From f44216b95bb4b1e359875c6fa2f6212823678a41 Mon Sep 17 00:00:00 2001
From: dom <dom@local>
Date: Thu, 12 Feb 2026 23:12:39 +0100
Subject: [PATCH] =?UTF-8?q?feat:=20pass=20LLM=20hybride=20pour=20DAS=20+?=
 =?UTF-8?q?=20interface=20admin=20r=C3=A9f=C3=A9rentiels=20RAG?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Chantier 1 — Extraction DAS par LLM :
- Nouveau prompt expert DIM dans rag_search.py (extract_das_llm)
- Phase 4 dans cim10_extractor.py : détection DAS supplémentaires avant enrichissement RAG
- Cache persistant (clé hash du texte), validation CIM-10, déduplication
- Activé uniquement avec use_rag=True (--no-rag le désactive)

Chantier 2 — Admin référentiels :
- Config : REFERENTIELS_DIR, UPLOAD_MAX_SIZE_MB, ALLOWED_EXTENSIONS
- Chunking générique (PDF/CSV/Excel/TXT) + ajout incrémental FAISS dans rag_index.py
- ReferentielManager CRUD dans viewer/referentiels.py
- 5 routes Flask (listing, upload, indexation, suppression, rebuild)
- Template admin avec tableau interactif + lien sidebar

Fix : if cache → if cache is not None (OllamaCache vide évaluait à False)

410 tests passent (27 nouveaux, 0 régression).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/config.py                                |   3 +
 src/medical/cim10_extractor.py               |  77 +++++++
 src/medical/rag_index.py                     | 182 ++++++++++++++-
 src/medical/rag_search.py                    |  95 ++++++++
 src/viewer/app.py                            |  73 +++++-
 src/viewer/referentiels.py                   | 155 +++++++++++++
 src/viewer/templates/admin_referentiels.html | 220 +++++++++++++++++++
 src/viewer/templates/base.html               |   6 +
 tests/test_das_llm.py                        | 213 ++++++++++++++++++
 tests/test_referentiels.py                   | 179 +++++++++++++++
 10 files changed, 1197 insertions(+), 6 deletions(-)
 create mode 100644 src/viewer/referentiels.py
 create mode 100644 src/viewer/templates/admin_referentiels.html
 create mode 100644 tests/test_das_llm.py
 create mode 100644 tests/test_referentiels.py

diff --git a/src/config.py b/src/config.py
index 7fd1482..44afe9d 100644
--- a/src/config.py
+++ b/src/config.py
@@ -46,6 +46,9 @@ NUM_UM = "0000"
 # --- Configuration RAG ---
 
 RAG_INDEX_DIR = BASE_DIR / "data" / "rag_index"
+REFERENTIELS_DIR = BASE_DIR / "data" / "referentiels"
+UPLOAD_MAX_SIZE_MB = 50
+ALLOWED_EXTENSIONS = {".pdf", ".csv", ".xlsx", ".xls", ".txt"}
 CIM10_DICT_PATH = BASE_DIR / "data" / "cim10_dict.json"
 CCAM_DICT_PATH = BASE_DIR / "data" / "ccam_dict.json"
 CIM10_PDF = Path("/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf")
diff --git a/src/medical/cim10_extractor.py b/src/medical/cim10_extractor.py
index 0b31926..b45f37b 100644
--- a/src/medical/cim10_extractor.py
+++ b/src/medical/cim10_extractor.py
@@ -112,6 +112,10 @@ def extract_medical_info(
     _extract_imagerie(anonymized_text, dossier)
     _extract_complications(anonymized_text, dossier, edsnlp_result)
 
+    # Phase 4 : pass LLM pour détecter des DAS supplémentaires
+    if use_rag:
+        _extract_das_llm(anonymized_text, dossier)
+
     if use_rag:
         _enrich_with_rag(dossier)
 
@@ -133,6 +137,79 @@ def extract_medical_info(
     return dossier
 
 
+def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
+    """Extrait des DAS supplémentaires via un pass LLM (avant enrichissement RAG)."""
+    try:
+        from .rag_search import extract_das_llm
+        from .ollama_cache import OllamaCache
+        from ..config import OLLAMA_CACHE_PATH, OLLAMA_MODEL
+    except ImportError:
+        logger.warning("Module RAG non disponible pour l'extraction DAS LLM")
+        return
+
+    try:
+        cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL)
+
+        # Construire le contexte
+        contexte = {
+            "sexe": dossier.sejour.sexe,
+            "age": dossier.sejour.age,
+            "duree_sejour": dossier.sejour.duree_sejour,
+            "imc": dossier.sejour.imc,
+            "antecedents": dossier.antecedents[:5],
+            "biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle],
+            "imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie],
+            "complications": dossier.complications,
+        }
+
+        # DAS existants (texte + code)
+        existing_das = []
+        existing_codes = set()
+        if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion:
+            existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
+        for d in dossier.diagnostics_associes:
+            label = d.texte
+            if d.cim10_suggestion:
+                label += f" ({d.cim10_suggestion})"
+                existing_codes.add(d.cim10_suggestion)
+            existing_das.append(label)
+
+        dp_texte = dossier.diagnostic_principal.texte if dossier.diagnostic_principal else ""
+
+        das_results = extract_das_llm(text, contexte, existing_das, dp_texte, cache=cache)
+
+        added = 0
+        for das in das_results:
+            texte = clean_diagnostic_text(das.get("texte", ""))
+            if not texte or not is_valid_diagnostic_text(texte):
+                continue
+
+            code = das.get("code_cim10")
+            if code:
+                code = normalize_code(code)
+                is_valid, _ = cim10_validate(code)
+                if not is_valid:
+                    logger.info("DAS LLM : code %s invalide pour « %s », ignoré", code, texte)
+                    continue
+                if code in existing_codes:
+                    continue
+                existing_codes.add(code)
+
+            dossier.diagnostics_associes.append(Diagnostic(
+                texte=texte,
+                cim10_suggestion=code,
+                justification=das.get("justification"),
+            ))
+            added += 1
+
+        if added:
+            logger.info("DAS LLM : %d diagnostics supplémentaires ajoutés", added)
+
+        cache.save()
+    except Exception:
+        logger.warning("Erreur lors de l'extraction DAS LLM", exc_info=True)
+
+
 def _enrich_with_rag(dossier: DossierMedical) -> None:
     """Enrichit les diagnostics via le RAG (FAISS + Ollama)."""
     try:
diff --git a/src/medical/rag_index.py b/src/medical/rag_index.py
index 9691f07..183ab0a 100644
--- a/src/medical/rag_index.py
+++ b/src/medical/rag_index.py
@@ -11,7 +11,7 @@ from typing import Optional
 
 import pdfplumber
 
-from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH
+from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH, REFERENTIELS_DIR
 
 logger = logging.getLogger(__name__)
 
@@ -482,3 +482,183 @@ def get_index() -> tuple | None:
 
     logger.info("Index FAISS chargé : %d vecteurs", _faiss_index.ntotal)
     return _faiss_index, _metadata
+
+
+# ---------------------------------------------------------------------------
+# Chunking générique pour fichiers utilisateur (référentiels)
+# ---------------------------------------------------------------------------
+
+def chunk_user_file(file_path: Path, doc_name: str) -> list[Chunk]:
+    """Découpe un fichier utilisateur en chunks pour indexation FAISS.
+
+    Dispatch selon l'extension :
+    - PDF : pages groupées par 2
+    - CSV/Excel : une ligne = un chunk
+    - TXT : paragraphes (blocs séparés par lignes vides)
+
+    Args:
+        file_path: Chemin du fichier.
+        doc_name: Nom du document (utilisé comme identifiant dans les métadonnées).
+
+    Returns:
+        Liste de Chunk prêts pour l'indexation.
+    """
+    suffix = file_path.suffix.lower()
+    if suffix == ".pdf":
+        return _chunk_user_pdf(file_path, doc_name)
+    elif suffix in (".csv", ".xlsx", ".xls"):
+        return _chunk_user_tabular(file_path, doc_name)
+    elif suffix == ".txt":
+        return _chunk_user_txt(file_path, doc_name)
+    else:
+        logger.warning("Extension non supportée pour chunking : %s", suffix)
+        return []
+
+
+def _chunk_user_pdf(file_path: Path, doc_name: str) -> list[Chunk]:
+    """Découpe un PDF utilisateur en chunks de 2 pages."""
+    chunks: list[Chunk] = []
+    try:
+        with pdfplumber.open(file_path) as pdf:
+            page_texts: list[str] = []
+            start_page = 1
+            for page_num, page in enumerate(pdf.pages, start=1):
+                text = page.extract_text()
+                if text:
+                    page_texts.append(text)
+                if len(page_texts) >= 2:
+                    combined = "\n".join(page_texts)
+                    if len(combined.split()) >= 10:
+                        chunks.append(Chunk(
+                            text=combined,
+                            document=doc_name,
+                            page=start_page,
+                        ))
+                    page_texts = []
+                    start_page = page_num + 1
+            if page_texts:
+                combined = "\n".join(page_texts)
+                if len(combined.split()) >= 10:
+                    chunks.append(Chunk(
+                        text=combined,
+                        document=doc_name,
+                        page=start_page,
+                    ))
+    except Exception:
+        logger.warning("Erreur lors du chunking PDF %s", file_path, exc_info=True)
+    logger.info("Référentiel PDF %s : %d chunks", doc_name, len(chunks))
+    return chunks
+
+
+def _chunk_user_tabular(file_path: Path, doc_name: str) -> list[Chunk]:
+    """Découpe un CSV/Excel : une ligne = un chunk."""
+    chunks: list[Chunk] = []
+    try:
+        import pandas as pd
+        suffix = file_path.suffix.lower()
+        if suffix == ".csv":
+            df = pd.read_csv(file_path, encoding="utf-8", on_bad_lines="skip")
+        else:
+            df = pd.read_excel(file_path)
+
+        for idx, row in df.iterrows():
+            text = " | ".join(str(v) for v in row.values if pd.notna(v))
+            if len(text.split()) >= 3:
+                chunks.append(Chunk(
+                    text=text,
+                    document=doc_name,
+                    page=int(idx) + 1,
+                ))
+    except Exception:
+        logger.warning("Erreur lors du chunking tabular %s", file_path, exc_info=True)
+    logger.info("Référentiel tabular %s : %d chunks", doc_name, len(chunks))
+    return chunks
+
+
+def _chunk_user_txt(file_path: Path, doc_name: str) -> list[Chunk]:
+    """Découpe un fichier TXT en paragraphes (blocs séparés par lignes vides)."""
+    chunks: list[Chunk] = []
+    try:
+        text = file_path.read_text(encoding="utf-8")
+        paragraphs = re.split(r"\n\s*\n", text)
+        for i, para in enumerate(paragraphs):
+            para = para.strip()
+            if len(para.split()) >= 5:
+                chunks.append(Chunk(
+                    text=para,
+                    document=doc_name,
+                    page=i + 1,
+                ))
+    except Exception:
+        logger.warning("Erreur lors du chunking TXT %s", file_path, exc_info=True)
+    logger.info("Référentiel TXT %s : %d chunks", doc_name, len(chunks))
+    return chunks
+
+
+def add_chunks_to_index(chunks: list[Chunk]) -> int:
+    """Ajoute des chunks à l'index FAISS existant (incrémental).
+
+    Charge l'index si nécessaire, encode les chunks, ajoute les vecteurs,
+    et sauvegarde le tout.
+
+    Args:
+        chunks: Liste de Chunk à ajouter.
+
+    Returns:
+        Nombre de chunks effectivement ajoutés.
+    """
+    if not chunks:
+        return 0
+
+    import faiss
+    import numpy as np
+    from .rag_search import _get_embed_model
+
+    index_path = RAG_INDEX_DIR / "faiss.index"
+    meta_path = RAG_INDEX_DIR / "metadata.json"
+
+    # Charger l'index existant ou en créer un nouveau
+    if index_path.exists() and meta_path.exists():
+        faiss_idx = faiss.read_index(str(index_path))
+        metadata = json.loads(meta_path.read_text(encoding="utf-8"))
+    else:
+        model = _get_embed_model()
+        # Obtenir la dimension via un encodage test
+        test_vec = model.encode(["test"], normalize_embeddings=True)
+        dim = test_vec.shape[1]
+        faiss_idx = faiss.IndexFlatIP(dim)
+        metadata = []
+
+    # Encoder les nouveaux chunks
+    model = _get_embed_model()
+    texts = [c.text[:2000] for c in chunks]
+    embeddings = model.encode(texts, normalize_embeddings=True, batch_size=64)
+    embeddings = np.array(embeddings, dtype=np.float32)
+
+    # Ajouter à l'index
+    faiss_idx.add(embeddings)
+
+    # Ajouter les métadonnées
+    from dataclasses import asdict
+    for chunk in chunks:
+        meta = asdict(chunk)
+        meta["extrait"] = meta.pop("text")[:800]
+        metadata.append(meta)
+
+    # Sauvegarder
+    RAG_INDEX_DIR.mkdir(parents=True, exist_ok=True)
+    faiss.write_index(faiss_idx, str(index_path))
+    meta_path.write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    # Invalider le singleton pour forcer le rechargement
+    reset_index()
+
+    logger.info("Index FAISS : %d chunks ajoutés (total : %d)", len(chunks), faiss_idx.ntotal)
+    return len(chunks)
+
+
+def reset_index() -> None:
+    """Invalide le singleton FAISS pour forcer le rechargement au prochain accès."""
+    global _faiss_index, _metadata
+    _faiss_index = None
+    _metadata = []
diff --git a/src/medical/rag_search.py b/src/medical/rag_search.py
index c4b221d..cca37c3 100644
--- a/src/medical/rag_search.py
+++ b/src/medical/rag_search.py
@@ -473,6 +473,101 @@ def enrich_acte(acte: ActeCCAM, contexte: dict, cache: OllamaCache | None = None
         logger.info("Ollama non disponible — sources FAISS CCAM conservées sans justification LLM")
 
 
+def _build_prompt_das_extraction(text: str, contexte: dict, existing_das: list[str], dp_texte: str) -> str:
+    """Construit le prompt pour l'extraction LLM de DAS supplémentaires."""
+    ctx_str = _format_contexte(contexte)
+    existing_str = "\n".join(f"- {d}" for d in existing_das) if existing_das else "Aucun"
+
+    return f"""Tu es un médecin DIM (Département d'Information Médicale) expert en codage PMSI.
+Analyse le texte médical suivant et identifie les diagnostics associés significatifs (DAS) qui n'ont PAS encore été codés.
+
+RÈGLES IMPÉRATIVES :
+- Un DAS doit avoir mobilisé des ressources supplémentaires pendant le séjour
+- Ne PAS proposer de doublons avec les DAS déjà codés ci-dessous
+- Ne PAS proposer le diagnostic principal comme DAS
+- Ne PAS coder les symptômes (R00-R99) si un diagnostic précis les explique
+- Ne PAS coder les antécédents non pertinents pour le séjour
+- Privilégie les codes CIM-10 les plus SPÉCIFIQUES (4e ou 5e caractère)
+- Ne propose que des diagnostics CLAIREMENT mentionnés dans le texte
+
+DIAGNOSTIC PRINCIPAL : {dp_texte or "Non identifié"}
+
+DAS DÉJÀ CODÉS :
+{existing_str}
+
+CONTEXTE CLINIQUE :
+{ctx_str}
+
+TEXTE MÉDICAL :
+{text[:4000]}
+
+Réponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant ou après :
+{{
+  "diagnostics_supplementaires": [
+    {{
+      "texte": "description du diagnostic",
+      "code_cim10": "X99.9",
+      "justification": "pourquoi ce DAS est pertinent pour le séjour"
+    }}
+  ]
+}}
+
+Si aucun DAS supplémentaire n'est pertinent, retourne : {{"diagnostics_supplementaires": []}}"""
+
+
+def extract_das_llm(
+    text: str,
+    contexte: dict,
+    existing_das: list[str],
+    dp_texte: str,
+    cache: OllamaCache | None = None,
+) -> list[dict]:
+    """Extrait des DAS supplémentaires via un pass LLM.
+
+    Args:
+        text: Texte médical complet.
+        contexte: Contexte patient (sexe, age, etc.).
+        existing_das: Liste des DAS déjà codés (texte + code).
+        dp_texte: Texte du diagnostic principal.
+        cache: Cache Ollama optionnel.
+
+    Returns:
+        Liste de dicts {texte, code_cim10, justification} pour les DAS détectés.
+    """
+    import hashlib
+
+    # Clé de cache basée sur le hash du texte
+    text_hash = hashlib.md5(text[:4000].encode()).hexdigest()[:16]
+    cache_key_text = f"das_extract::{text_hash}"
+
+    # Vérifier le cache
+    if cache is not None:
+        cached = cache.get(cache_key_text, "das_llm")
+        if cached is not None:
+            logger.info("Cache hit pour extraction DAS LLM")
+            return cached.get("diagnostics_supplementaires", [])
+
+    # Construire le prompt et appeler Ollama
+    prompt = _build_prompt_das_extraction(text, contexte, existing_das, dp_texte)
+    result = call_ollama(prompt, temperature=0.1, max_tokens=2000)
+
+    if result is None:
+        logger.warning("Extraction DAS LLM : Ollama non disponible")
+        return []
+
+    das_list = result.get("diagnostics_supplementaires", [])
+    if not isinstance(das_list, list):
+        logger.warning("Extraction DAS LLM : format inattendu")
+        return []
+
+    # Stocker dans le cache
+    if cache is not None:
+        cache.put(cache_key_text, "das_llm", result)
+
+    logger.info("Extraction DAS LLM : %d diagnostics supplémentaires détectés", len(das_list))
+    return das_list
+
+
 def enrich_dossier(dossier: DossierMedical) -> None:
     """Enrichit le DP et tous les DAS d'un dossier via le RAG.
 
diff --git a/src/viewer/app.py b/src/viewer/app.py
index 270dbfc..5723685 100644
--- a/src/viewer/app.py
+++ b/src/viewer/app.py
@@ -11,8 +11,11 @@ import requests
 from flask import Flask, abort, render_template, request, jsonify
 from markupsafe import Markup
 
-from ..config import STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical
+from werkzeug.utils import secure_filename
+
+from ..config import STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
 from .. import config as cfg
+from .referentiels import ReferentielManager
 
 logger = logging.getLogger(__name__)
 
@@ -271,12 +274,12 @@ def create_app() -> Flask:
     def reprocess(filepath: str):
         """Relance le traitement d'un dossier."""
         from ..main import process_pdf, write_outputs
-        
+
         dossier = load_dossier(filepath)
         source_file = dossier.source_file
         if not source_file:
             return jsonify({"error": "Fichier source introuvable"}), 400
-        
+
         # Chercher le PDF source dans input/
         input_dir = Path(__file__).parent.parent.parent / "input"
         pdf_path = None
@@ -284,10 +287,10 @@ def create_app() -> Flask:
             if p.is_file():
                 pdf_path = p
                 break
-        
+
         if not pdf_path:
             return jsonify({"error": f"PDF source '{source_file}' introuvable"}), 404
-        
+
         try:
             anonymized_text, new_dossier, report = process_pdf(pdf_path)
             stem = pdf_path.stem.replace(" ", "_")
@@ -300,4 +303,64 @@ def create_app() -> Flask:
             logger.exception("Erreur lors du retraitement")
             return jsonify({"error": str(e)}), 500
 
+    # ------------------------------------------------------------------
+    # Routes admin référentiels
+    # ------------------------------------------------------------------
+
+    ref_manager = ReferentielManager()
+
+    @app.route("/admin/referentiels")
+    def admin_referentiels():
+        refs = ref_manager.list_all()
+        return render_template("admin_referentiels.html", referentiels=refs, max_size=UPLOAD_MAX_SIZE_MB)
+
+    @app.route("/admin/referentiels/upload", methods=["POST"])
+    def upload_referentiel():
+        if "file" not in request.files:
+            return jsonify({"error": "Aucun fichier envoyé"}), 400
+        f = request.files["file"]
+        if not f.filename:
+            return jsonify({"error": "Nom de fichier vide"}), 400
+
+        filename = secure_filename(f.filename)
+        try:
+            file_data = f.read()
+            ref = ref_manager.add_file(filename, file_data)
+            return jsonify({"ok": True, "referentiel": ref})
+        except ValueError as e:
+            return jsonify({"error": str(e)}), 400
+
+    @app.route("/admin/referentiels/<ref_id>/index", methods=["POST"])
+    def index_referentiel(ref_id: str):
+        try:
+            count = ref_manager.index_referentiel(ref_id)
+            return jsonify({"ok": True, "chunks": count})
+        except ValueError as e:
+            return jsonify({"error": str(e)}), 404
+        except Exception as e:
+            logger.exception("Erreur lors de l'indexation du référentiel %s", ref_id)
+            return jsonify({"error": str(e)}), 500
+
+    @app.route("/admin/referentiels/<ref_id>", methods=["DELETE"])
+    def delete_referentiel(ref_id: str):
+        if ref_manager.remove(ref_id):
+            return jsonify({"ok": True})
+        return jsonify({"error": "Référentiel introuvable"}), 404
+
+    @app.route("/admin/referentiels/rebuild-index", methods=["POST"])
+    def rebuild_index():
+        try:
+            from ..medical.rag_index import build_index
+            build_index(force=True)
+            # Réindexer tous les référentiels actifs
+            reindexed = 0
+            for ref in ref_manager.list_all():
+                if ref["status"] == "indexed":
+                    ref_manager.index_referentiel(ref["id"])
+                    reindexed += 1
+            return jsonify({"ok": True, "reindexed": reindexed})
+        except Exception as e:
+            logger.exception("Erreur lors du rebuild de l'index")
+            return jsonify({"error": str(e)}), 500
+
     return app
diff --git a/src/viewer/referentiels.py b/src/viewer/referentiels.py
new file mode 100644
index 0000000..6aa1c16
--- /dev/null
+++ b/src/viewer/referentiels.py
@@ -0,0 +1,155 @@
+"""Gestionnaire de référentiels utilisateur pour le RAG."""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+import uuid
+from datetime import datetime
+from pathlib import Path
+
+from ..config import REFERENTIELS_DIR, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
+
+logger = logging.getLogger(__name__)
+
+
+class ReferentielManager:
+    """CRUD pour les fichiers de référentiels utilisateur.
+
+    Stocke les fichiers dans REFERENTIELS_DIR avec un index.json
+    pour les métadonnées.
+    """
+
+    def __init__(self, referentiels_dir: Path | None = None):
+        self._dir = referentiels_dir or REFERENTIELS_DIR
+        self._dir.mkdir(parents=True, exist_ok=True)
+        self._index_path = self._dir / "index.json"
+        self._index: list[dict] = self._load_index()
+
+    def _load_index(self) -> list[dict]:
+        if self._index_path.exists():
+            try:
+                return json.loads(self._index_path.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, KeyError):
+                logger.warning("Index référentiels corrompu, réinitialisé")
+        return []
+
+    def _save_index(self) -> None:
+        self._index_path.write_text(
+            json.dumps(self._index, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+
+    def list_all(self) -> list[dict]:
+        """Retourne la liste de tous les référentiels."""
+        return list(self._index)
+
+    def get(self, ref_id: str) -> dict | None:
+        """Retourne un référentiel par son ID."""
+        for ref in self._index:
+            if ref["id"] == ref_id:
+                return ref
+        return None
+
+    def add_file(self, filename: str, file_data: bytes) -> dict:
+        """Ajoute un fichier de référentiel.
+
+        Args:
+            filename: Nom original du fichier.
+            file_data: Contenu binaire du fichier.
+
+        Returns:
+            Métadonnées du référentiel créé.
+
+        Raises:
+            ValueError: Extension non autorisée ou taille dépassée.
+        """
+        ext = Path(filename).suffix.lower()
+        if ext not in ALLOWED_EXTENSIONS:
+            raise ValueError(f"Extension '{ext}' non autorisée. Extensions valides : {ALLOWED_EXTENSIONS}")
+
+        size_mb = len(file_data) / (1024 * 1024)
+        if size_mb > UPLOAD_MAX_SIZE_MB:
+            raise ValueError(f"Fichier trop volumineux ({size_mb:.1f} Mo > {UPLOAD_MAX_SIZE_MB} Mo)")
+
+        ref_id = uuid.uuid4().hex[:12]
+        safe_name = f"{ref_id}_{Path(filename).stem}{ext}"
+        file_path = self._dir / safe_name
+
+        file_path.write_bytes(file_data)
+
+        ref = {
+            "id": ref_id,
+            "filename": filename,
+            "stored_name": safe_name,
+            "extension": ext,
+            "size_bytes": len(file_data),
+            "date_added": datetime.now().isoformat(),
+            "status": "uploaded",
+            "chunks_count": 0,
+        }
+        self._index.append(ref)
+        self._save_index()
+
+        logger.info("Référentiel ajouté : %s (%s)", filename, ref_id)
+        return ref
+
+    def remove(self, ref_id: str) -> bool:
+        """Supprime un référentiel (fichier + métadonnées).
+
+        Returns:
+            True si trouvé et supprimé, False sinon.
+        """
+        ref = self.get(ref_id)
+        if not ref:
+            return False
+
+        file_path = self._dir / ref["stored_name"]
+        if file_path.exists():
+            file_path.unlink()
+
+        self._index = [r for r in self._index if r["id"] != ref_id]
+        self._save_index()
+
+        logger.info("Référentiel supprimé : %s (%s)", ref["filename"], ref_id)
+        return True
+
+    def index_referentiel(self, ref_id: str) -> int:
+        """Indexe un référentiel dans FAISS.
+
+        Args:
+            ref_id: ID du référentiel à indexer.
+
+        Returns:
+            Nombre de chunks indexés.
+
+        Raises:
+            ValueError: Référentiel introuvable.
+        """
+        ref = self.get(ref_id)
+        if not ref:
+            raise ValueError(f"Référentiel {ref_id} introuvable")
+
+        file_path = self._dir / ref["stored_name"]
+        if not file_path.exists():
+            raise ValueError(f"Fichier {ref['stored_name']} introuvable")
+
+        from ..medical.rag_index import chunk_user_file, add_chunks_to_index
+
+        doc_name = f"ref:{ref['filename']}"
+        chunks = chunk_user_file(file_path, doc_name)
+
+        if not chunks:
+            ref["status"] = "empty"
+            ref["chunks_count"] = 0
+            self._save_index()
+            return 0
+
+        count = add_chunks_to_index(chunks)
+        ref["status"] = "indexed"
+        ref["chunks_count"] = count
+        self._save_index()
+
+        logger.info("Référentiel indexé : %s → %d chunks", ref["filename"], count)
+        return count
diff --git a/src/viewer/templates/admin_referentiels.html b/src/viewer/templates/admin_referentiels.html
new file mode 100644
index 0000000..c84b5e2
--- /dev/null
+++ b/src/viewer/templates/admin_referentiels.html
@@ -0,0 +1,220 @@
+{% extends "base.html" %}
+
+{% block title %}Référentiels RAG{% endblock %}
+
+{% block sidebar %}
+<div class="group-title">Admin</div>
+<a href="/admin/referentiels" style="color:#60a5fa;font-weight:600;border-left-color:#3b82f6;">Référentiels RAG</a>
+<a href="/">Retour aux dossiers</a>
+{% endblock %}
+
+{% block content %}
+<h2>Référentiels RAG</h2>
+<p style="font-size:0.85rem;color:#64748b;margin-bottom:1.5rem;">
+  Ajoutez des documents de référence (PDF, CSV, Excel, TXT) pour enrichir la base de connaissances du RAG.
+</p>
+
+<!-- Zone upload -->
+<div class="card" style="margin-bottom:1.5rem;">
+  <h3>Ajouter un référentiel</h3>
+  <form id="upload-form" style="display:flex;gap:0.75rem;align-items:end;flex-wrap:wrap;margin-top:0.75rem;">
+    <div>
+      <label style="display:block;font-size:0.7rem;color:#64748b;text-transform:uppercase;letter-spacing:0.05em;font-weight:600;margin-bottom:0.25rem;">Fichier</label>
+      <input type="file" id="file-input" name="file" accept=".pdf,.csv,.xlsx,.xls,.txt"
+        style="font-size:0.85rem;padding:0.35rem;">
+    </div>
+    <button type="submit" id="upload-btn"
+      style="padding:0.5rem 1.25rem;border-radius:6px;border:none;background:#3b82f6;color:#fff;font-size:0.85rem;font-weight:600;cursor:pointer;">
+      Uploader
+    </button>
+    <span id="upload-status" style="font-size:0.8rem;"></span>
+  </form>
+  <p style="font-size:0.7rem;color:#94a3b8;margin-top:0.5rem;">
+    Extensions : .pdf, .csv, .xlsx, .xls, .txt — Max {{ max_size }} Mo
+  </p>
+</div>
+
+<!-- Tableau référentiels -->
+<div class="card">
+  <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:0.75rem;">
+    <h3>Référentiels indexés</h3>
+    <button id="rebuild-btn"
+      style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #e2e8f0;background:#fff;font-size:0.75rem;cursor:pointer;">
+      Rebuild complet
+    </button>
+  </div>
+
+  <table>
+    <thead>
+      <tr>
+        <th>Nom</th>
+        <th>Type</th>
+        <th>Taille</th>
+        <th>Date</th>
+        <th>Chunks</th>
+        <th>Statut</th>
+        <th>Actions</th>
+      </tr>
+    </thead>
+    <tbody id="ref-table">
+      {% for ref in referentiels %}
+      <tr id="row-{{ ref.id }}">
+        <td>{{ ref.filename }}</td>
+        <td><span class="badge" style="background:#f1f5f9;color:#334155;">{{ ref.extension }}</span></td>
+        <td>{{ "%.1f"|format(ref.size_bytes / 1024 / 1024) }} Mo</td>
+        <td style="font-size:0.8rem;">{{ ref.date_added[:10] }}</td>
+        <td>{{ ref.chunks_count }}</td>
+        <td>
+          {% if ref.status == 'indexed' %}
+            <span class="badge" style="background:#dcfce7;color:#16a34a;">Indexé</span>
+          {% elif ref.status == 'empty' %}
+            <span class="badge" style="background:#fef9c3;color:#ca8a04;">Vide</span>
+          {% else %}
+            <span class="badge" style="background:#f1f5f9;color:#64748b;">Uploadé</span>
+          {% endif %}
+        </td>
+        <td>
+          <button onclick="indexRef('{{ ref.id }}')" class="action-btn"
+            style="padding:2px 8px;border-radius:4px;border:1px solid #3b82f6;background:#eff6ff;color:#2563eb;font-size:0.75rem;cursor:pointer;margin-right:4px;">
+            Indexer
+          </button>
+          <button onclick="deleteRef('{{ ref.id }}')" class="action-btn"
+            style="padding:2px 8px;border-radius:4px;border:1px solid #fca5a5;background:#fef2f2;color:#dc2626;font-size:0.75rem;cursor:pointer;">
+            Supprimer
+          </button>
+        </td>
+      </tr>
+      {% endfor %}
+      {% if not referentiels %}
+      <tr id="empty-row">
+        <td colspan="7" style="text-align:center;color:#94a3b8;padding:2rem;">Aucun référentiel</td>
+      </tr>
+      {% endif %}
+    </tbody>
+  </table>
+</div>
+
+<div id="global-status" style="margin-top:1rem;font-size:0.8rem;"></div>
+{% endblock %}
+
+{% block scripts %}
+<script>
+(function() {
+  const uploadForm = document.getElementById('upload-form');
+  const fileInput = document.getElementById('file-input');
+  const uploadBtn = document.getElementById('upload-btn');
+  const uploadStatus = document.getElementById('upload-status');
+  const globalStatus = document.getElementById('global-status');
+  const rebuildBtn = document.getElementById('rebuild-btn');
+
+  uploadForm.addEventListener('submit', function(e) {
+    e.preventDefault();
+    const file = fileInput.files[0];
+    if (!file) { uploadStatus.textContent = 'Sélectionnez un fichier'; return; }
+
+    const fd = new FormData();
+    fd.append('file', file);
+
+    uploadBtn.disabled = true;
+    uploadBtn.innerHTML = '<span class="spinner"></span>';
+    uploadStatus.textContent = '';
+
+    fetch('/admin/referentiels/upload', { method: 'POST', body: fd })
+      .then(r => r.json())
+      .then(d => {
+        uploadBtn.disabled = false;
+        uploadBtn.textContent = 'Uploader';
+        if (d.ok) {
+          uploadStatus.style.color = '#16a34a';
+          uploadStatus.textContent = 'Uploadé';
+          setTimeout(() => location.reload(), 800);
+        } else {
+          uploadStatus.style.color = '#dc2626';
+          uploadStatus.textContent = d.error || 'Erreur';
+        }
+      })
+      .catch(() => {
+        uploadBtn.disabled = false;
+        uploadBtn.textContent = 'Uploader';
+        uploadStatus.style.color = '#dc2626';
+        uploadStatus.textContent = 'Erreur réseau';
+      });
+  });
+
+  window.indexRef = function(id) {
+    const btn = event.target;
+    btn.disabled = true;
+    btn.innerHTML = '<span class="spinner" style="border-color:rgba(37,99,235,0.3);border-top-color:#2563eb;width:10px;height:10px;"></span>';
+
+    fetch('/admin/referentiels/' + id + '/index', { method: 'POST' })
+      .then(r => r.json())
+      .then(d => {
+        if (d.ok) {
+          globalStatus.style.color = '#16a34a';
+          globalStatus.textContent = d.chunks + ' chunks indexés';
+          setTimeout(() => location.reload(), 800);
+        } else {
+          btn.disabled = false;
+          btn.textContent = 'Indexer';
+          globalStatus.style.color = '#dc2626';
+          globalStatus.textContent = d.error || 'Erreur';
+        }
+      })
+      .catch(() => {
+        btn.disabled = false;
+        btn.textContent = 'Indexer';
+        globalStatus.style.color = '#dc2626';
+        globalStatus.textContent = 'Erreur réseau';
+      });
+  };
+
+  window.deleteRef = function(id) {
+    if (!confirm('Supprimer ce référentiel ?')) return;
+
+    fetch('/admin/referentiels/' + id, { method: 'DELETE' })
+      .then(r => r.json())
+      .then(d => {
+        if (d.ok) {
+          const row = document.getElementById('row-' + id);
+          if (row) row.remove();
+          globalStatus.style.color = '#16a34a';
+          globalStatus.textContent = 'Supprimé';
+        } else {
+          globalStatus.style.color = '#dc2626';
+          globalStatus.textContent = d.error || 'Erreur';
+        }
+      })
+      .catch(() => {
+        globalStatus.style.color = '#dc2626';
+        globalStatus.textContent = 'Erreur réseau';
+      });
+  };
+
+  rebuildBtn.addEventListener('click', function() {
+    if (!confirm('Reconstruire l\'index FAISS complet ? Cela peut prendre plusieurs minutes.')) return;
+    rebuildBtn.disabled = true;
+    rebuildBtn.innerHTML = '<span class="spinner" style="border-color:rgba(0,0,0,0.2);border-top-color:#333;width:10px;height:10px;"></span> Rebuild…';
+
+    fetch('/admin/referentiels/rebuild-index', { method: 'POST' })
+      .then(r => r.json())
+      .then(d => {
+        rebuildBtn.disabled = false;
+        rebuildBtn.textContent = 'Rebuild complet';
+        if (d.ok) {
+          globalStatus.style.color = '#16a34a';
+          globalStatus.textContent = 'Index reconstruit (' + d.reindexed + ' référentiels réindexés)';
+        } else {
+          globalStatus.style.color = '#dc2626';
+          globalStatus.textContent = d.error || 'Erreur';
+        }
+      })
+      .catch(() => {
+        rebuildBtn.disabled = false;
+        rebuildBtn.textContent = 'Rebuild complet';
+        globalStatus.style.color = '#dc2626';
+        globalStatus.textContent = 'Erreur réseau';
+      });
+  });
+})();
+</script>
+{% endblock %}
diff --git a/src/viewer/templates/base.html b/src/viewer/templates/base.html
index 855ae8f..3585ce0 100644
--- a/src/viewer/templates/base.html
+++ b/src/viewer/templates/base.html
@@ -227,6 +227,12 @@
   <nav class="sidebar-nav" id="sidebar-nav">
     {% block sidebar %}{% endblock %}
   </nav>
+  <div class="sidebar-admin" style="border-top:1px solid #1e293b;padding:0.5rem 1rem;">
+    <a href="/admin/referentiels" style="display:block;color:#94a3b8;text-decoration:none;font-size:0.8rem;padding:0.35rem 0;transition:color 0.15s;"
+       onmouseover="this.style.color='#e2e8f0'" onmouseout="this.style.color='#94a3b8'">
+      Référentiels RAG
+    </a>
+  </div>
   <div class="sidebar-admin">
     <label for="model-select">Modèle Ollama</label>
     <select id="model-select"><option>Chargement…</option></select>
diff --git a/tests/test_das_llm.py b/tests/test_das_llm.py
new file mode 100644
index 0000000..54763d6
--- /dev/null
+++ b/tests/test_das_llm.py
@@ -0,0 +1,213 @@
+"""Tests pour le pass LLM d'extraction de DAS supplémentaires."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from src.config import Diagnostic, DossierMedical, Sejour
+from src.medical.ollama_cache import OllamaCache
+
+
+class TestExtractDasLlm:
+    """Tests pour extract_das_llm() dans rag_search.py."""
+
+    def test_returns_das_from_llm(self):
+        """Le pass LLM retourne des DAS supplémentaires."""
+        from src.medical.rag_search import extract_das_llm
+
+        mock_result = {
+            "diagnostics_supplementaires": [
+                {
+                    "texte": "Hypertension artérielle",
+                    "code_cim10": "I10",
+                    "justification": "HTA mentionnée dans le texte",
+                },
+            ]
+        }
+
+        with patch("src.medical.rag_search.call_ollama", return_value=mock_result):
+            result = extract_das_llm(
+                text="Patient hypertendu sous traitement",
+                contexte={"sexe": "M", "age": 65},
+                existing_das=["Diabète de type 2 (E11.9)"],
+                dp_texte="Pancréatite aiguë biliaire",
+            )
+
+        assert len(result) == 1
+        assert result[0]["code_cim10"] == "I10"
+        assert result[0]["texte"] == "Hypertension artérielle"
+
+    def test_returns_empty_when_ollama_unavailable(self):
+        """Retourne une liste vide si Ollama est indisponible."""
+        from src.medical.rag_search import extract_das_llm
+
+        with patch("src.medical.rag_search.call_ollama", return_value=None):
+            result = extract_das_llm(
+                text="Texte médical",
+                contexte={},
+                existing_das=[],
+                dp_texte="",
+            )
+
+        assert result == []
+
+    def test_returns_empty_on_bad_format(self):
+        """Retourne une liste vide si le format de réponse est inattendu."""
+        from src.medical.rag_search import extract_das_llm
+
+        with patch("src.medical.rag_search.call_ollama", return_value={"other_key": "value"}):
+            result = extract_das_llm(
+                text="Texte médical",
+                contexte={},
+                existing_das=[],
+                dp_texte="",
+            )
+
+        assert result == []
+
+    def test_cache_hit(self, tmp_path):
+        """Le cache est utilisé quand disponible."""
+        from src.medical.rag_search import extract_das_llm
+
+        cache = OllamaCache(tmp_path / "cache.json", "test-model")
+
+        mock_result = {
+            "diagnostics_supplementaires": [
+                {"texte": "Anémie", "code_cim10": "D64.9", "justification": "test"},
+            ]
+        }
+
+        # Premier appel : cache miss, appelle Ollama
+        with patch("src.medical.rag_search.call_ollama", return_value=mock_result) as mock_call:
+            result1 = extract_das_llm(
+                text="Patient anémique Hb basse",
+                contexte={},
+                existing_das=[],
+                dp_texte="",
+                cache=cache,
+            )
+            assert mock_call.call_count == 1
+            assert len(result1) == 1
+
+        # Vérifier que le cache contient bien l'entrée
+        assert len(cache) > 0
+
+        # Deuxième appel : cache hit, pas d'appel Ollama
+        with patch("src.medical.rag_search.call_ollama") as mock_call:
+            result2 = extract_das_llm(
+                text="Patient anémique Hb basse",
+                contexte={},
+                existing_das=[],
+                dp_texte="",
+                cache=cache,
+            )
+            mock_call.assert_not_called()
+
+        assert len(result2) == 1
+        assert result2[0]["code_cim10"] == "D64.9"
+
+    def test_prompt_includes_context(self):
+        """Le prompt contient le contexte patient et les DAS existants."""
+        from src.medical.rag_search import _build_prompt_das_extraction
+
+        prompt = _build_prompt_das_extraction(
+            text="Patient hypertendu diabétique",
+            contexte={"sexe": "F", "age": 72, "duree_sejour": 5},
+            existing_das=["Diabète de type 2 (E11.9)", "Obésité (E66.0)"],
+            dp_texte="Pancréatite aiguë biliaire",
+        )
+
+        assert "Pancréatite aiguë biliaire" in prompt
+        assert "Diabète de type 2 (E11.9)" in prompt
+        assert "Obésité (E66.0)" in prompt
+        assert "Patient hypertendu diabétique" in prompt
+
+
+class TestExtractDasLlmIntegration:
+    """Tests d'intégration pour le pass LLM DAS dans cim10_extractor.py."""
+
+    def test_das_llm_called_when_use_rag_true(self):
+        """Le pass LLM DAS est appelé quand use_rag=True."""
+        from src.medical.cim10_extractor import extract_medical_info
+
+        parsed = {
+            "type": "CRH",
+            "patient": {"sexe": "M"},
+            "sejour": {},
+            "diagnostics": [
+                {"libelle": "Pancréatite aiguë biliaire", "code_cim10": "K85.1", "type": "principal"},
+            ],
+        }
+
+        with patch("src.medical.cim10_extractor._extract_das_llm") as mock_llm, \
+             patch("src.medical.cim10_extractor._enrich_with_rag"):
+            extract_medical_info(parsed, "texte médical", use_rag=True)
+            mock_llm.assert_called_once()
+
+    def test_das_llm_not_called_when_use_rag_false(self):
+        """Le pass LLM DAS n'est PAS appelé quand use_rag=False."""
+        from src.medical.cim10_extractor import extract_medical_info
+
+        parsed = {
+            "type": "CRH",
+            "patient": {"sexe": "M"},
+            "sejour": {},
+            "diagnostics": [
+                {"libelle": "Pancréatite aiguë biliaire", "code_cim10": "K85.1", "type": "principal"},
+            ],
+        }
+
+        with patch("src.medical.cim10_extractor._extract_das_llm") as mock_llm:
+            extract_medical_info(parsed, "texte médical", use_rag=False)
+            mock_llm.assert_not_called()
+
+    def test_das_llm_filters_invalid_codes(self):
+        """Les codes CIM-10 invalides sont filtrés lors de l'intégration."""
+        from src.medical.cim10_extractor import _extract_das_llm
+
+        dossier = DossierMedical()
+        dossier.sejour = Sejour(sexe="M", age=50)
+        dossier.diagnostic_principal = Diagnostic(
+            texte="Pancréatite aiguë", cim10_suggestion="K85.9",
+        )
+
+        mock_result = [
+            {"texte": "Hypertension artérielle", "code_cim10": "I10", "justification": "ok"},
+            {"texte": "Diagnostic bidon", "code_cim10": "ZZZ.99", "justification": "invalide"},
+        ]
+
+        with patch("src.medical.rag_search.extract_das_llm", return_value=mock_result):
+            _extract_das_llm("texte médical", dossier)
+
+        # I10 est valide → ajouté ; ZZZ.99 est invalide → filtré
+        codes = [d.cim10_suggestion for d in dossier.diagnostics_associes]
+        assert "I10" in codes
+        assert "ZZZ.99" not in codes
+
+    def test_das_llm_deduplicates(self):
+        """Les codes déjà présents dans les DAS ne sont pas dupliqués."""
+        from src.medical.cim10_extractor import _extract_das_llm
+
+        dossier = DossierMedical()
+        dossier.sejour = Sejour(sexe="M", age=50)
+        dossier.diagnostic_principal = Diagnostic(
+            texte="Pancréatite aiguë", cim10_suggestion="K85.9",
+        )
+        dossier.diagnostics_associes = [
+            Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I10"),
+        ]
+
+        mock_result = [
+            {"texte": "HTA essentielle", "code_cim10": "I10", "justification": "doublon"},
+            {"texte": "Obésité", "code_cim10": "E66.0", "justification": "nouveau"},
+        ]
+
+        with patch("src.medical.rag_search.extract_das_llm", return_value=mock_result):
+            _extract_das_llm("texte médical", dossier)
+
+        codes = [d.cim10_suggestion for d in dossier.diagnostics_associes]
+        assert codes.count("I10") == 1  # Pas de doublon
+        assert "E66.0" in codes  # Nouveau ajouté
diff --git a/tests/test_referentiels.py b/tests/test_referentiels.py
new file mode 100644
index 0000000..ae287e4
--- /dev/null
+++ b/tests/test_referentiels.py
@@ -0,0 +1,179 @@
+"""Tests pour le gestionnaire de référentiels et les routes Flask associées."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from src.viewer.referentiels import ReferentielManager
+from src.config import ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
+
+
+# ---------------------------------------------------------------------------
+# Tests ReferentielManager
+# ---------------------------------------------------------------------------
+
+class TestReferentielManager:
+
+    @pytest.fixture
+    def manager(self, tmp_path):
+        return ReferentielManager(tmp_path / "refs")
+
+    def test_add_file(self, manager):
+        ref = manager.add_file("guide.pdf", b"fake pdf content")
+        assert ref["filename"] == "guide.pdf"
+        assert ref["extension"] == ".pdf"
+        assert ref["status"] == "uploaded"
+        assert ref["size_bytes"] == len(b"fake pdf content")
+        assert ref["chunks_count"] == 0
+
+    def test_list_all(self, manager):
+        manager.add_file("a.txt", b"hello")
+        manager.add_file("b.csv", b"col1,col2")
+        assert len(manager.list_all()) == 2
+
+    def test_get(self, manager):
+        ref = manager.add_file("guide.pdf", b"content")
+        found = manager.get(ref["id"])
+        assert found is not None
+        assert found["filename"] == "guide.pdf"
+
+    def test_get_not_found(self, manager):
+        assert manager.get("nonexistent") is None
+
+    def test_remove(self, manager):
+        ref = manager.add_file("guide.pdf", b"content")
+        assert manager.remove(ref["id"]) is True
+        assert len(manager.list_all()) == 0
+        assert manager.get(ref["id"]) is None
+
+    def test_remove_not_found(self, manager):
+        assert manager.remove("nonexistent") is False
+
+    def test_add_file_invalid_extension(self, manager):
+        with pytest.raises(ValueError, match="Extension"):
+            manager.add_file("malware.exe", b"evil")
+
+    def test_add_file_too_large(self, manager):
+        big_data = b"x" * (UPLOAD_MAX_SIZE_MB * 1024 * 1024 + 1)
+        with pytest.raises(ValueError, match="volumineux"):
+            manager.add_file("big.pdf", big_data)
+
+    def test_persistence(self, tmp_path):
+        """L'index persiste entre les instances."""
+        dir_path = tmp_path / "refs"
+        m1 = ReferentielManager(dir_path)
+        m1.add_file("a.txt", b"hello")
+
+        m2 = ReferentielManager(dir_path)
+        assert len(m2.list_all()) == 1
+        assert m2.list_all()[0]["filename"] == "a.txt"
+
+    def test_file_stored_on_disk(self, manager, tmp_path):
+        ref = manager.add_file("test.txt", b"file content here")
+        stored_path = manager._dir / ref["stored_name"]
+        assert stored_path.exists()
+        assert stored_path.read_bytes() == b"file content here"
+
+    def test_remove_deletes_file(self, manager):
+        ref = manager.add_file("test.txt", b"content")
+        stored_path = manager._dir / ref["stored_name"]
+        assert stored_path.exists()
+        manager.remove(ref["id"])
+        assert not stored_path.exists()
+
+
+# ---------------------------------------------------------------------------
+# Tests chunking générique
+# ---------------------------------------------------------------------------
+
+class TestChunking:
+
+    def test_chunk_txt(self, tmp_path):
+        from src.medical.rag_index import chunk_user_file
+
+        txt_file = tmp_path / "test.txt"
+        txt_file.write_text(
+            "Premier paragraphe avec assez de mots pour le seuil.\n\n"
+            "Deuxième paragraphe avec encore plus de mots pour dépasser le minimum.\n\n"
+            "Court\n\n"
+            "Troisième paragraphe qui devrait aussi être un chunk valide.",
+            encoding="utf-8",
+        )
+
+        chunks = chunk_user_file(txt_file, "test_doc")
+        assert len(chunks) >= 2  # au moins 2 paragraphes assez longs
+        assert all(c.document == "test_doc" for c in chunks)
+
+    def test_chunk_csv(self, tmp_path):
+        from src.medical.rag_index import chunk_user_file
+
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(
+            "code,description,note\n"
+            "K85.1,Pancréatite aiguë biliaire,diagnostic fréquent\n"
+            "I10,Hypertension essentielle,comorbidité courante\n",
+            encoding="utf-8",
+        )
+
+        chunks = chunk_user_file(csv_file, "csv_doc")
+        assert len(chunks) == 2
+        assert "K85.1" in chunks[0].text
+        assert "I10" in chunks[1].text
+
+    def test_chunk_unsupported_extension(self, tmp_path):
+        from src.medical.rag_index import chunk_user_file
+
+        bad_file = tmp_path / "test.xyz"
+        bad_file.write_text("content")
+
+        chunks = chunk_user_file(bad_file, "bad")
+        assert chunks == []
+
+
+# ---------------------------------------------------------------------------
+# Tests routes Flask
+# ---------------------------------------------------------------------------
+
+class TestReferentielRoutes:
+
+    @pytest.fixture
+    def app(self, tmp_path):
+        """Crée une app Flask de test avec un manager temporaire."""
+        from src.viewer.app import create_app
+        app = create_app()
+        app.config["TESTING"] = True
+        return app
+
+    @pytest.fixture
+    def client(self, app):
+        return app.test_client()
+
+    def test_admin_page_loads(self, client):
+        resp = client.get("/admin/referentiels")
+        assert resp.status_code == 200
+        assert "Référentiels RAG" in resp.data.decode()
+
+    def test_upload_no_file(self, client):
+        resp = client.post("/admin/referentiels/upload")
+        assert resp.status_code == 400
+        data = resp.get_json()
+        assert "error" in data
+
+    def test_upload_valid_file(self, client):
+        from io import BytesIO
+        data = {
+            "file": (BytesIO(b"test content"), "doc.txt"),
+        }
+        resp = client.post("/admin/referentiels/upload", data=data, content_type="multipart/form-data")
+        result = resp.get_json()
+        assert resp.status_code == 200
+        assert result["ok"] is True
+        assert result["referentiel"]["filename"] == "doc.txt"
+
+    def test_delete_nonexistent(self, client):
+        resp = client.delete("/admin/referentiels/nonexistent")
+        assert resp.status_code == 404