diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..56b481c --- /dev/null +++ b/run.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +set -e + +cd "$(dirname "$0")" + +echo "🚀 DĂ©marrage de l'application T2A..." + +# VĂ©rifier si l'environnement virtuel existe +if [ ! -d ".venv" ]; then + echo "📩 CrĂ©ation de l'environnement virtuel..." + python3 -m venv .venv +fi + +# Activer l'environnement virtuel +echo "🔧 Activation de l'environnement virtuel..." +source .venv/bin/activate + +# Installer/mettre Ă  jour les dĂ©pendances +if [ ! -f ".venv/.deps_installed" ] || [ "requirements.txt" -nt ".venv/.deps_installed" ]; then + echo "đŸ“„ Installation des dĂ©pendances..." + pip install -q --upgrade pip + pip install -q -r requirements.txt + touch .venv/.deps_installed +else + echo "✅ DĂ©pendances dĂ©jĂ  installĂ©es" +fi + +# CrĂ©er les rĂ©pertoires nĂ©cessaires +mkdir -p input output/anonymized output/structured output/reports data/rag_index + +echo "" +echo "✹ Application prĂȘte !" +echo "" +echo "📂 RĂ©pertoires :" +echo " - input/ : Placez vos PDFs ici" +echo " - output/ : RĂ©sultats du traitement" +echo "" +echo "🌐 Lancement du viewer sur http://localhost:5000" +echo "" +echo " Appuyez sur Ctrl+C pour arrĂȘter" +echo "" + +# Lancer le viewer +python3 -m src.viewer diff --git a/src/config.py b/src/config.py index 934bb32..7fd1482 100644 --- a/src/config.py +++ b/src/config.py @@ -33,6 +33,14 @@ NER_CONFIDENCE_THRESHOLD = 0.80 OLLAMA_URL = "http://localhost:11434" OLLAMA_MODEL = "gemma3:12b" OLLAMA_TIMEOUT = 120 +OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json" +OLLAMA_MAX_PARALLEL = 2 + + +# --- Configuration RUM / Ă©tablissement --- + +FINESS = "000000000" +NUM_UM = "0000" # --- Configuration RAG --- @@ -83,6 +91,10 @@ class Diagnostic(BaseModel): class ActeCCAM(BaseModel): texte: str code_ccam_suggestion: Optional[str] = None + ccam_confidence: Optional[str] = None + justification: Optional[str] = None + raisonnement: Optional[str] = None + sources_rag: list[RAGSource] = Field(default_factory=list) date: Optional[str] = None validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie" alertes: list[str] = Field(default_factory=list) @@ -120,12 +132,38 @@ class DossierMedical(BaseModel): complications: list[str] = Field(default_factory=list) alertes_codage: list[str] = Field(default_factory=list) source_files: list[str] = Field(default_factory=list) + ghm_estimation: Optional[GHMEstimation] = None + controles_cpam: list[ControleCPAM] = Field(default_factory=list) processing_time_s: float | None = None # --- Rapport d'anonymisation --- +class GHMEstimation(BaseModel): + cmd: Optional[str] = None + cmd_libelle: Optional[str] = None + type_ghm: Optional[str] = None # "C" / "M" / "K" + severite: int = 1 # 1-4 + ghm_approx: Optional[str] = None # ex: "07C??2" + cma_count: int = 0 + cms_count: int = 0 + alertes: list[str] = Field(default_factory=list) + + +class ControleCPAM(BaseModel): + numero_ogc: int + titre: str = "" + arg_ucr: str = "" + decision_ucr: str = "" + dp_ucr: Optional[str] = None + da_ucr: Optional[str] = None + dr_ucr: Optional[str] = None + actes_ucr: Optional[str] = None + contre_argumentation: Optional[str] = None + sources_reponse: list[RAGSource] = Field(default_factory=list) + + class AnonymizationReport(BaseModel): source_file: str total_replacements: int = 0 diff --git a/src/control/__init__.py b/src/control/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/control/cpam_parser.py b/src/control/cpam_parser.py new file mode 100644 index 0000000..1ed7147 --- /dev/null +++ b/src/control/cpam_parser.py @@ -0,0 +1,115 @@ +"""Parsing du fichier Excel de contrĂŽle CPAM (UCR) et matching OGC.""" + +from __future__ import annotations + +import logging +import re +from pathlib import Path + +import openpyxl + +from ..config import ControleCPAM + +logger = logging.getLogger(__name__) + +# Colonnes attendues dans le fichier Excel +_EXPECTED_COLUMNS = ("N° OGC", "Titre", "Arg_UCR", "DĂ©cision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR") + + +def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]: + """Lit le fichier Excel de contrĂŽle CPAM et retourne un dict OGC -> liste de contrĂŽles. + + Args: + path: Chemin vers le fichier .xlsx CPAM. + + Returns: + Dict avec le numĂ©ro OGC comme clĂ© et la liste des contrĂŽles associĂ©s. + """ + path = Path(path) + if not path.exists(): + logger.error("Fichier CPAM introuvable : %s", path) + return {} + + wb = openpyxl.load_workbook(path, read_only=True) + ws = wb[wb.sheetnames[0]] + + # Lire l'en-tĂȘte + rows = ws.iter_rows(values_only=True) + header = next(rows, None) + if header is None: + logger.error("Fichier CPAM vide : %s", path) + return {} + + # Construire le mapping colonne -> index + col_map = {} + for i, col_name in enumerate(header): + if col_name: + col_map[col_name.strip()] = i + + # VĂ©rifier les colonnes requises + missing = [c for c in _EXPECTED_COLUMNS[:4] if c not in col_map] + if missing: + logger.error("Colonnes manquantes dans le fichier CPAM : %s", missing) + return {} + + result: dict[int, list[ControleCPAM]] = {} + count = 0 + + for row in rows: + ogc_val = row[col_map["N° OGC"]] + if ogc_val is None: + continue + + try: + numero_ogc = int(ogc_val) + except (ValueError, TypeError): + logger.warning("N° OGC invalide ignorĂ© : %s", ogc_val) + continue + + controle = ControleCPAM( + numero_ogc=numero_ogc, + titre=str(row[col_map.get("Titre", 1)] or "").strip(), + arg_ucr=str(row[col_map.get("Arg_UCR", 2)] or "").strip(), + decision_ucr=str(row[col_map.get("DĂ©cision_UCR", 3)] or "").strip(), + dp_ucr=_clean_optional(row, col_map.get("DP_UCR")), + da_ucr=_clean_optional(row, col_map.get("DA_UCR")), + dr_ucr=_clean_optional(row, col_map.get("DR_UCR")), + actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")), + ) + + result.setdefault(numero_ogc, []).append(controle) + count += 1 + + logger.info("CPAM : %d contrĂŽles chargĂ©s pour %d OGC distincts", count, len(result)) + return result + + +def _clean_optional(row: tuple, idx: int | None) -> str | None: + """Extrait une valeur optionnelle depuis une ligne Excel.""" + if idx is None or idx >= len(row): + return None + val = row[idx] + if val is None: + return None + val = str(val).strip() + return val if val else None + + +def match_dossier_ogc(source_name: str, cpam_data: dict[int, list[ControleCPAM]]) -> list[ControleCPAM]: + """Cherche les contrĂŽles CPAM correspondant Ă  un dossier par prĂ©fixe OGC. + + Le nom du dossier suit le format "17_23100690" oĂč 17 est le N° OGC. + + Args: + source_name: Nom du sous-dossier (ex: "17_23100690"). + cpam_data: Dict OGC -> contrĂŽles retournĂ© par parse_cpam_excel(). + + Returns: + Liste des contrĂŽles CPAM pour cet OGC, ou liste vide. + """ + match = re.match(r"^(\d+)_", source_name) + if not match: + return [] + + ogc = int(match.group(1)) + return cpam_data.get(ogc, []) diff --git a/src/control/cpam_response.py b/src/control/cpam_response.py new file mode 100644 index 0000000..825457a --- /dev/null +++ b/src/control/cpam_response.py @@ -0,0 +1,228 @@ +"""GĂ©nĂ©ration de contre-argumentation pour les contrĂŽles CPAM via RAG + Ollama.""" + +from __future__ import annotations + +import logging + +from ..config import ControleCPAM, DossierMedical, RAGSource +from ..medical.ollama_client import call_ollama + +logger = logging.getLogger(__name__) + + +def _search_rag_for_control(controle: ControleCPAM, dossier: DossierMedical) -> list[dict]: + """Recherche RAG ciblĂ©e pour le sujet du dĂ©saccord.""" + try: + from ..medical.rag_search import search_similar + except Exception: + logger.warning("Index RAG non disponible pour la contre-argumentation") + return [] + + # Construire une requĂȘte combinant l'argument CPAM et le diagnostic concernĂ© + query_parts = [] + + if controle.titre: + query_parts.append(controle.titre) + + # Ajouter les codes contestĂ©s pour cibler la recherche + if controle.dp_ucr: + query_parts.append(f"diagnostic principal {controle.dp_ucr}") + if controle.da_ucr: + query_parts.append(f"diagnostic associĂ© {controle.da_ucr}") + + # Tronquer l'argument CPAM pour ne garder que le coeur + arg_short = controle.arg_ucr[:300] if controle.arg_ucr else "" + if arg_short: + query_parts.append(arg_short) + + query = " ".join(query_parts) + if not query.strip(): + return [] + + return search_similar(query, top_k=8) + + +def _build_cpam_prompt( + dossier: DossierMedical, + controle: ControleCPAM, + sources: list[dict], +) -> str: + """Construit le prompt pour la contre-argumentation CPAM.""" + # RĂ©sumĂ© du dossier mĂ©dical + dossier_lines = [] + + if dossier.diagnostic_principal: + dp = dossier.diagnostic_principal + dp_code = f" ({dp.cim10_suggestion})" if dp.cim10_suggestion else "" + dossier_lines.append(f"- DP : {dp.texte}{dp_code}") + + if dossier.diagnostics_associes: + das_parts = [] + for das in dossier.diagnostics_associes: + code = f" ({das.cim10_suggestion})" if das.cim10_suggestion else "" + das_parts.append(f"{das.texte}{code}") + dossier_lines.append(f"- DAS : {', '.join(das_parts)}") + + if dossier.actes_ccam: + actes = [f"{a.texte} ({a.code_ccam_suggestion})" if a.code_ccam_suggestion else a.texte + for a in dossier.actes_ccam] + dossier_lines.append(f"- Actes CCAM : {', '.join(actes)}") + + sejour = dossier.sejour + if sejour.duree_sejour is not None: + dossier_lines.append(f"- DurĂ©e sĂ©jour : {sejour.duree_sejour} jours") + if sejour.sexe or sejour.age is not None: + patient_info = [] + if sejour.sexe: + patient_info.append(sejour.sexe) + if sejour.age is not None: + patient_info.append(f"{sejour.age} ans") + dossier_lines.append(f"- Patient : {', '.join(patient_info)}") + + if dossier.biologie_cle: + bio = [f"{b.test}: {b.valeur}" for b in dossier.biologie_cle[:5] if b.valeur] + if bio: + dossier_lines.append(f"- Biologie clĂ© : {', '.join(bio)}") + + if dossier.complications: + dossier_lines.append(f"- Complications : {', '.join(dossier.complications)}") + + dossier_str = "\n".join(dossier_lines) if dossier_lines else "Non disponible" + + # Codes contestĂ©s par la CPAM + codes_contestes = [] + if controle.dp_ucr: + codes_contestes.append(f"DP proposĂ© par UCR : {controle.dp_ucr}") + if controle.da_ucr: + codes_contestes.append(f"DA proposĂ©s par UCR : {controle.da_ucr}") + if controle.dr_ucr: + codes_contestes.append(f"DR proposĂ© par UCR : {controle.dr_ucr}") + if controle.actes_ucr: + codes_contestes.append(f"Actes proposĂ©s par UCR : {controle.actes_ucr}") + codes_str = "\n".join(codes_contestes) if codes_contestes else "Aucun code spĂ©cifique proposĂ©" + + # Sources RAG + sources_text = "" + for i, src in enumerate(sources, 1): + doc_name = { + "cim10": "CIM-10 FR 2026", + "cim10_alpha": "CIM-10 Index AlphabĂ©tique 2026", + "guide_methodo": "Guide MĂ©thodologique MCO 2026", + "ccam": "CCAM PMSI V4 2025", + }.get(src.get("document", ""), src.get("document", "")) + + code_info = f" (code: {src['code']})" if src.get("code") else "" + page_info = f" [page {src['page']}]" if src.get("page") else "" + + sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n" + sources_text += (src.get("extrait", "")[:800]) + "\n\n" + + return f"""Tu es un mĂ©decin DIM (DĂ©partement d'Information MĂ©dicale) expert en contentieux T2A. +Tu dois contre-argumenter la dĂ©cision de la CPAM (UCR) point par point, en t'appuyant sur le guide mĂ©thodologique et la CIM-10. + +DOSSIER MÉDICAL DE L'ÉTABLISSEMENT : +{dossier_str} + +OBJET DU DÉSACCORD : {controle.titre} + +ARGUMENTATION DE LA CPAM (UCR) : +{controle.arg_ucr} + +DÉCISION UCR : {controle.decision_ucr} + +CODES CONTESTÉS : +{codes_str} + +SOURCES RÉGLEMENTAIRES (Guide mĂ©thodologique, CIM-10) : +{sources_text} + +CONSIGNES : +- Analyse objectivement l'argument de la CPAM +- Identifie les points oĂč la CPAM a raison (le cas Ă©chĂ©ant) +- Contre-argumente point par point en citant le guide mĂ©thodologique et la CIM-10 +- Cite les rĂ©fĂ©rences prĂ©cises (pages, articles, fascicules) +- Propose une conclusion et la position recommandĂ©e + +RĂ©ponds UNIQUEMENT avec un objet JSON au format suivant : +{{ + "analyse_contestation": "RĂ©sumĂ© de ce que conteste la CPAM", + "points_accord": "Points oĂč la CPAM a raison (ou 'Aucun' si non applicable)", + "contre_arguments": "Arguments point par point en faveur de l'Ă©tablissement", + "references": "RĂ©fĂ©rences guide mĂ©thodologique / CIM-10 citĂ©es", + "conclusion": "SynthĂšse et position recommandĂ©e" +}}""" + + +def _format_response(parsed: dict) -> str: + """Formate la rĂ©ponse LLM en texte lisible.""" + sections = [] + + analyse = parsed.get("analyse_contestation") + if analyse: + sections.append(f"ANALYSE DE LA CONTESTATION\n{analyse}") + + accord = parsed.get("points_accord") + if accord and accord.lower() not in ("aucun", "non applicable", "n/a", ""): + sections.append(f"POINTS D'ACCORD\n{accord}") + + contre = parsed.get("contre_arguments") + if contre: + sections.append(f"CONTRE-ARGUMENTS\n{contre}") + + refs = parsed.get("references") + if refs: + sections.append(f"REFERENCES\n{refs}") + + conclusion = parsed.get("conclusion") + if conclusion: + sections.append(f"CONCLUSION\n{conclusion}") + + return "\n\n".join(sections) + + +def generate_cpam_response( + dossier: DossierMedical, + controle: ControleCPAM, +) -> tuple[str, list[RAGSource]]: + """GĂ©nĂšre une contre-argumentation pour un contrĂŽle CPAM. + + Args: + dossier: Le dossier mĂ©dical analysĂ©. + controle: Le contrĂŽle CPAM Ă  contester. + + Returns: + Tuple (texte de contre-argumentation, sources RAG utilisĂ©es). + """ + logger.info("CPAM : gĂ©nĂ©ration contre-argumentation pour OGC %d — %s", + controle.numero_ogc, controle.titre) + + # 1. Recherche RAG ciblĂ©e + sources = _search_rag_for_control(controle, dossier) + logger.info(" RAG : %d sources trouvĂ©es", len(sources)) + + # 2. Construction du prompt + prompt = _build_cpam_prompt(dossier, controle, sources) + + # 3. Appel Ollama + result = call_ollama(prompt, temperature=0.1, max_tokens=3000) + + # 4. Conversion des sources RAG + rag_sources = [ + RAGSource( + document=s.get("document", ""), + page=s.get("page"), + code=s.get("code"), + extrait=s.get("extrait", "")[:200], + ) + for s in sources + ] + + if result is None: + logger.warning(" Ollama non disponible — contre-argumentation non gĂ©nĂ©rĂ©e") + return "", rag_sources + + # 5. Formater la rĂ©ponse + text = _format_response(result) + logger.info(" Contre-argumentation gĂ©nĂ©rĂ©e (%d caractĂšres)", len(text)) + + return text, rag_sources diff --git a/src/export/__init__.py b/src/export/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/export/rum_export.py b/src/export/rum_export.py new file mode 100644 index 0000000..b950c1c --- /dev/null +++ b/src/export/rum_export.py @@ -0,0 +1,190 @@ +"""Export au format RUM (RĂ©sumĂ© d'UnitĂ© MĂ©dicale) V016 pour le groupeur ATIH. + +GĂ©nĂšre une ligne fixe de 165 caractĂšres suivie de zones variables +(DAS en 8 chars, actes CCAM en 29 chars chacun). +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path + +from ..config import FINESS, NUM_UM, DossierMedical + + +@dataclass +class RUMConfig: + finess: str = FINESS + num_um: str = NUM_UM + + +def _format_cim10(code: str | None) -> str: + """Formate un code CIM-10 sur 8 caractĂšres (sans point, padded).""" + if not code: + return " " * 8 + clean = code.upper().replace(".", "").strip() + return clean.ljust(8)[:8] + + +def _format_date(date_str: str | None) -> str: + """Convertit une date DD/MM/YYYY ou YYYY-MM-DD en DDMMYYYY (8 chars).""" + if not date_str: + return " " * 8 + date_str = date_str.strip() + # Format DD/MM/YYYY + m = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str) + if m: + return f"{m.group(1)}{m.group(2)}{m.group(3)}" + # Format YYYY-MM-DD + m = re.match(r"(\d{4})-(\d{2})-(\d{2})", date_str) + if m: + return f"{m.group(3)}{m.group(2)}{m.group(1)}" + return " " * 8 + + +def _format_sex(sexe: str | None) -> str: + """Convertit le sexe en code RUM (1=M, 2=F).""" + if not sexe: + return " " + s = sexe.strip().upper() + if s in ("M", "MASCULIN", "HOMME", "H"): + return "1" + if s in ("F", "FEMININ", "FÉMININ", "FEMME"): + return "2" + return " " + + +def _map_mode_entree(text: str | None) -> str: + """Convertit le mode d'entrĂ©e textuel en code RUM (1 char).""" + if not text: + return " " + t = text.strip().lower() + mapping = { + "domicile": "8", + "mutation": "6", + "transfert": "7", + "urgences": "8", + "urgence": "8", + } + for key, code in mapping.items(): + if key in t: + return code + return " " + + +def _map_mode_sortie(text: str | None) -> str: + """Convertit le mode de sortie textuel en code RUM (1 char).""" + if not text: + return " " + t = text.strip().lower() + mapping = { + "domicile": "8", + "mutation": "6", + "transfert": "7", + "deces": "9", + "dĂ©cĂšs": "9", + "dĂ©cĂ©dĂ©": "9", + "decede": "9", + } + for key, code in mapping.items(): + if key in t: + return code + return " " + + +def _format_ccam_act(acte) -> str: + """Formate un acte CCAM sur 29 caractĂšres. + + Structure : code(7) + phase(1) + activitĂ©(1) + date(8) + doc/extension(12) + """ + code = (acte.code_ccam_suggestion or "").upper().replace(" ", "") + code = code.ljust(7)[:7] + phase = "1" + activite = "1" + date = _format_date(acte.date) + extension = " " * 12 + return f"{code}{phase}{activite}{date}{extension}" + + +def export_rum(dossier: DossierMedical, config: RUMConfig | None = None) -> str: + """GĂ©nĂšre le texte RUM complet pour un dossier mĂ©dical. + + Returns: + ChaĂźne texte au format RUM V016 (165 chars fixes + zones variables). + """ + if config is None: + config = RUMConfig() + + sejour = dossier.sejour + dp = dossier.diagnostic_principal + + # Compteurs + das_list = dossier.diagnostics_associes + actes_list = dossier.actes_ccam + nb_das = len(das_list) + nb_actes = len(actes_list) + + # NumĂ©ros gĂ©nĂ©rĂ©s + source = dossier.source_file or "UNKNOWN" + num_rss = source.replace(".pdf", "").replace(" ", "_").ljust(20)[:20] + num_admin = num_rss + num_rum = source[:10].ljust(10)[:10] + + # Construction de la zone fixe (165 caractĂšres) + parts = [ + " " * 2, # 1-2 : Version classification (vide) + " " * 6, # 3-8 : GHM (vide, rempli par groupeur) + " ", # 9 : Filler + "016", # 10-12 : Version format + " " * 3, # 13-15 : Code retour + config.finess.ljust(9)[:9], # 16-24 : FINESS + "016", # 25-27 : Version RUM + num_rss, # 28-47 : N° RSS + num_admin, # 48-67 : N° admin + num_rum, # 68-77 : N° RUM + _format_date(None), # 78-85 : Date naissance (non disponible) + _format_sex(sejour.sexe), # 86 : Sexe + config.num_um.ljust(4)[:4], # 87-90 : N° UM + " " * 2, # 91-92 : Type autorisation + _format_date(sejour.date_entree), # 93-100: Date entrĂ©e UM + _map_mode_entree(sejour.mode_entree), # 101 : Mode entrĂ©e + " ", # 102 : Provenance + _format_date(sejour.date_sortie), # 103-110: Date sortie UM + _map_mode_sortie(sejour.mode_sortie), # 111 : Mode sortie + " ", # 112 : Destination + " " * 5, # 113-117: CP rĂ©sidence + " " * 4, # 118-121: Poids nnĂ© + " " * 2, # 122-123: Âge gestationnel + "00", # 124-125: Nb sĂ©ances + str(nb_das).zfill(2)[-2:], # 126-127: Nb DAS + "00", # 128-129: Nb DAD + str(nb_actes).zfill(2)[-2:], # 130-131: Nb actes + _format_cim10(dp.cim10_suggestion if dp else None), # 132-139: DP + " " * 8, # 140-147: DR + " " * 3, # 148-150: IGS2 + " " * 15, # 151-165: RĂ©servĂ© + ] + + fixed = "".join(parts) + assert len(fixed) == 165, f"Zone fixe RUM: attendu 165, obtenu {len(fixed)}" + + # Zones variables + variable_parts: list[str] = [] + + # DAS (8 chars chacun) + for das in das_list: + variable_parts.append(_format_cim10(das.cim10_suggestion)) + + # Actes CCAM (29 chars chacun) + for acte in actes_list: + variable_parts.append(_format_ccam_act(acte)) + + return fixed + "".join(variable_parts) + + +def save_rum(dossier: DossierMedical, path: Path, config: RUMConfig | None = None) -> None: + """Exporte un dossier au format RUM dans un fichier.""" + rum_text = export_rum(dossier, config) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(rum_text, encoding="utf-8") diff --git a/src/main.py b/src/main.py index 6b3dedd..0ec7fd6 100644 --- a/src/main.py +++ b/src/main.py @@ -10,13 +10,14 @@ import time from pathlib import Path from .anonymization.anonymizer import Anonymizer -from .config import ANONYMIZED_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical +from .config import ANONYMIZED_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical from .extraction.document_classifier import classify from .extraction.crh_parser import parse_crh from .extraction.document_splitter import split_documents from .extraction.pdf_extractor import extract_text from .extraction.trackare_parser import parse_trackare from .medical.cim10_extractor import extract_medical_info +from .medical.ghm import estimate_ghm logging.basicConfig( level=logging.INFO, @@ -84,10 +85,20 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag) dossier.source_file = pdf_path.name dossier.document_type = doc_type - dossier.processing_time_s = round(time.time() - t0, 2) logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal) logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam)) + # 8. Estimation GHM + try: + ghm = estimate_ghm(dossier) + dossier.ghm_estimation = ghm + logger.info(" GHM : CMD=%s, Type=%s, SĂ©vĂ©ritĂ©=%d → %s", + ghm.cmd or "?", ghm.type_ghm or "?", + ghm.severite, ghm.ghm_approx or "?") + except Exception: + logger.warning(" Erreur estimation GHM", exc_info=True) + + dossier.processing_time_s = round(time.time() - t0, 2) results.append((anonymized_text, dossier, report)) logger.info(" Temps total : %.2fs", time.time() - t0) @@ -120,6 +131,7 @@ def write_outputs( dossier: DossierMedical, report: AnonymizationReport, subdir: str | None = None, + export_rum_flag: bool = False, ) -> None: """Écrit les fichiers de sortie.""" anon_dir = ANONYMIZED_DIR / subdir if subdir else ANONYMIZED_DIR @@ -151,6 +163,17 @@ def write_outputs( ) logger.info(" → %s", report_path) + # Export RUM + if export_rum_flag: + from .export.rum_export import save_rum + rum_dir = OUTPUT_DIR / "rum" + if subdir: + rum_dir = rum_dir / subdir + rum_dir.mkdir(parents=True, exist_ok=True) + rum_path = rum_dir / f"{stem}_rum.txt" + save_rum(dossier, rum_path) + logger.info(" → %s", rum_path) + def main(input_path: str | None = None) -> None: """Point d'entrĂ©e principal.""" @@ -197,6 +220,16 @@ def main(input_path: str | None = None) -> None: action="store_true", help="Forcer la reconstruction de l'index FAISS", ) + parser.add_argument( + "--export-rum", + action="store_true", + help="Exporter les dossiers au format RUM V016 (pour groupeur ATIH)", + ) + parser.add_argument( + "--control-cpam", + metavar="PATH", + help="Fichier Excel de contrĂŽle CPAM (enrichit les dossiers avec contre-argumentation)", + ) args = parser.parse_args() if args.build_dict: @@ -226,6 +259,16 @@ def main(input_path: str | None = None) -> None: if args.no_rag: _use_rag = False + export_rum_flag = args.export_rum + + # Chargement contrĂŽle CPAM + cpam_data = None + if args.control_cpam: + from .control.cpam_parser import parse_cpam_excel + cpam_data = parse_cpam_excel(args.control_cpam) + if not cpam_data: + logger.warning("Aucun contrĂŽle CPAM chargĂ© depuis %s", args.control_cpam) + input_paths = args.input # Collecte des groupes (pdfs, subdir) Ă  traiter @@ -274,12 +317,13 @@ def main(input_path: str | None = None) -> None: multi = len(pdf_results) > 1 for part_idx, (anonymized_text, dossier, report) in enumerate(pdf_results): part_stem = f"{stem}_part{part_idx + 1}" if multi else stem - write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir) + write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir, export_rum_flag=export_rum_flag) group_dossiers.append(dossier) except Exception: logger.exception("Erreur lors du traitement de %s", pdf_path.name) # Fusion multi-PDFs si plusieurs documents dans le mĂȘme groupe + merged = None if len(group_dossiers) > 1 and subdir: try: from .medical.fusion import merge_dossiers @@ -287,13 +331,47 @@ def main(input_path: str | None = None) -> None: struct_dir = STRUCTURED_DIR / subdir struct_dir.mkdir(parents=True, exist_ok=True) merged_path = struct_dir / f"{subdir}_fusionne_cim10.json" + + # Export RUM du dossier fusionnĂ© + if export_rum_flag: + from .export.rum_export import save_rum + rum_dir = OUTPUT_DIR / "rum" / subdir + rum_dir.mkdir(parents=True, exist_ok=True) + rum_path = rum_dir / f"{subdir}_fusionne_rum.txt" + save_rum(merged, rum_path) + logger.info(" → RUM fusionnĂ© : %s", rum_path) + except Exception: + logger.exception("Erreur lors de la fusion du groupe %s", subdir) + merged = None + + # ContrĂŽle CPAM : enrichir le dossier principal (fusionnĂ© ou dernier) + if cpam_data and subdir: + from .control.cpam_parser import match_dossier_ogc + controles = match_dossier_ogc(subdir, cpam_data) + if controles: + from .control.cpam_response import generate_cpam_response + target = merged if merged else (group_dossiers[-1] if group_dossiers else None) + if target: + logger.info(" CPAM : %d contrĂŽle(s) pour %s", len(controles), subdir) + for ctrl in controles: + text, sources = generate_cpam_response(target, ctrl) + ctrl.contre_argumentation = text + ctrl.sources_reponse = sources + target.controles_cpam = controles + + # Écrire le dossier fusionnĂ© (aprĂšs enrichissement CPAM Ă©ventuel) + if merged is not None and subdir: + try: + struct_dir = STRUCTURED_DIR / subdir + struct_dir.mkdir(parents=True, exist_ok=True) + merged_path = struct_dir / f"{subdir}_fusionne_cim10.json" merged_path.write_text( merged.model_dump_json(indent=2, exclude_none=True), encoding="utf-8", ) logger.info(" → Dossier fusionnĂ© : %s", merged_path) except Exception: - logger.exception("Erreur lors de la fusion du groupe %s", subdir) + logger.exception("Erreur Ă©criture dossier fusionnĂ© %s", subdir) logger.info("TerminĂ©.") diff --git a/src/medical/cim10_dict.py b/src/medical/cim10_dict.py index e6ad4b2..a58ec0f 100644 --- a/src/medical/cim10_dict.py +++ b/src/medical/cim10_dict.py @@ -173,6 +173,32 @@ def lookup( return None +def normalize_code(code: str) -> str: + """Normalise un code CIM-10 : K810 → K81.0, k85.1 → K85.1.""" + code = code.strip().upper() + # InsĂ©rer le point si absent : K810 → K81.0 + if len(code) > 3 and "." not in code: + code = code[:3] + "." + code[3:] + return code + + +def validate_code(code: str) -> tuple[bool, str]: + """VĂ©rifie si un code CIM-10 existe dans le dictionnaire. + + Returns: + (is_valid, label) — label vide si invalide. + """ + d = load_dict() + normalized = normalize_code(code) + if normalized in d: + return True, d[normalized] + # Tenter aussi le code brut (3 caractĂšres sans point) + raw = code.upper().strip() + if raw in d: + return True, d[raw] + return False, "" + + def reset_cache() -> None: """RĂ©initialise les caches (utile pour les tests).""" global _dict_cache, _normalized_cache diff --git a/src/medical/cim10_extractor.py b/src/medical/cim10_extractor.py index f527c46..0b31926 100644 --- a/src/medical/cim10_extractor.py +++ b/src/medical/cim10_extractor.py @@ -9,7 +9,7 @@ from typing import Optional logger = logging.getLogger(__name__) -from .cim10_dict import lookup as dict_lookup, normalize_text +from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text from ..config import ( @@ -118,6 +118,9 @@ def extract_medical_info( # Post-processing : validation des codes CCAM contre le dictionnaire _validate_ccam(dossier) + # Post-processing : validation des codes CIM-10 contre le dictionnaire + _validate_cim10(dossier) + # Post-processing : exclusions symptĂŽme vs diagnostic prĂ©cis _apply_exclusion_rules(dossier) @@ -663,6 +666,68 @@ def _validate_ccam(dossier: DossierMedical) -> None: ) +_INVALID_CODE_PATTERNS = {"aucun", "none", "n/a", "non_codable", "aucun_code_valide", "inconnu"} + + +def _fallback_cim10(texte: str) -> str | None: + """Tente de trouver un code CIM-10 via le dictionnaire Ă  partir du texte diagnostic.""" + code = dict_lookup(texte, domain_overrides=CIM10_MAP) + if code: + is_valid, _ = cim10_validate(code) + if is_valid: + return code + return None + + +def _validate_cim10(dossier: DossierMedical) -> None: + """Valide les codes CIM-10 suggĂ©rĂ©s par Ollama contre le dictionnaire.""" + diags: list[tuple[str, Diagnostic]] = [] + if dossier.diagnostic_principal: + diags.append(("DP", dossier.diagnostic_principal)) + for das in dossier.diagnostics_associes: + diags.append(("DAS", das)) + + for type_diag, diag in diags: + if not diag.cim10_suggestion: + continue + + # Rejeter les hallucinations + if diag.cim10_suggestion.lower().strip() in _INVALID_CODE_PATTERNS: + fallback = _fallback_cim10(diag.texte) + if fallback: + dossier.alertes_codage.append( + f"CIM-10 {type_diag} ({diag.texte}) : code rejetĂ© « {diag.cim10_suggestion} » → fallback {fallback}" + ) + diag.cim10_suggestion = fallback + diag.cim10_confidence = "medium" + else: + dossier.alertes_codage.append( + f"CIM-10 {type_diag} ({diag.texte}) : code rejetĂ© « {diag.cim10_suggestion} »" + ) + diag.cim10_suggestion = None + diag.cim10_confidence = None + continue + + # Normaliser le format (K810 → K81.0) + diag.cim10_suggestion = normalize_code(diag.cim10_suggestion) + + # Valider contre le dictionnaire + is_valid, label = cim10_validate(diag.cim10_suggestion) + if not is_valid: + fallback = _fallback_cim10(diag.texte) + if fallback: + dossier.alertes_codage.append( + f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code invalide → fallback {fallback}" + ) + diag.cim10_suggestion = fallback + diag.cim10_confidence = "medium" + else: + dossier.alertes_codage.append( + f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code absent du dictionnaire CIM-10" + ) + diag.cim10_confidence = "low" + + def _find_act_date(text: str, act_pattern: str) -> str | None: """Trouve la date associĂ©e Ă  un acte.""" # Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY" @@ -705,7 +770,7 @@ def _apply_severity_rules(dossier: DossierMedical) -> None: """Enrichit les diagnostics avec les informations de sĂ©vĂ©ritĂ© heuristique.""" try: from .severity import enrich_dossier_severity - alertes = enrich_dossier_severity( + alertes, _cma_count, _cms_count = enrich_dossier_severity( dossier.diagnostic_principal, dossier.diagnostics_associes, ) dossier.alertes_codage.extend(alertes) diff --git a/src/medical/das_filter.py b/src/medical/das_filter.py index 737dca0..28c070d 100644 --- a/src/medical/das_filter.py +++ b/src/medical/das_filter.py @@ -33,9 +33,12 @@ def is_valid_diagnostic_text(text: str) -> bool: if re.match(r"^([a-zĂ -Ăż]{3,})\1+[a-zĂ -Ăż]*$", t, re.IGNORECASE): return False - # 5. Mots rĂ©pĂ©tĂ©s ≄ 3 fois : "SpontanĂ©e spontanĂ©e spontanĂ©e spontanĂ©e" + # 5. Mots rĂ©pĂ©tĂ©s : tous identiques ("Absence absence", "Anticoagulant anticoagulant") + # ou ≄ 3 occurrences du mĂȘme mot words = t.lower().split() - if words: + if len(words) >= 2: + if len(set(words)) == 1: + return False from collections import Counter counts = Counter(words) if counts.most_common(1)[0][1] >= 3: @@ -47,4 +50,27 @@ def is_valid_diagnostic_text(text: str) -> bool: if t in {"Isolement", "Pp 500"}: return False + # 7. Ponctuation initiale (artefacts OCR) : ", sans prĂ©cision" + if re.match(r'^[,.\-;:!)\]]\s', t): + return False + + # 8. Pattern "À X.X" / "A X.X" (valeurs numĂ©riques OCR) + if re.match(r'^[ÀA]\s+\d+([.,]\d+)?$', t): + return False + + # 9. Crochets (artefacts OCR) : "Episode [episode" + if '[' in t or ']' in t: + return False + + # 10. Termes de laboratoire isolĂ©s (un seul mot ≠ diagnostic) + _LAB_TERMS = {"hĂ©moglobine", "crĂ©atinine", "plaquettes", "leucocytes", "glycĂ©mie", + "natrĂ©mie", "kaliĂ©mie", "calcĂ©mie", "bilirubine", "albumine", + "fibrinogĂšne", "hĂ©matocrite", "cĂ©tonurie", "glycosurie"} + if t.lower() in _LAB_TERMS: + return False + + # 11. Fragments anatomiques courts sans pathologie : "Dans la vessie", "Le rein" + if re.match(r'^(Dans |La |Le |Les |Au |Aux )', t) and len(t) < 30: + return False + return True diff --git a/src/medical/ghm.py b/src/medical/ghm.py new file mode 100644 index 0000000..115c3d4 --- /dev/null +++ b/src/medical/ghm.py @@ -0,0 +1,215 @@ +"""Estimation heuristique du GHM (Groupe HomogĂšne de Malades). + +L'algorithme officiel (ATIH FG-MCO) est propriĂ©taire. Ce module fournit une +estimation approximative utile comme prĂ©-codage / aide au DIM : + 1. CMD depuis le DP (table de plages CIM-10) + 2. Type de prise en charge depuis les actes CCAM + 3. SĂ©vĂ©ritĂ© depuis les CMA/CMS + 4. Construction du code GHM approximatif +""" + +from __future__ import annotations + +import bisect +from typing import Optional + +from ..config import DossierMedical, GHMEstimation + + +# --------------------------------------------------------------------------- +# Table CIM-10 → CMD (CatĂ©gorie Majeure de Diagnostic) +# TriĂ©e par borne infĂ©rieure pour lookup par bisect. +# Format : (debut, fin, cmd, libelle) +# --------------------------------------------------------------------------- + +_CMD_RANGES: list[tuple[str, str, str, str]] = [ + ("A00", "A99", "18", "Maladies infectieuses et parasitaires"), + ("B00", "B19", "18", "Maladies infectieuses et parasitaires"), + ("B20", "B24", "25", "Maladies dues au VIH"), + ("B25", "B99", "18", "Maladies infectieuses et parasitaires"), + ("C00", "C97", "17", "Tumeurs malignes"), + ("D00", "D09", "17", "Tumeurs malignes"), + ("D10", "D48", "16", "Tumeurs bĂ©nignes, hĂ©mopathies"), + ("D50", "D89", "16", "Tumeurs bĂ©nignes, hĂ©mopathies"), + ("E00", "E07", "10", "Maladies endocriniennes"), + ("E10", "E14", "10", "Maladies endocriniennes"), + ("E15", "E46", "10", "Maladies endocriniennes"), + ("E47", "E90", "10", "Maladies endocriniennes"), + ("F00", "F09", "19", "Maladies mentales"), + ("F10", "F19", "20", "Troubles mentaux liĂ©s Ă  l'alcool et aux toxiques"), + ("F20", "F99", "19", "Maladies mentales"), + ("G00", "G99", "01", "Affections du systĂšme nerveux"), + ("H00", "H59", "02", "Affections de l'oeil"), + ("H60", "H95", "03", "Affections ORL"), + ("I00", "I99", "05", "Affections de l'appareil circulatoire"), + ("J00", "J99", "04", "Affections de l'appareil respiratoire"), + ("K00", "K67", "06", "Affections du tube digestif"), + ("K70", "K87", "07", "Affections hĂ©patobiliaires et pancrĂ©atiques"), + ("K90", "K93", "06", "Affections du tube digestif"), + ("L00", "L99", "09", "Affections de la peau"), + ("M00", "M99", "08", "Affections du systĂšme ostĂ©o-articulaire"), + ("N00", "N39", "11", "Affections du rein et des voies urinaires"), + ("N40", "N51", "12", "Affections de l'appareil gĂ©nital masculin"), + ("N60", "N98", "13", "Affections de l'appareil gĂ©nital fĂ©minin"), + ("N99", "N99", "11", "Affections du rein et des voies urinaires"), + ("O00", "O99", "14", "Grossesses, accouchements, post-partum"), + ("P00", "P96", "15", "Nouveau-nĂ©s, pĂ©riode pĂ©rinatale"), + ("Q00", "Q99", "15", "Nouveau-nĂ©s, pĂ©riode pĂ©rinatale"), + ("R00", "R99", "23", "Facteurs influençant l'Ă©tat de santĂ© (symptĂŽmes)"), + ("S00", "S99", "21", "Traumatismes"), + ("T00", "T19", "21", "Traumatismes"), + ("T20", "T32", "22", "BrĂ»lures"), + ("T33", "T98", "21", "Traumatismes"), + ("U00", "U99", "26", "CatĂ©gories spĂ©ciales"), + ("V00", "Y98", "24", "Causes externes"), + ("Z00", "Z99", "23", "Facteurs influençant l'Ă©tat de santĂ©"), +] + +# PrĂ©-calcul : liste triĂ©e des bornes infĂ©rieures pour bisect +_CMD_STARTS = [r[0] for r in _CMD_RANGES] + + +def find_cmd(code_cim10: str) -> tuple[Optional[str], Optional[str]]: + """Trouve la CMD correspondant Ă  un code CIM-10. + + Returns: + (cmd, libelle) ou (None, None) si non trouvĂ©. + """ + if not code_cim10: + return None, None + + # Normaliser : majuscules, retirer le point + code = code_cim10.upper().replace(".", "").strip() + if len(code) < 3: + return None, None + + # Prendre les 3 premiers caractĂšres pour le lookup + code3 = code[:3] + + # bisect pour trouver la plage candidate + idx = bisect.bisect_right(_CMD_STARTS, code3) - 1 + if idx < 0: + return None, None + + debut, fin, cmd, libelle = _CMD_RANGES[idx] + if debut <= code3 <= fin: + return cmd, libelle + + return None, None + + +# --------------------------------------------------------------------------- +# PrĂ©fixes CCAM classants (chirurgicaux) +# Les codes CCAM commençant par ces lettres correspondent Ă  des organes +# et sont considĂ©rĂ©s chirurgicaux quand ils dĂ©signent un acte opĂ©ratoire. +# --------------------------------------------------------------------------- + +_CCAM_CHIRURGICAL_PREFIXES = {"H", "J", "K", "L", "N", "P", "Q"} + +# PrĂ©fixes interventionnels (imagerie, endoscopie) +_CCAM_INTERVENTIONNEL_PREFIXES = {"Z", "Y"} + + +def _detect_type_ghm(actes_ccam: list) -> str: + """DĂ©termine le type de prise en charge depuis les actes CCAM. + + Returns: + "C" (chirurgical), "K" (interventionnel) ou "M" (mĂ©dical). + """ + has_chirurgical = False + has_interventionnel = False + + for acte in actes_ccam: + code = acte.code_ccam_suggestion + if not code or len(code) < 4: + continue + + prefix = code[0].upper() + if prefix in _CCAM_CHIRURGICAL_PREFIXES: + has_chirurgical = True + break + if prefix in _CCAM_INTERVENTIONNEL_PREFIXES: + has_interventionnel = True + + if has_chirurgical: + return "C" + if has_interventionnel: + return "K" + return "M" + + +def _compute_severity(das_list: list) -> tuple[int, int, int]: + """Calcule le niveau de sĂ©vĂ©ritĂ© Ă  partir des DAS. + + Returns: + (niveau, cma_count, cms_count) + """ + cma_count = 0 + cms_count = 0 + + for das in das_list: + if getattr(das, "est_cma", False): + cma_count += 1 + if getattr(das, "est_cms", False): + cms_count += 1 + + if cms_count >= 2: + niveau = 4 + elif cms_count >= 1 or cma_count >= 3: + niveau = 3 + elif cma_count >= 2: + niveau = 2 + else: + niveau = 1 + + return niveau, cma_count, cms_count + + +def estimate_ghm(dossier: DossierMedical) -> GHMEstimation: + """Estime le GHM d'un dossier mĂ©dical. + + Heuristique en 4 Ă©tapes : + 1. CMD depuis le DP + 2. Type de prise en charge depuis les actes CCAM + 3. SĂ©vĂ©ritĂ© depuis les CMA/CMS + 4. Construction du code approximatif + """ + estimation = GHMEstimation() + + # 1. CMD depuis le DP + dp = dossier.diagnostic_principal + dp_code = dp.cim10_suggestion if dp else None + + if not dp: + estimation.alertes.append("DP absent — CMD non dĂ©terminable") + elif not dp_code: + estimation.alertes.append("DP sans code CIM-10 — CMD non dĂ©terminable") + else: + cmd, libelle = find_cmd(dp_code) + if cmd: + estimation.cmd = cmd + estimation.cmd_libelle = libelle + else: + estimation.alertes.append(f"CMD inconnue pour le code {dp_code}") + + # Alerte DP symptomatique + code_letter = dp_code.upper().replace(".", "").strip()[:1] + if code_letter in ("R", "Z"): + estimation.alertes.append( + f"DP symptomatique ({dp_code}) — risque de CMD 23, impact tarif" + ) + + # 2. Type de prise en charge + estimation.type_ghm = _detect_type_ghm(dossier.actes_ccam) + + # 3. SĂ©vĂ©ritĂ© + niveau, cma_count, cms_count = _compute_severity(dossier.diagnostics_associes) + estimation.severite = niveau + estimation.cma_count = cma_count + estimation.cms_count = cms_count + + # 4. Code approximatif + if estimation.cmd and estimation.type_ghm: + estimation.ghm_approx = f"{estimation.cmd}{estimation.type_ghm}??{estimation.severite}" + + return estimation diff --git a/src/medical/ollama_cache.py b/src/medical/ollama_cache.py new file mode 100644 index 0000000..caf6c5f --- /dev/null +++ b/src/medical/ollama_cache.py @@ -0,0 +1,85 @@ +"""Cache persistant thread-safe pour les rĂ©sultats Ollama.""" + +from __future__ import annotations + +import json +import logging +import threading +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class OllamaCache: + """Cache JSON persistant pour Ă©viter les appels Ollama redondants. + + ClĂ© = (texte_diagnostic_normalisĂ©, type). + Le modĂšle Ollama est stockĂ© dans les mĂ©tadonnĂ©es : si le modĂšle change, + le cache est automatiquement invalidĂ©. + """ + + def __init__(self, cache_path: Path, model: str): + self._path = cache_path + self._model = model + self._lock = threading.Lock() + self._data: dict[str, dict] = {} + self._dirty = False + self._load() + + def _load(self) -> None: + """Charge le cache depuis le disque.""" + if not self._path.exists(): + logger.info("Cache Ollama : nouveau cache (%s)", self._path) + return + try: + raw = json.loads(self._path.read_text(encoding="utf-8")) + if raw.get("model") != self._model: + logger.info( + "Cache Ollama : modĂšle changĂ© (%s → %s), cache invalidĂ©", + raw.get("model"), self._model, + ) + return + self._data = raw.get("entries", {}) + logger.info("Cache Ollama : %d entrĂ©es chargĂ©es", len(self._data)) + except (json.JSONDecodeError, KeyError) as e: + logger.warning("Cache Ollama : fichier corrompu (%s), rĂ©initialisĂ©", e) + self._data = {} + + @staticmethod + def _make_key(texte: str, diag_type: str) -> str: + """Construit une clĂ© normalisĂ©e.""" + return f"{diag_type}::{texte.strip().lower()}" + + def get(self, texte: str, diag_type: str) -> dict | None: + """RĂ©cupĂšre un rĂ©sultat cachĂ©, ou None si absent.""" + key = self._make_key(texte, diag_type) + with self._lock: + return self._data.get(key) + + def put(self, texte: str, diag_type: str, result: dict) -> None: + """Stocke un rĂ©sultat dans le cache.""" + key = self._make_key(texte, diag_type) + with self._lock: + self._data[key] = result + self._dirty = True + + def save(self) -> None: + """Persiste le cache sur disque si modifiĂ©.""" + with self._lock: + if not self._dirty: + return + self._path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "model": self._model, + "entries": self._data, + } + self._path.write_text( + json.dumps(payload, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + self._dirty = False + logger.info("Cache Ollama : %d entrĂ©es sauvegardĂ©es", len(self._data)) + + def __len__(self) -> int: + with self._lock: + return len(self._data) diff --git a/src/medical/ollama_client.py b/src/medical/ollama_client.py new file mode 100644 index 0000000..7050869 --- /dev/null +++ b/src/medical/ollama_client.py @@ -0,0 +1,80 @@ +"""Client Ollama partagĂ© — appel LLM en mode JSON natif.""" + +from __future__ import annotations + +import json +import logging + +import requests + +from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT + +logger = logging.getLogger(__name__) + + +def parse_json_response(raw: str) -> dict | None: + """Parse une rĂ©ponse JSON d'Ollama, en gĂ©rant les blocs markdown.""" + text = raw.strip() + if text.startswith("```"): + first_nl = text.find("\n") + if first_nl != -1: + text = text[first_nl + 1:] + if text.rstrip().endswith("```"): + text = text.rstrip()[:-3] + text = text.strip() + + try: + return json.loads(text) + except json.JSONDecodeError: + logger.warning("Ollama : JSON invalide : %s", raw[:200]) + return None + + +def call_ollama( + prompt: str, + temperature: float = 0.1, + max_tokens: int = 2500, +) -> dict | None: + """Appelle Ollama en mode JSON natif avec retry. + + Args: + prompt: Le prompt Ă  envoyer. + temperature: TempĂ©rature de gĂ©nĂ©ration (dĂ©faut: 0.1). + max_tokens: Nombre max de tokens (dĂ©faut: 2500). + + Returns: + Le dict JSON parsĂ©, ou None en cas d'erreur. + """ + for attempt in range(2): + try: + response = requests.post( + f"{OLLAMA_URL}/api/generate", + json={ + "model": OLLAMA_MODEL, + "prompt": prompt, + "stream": False, + "format": "json", + "options": { + "temperature": temperature, + "num_predict": max_tokens, + }, + }, + timeout=OLLAMA_TIMEOUT, + ) + response.raise_for_status() + raw = response.json().get("response", "") + result = parse_json_response(raw) + if result is not None: + return result + if attempt == 0: + logger.info("Ollama : retry aprĂšs Ă©chec de parsing") + except requests.ConnectionError: + logger.warning("Ollama non disponible (connexion refusĂ©e)") + return None + except requests.Timeout: + logger.warning("Ollama timeout aprĂšs %ds", OLLAMA_TIMEOUT) + return None + except (requests.RequestException, json.JSONDecodeError) as e: + logger.warning("Ollama erreur : %s", e) + return None + return None diff --git a/src/medical/rag_search.py b/src/medical/rag_search.py index 223358a..c4b221d 100644 --- a/src/medical/rag_search.py +++ b/src/medical/rag_search.py @@ -2,12 +2,17 @@ from __future__ import annotations -import json import logging +from concurrent.futures import ThreadPoolExecutor, as_completed -import requests - -from ..config import Diagnostic, DossierMedical, RAGSource, OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT +from ..config import ( + ActeCCAM, Diagnostic, DossierMedical, RAGSource, + OLLAMA_CACHE_PATH, OLLAMA_MAX_PARALLEL, OLLAMA_MODEL, +) +from .cim10_dict import normalize_code, validate_code as cim10_validate +from .ccam_dict import validate_code as ccam_validate +from .ollama_client import call_ollama, parse_json_response +from .ollama_cache import OllamaCache logger = logging.getLogger(__name__) @@ -85,6 +90,52 @@ def search_similar(query: str, top_k: int = 10) -> list[dict]: return final +def search_similar_ccam(query: str, top_k: int = 8) -> list[dict]: + """Recherche les passages CCAM les plus similaires dans l'index FAISS. + + MĂȘme logique que search_similar() mais priorise les sources CCAM. + """ + from .rag_index import get_index + import numpy as np + + result = get_index() + if result is None: + logger.warning("Index FAISS non disponible") + return [] + + faiss_index, metadata = result + + model = _get_embed_model() + query_vec = model.encode([query], normalize_embeddings=True) + query_vec = np.array(query_vec, dtype=np.float32) + + fetch_k = min(top_k * 2, faiss_index.ntotal) + scores, indices = faiss_index.search(query_vec, fetch_k) + + raw_results = [] + for score, idx in zip(scores[0], indices[0]): + if idx < 0: + continue + if float(score) < _MIN_SCORE: + continue + meta = metadata[idx].copy() + meta["score"] = float(score) + raw_results.append(meta) + + # Prioriser les sources CCAM (au moins 5 sur top_k) + ccam_results = [r for r in raw_results if r["document"] == "ccam"] + other_results = [r for r in raw_results if r["document"] != "ccam"] + + min_ccam = min(5, len(ccam_results)) + final = ccam_results[:min_ccam] + remaining_slots = top_k - len(final) + remaining = ccam_results[min_ccam:] + other_results + remaining.sort(key=lambda r: r["score"], reverse=True) + final.extend(remaining[:remaining_slots]) + + return final + + def _format_contexte(contexte: dict) -> str: """Formate le contexte patient de maniĂšre structurĂ©e pour le prompt.""" lines = [] @@ -193,31 +244,63 @@ RĂ©ponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant }}""" +def _build_prompt_ccam(texte: str, sources: list[dict], contexte: dict) -> str: + """Construit le prompt expert DIM pour le codage CCAM avec raisonnement structurĂ©.""" + sources_text = "" + for i, src in enumerate(sources, 1): + doc_name = { + "cim10": "CIM-10 FR 2026", + "cim10_alpha": "CIM-10 Index AlphabĂ©tique 2026", + "guide_methodo": "Guide MĂ©thodologique MCO 2026", + "ccam": "CCAM PMSI V4 2025", + }.get(src["document"], src["document"]) + + code_info = f" (code: {src['code']})" if src.get("code") else "" + page_info = f" [page {src['page']}]" if src.get("page") else "" + + sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n" + sources_text += (src.get("extrait", "")[:800]) + "\n\n" + + ctx_str = _format_contexte(contexte) + + return f"""Tu es un mĂ©decin DIM (DĂ©partement d'Information MĂ©dicale) expert en codage CCAM PMSI. +Tu dois coder l'acte chirurgical/mĂ©dical suivant en respectant STRICTEMENT la nomenclature CCAM. + +RÈGLES IMPÉRATIVES : +- Le code doit provenir UNIQUEMENT des sources CCAM fournies +- Un code CCAM est composĂ© de 4 lettres + 3 chiffres (ex: HMFC004) +- VĂ©rifie l'activitĂ© (1=acte technique, 4=anesthĂ©sie) et le regroupement +- Tiens compte du tarif secteur 1 pour valider la cohĂ©rence +- Si plusieurs codes sont possibles, choisis le plus spĂ©cifique Ă  l'acte dĂ©crit +- En cas de doute, indique confidence "low" plutĂŽt que de proposer un code inadaptĂ© + +ACTE À CODER : "{texte}" + +CONTEXTE CLINIQUE : +{ctx_str} + +SOURCES CCAM : +{sources_text} +RĂ©ponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant ou aprĂšs : +{{ + "analyse_acte": "que dĂ©crit cet acte sur le plan technique/chirurgical", + "codes_candidats": "quels codes CCAM des sources sont compatibles", + "discrimination": "pourquoi choisir ce code plutĂŽt qu'un autre (activitĂ©, regroupement, tarif)", + "code": "ABCD123", + "confidence": "high ou medium ou low", + "justification": "explication courte en français" +}}""" + + def _parse_ollama_response(raw: str) -> dict | None: - """Parse la rĂ©ponse JSON d'Ollama (mode JSON). - - Reconstitue le raisonnement Ă  partir des champs structurĂ©s. - """ - # Stripper les blocs markdown ```json ... ``` que certains modĂšles ajoutent - text = raw.strip() - if text.startswith("```"): - first_nl = text.find("\n") - if first_nl != -1: - text = text[first_nl + 1:] - # Retirer la fence fermante seulement si elle existe en fin de texte - if text.rstrip().endswith("```"): - text = text.rstrip()[:-3] - text = text.strip() - - try: - parsed = json.loads(text) - except json.JSONDecodeError: - logger.warning("Ollama : JSON invalide : %s", raw[:200]) + """Parse la rĂ©ponse JSON d'Ollama et reconstitue le raisonnement structurĂ©.""" + parsed = parse_json_response(raw) + if parsed is None: return None # Reconstituer le raisonnement Ă  partir des champs structurĂ©s reasoning_parts = [] - for key in ("analyse_clinique", "codes_candidats", "discrimination", "regle_pmsi"): + for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"): val = parsed.pop(key, None) if val: titre = key.replace("_", " ").upper() @@ -229,59 +312,70 @@ def _parse_ollama_response(raw: str) -> dict | None: def _call_ollama(prompt: str) -> dict | None: - """Appelle Ollama (mode JSON) et parse la rĂ©ponse. Retry une fois si parsing Ă©choue.""" - for attempt in range(2): - try: - response = requests.post( - f"{OLLAMA_URL}/api/generate", - json={ - "model": OLLAMA_MODEL, - "prompt": prompt, - "stream": False, - "format": "json", - "options": { - "temperature": 0.1, - "num_predict": 2500, - }, - }, - timeout=OLLAMA_TIMEOUT, + """Appelle Ollama (mode JSON) et parse la rĂ©ponse avec reconstitution du raisonnement.""" + result = call_ollama(prompt, temperature=0.1, max_tokens=2500) + if result is None: + return None + # Reconstituer le raisonnement structurĂ© + reasoning_parts = [] + for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"): + val = result.pop(key, None) + if val: + titre = key.replace("_", " ").upper() + reasoning_parts.append(f"{titre} :\n{val}") + if reasoning_parts: + result["raisonnement"] = "\n\n".join(reasoning_parts) + return result + + +def _apply_llm_result_diagnostic(diagnostic: Diagnostic, llm_result: dict) -> None: + """Applique un rĂ©sultat LLM (frais ou cachĂ©) Ă  un Diagnostic.""" + code = llm_result.get("code") + confidence = llm_result.get("confidence") + justification = llm_result.get("justification") + raisonnement = llm_result.get("raisonnement") + + if code: + code = normalize_code(code) + is_valid, _ = cim10_validate(code) + if is_valid: + diagnostic.cim10_suggestion = code + else: + logger.warning( + "RAG : code Ollama %s invalide pour « %s », code ignorĂ©", + code, diagnostic.texte, ) - response.raise_for_status() - raw = response.json().get("response", "") - result = _parse_ollama_response(raw) - if result is not None: - return result - if attempt == 0: - logger.info("Ollama : retry aprĂšs Ă©chec de parsing") - except requests.ConnectionError: - logger.warning("Ollama non disponible (connexion refusĂ©e)") - return None - except requests.Timeout: - logger.warning("Ollama timeout aprĂšs %ds", OLLAMA_TIMEOUT) - return None - except (requests.RequestException, json.JSONDecodeError) as e: - logger.warning("Ollama erreur : %s", e) - return None - return None + if confidence in ("high", "medium", "low"): + diagnostic.cim10_confidence = confidence + if justification: + diagnostic.justification = justification + if raisonnement: + diagnostic.raisonnement = raisonnement def enrich_diagnostic( diagnostic: Diagnostic, contexte: dict, est_dp: bool = True, + cache: OllamaCache | None = None, ) -> None: """Enrichit un Diagnostic avec le RAG (FAISS + Ollama). Modifie le diagnostic en place. Fallback gracieux si FAISS ou Ollama Ă©chouent. """ - # 1. Recherche FAISS + diag_type = "dp" if est_dp else "das" + + # 1. VĂ©rifier le cache + cached = cache.get(diagnostic.texte, diag_type) if cache else None + + # 2. Recherche FAISS (toujours, pour les sources_rag fraĂźches) sources = search_similar(diagnostic.texte, top_k=10) if not sources: logger.debug("Aucune source RAG trouvĂ©e pour : %s", diagnostic.texte) return - # 2. Stocker les sources RAG + # 3. Stocker les sources RAG diagnostic.sources_rag = [ RAGSource( document=s["document"], @@ -292,30 +386,101 @@ def enrich_diagnostic( for s in sources ] - # 3. Appel Ollama pour justification avec raisonnement structurĂ© + # 4. Si cache hit, appliquer et court-circuiter Ollama + if cached is not None: + logger.info("Cache hit pour %s : « %s »", diag_type.upper(), diagnostic.texte) + _apply_llm_result_diagnostic(diagnostic, cached) + return + + # 5. Appel Ollama pour justification avec raisonnement structurĂ© prompt = _build_prompt(diagnostic.texte, sources, contexte, est_dp=est_dp) llm_result = _call_ollama(prompt) if llm_result: - code = llm_result.get("code") - confidence = llm_result.get("confidence") - justification = llm_result.get("justification") - raisonnement = llm_result.get("raisonnement") - - if code: - diagnostic.cim10_suggestion = code - if confidence in ("high", "medium", "low"): - diagnostic.cim10_confidence = confidence - if justification: - diagnostic.justification = justification - if raisonnement: - diagnostic.raisonnement = raisonnement + _apply_llm_result_diagnostic(diagnostic, llm_result) + if cache: + cache.put(diagnostic.texte, diag_type, llm_result) else: logger.info("Ollama non disponible — sources FAISS conservĂ©es sans justification LLM") +def _apply_llm_result_acte(acte: ActeCCAM, llm_result: dict) -> None: + """Applique un rĂ©sultat LLM (frais ou cachĂ©) Ă  un ActeCCAM.""" + code = llm_result.get("code") + confidence = llm_result.get("confidence") + justification = llm_result.get("justification") + raisonnement = llm_result.get("raisonnement") + + if code: + code = code.strip().upper() + is_valid, _ = ccam_validate(code) + if is_valid: + acte.code_ccam_suggestion = code + else: + logger.warning( + "RAG : code CCAM Ollama %s invalide pour « %s », code ignorĂ©", + code, acte.texte, + ) + if confidence in ("high", "medium", "low"): + acte.ccam_confidence = confidence + if justification: + acte.justification = justification + if raisonnement: + acte.raisonnement = raisonnement + + +def enrich_acte(acte: ActeCCAM, contexte: dict, cache: OllamaCache | None = None) -> None: + """Enrichit un ActeCCAM avec le RAG (FAISS + Ollama). + + Modifie l'acte en place. Fallback gracieux si FAISS ou Ollama Ă©chouent. + """ + # 1. VĂ©rifier le cache + cached = cache.get(acte.texte, "ccam") if cache else None + + # 2. Recherche FAISS (sources CCAM priorisĂ©es) + sources = search_similar_ccam(acte.texte, top_k=8) + + if not sources: + logger.debug("Aucune source RAG CCAM trouvĂ©e pour : %s", acte.texte) + return + + # 3. Stocker les sources RAG + acte.sources_rag = [ + RAGSource( + document=s["document"], + page=s.get("page"), + code=s.get("code"), + extrait=s.get("extrait", "")[:200], + ) + for s in sources + ] + + # 4. Si cache hit, appliquer et court-circuiter Ollama + if cached is not None: + logger.info("Cache hit pour CCAM : « %s »", acte.texte) + _apply_llm_result_acte(acte, cached) + return + + # 5. Appel Ollama pour justification avec raisonnement structurĂ© + prompt = _build_prompt_ccam(acte.texte, sources, contexte) + llm_result = _call_ollama(prompt) + + if llm_result: + _apply_llm_result_acte(acte, llm_result) + if cache: + cache.put(acte.texte, "ccam", llm_result) + else: + logger.info("Ollama non disponible — sources FAISS CCAM conservĂ©es sans justification LLM") + + def enrich_dossier(dossier: DossierMedical) -> None: - """Enrichit le DP et tous les DAS d'un dossier via le RAG.""" + """Enrichit le DP et tous les DAS d'un dossier via le RAG. + + Utilise un cache persistant et parallĂ©lise les appels Ollama + pour les DAS et actes CCAM (max_workers = OLLAMA_MAX_PARALLEL). + """ + cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL) + contexte = { "sexe": dossier.sejour.sexe, "age": dossier.sejour.age, @@ -327,11 +492,12 @@ def enrich_dossier(dossier: DossierMedical) -> None: "complications": dossier.complications, } + # Phase 1 : DP seul (le contexte DAS en dĂ©pend) if dossier.diagnostic_principal: logger.info("RAG enrichissement DP : %s", dossier.diagnostic_principal.texte) - enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True) + enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True, cache=cache) - # Pour les DAS, ajouter le DP et les DAS existants au contexte pour cohĂ©rence + # Mettre Ă  jour le contexte avec le DP pour les DAS if dossier.diagnostic_principal: contexte["dp_texte"] = dossier.diagnostic_principal.texte contexte["das_codes_existants"] = [ @@ -340,6 +506,20 @@ def enrich_dossier(dossier: DossierMedical) -> None: if d.cim10_suggestion ] - for das in dossier.diagnostics_associes: - logger.info("RAG enrichissement DAS : %s", das.texte) - enrich_diagnostic(das, contexte, est_dp=False) + # Phase 2 : DAS + Actes en parallĂšle + das_list = dossier.diagnostics_associes + actes_list = dossier.actes_ccam + + if das_list or actes_list: + with ThreadPoolExecutor(max_workers=OLLAMA_MAX_PARALLEL) as executor: + futures = [] + for das in das_list: + logger.info("RAG enrichissement DAS : %s", das.texte) + futures.append(executor.submit(enrich_diagnostic, das, contexte, False, cache)) + for acte in actes_list: + logger.info("RAG enrichissement CCAM : %s", acte.texte) + futures.append(executor.submit(enrich_acte, acte, contexte, cache)) + for f in as_completed(futures): + f.result() # propage les exceptions + + cache.save() diff --git a/src/medical/severity.py b/src/medical/severity.py index 7f97909..086372d 100644 --- a/src/medical/severity.py +++ b/src/medical/severity.py @@ -158,7 +158,7 @@ def evaluate_severity(diagnostic) -> SeverityInfo: return info -def enrich_dossier_severity(dp, das_list: list) -> list[str]: +def enrich_dossier_severity(dp, das_list: list) -> tuple[list[str], int, int]: """Enrichit les diagnostics d'un dossier avec les informations de sĂ©vĂ©ritĂ©. Modifie les diagnostics en place (attributs est_cma, est_cms, niveau_severite). @@ -168,7 +168,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]: das_list: Liste des diagnostics associĂ©s. Returns: - Liste d'alertes de sĂ©vĂ©ritĂ© gĂ©nĂ©rĂ©es. + (alertes, cma_count, cms_count). """ alertes = [] @@ -181,6 +181,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]: # Évaluer chaque DAS cma_count = 0 + cms_count = 0 for das in das_list: if not das.cim10_suggestion: continue @@ -189,6 +190,10 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]: if info.est_cma_probable: das.est_cma = True cma_count += 1 + # CMS = CMA sĂ©vĂšre + if info.niveau_severite == "severe": + das.est_cms = True + cms_count += 1 alertes.append( f"CMA probable : '{das.texte}' ({das.cim10_suggestion}) — " f"sĂ©vĂ©ritĂ© {info.niveau_severite}" @@ -198,4 +203,4 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]: if cma_count >= 2: alertes.insert(0, f"{cma_count} CMA probables dĂ©tectĂ©es — impact potentiel sur le niveau de sĂ©vĂ©ritĂ© GHM") - return alertes + return alertes, cma_count, cms_count diff --git a/src/viewer/templates/detail.html b/src/viewer/templates/detail.html index 2056aa3..edada47 100644 --- a/src/viewer/templates/detail.html +++ b/src/viewer/templates/detail.html @@ -69,6 +69,125 @@ {% endif %} +{# ---- Estimation GHM ---- #} +{% if dossier.ghm_estimation %} +{% set ghm = dossier.ghm_estimation %} +
+

Estimation GHM

+
+ {% if ghm.cmd %} +
+ + {{ ghm.cmd }}{% if ghm.cmd_libelle %} — {{ ghm.cmd_libelle }}{% endif %} +
+ {% endif %} +
+ + {% if ghm.type_ghm == 'C' %} + C — Chirurgical + {% elif ghm.type_ghm == 'K' %} + K — Interventionnel + {% elif ghm.type_ghm == 'M' %} + M — MĂ©dical + {% endif %} +
+
+ + {% if ghm.severite <= 1 %} + Niveau {{ ghm.severite }} + {% elif ghm.severite == 2 %} + Niveau {{ ghm.severite }} + {% elif ghm.severite == 3 %} + Niveau {{ ghm.severite }} + {% else %} + Niveau {{ ghm.severite }} + {% endif %} +
+ {% if ghm.ghm_approx %} +
+ + {{ ghm.ghm_approx }} +
+ {% endif %} +
+ + {{ ghm.cma_count }} CMA, {{ ghm.cms_count }} CMS +
+
+ {% if ghm.alertes %} +
+ {% for alerte in ghm.alertes %} +
{{ alerte }}
+ {% endfor %} +
+ {% endif %} +
+ Estimation heuristique — le GHM dĂ©finitif nĂ©cessite le groupeur officiel ATIH +
+
+{% endif %} + +{# ---- ContrĂŽle CPAM ---- #} +{% if dossier.controles_cpam %} +
+

ContrĂŽle CPAM ({{ dossier.controles_cpam|length }})

+ {% for ctrl in dossier.controles_cpam %} +
+
+ OGC {{ ctrl.numero_ogc }} — {{ ctrl.titre }} + {% if 'retient' in ctrl.decision_ucr|lower %} + {{ ctrl.decision_ucr }} + {% elif 'confirme' in ctrl.decision_ucr|lower %} + {{ ctrl.decision_ucr }} + {% else %} + {{ ctrl.decision_ucr }} + {% endif %} +
+ + {# Argument CPAM #} + {% if ctrl.arg_ucr %} +
+
Argument CPAM
+ {{ ctrl.arg_ucr }} +
+ {% endif %} + + {# Codes contestés #} + {% if ctrl.dp_ucr or ctrl.da_ucr or ctrl.dr_ucr or ctrl.actes_ucr %} +
+
Codes contestés
+
+ {% if ctrl.dp_ucr %}DP: {{ ctrl.dp_ucr }}{% endif %} + {% if ctrl.da_ucr %}DA: {{ ctrl.da_ucr }}{% endif %} + {% if ctrl.dr_ucr %}DR: {{ ctrl.dr_ucr }}{% endif %} + {% if ctrl.actes_ucr %}Actes: {{ ctrl.actes_ucr }}{% endif %} +
+
+ {% endif %} + + {# Contre-argumentation #} + {% if ctrl.contre_argumentation %} +
+
Contre-argumentation
+
{{ ctrl.contre_argumentation }}
+
+ {% endif %} + + {# Sources RAG #} + {% if ctrl.sources_reponse %} +
+ Sources RAG ({{ ctrl.sources_reponse|length }}) + {% for src in ctrl.sources_reponse %} +
{{ src.document }}{% if src.code %} — {{ src.code }}{% endif %}{% if src.page %} [p.{{ src.page }}]{% endif %}
+{{ src.extrait or '' }}
+ {% endfor %} +
+ {% endif %} +
+ {% endfor %} +
+{% endif %} + {# ---- Alertes de codage ---- #} {% if dossier.alertes_codage %}
diff --git a/tests/test_cpam_parser.py b/tests/test_cpam_parser.py new file mode 100644 index 0000000..6cd7483 --- /dev/null +++ b/tests/test_cpam_parser.py @@ -0,0 +1,130 @@ +"""Tests pour le parser de contrĂŽle CPAM.""" + +import tempfile +from pathlib import Path + +import openpyxl +import pytest + +from src.config import ControleCPAM +from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel + + +def _create_test_xlsx(rows: list[tuple], path: Path) -> None: + """CrĂ©e un fichier xlsx de test avec les lignes donnĂ©es.""" + wb = openpyxl.Workbook() + ws = wb.active + ws.title = "OGC ContrĂŽle T2A" + ws.append(("N° OGC", "Titre", "Arg_UCR", "DĂ©cision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")) + for row in rows: + ws.append(row) + wb.save(path) + + +class TestParseCpamExcel: + def test_parse_basic(self, tmp_path): + xlsx = tmp_path / "test.xlsx" + _create_test_xlsx([ + (17, "DĂ©saccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None), + (21, "DĂ©saccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None), + ], xlsx) + + result = parse_cpam_excel(xlsx) + + assert 17 in result + assert 21 in result + assert len(result[17]) == 1 + assert len(result[21]) == 1 + assert result[17][0].titre == "DĂ©saccord sur les DAS" + assert result[17][0].decision_ucr == "UCR retient" + assert result[21][0].dp_ucr == "K85.1" + + def test_parse_multiple_same_ogc(self, tmp_path): + xlsx = tmp_path / "test.xlsx" + _create_test_xlsx([ + (17, "Titre 1", "Arg 1", "DĂ©cision 1", None, None, None, None), + (17, "Titre 2", "Arg 2", "DĂ©cision 2", None, None, None, None), + ], xlsx) + + result = parse_cpam_excel(xlsx) + + assert len(result[17]) == 2 + + def test_parse_empty_file(self, tmp_path): + xlsx = tmp_path / "empty.xlsx" + _create_test_xlsx([], xlsx) + + result = parse_cpam_excel(xlsx) + + assert result == {} + + def test_parse_nonexistent_file(self): + result = parse_cpam_excel("/nonexistent/path.xlsx") + assert result == {} + + def test_parse_optional_fields(self, tmp_path): + xlsx = tmp_path / "test.xlsx" + _create_test_xlsx([ + (42, "Titre", "Arg", "DĂ©cision", "E11.40", "G63.2", "E11.9", "ABCD123"), + ], xlsx) + + result = parse_cpam_excel(xlsx) + + ctrl = result[42][0] + assert ctrl.dp_ucr == "E11.40" + assert ctrl.da_ucr == "G63.2" + assert ctrl.dr_ucr == "E11.9" + assert ctrl.actes_ucr == "ABCD123" + + +class TestMatchDossierOGC: + def setup_method(self): + self.cpam_data = { + 17: [ControleCPAM(numero_ogc=17, titre="Test 17")], + 21: [ControleCPAM(numero_ogc=21, titre="Test 21")], + } + + def test_match_found(self): + result = match_dossier_ogc("17_23100690", self.cpam_data) + assert len(result) == 1 + assert result[0].numero_ogc == 17 + + def test_match_not_found(self): + result = match_dossier_ogc("15_23096332", self.cpam_data) + assert result == [] + + def test_match_no_prefix(self): + result = match_dossier_ogc("nodash", self.cpam_data) + assert result == [] + + def test_match_empty_data(self): + result = match_dossier_ogc("17_23100690", {}) + assert result == [] + + +class TestControleCPAMModel: + def test_serialization(self): + ctrl = ControleCPAM( + numero_ogc=17, + titre="DĂ©saccord sur les DAS", + arg_ucr="Argument...", + decision_ucr="UCR retient", + dp_ucr="K85.1", + ) + data = ctrl.model_dump() + assert data["numero_ogc"] == 17 + assert data["dp_ucr"] == "K85.1" + assert data["contre_argumentation"] is None + + def test_deserialization(self): + data = { + "numero_ogc": 21, + "titre": "Test", + "arg_ucr": "Arg", + "decision_ucr": "DĂ©cision", + "contre_argumentation": "Ma rĂ©ponse", + } + ctrl = ControleCPAM(**data) + assert ctrl.numero_ogc == 21 + assert ctrl.contre_argumentation == "Ma rĂ©ponse" + assert ctrl.sources_reponse == [] diff --git a/tests/test_cpam_response.py b/tests/test_cpam_response.py new file mode 100644 index 0000000..192d91a --- /dev/null +++ b/tests/test_cpam_response.py @@ -0,0 +1,146 @@ +"""Tests pour la gĂ©nĂ©ration de contre-argumentation CPAM.""" + +from unittest.mock import patch + +import pytest + +from src.config import ControleCPAM, Diagnostic, DossierMedical, RAGSource, Sejour +from src.control.cpam_response import _build_cpam_prompt, _format_response, generate_cpam_response + + +def _make_dossier() -> DossierMedical: + """CrĂ©e un dossier mĂ©dical de test.""" + return DossierMedical( + source_file="test.pdf", + document_type="crh", + sejour=Sejour(sexe="M", age=65, duree_sejour=5), + diagnostic_principal=Diagnostic( + texte="CholĂ©cystite aiguĂ«", + cim10_suggestion="K81.0", + ), + diagnostics_associes=[ + Diagnostic(texte="IlĂ©us rĂ©flexe", cim10_suggestion="K56.0"), + ], + ) + + +def _make_controle() -> ControleCPAM: + """CrĂ©e un contrĂŽle CPAM de test.""" + return ControleCPAM( + numero_ogc=17, + titre="DĂ©saccord sur les DAS", + arg_ucr="L'UCR confirme l'avis des mĂ©decins contrĂŽleurs au motif que le DAS K56.0 n'est pas justifiĂ©.", + decision_ucr="UCR confirme avis mĂ©decins contrĂŽleurs", + dp_ucr=None, + da_ucr="K56.0", + ) + + +class TestBuildPrompt: + def test_prompt_contains_dossier_info(self): + dossier = _make_dossier() + controle = _make_controle() + prompt = _build_cpam_prompt(dossier, controle, []) + + assert "CholĂ©cystite aiguĂ«" in prompt + assert "K81.0" in prompt + assert "IlĂ©us rĂ©flexe" in prompt + assert "65 ans" in prompt + + def test_prompt_contains_cpam_argument(self): + dossier = _make_dossier() + controle = _make_controle() + prompt = _build_cpam_prompt(dossier, controle, []) + + assert controle.arg_ucr in prompt + assert controle.decision_ucr in prompt + + def test_prompt_contains_codes_contestes(self): + dossier = _make_dossier() + controle = _make_controle() + prompt = _build_cpam_prompt(dossier, controle, []) + + assert "DA proposĂ©s par UCR : K56.0" in prompt + + def test_prompt_contains_rag_sources(self): + dossier = _make_dossier() + controle = _make_controle() + sources = [ + {"document": "guide_methodo", "page": 64, "extrait": "Texte du guide..."}, + {"document": "cim10", "code": "K56.0", "extrait": "IlĂ©us paralytique..."}, + ] + prompt = _build_cpam_prompt(dossier, controle, sources) + + assert "Guide MĂ©thodologique MCO 2026" in prompt + assert "CIM-10 FR 2026" in prompt + assert "page 64" in prompt + + +class TestFormatResponse: + def test_full_response(self): + parsed = { + "analyse_contestation": "La CPAM conteste le DAS K56.0", + "points_accord": "Aucun", + "contre_arguments": "Le guide mĂ©thodologique prĂ©cise...", + "references": "Guide mĂ©tho p.64", + "conclusion": "Le DAS est justifiĂ©", + } + text = _format_response(parsed) + + assert "ANALYSE DE LA CONTESTATION" in text + assert "CONTRE-ARGUMENTS" in text + assert "CONCLUSION" in text + # "Aucun" ne doit pas gĂ©nĂ©rer la section points d'accord + assert "POINTS D'ACCORD" not in text + + def test_partial_response(self): + parsed = { + "contre_arguments": "Arguments...", + "conclusion": "Conclusion...", + } + text = _format_response(parsed) + + assert "CONTRE-ARGUMENTS" in text + assert "CONCLUSION" in text + + def test_empty_response(self): + text = _format_response({}) + assert text == "" + + +class TestGenerateResponse: + @patch("src.control.cpam_response.call_ollama") + @patch("src.control.cpam_response._search_rag_for_control") + def test_generate_success(self, mock_rag, mock_ollama): + mock_rag.return_value = [ + {"document": "guide_methodo", "page": 64, "extrait": "Texte guide"}, + ] + mock_ollama.return_value = { + "analyse_contestation": "Analyse...", + "contre_arguments": "Contre-arguments...", + "conclusion": "Conclusion...", + } + + dossier = _make_dossier() + controle = _make_controle() + + text, sources = generate_cpam_response(dossier, controle) + + assert "Contre-arguments..." in text + assert len(sources) == 1 + assert sources[0].document == "guide_methodo" + mock_ollama.assert_called_once() + + @patch("src.control.cpam_response.call_ollama") + @patch("src.control.cpam_response._search_rag_for_control") + def test_generate_ollama_unavailable(self, mock_rag, mock_ollama): + mock_rag.return_value = [] + mock_ollama.return_value = None + + dossier = _make_dossier() + controle = _make_controle() + + text, sources = generate_cpam_response(dossier, controle) + + assert text == "" + assert sources == [] diff --git a/tests/test_das_filter.py b/tests/test_das_filter.py index 0967479..e9356be 100644 --- a/tests/test_das_filter.py +++ b/tests/test_das_filter.py @@ -104,3 +104,59 @@ class TestIsValidDiagnosticText: def test_accept_sepsis(self): assert is_valid_diagnostic_text("Sepsis sĂ©vĂšre") + + # --- RĂšgle 5 modifiĂ©e : mots dupliquĂ©s (2 mots identiques) --- + def test_reject_absence_absence(self): + assert not is_valid_diagnostic_text("Absence absence") + + def test_reject_anticoagulant_anticoagulant(self): + assert not is_valid_diagnostic_text("Anticoagulant anticoagulant") + + def test_reject_ventilation_ventilation(self): + assert not is_valid_diagnostic_text("Ventilation ventilation") + + # --- RĂšgle 7 : ponctuation initiale --- + def test_reject_comma_prefix(self): + assert not is_valid_diagnostic_text(", sans prĂ©cision") + + def test_reject_dash_prefix(self): + assert not is_valid_diagnostic_text("- masse musculaire") + + # --- RĂšgle 8 : valeurs numĂ©riques OCR "À X.X" --- + def test_reject_a_accent_value(self): + assert not is_valid_diagnostic_text("À 0.1") + + def test_reject_a_accent_value_3(self): + assert not is_valid_diagnostic_text("À 3.0") + + def test_reject_a_value(self): + assert not is_valid_diagnostic_text("A 12,5") + + # --- RĂšgle 9 : crochets (artefacts OCR) --- + def test_reject_bracket_fragment(self): + assert not is_valid_diagnostic_text("Episode [episode") + + def test_reject_closing_bracket(self): + assert not is_valid_diagnostic_text("valeur]") + + # --- RĂšgle 10 : termes de laboratoire isolĂ©s --- + def test_reject_hemoglobine(self): + assert not is_valid_diagnostic_text("HĂ©moglobine") + + def test_reject_creatinine(self): + assert not is_valid_diagnostic_text("CrĂ©atinine") + + def test_accept_hemoglobine_in_phrase(self): + """Un terme labo dans un contexte clinique est acceptĂ©.""" + assert is_valid_diagnostic_text("HĂ©moglobine basse avec anĂ©mie") + + # --- RĂšgle 11 : fragments anatomiques courts --- + def test_reject_dans_la_vessie(self): + assert not is_valid_diagnostic_text("Dans la vessie") + + def test_reject_le_rein(self): + assert not is_valid_diagnostic_text("Le rein") + + def test_accept_long_fragment(self): + """Un fragment long commençant par 'Dans' peut ĂȘtre lĂ©gitime.""" + assert is_valid_diagnostic_text("Dans le cadre d'une insuffisance rĂ©nale chronique terminale") diff --git a/tests/test_ghm.py b/tests/test_ghm.py new file mode 100644 index 0000000..e144e47 --- /dev/null +++ b/tests/test_ghm.py @@ -0,0 +1,189 @@ +"""Tests pour le module d'estimation GHM.""" + +import pytest + +from src.config import ActeCCAM, Diagnostic, DossierMedical +from src.medical.ghm import estimate_ghm, find_cmd, _detect_type_ghm, _compute_severity + + +class TestFindCMD: + def test_k85_hepatobilaire(self): + cmd, libelle = find_cmd("K85.1") + assert cmd == "07" + assert "hĂ©patobiliaire" in libelle.lower() or "pancrĂ©at" in libelle.lower() + + def test_j18_respiratoire(self): + cmd, _ = find_cmd("J18") + assert cmd == "04" + + def test_n17_renal(self): + cmd, _ = find_cmd("N17") + assert cmd == "11" + + def test_n40_genital_masculin(self): + cmd, _ = find_cmd("N40") + assert cmd == "12" + + def test_f10_toxicomanie(self): + cmd, _ = find_cmd("F10") + assert cmd == "20" + + def test_z00_facteurs(self): + cmd, _ = find_cmd("Z00") + assert cmd == "23" + + def test_k40_digestif(self): + cmd, _ = find_cmd("K40") + assert cmd == "06" + + def test_b20_vih(self): + cmd, _ = find_cmd("B20") + assert cmd == "25" + + def test_t25_brulures(self): + cmd, _ = find_cmd("T25") + assert cmd == "22" + + def test_s72_traumatismes(self): + cmd, _ = find_cmd("S72") + assert cmd == "21" + + def test_code_with_dot(self): + cmd, _ = find_cmd("K85.1") + assert cmd == "07" + + def test_code_lowercase(self): + cmd, _ = find_cmd("k85.1") + assert cmd == "07" + + def test_empty_code(self): + cmd, libelle = find_cmd("") + assert cmd is None + assert libelle is None + + def test_none_code(self): + cmd, libelle = find_cmd(None) + assert cmd is None + assert libelle is None + + def test_short_code(self): + cmd, libelle = find_cmd("K8") + assert cmd is None + + +class TestDetectTypeGHM: + def test_chirurgical(self): + actes = [ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004")] + assert _detect_type_ghm(actes) == "C" + + def test_interventionnel(self): + actes = [ActeCCAM(texte="Échographie", code_ccam_suggestion="ZCQM001")] + assert _detect_type_ghm(actes) == "K" + + def test_medical_no_actes(self): + assert _detect_type_ghm([]) == "M" + + def test_medical_no_code(self): + actes = [ActeCCAM(texte="Biopsie", code_ccam_suggestion=None)] + assert _detect_type_ghm(actes) == "M" + + def test_chirurgical_overrides_interventionnel(self): + actes = [ + ActeCCAM(texte="Écho", code_ccam_suggestion="ZCQM001"), + ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004"), + ] + assert _detect_type_ghm(actes) == "C" + + +class TestSeverityLevels: + def test_no_cma_level_1(self): + das = [Diagnostic(texte="HTA", cim10_suggestion="I10")] + niveau, cma, cms = _compute_severity(das) + assert niveau == 1 + + def test_two_cma_level_2(self): + das = [ + Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True), + Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True), + ] + niveau, cma, cms = _compute_severity(das) + assert niveau == 2 + assert cma == 2 + + def test_one_cms_level_3(self): + das = [ + Diagnostic(texte="Sepsis sĂ©vĂšre", cim10_suggestion="A41.9", est_cma=True, est_cms=True), + ] + niveau, cma, cms = _compute_severity(das) + assert niveau == 3 + assert cms == 1 + + def test_two_cms_level_4(self): + das = [ + Diagnostic(texte="Sepsis", cim10_suggestion="A41.9", est_cma=True, est_cms=True), + Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True, est_cms=True), + ] + niveau, cma, cms = _compute_severity(das) + assert niveau == 4 + assert cms == 2 + + def test_three_cma_level_3(self): + das = [ + Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True), + Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True), + Diagnostic(texte="DiabĂšte", cim10_suggestion="E11.9", est_cma=True), + ] + niveau, cma, cms = _compute_severity(das) + assert niveau == 3 + assert cma == 3 + + +class TestEstimateGHM: + def test_chirurgical_with_cma(self): + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="CholĂ©cystite", cim10_suggestion="K80.1"), + actes_ccam=[ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004")], + diagnostics_associes=[ + Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True), + Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True), + ], + ) + ghm = estimate_ghm(dossier) + assert ghm.cmd == "07" + assert ghm.type_ghm == "C" + assert ghm.severite == 2 + assert ghm.ghm_approx == "07C??2" + assert ghm.cma_count == 2 + + def test_medical_sans_actes(self): + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"), + ) + ghm = estimate_ghm(dossier) + assert ghm.cmd == "04" + assert ghm.type_ghm == "M" + assert ghm.severite == 1 + assert ghm.ghm_approx == "04M??1" + + def test_dp_absent(self): + dossier = DossierMedical() + ghm = estimate_ghm(dossier) + assert ghm.cmd is None + assert ghm.ghm_approx is None + assert any("DP absent" in a for a in ghm.alertes) + + def test_dp_sans_code(self): + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="Douleur thoracique"), + ) + ghm = estimate_ghm(dossier) + assert ghm.cmd is None + assert any("sans code" in a for a in ghm.alertes) + + def test_dp_symptomatique(self): + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="Douleur thoracique", cim10_suggestion="R07.4"), + ) + ghm = estimate_ghm(dossier) + assert ghm.cmd == "23" + assert any("symptomatique" in a for a in ghm.alertes) diff --git a/tests/test_ollama_cache.py b/tests/test_ollama_cache.py new file mode 100644 index 0000000..52bbd7e --- /dev/null +++ b/tests/test_ollama_cache.py @@ -0,0 +1,108 @@ +"""Tests unitaires pour le cache Ollama persistant.""" + +import json +import threading + +import pytest + +from src.medical.ollama_cache import OllamaCache + + +class TestOllamaCache: + def test_get_miss(self, tmp_path): + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + assert cache.get("HTA", "das") is None + + def test_put_and_get(self, tmp_path): + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"} + cache.put("HTA", "das", result) + assert cache.get("HTA", "das") == result + + def test_key_normalization(self, tmp_path): + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + result = {"code": "I10", "confidence": "high"} + cache.put(" HTA ", "das", result) + assert cache.get("hta", "das") == result + + def test_different_types_different_keys(self, tmp_path): + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + cache.put("DiabĂšte", "dp", {"code": "E11.9"}) + cache.put("DiabĂšte", "das", {"code": "E11.8"}) + assert cache.get("DiabĂšte", "dp")["code"] == "E11.9" + assert cache.get("DiabĂšte", "das")["code"] == "E11.8" + + def test_save_and_reload(self, tmp_path): + path = tmp_path / "cache.json" + cache = OllamaCache(path, "gemma3:12b") + cache.put("HTA", "das", {"code": "I10"}) + cache.save() + + assert path.exists() + + cache2 = OllamaCache(path, "gemma3:12b") + assert cache2.get("HTA", "das") == {"code": "I10"} + + def test_save_no_write_if_clean(self, tmp_path): + path = tmp_path / "cache.json" + cache = OllamaCache(path, "gemma3:12b") + cache.save() + assert not path.exists() + + def test_model_change_invalidates(self, tmp_path): + path = tmp_path / "cache.json" + cache = OllamaCache(path, "gemma3:12b") + cache.put("HTA", "das", {"code": "I10"}) + cache.save() + + cache2 = OllamaCache(path, "llama3:8b") + assert cache2.get("HTA", "das") is None + assert len(cache2) == 0 + + def test_corrupted_file(self, tmp_path): + path = tmp_path / "cache.json" + path.write_text("not valid json", encoding="utf-8") + + cache = OllamaCache(path, "gemma3:12b") + assert len(cache) == 0 + assert cache.get("HTA", "das") is None + + def test_len(self, tmp_path): + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + assert len(cache) == 0 + cache.put("HTA", "das", {"code": "I10"}) + assert len(cache) == 1 + cache.put("DiabĂšte", "dp", {"code": "E11.9"}) + assert len(cache) == 2 + + def test_thread_safety(self, tmp_path): + """Écriture concurrente depuis plusieurs threads.""" + cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b") + errors = [] + + def writer(i): + try: + cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"}) + except Exception as e: + errors.append(e) + + threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors + assert len(cache) == 20 + + def test_json_format(self, tmp_path): + """Le fichier JSON contient le modĂšle et les entrĂ©es.""" + path = tmp_path / "cache.json" + cache = OllamaCache(path, "gemma3:12b") + cache.put("HTA", "das", {"code": "I10"}) + cache.save() + + raw = json.loads(path.read_text(encoding="utf-8")) + assert raw["model"] == "gemma3:12b" + assert "entries" in raw + assert len(raw["entries"]) == 1 diff --git a/tests/test_rag.py b/tests/test_rag.py index 27d39dc..184ec6b 100644 --- a/tests/test_rag.py +++ b/tests/test_rag.py @@ -7,7 +7,8 @@ from unittest.mock import patch, MagicMock import pytest -from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF +from src.config import RAGSource, Diagnostic, ActeCCAM, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF +from src.medical.ollama_cache import OllamaCache class TestRAGSource: @@ -494,6 +495,47 @@ class TestRAGSearchMocked: assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancrĂ©atite..." assert len(diag.sources_rag) == 1 + def test_enrich_diagnostic_invalid_code_ignored(self): + """Un code Ollama invalide ne remplace pas le code existant.""" + from src.medical.rag_search import enrich_diagnostic + + diag = Diagnostic(texte="PancrĂ©atite aiguĂ«", cim10_suggestion="K85.9") + mock_sources = [ + {"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9}, + ] + mock_llm = { + "code": "X99.99", # code invalide + "confidence": "high", + "justification": "Hallucination", + } + + with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \ + patch("src.medical.rag_search._call_ollama", return_value=mock_llm): + enrich_diagnostic(diag, {"sexe": "M", "age": 50}) + + # Le code original est conservĂ© (pas remplacĂ© par le code invalide) + assert diag.cim10_suggestion == "K85.9" + + def test_enrich_diagnostic_normalizes_code(self): + """Un code Ollama sans point est normalisĂ© (K851 → K85.1).""" + from src.medical.rag_search import enrich_diagnostic + + diag = Diagnostic(texte="PancrĂ©atite aiguĂ« biliaire") + mock_sources = [ + {"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9}, + ] + mock_llm = { + "code": "K851", # sans point + "confidence": "high", + "justification": "PancrĂ©atite biliaire", + } + + with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \ + patch("src.medical.rag_search._call_ollama", return_value=mock_llm): + enrich_diagnostic(diag, {"sexe": "F", "age": 43}) + + assert diag.cim10_suggestion == "K85.1" + def test_enrich_diagnostic_est_dp_flag(self): """Le flag est_dp est bien passĂ© Ă  _build_prompt.""" from src.medical.rag_search import enrich_diagnostic @@ -533,10 +575,12 @@ class TestEnrichDossier: captured_contexts = [] - def mock_enrich(diag, contexte, est_dp=True): + def mock_enrich(diag, contexte, est_dp=True, cache=None): captured_contexts.append(contexte.copy()) - with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich): + with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \ + patch("src.medical.rag_search.OllamaCache") as mock_cache_cls: + mock_cache_cls.return_value = MagicMock() enrich_dossier(dossier) assert len(captured_contexts) == 1 # DP seulement (pas de DAS) @@ -563,10 +607,12 @@ class TestEnrichDossier: captured = [] - def mock_enrich(diag, contexte, est_dp=True): + def mock_enrich(diag, contexte, est_dp=True, cache=None): captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")}) - with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich): + with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \ + patch("src.medical.rag_search.OllamaCache") as mock_cache_cls: + mock_cache_cls.return_value = MagicMock() enrich_dossier(dossier) assert len(captured) == 2 @@ -578,6 +624,149 @@ class TestEnrichDossier: assert captured[1]["dp_texte"] == "PancrĂ©atite aiguĂ« biliaire" +class TestNormalizeCode: + def test_insert_dot(self): + from src.medical.cim10_dict import normalize_code + assert normalize_code("K810") == "K81.0" + + def test_already_dotted(self): + from src.medical.cim10_dict import normalize_code + assert normalize_code("k85.1") == "K85.1" + + def test_three_chars(self): + from src.medical.cim10_dict import normalize_code + assert normalize_code("K85") == "K85" + + def test_strip_spaces(self): + from src.medical.cim10_dict import normalize_code + assert normalize_code(" E660 ") == "E66.0" + + +class TestValidateCodeCIM10: + def test_known_code(self): + from src.medical.cim10_dict import validate_code + is_valid, label = validate_code("K81.9") + assert is_valid is True + assert label # non vide + + def test_unknown_code(self): + from src.medical.cim10_dict import validate_code + is_valid, label = validate_code("Z99.99") + assert is_valid is False + assert label == "" + + def test_normalize_before_validate(self): + """K810 doit ĂȘtre normalisĂ© en K81.0 et trouvĂ©.""" + from src.medical.cim10_dict import validate_code + is_valid, label = validate_code("K810") + assert is_valid is True + + def test_three_char_code(self): + """Code parent sans point (K85) doit ĂȘtre validĂ©.""" + from src.medical.cim10_dict import validate_code + is_valid, label = validate_code("K85") + assert is_valid is True + + +class TestValidateCIM10PostProcessing: + def test_hallucination_rejected(self): + """Les codes hallucination (Aucun, N/A...) sont rejetĂ©s.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Aucun"), + ) + _validate_cim10(dossier) + assert dossier.diagnostic_principal.cim10_suggestion is None + assert any("rejetĂ©" in a for a in dossier.alertes_codage) + + def test_normalizes_format(self): + """K810 est normalisĂ© en K81.0.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="CholĂ©cystite", cim10_suggestion="K810"), + ) + _validate_cim10(dossier) + assert dossier.diagnostic_principal.cim10_suggestion == "K81.0" + + def test_invalid_code_gets_low_confidence(self): + """Un code inexistant reçoit confidence=low et une alerte.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostics_associes=[ + Diagnostic(texte="Chose bizarre", cim10_suggestion="Z99.99"), + ], + ) + _validate_cim10(dossier) + assert dossier.diagnostics_associes[0].cim10_confidence == "low" + assert any("absent du dictionnaire" in a for a in dossier.alertes_codage) + + def test_valid_code_unchanged(self): + """Un code valide n'est pas modifiĂ© et pas d'alerte.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="PancrĂ©atite", cim10_suggestion="K85.1"), + ) + _validate_cim10(dossier) + assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" + assert not any("CIM-10" in a for a in dossier.alertes_codage) + + def test_non_codable_rejected(self): + """'non_codable' est rejetĂ© comme hallucination.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostics_associes=[ + Diagnostic(texte="Truc", cim10_suggestion="non_codable"), + ], + ) + _validate_cim10(dossier) + assert dossier.diagnostics_associes[0].cim10_suggestion is None + + def test_hallucination_fallback_found(self): + """Hallucination rejetĂ©e mais fallback dictionnaire trouve un code.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="CholĂ©cystite aiguĂ«", cim10_suggestion="Aucun"), + ) + _validate_cim10(dossier) + assert dossier.diagnostic_principal.cim10_suggestion == "K81.0" + assert dossier.diagnostic_principal.cim10_confidence == "medium" + assert any("fallback" in a for a in dossier.alertes_codage) + + def test_invalid_code_fallback_found(self): + """Code invalide remplacĂ© par fallback dictionnaire.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostics_associes=[ + Diagnostic(texte="Hypertension artĂ©rielle", cim10_suggestion="I99.99"), + ], + ) + _validate_cim10(dossier) + assert dossier.diagnostics_associes[0].cim10_suggestion == "I10" + assert dossier.diagnostics_associes[0].cim10_confidence == "medium" + assert any("fallback" in a for a in dossier.alertes_codage) + + def test_invalid_code_no_fallback(self): + """Code invalide sans fallback possible → low confidence.""" + from src.medical.cim10_extractor import _validate_cim10 + + dossier = DossierMedical( + diagnostics_associes=[ + Diagnostic(texte="Chose bizarre inconnue", cim10_suggestion="Z99.99"), + ], + ) + _validate_cim10(dossier) + assert dossier.diagnostics_associes[0].cim10_suggestion == "Z99.99" + assert dossier.diagnostics_associes[0].cim10_confidence == "low" + assert any("absent du dictionnaire" in a for a in dossier.alertes_codage) + + class TestFormatContexte: """Tests pour _format_contexte.""" @@ -610,3 +799,241 @@ class TestFormatContexte: assert "TDM abdominal" in result assert "Ă©ruption cutanĂ©e" in result assert "PancrĂ©atite aiguĂ« biliaire" in result + + +class TestActeCCAMExtended: + def test_backward_compatible(self): + """Les nouveaux champs RAG sont optionnels — rĂ©trocompatible.""" + a = ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004") + assert a.texte == "CholĂ©cystectomie" + assert a.code_ccam_suggestion == "HMFC004" + assert a.ccam_confidence is None + assert a.justification is None + assert a.raisonnement is None + assert a.sources_rag == [] + + def test_with_rag_fields(self): + a = ActeCCAM( + texte="CholĂ©cystectomie par coelioscopie", + code_ccam_suggestion="HMFC004", + ccam_confidence="high", + justification="HMFC004 correspond Ă  la cholĂ©cystectomie par coelioscopie", + raisonnement="ANALYSE ACTE : CholĂ©cystectomie par voie coelioscopique...", + sources_rag=[ + RAGSource(document="ccam", page=10, code="HMFC004"), + ], + ) + assert a.ccam_confidence == "high" + assert a.justification is not None + assert len(a.sources_rag) == 1 + assert a.sources_rag[0].code == "HMFC004" + + def test_serialization_exclude_none(self): + a = ActeCCAM(texte="Test", code_ccam_suggestion="HMFC004") + data = a.model_dump(exclude_none=True) + assert "ccam_confidence" not in data + assert "justification" not in data + assert "raisonnement" not in data + assert "sources_rag" in data + + +class TestSearchSimilarCCAM: + def test_prioritizes_ccam(self): + """Les sources CCAM sont priorisĂ©es (au moins 5 sur 8).""" + from src.medical.rag_search import search_similar_ccam + import numpy as np + + mock_metadata = [] + for i in range(6): + mock_metadata.append({"document": "ccam", "code": f"HMFC00{i}", "page": i, "extrait": f"CCAM {i}"}) + for i in range(6): + mock_metadata.append({"document": "guide_methodo", "page": i + 10, "extrait": f"Guide {i}"}) + + mock_index = MagicMock() + mock_index.ntotal = 12 + scores = np.array([[0.9 - i * 0.03 for i in range(12)]], dtype=np.float32) + indices = np.array([list(range(12))], dtype=np.int64) + mock_index.search.return_value = (scores, indices) + + with patch("src.medical.rag_index.get_index", return_value=(mock_index, mock_metadata)), \ + patch("src.medical.rag_search._get_embed_model") as mock_model: + mock_model.return_value.encode.return_value = np.array([[0.1] * 768], dtype=np.float32) + results = search_similar_ccam("cholĂ©cystectomie", top_k=8) + + ccam_count = sum(1 for r in results if r["document"] == "ccam") + assert ccam_count >= 5, f"Seulement {ccam_count} sources CCAM sur {len(results)}" + + def test_no_index(self): + """search_similar_ccam retourne une liste vide si l'index n'existe pas.""" + from src.medical.rag_search import search_similar_ccam + + with patch("src.medical.rag_index.get_index", return_value=None): + results = search_similar_ccam("cholĂ©cystectomie") + assert results == [] + + +class TestEnrichActe: + def test_enrich_with_ollama(self): + """Enrichissement complet avec sources + Ollama.""" + from src.medical.rag_search import enrich_acte + + acte = ActeCCAM(texte="CholĂ©cystectomie par coelioscopie") + mock_sources = [ + { + "document": "ccam", + "page": 10, + "code": "HMFC004", + "extrait": "HMFC004 CholĂ©cystectomie par coelioscopie...", + "score": 0.92, + }, + ] + mock_llm = { + "code": "HMFC004", + "confidence": "high", + "justification": "CholĂ©cystectomie par coelioscopie = HMFC004", + "raisonnement": "ANALYSE ACTE : CholĂ©cystectomie par voie coelioscopique...", + } + + with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \ + patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \ + patch("src.medical.rag_search.ccam_validate", return_value=(True, "CholĂ©cystectomie")): + enrich_acte(acte, {"sexe": "F", "age": 43}) + + assert acte.code_ccam_suggestion == "HMFC004" + assert acte.ccam_confidence == "high" + assert acte.justification == "CholĂ©cystectomie par coelioscopie = HMFC004" + assert acte.raisonnement is not None + assert len(acte.sources_rag) == 1 + + def test_enrich_no_sources(self): + """enrich_acte ne plante pas si aucune source trouvĂ©e.""" + from src.medical.rag_search import enrich_acte + + acte = ActeCCAM(texte="Acte inconnu", code_ccam_suggestion="ABCD123") + + with patch("src.medical.rag_search.search_similar_ccam", return_value=[]): + enrich_acte(acte, {"sexe": "M", "age": 50}) + + assert acte.sources_rag == [] + assert acte.justification is None + + def test_enrich_no_ollama(self): + """Enrichissement avec sources FAISS mais sans Ollama.""" + from src.medical.rag_search import enrich_acte + + acte = ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004") + mock_sources = [ + {"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9}, + ] + + with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \ + patch("src.medical.rag_search._call_ollama", return_value=None): + enrich_acte(acte, {"sexe": "M", "age": 50}) + + assert len(acte.sources_rag) == 1 + assert acte.justification is None + assert acte.raisonnement is None + + def test_enrich_invalid_code(self): + """Un code CCAM invalide d'Ollama ne remplace pas le code existant.""" + from src.medical.rag_search import enrich_acte + + acte = ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004") + mock_sources = [ + {"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9}, + ] + mock_llm = { + "code": "ZZZZ999", + "confidence": "high", + "justification": "Hallucination", + } + + with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \ + patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \ + patch("src.medical.rag_search.ccam_validate", return_value=(False, "")): + enrich_acte(acte, {"sexe": "M", "age": 50}) + + # Le code original est conservĂ© + assert acte.code_ccam_suggestion == "HMFC004" + # Mais la confidence est quand mĂȘme affectĂ©e + assert acte.ccam_confidence == "high" + + +class TestEnrichDossierCCAM: + def test_enriches_actes(self): + """enrich_dossier enrichit aussi les actes CCAM.""" + from src.medical.rag_search import enrich_dossier + + dossier = DossierMedical( + diagnostic_principal=Diagnostic(texte="Lithiase vĂ©siculaire"), + actes_ccam=[ + ActeCCAM(texte="CholĂ©cystectomie par coelioscopie"), + ActeCCAM(texte="AnesthĂ©sie gĂ©nĂ©rale"), + ], + ) + + enriched = [] + + def mock_enrich_diag(diag, contexte, est_dp=True, cache=None): + pass + + def mock_enrich_acte(acte, contexte, cache=None): + enriched.append(acte.texte) + + with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich_diag), \ + patch("src.medical.rag_search.enrich_acte", side_effect=mock_enrich_acte), \ + patch("src.medical.rag_search.OllamaCache") as mock_cache_cls: + mock_cache_cls.return_value = MagicMock() + enrich_dossier(dossier) + + assert len(enriched) == 2 + assert "CholĂ©cystectomie par coelioscopie" in enriched + assert "AnesthĂ©sie gĂ©nĂ©rale" in enriched + + +class TestBuildPromptCCAM: + def test_prompt_contains_acte(self): + from src.medical.rag_search import _build_prompt_ccam + + sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 CholĂ©cystectomie"}] + contexte = {"sexe": "F", "age": 43} + prompt = _build_prompt_ccam("CholĂ©cystectomie par coelioscopie", sources, contexte) + + assert "CholĂ©cystectomie par coelioscopie" in prompt + assert "CCAM" in prompt + assert "analyse_acte" in prompt + assert "objet JSON" in prompt + + def test_prompt_contains_source_info(self): + from src.medical.rag_search import _build_prompt_ccam + + sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 CholĂ©cystectomie par coelioscopie"}] + contexte = {} + prompt = _build_prompt_ccam("CholĂ©cystectomie", sources, contexte) + + assert "CCAM PMSI V4 2025" in prompt + assert "HMFC004" in prompt + + +class TestParseOllamaResponseCCAM: + def test_parse_ccam_structured_json(self): + """Le parsing extrait analyse_acte dans le raisonnement.""" + from src.medical.rag_search import _parse_ollama_response + import json + + raw = json.dumps({ + "analyse_acte": "CholĂ©cystectomie par voie coelioscopique", + "codes_candidats": "HMFC004, HMFC003", + "discrimination": "HMFC004 est le code spĂ©cifique Ă  la coelioscopie", + "code": "HMFC004", + "confidence": "high", + "justification": "CholĂ©cystectomie coelioscopique = HMFC004", + }) + + result = _parse_ollama_response(raw) + assert result is not None + assert result["code"] == "HMFC004" + assert "raisonnement" in result + assert "ANALYSE ACTE" in result["raisonnement"] + assert "CODES CANDIDATS" in result["raisonnement"] + assert "analyse_acte" not in result diff --git a/tests/test_rum_export.py b/tests/test_rum_export.py new file mode 100644 index 0000000..75ea6e4 --- /dev/null +++ b/tests/test_rum_export.py @@ -0,0 +1,212 @@ +"""Tests pour le module d'export RUM V016.""" + +import pytest + +from src.config import ActeCCAM, Diagnostic, DossierMedical, Sejour +from src.export.rum_export import ( + RUMConfig, + export_rum, + _format_cim10, + _format_date, + _format_sex, + _format_ccam_act, + _map_mode_entree, + _map_mode_sortie, +) + + +class TestFormatHelpers: + def test_format_cim10_normal(self): + assert _format_cim10("K85.1") == "K851 " + assert len(_format_cim10("K85.1")) == 8 + + def test_format_cim10_short(self): + result = _format_cim10("J18") + assert result == "J18 " + assert len(result) == 8 + + def test_format_cim10_none(self): + assert _format_cim10(None) == " " + assert len(_format_cim10(None)) == 8 + + def test_format_cim10_empty(self): + assert _format_cim10("") == " " + + def test_format_date_ddmmyyyy(self): + assert _format_date("15/03/2025") == "15032025" + + def test_format_date_iso(self): + assert _format_date("2025-03-15") == "15032025" + + def test_format_date_none(self): + assert _format_date(None) == " " + assert len(_format_date(None)) == 8 + + def test_format_sex_masculin(self): + assert _format_sex("M") == "1" + assert _format_sex("Masculin") == "1" + assert _format_sex("H") == "1" + + def test_format_sex_feminin(self): + assert _format_sex("F") == "2" + assert _format_sex("FĂ©minin") == "2" + + def test_format_sex_none(self): + assert _format_sex(None) == " " + + def test_map_mode_entree(self): + assert _map_mode_entree("Domicile") == "8" + assert _map_mode_entree("Mutation") == "6" + assert _map_mode_entree("Transfert") == "7" + assert _map_mode_entree(None) == " " + + def test_map_mode_sortie(self): + assert _map_mode_sortie("Domicile") == "8" + assert _map_mode_sortie("DĂ©cĂšs") == "9" + assert _map_mode_sortie("Transfert") == "7" + assert _map_mode_sortie(None) == " " + + def test_format_ccam_act(self): + acte = ActeCCAM(texte="CholĂ©cystectomie", code_ccam_suggestion="HMFC004", date="15/03/2025") + result = _format_ccam_act(acte) + assert len(result) == 29 + assert result[:7] == "HMFC004" + assert result[7] == "1" # phase + assert result[8] == "1" # activitĂ© + assert result[9:17] == "15032025" # date + + +class TestExportRUM: + def _make_dossier(self, **kwargs): + defaults = dict( + source_file="test.pdf", + sejour=Sejour( + sexe="M", + date_entree="01/01/2025", + date_sortie="05/01/2025", + mode_entree="Domicile", + mode_sortie="Domicile", + ), + diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"), + diagnostics_associes=[ + Diagnostic(texte="HTA", cim10_suggestion="I10"), + ], + actes_ccam=[ + ActeCCAM(texte="Radio thorax", code_ccam_suggestion="ZBQK002", date="02/01/2025"), + ], + ) + defaults.update(kwargs) + return DossierMedical(**defaults) + + def test_fixed_zone_length(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + # La zone fixe fait 165 chars, plus DAS et actes + assert len(rum) >= 165 + + def test_fixed_zone_exact_165(self): + dossier = self._make_dossier(diagnostics_associes=[], actes_ccam=[]) + rum = export_rum(dossier) + assert len(rum) == 165 + + def test_version_format(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[9:12] == "016" # version format + assert rum[24:27] == "016" # version RUM + + def test_finess(self): + config = RUMConfig(finess="123456789") + dossier = self._make_dossier() + rum = export_rum(dossier, config) + assert rum[15:24] == "123456789" + + def test_sexe(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[85] == "1" # M + + def test_dates(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[92:100] == "01012025" # date entrĂ©e + assert rum[102:110] == "05012025" # date sortie + + def test_modes(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[100] == "8" # mode entrĂ©e domicile + assert rum[110] == "8" # mode sortie domicile + + def test_dp_field(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[131:139] == "J189 " + + def test_nb_das(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[125:127] == "01" + + def test_nb_actes(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + assert rum[129:131] == "01" + + def test_das_variable_zone(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + # DAS commence Ă  pos 165, 8 chars + das_zone = rum[165:173] + assert das_zone == "I10 " + + def test_acte_variable_zone(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + # 1 DAS (8 chars) puis l'acte (29 chars) Ă  pos 173 + acte_zone = rum[173:202] + assert len(acte_zone) == 29 + assert acte_zone[:7] == "ZBQK002" + + def test_total_length(self): + dossier = self._make_dossier() + rum = export_rum(dossier) + # 165 + 1*8 (DAS) + 1*29 (acte) = 202 + assert len(rum) == 202 + + +class TestEdgeCases: + def test_no_dp(self): + dossier = DossierMedical(source_file="test.pdf") + rum = export_rum(dossier) + assert len(rum) == 165 + assert rum[131:139] == " " + + def test_no_sejour_data(self): + dossier = DossierMedical( + source_file="test.pdf", + diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="J18.9"), + ) + rum = export_rum(dossier) + assert len(rum) == 165 + assert rum[85] == " " # sexe vide + + def test_multiple_das_and_actes(self): + dossier = DossierMedical( + source_file="test.pdf", + diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="K85.1"), + diagnostics_associes=[ + Diagnostic(texte="D1", cim10_suggestion="I10"), + Diagnostic(texte="D2", cim10_suggestion="E11.9"), + Diagnostic(texte="D3", cim10_suggestion="I48.9"), + ], + actes_ccam=[ + ActeCCAM(texte="A1", code_ccam_suggestion="HMFC004", date="01/01/2025"), + ActeCCAM(texte="A2", code_ccam_suggestion="ZBQK002", date="02/01/2025"), + ], + ) + rum = export_rum(dossier) + # 165 + 3*8 + 2*29 = 165 + 24 + 58 = 247 + assert len(rum) == 247 + assert rum[125:127] == "03" # nb DAS + assert rum[129:131] == "02" # nb actes diff --git a/tests/test_severity.py b/tests/test_severity.py index d16d38e..fd82d93 100644 --- a/tests/test_severity.py +++ b/tests/test_severity.py @@ -90,7 +90,7 @@ class TestEnrichDossierSeverity: Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"), Diagnostic(texte="ObĂ©sitĂ©", cim10_suggestion="E66.0"), ] - alertes = enrich_dossier_severity(dp, das) + alertes, cma_count, cms_count = enrich_dossier_severity(dp, das) # I48.9 = CMA probable assert das[0].est_cma is True @@ -101,9 +101,21 @@ class TestEnrichDossierSeverity: # Au moins une alerte CMA assert any("CMA" in a for a in alertes) + assert cma_count >= 1 def test_dp_severity_set(self): dp = Diagnostic(texte="Sepsis sĂ©vĂšre", cim10_suggestion="A41.9") - alertes = enrich_dossier_severity(dp, []) + alertes, cma_count, cms_count = enrich_dossier_severity(dp, []) assert dp.niveau_severite == "severe" assert dp.est_cma is True + + def test_cms_detection(self): + """CMS dĂ©tectĂ© quand CMA + sĂ©vĂ©ritĂ© severe.""" + dp = Diagnostic(texte="PancrĂ©atite", cim10_suggestion="K85.1") + das = [ + Diagnostic(texte="Sepsis sĂ©vĂšre", cim10_suggestion="A41.9"), + ] + alertes, cma_count, cms_count = enrich_dossier_severity(dp, das) + assert das[0].est_cma is True + assert das[0].est_cms is True + assert cms_count == 1