feat: cache Ollama + parallélisation ThreadPool + filtrage DAS renforcé + modules GHM/CPAM/export RUM

- Cache persistant JSON thread-safe pour les résultats Ollama (invalidation par modèle)
- Parallélisation des appels Ollama (ThreadPoolExecutor, 2 workers)
- 6 nouvelles règles de filtrage DAS parasites (doublons, ponctuation, OCR, labo, fragments)
- Client Ollama centralisé (mode JSON natif + retry)
- Module GHM (estimation CMD/sévérité)
- Module contrôle CPAM (parser + contre-argumentation RAG)
- Export RUM (format RSS)
- Viewer enrichi (détail dossier)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-12 13:44:34 +01:00
parent a00e5f1147
commit a58398f5d4
25 changed files with 2872 additions and 97 deletions

45
run.sh Executable file
View File

@@ -0,0 +1,45 @@
#!/bin/bash
set -e
cd "$(dirname "$0")"
echo "🚀 Démarrage de l'application T2A..."
# Vérifier si l'environnement virtuel existe
if [ ! -d ".venv" ]; then
echo "📦 Création de l'environnement virtuel..."
python3 -m venv .venv
fi
# Activer l'environnement virtuel
echo "🔧 Activation de l'environnement virtuel..."
source .venv/bin/activate
# Installer/mettre à jour les dépendances
if [ ! -f ".venv/.deps_installed" ] || [ "requirements.txt" -nt ".venv/.deps_installed" ]; then
echo "📥 Installation des dépendances..."
pip install -q --upgrade pip
pip install -q -r requirements.txt
touch .venv/.deps_installed
else
echo "✅ Dépendances déjà installées"
fi
# Créer les répertoires nécessaires
mkdir -p input output/anonymized output/structured output/reports data/rag_index
echo ""
echo "✨ Application prête !"
echo ""
echo "📂 Répertoires :"
echo " - input/ : Placez vos PDFs ici"
echo " - output/ : Résultats du traitement"
echo ""
echo "🌐 Lancement du viewer sur http://localhost:5000"
echo ""
echo " Appuyez sur Ctrl+C pour arrêter"
echo ""
# Lancer le viewer
python3 -m src.viewer

View File

@@ -33,6 +33,14 @@ NER_CONFIDENCE_THRESHOLD = 0.80
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "gemma3:12b"
OLLAMA_TIMEOUT = 120
OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json"
OLLAMA_MAX_PARALLEL = 2
# --- Configuration RUM / établissement ---
FINESS = "000000000"
NUM_UM = "0000"
# --- Configuration RAG ---
@@ -83,6 +91,10 @@ class Diagnostic(BaseModel):
class ActeCCAM(BaseModel):
texte: str
code_ccam_suggestion: Optional[str] = None
ccam_confidence: Optional[str] = None
justification: Optional[str] = None
raisonnement: Optional[str] = None
sources_rag: list[RAGSource] = Field(default_factory=list)
date: Optional[str] = None
validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie"
alertes: list[str] = Field(default_factory=list)
@@ -120,12 +132,38 @@ class DossierMedical(BaseModel):
complications: list[str] = Field(default_factory=list)
alertes_codage: list[str] = Field(default_factory=list)
source_files: list[str] = Field(default_factory=list)
ghm_estimation: Optional[GHMEstimation] = None
controles_cpam: list[ControleCPAM] = Field(default_factory=list)
processing_time_s: float | None = None
# --- Rapport d'anonymisation ---
class GHMEstimation(BaseModel):
cmd: Optional[str] = None
cmd_libelle: Optional[str] = None
type_ghm: Optional[str] = None # "C" / "M" / "K"
severite: int = 1 # 1-4
ghm_approx: Optional[str] = None # ex: "07C??2"
cma_count: int = 0
cms_count: int = 0
alertes: list[str] = Field(default_factory=list)
class ControleCPAM(BaseModel):
numero_ogc: int
titre: str = ""
arg_ucr: str = ""
decision_ucr: str = ""
dp_ucr: Optional[str] = None
da_ucr: Optional[str] = None
dr_ucr: Optional[str] = None
actes_ucr: Optional[str] = None
contre_argumentation: Optional[str] = None
sources_reponse: list[RAGSource] = Field(default_factory=list)
class AnonymizationReport(BaseModel):
source_file: str
total_replacements: int = 0

0
src/control/__init__.py Normal file
View File

115
src/control/cpam_parser.py Normal file
View File

@@ -0,0 +1,115 @@
"""Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC."""
from __future__ import annotations
import logging
import re
from pathlib import Path
import openpyxl
from ..config import ControleCPAM
logger = logging.getLogger(__name__)
# Colonnes attendues dans le fichier Excel
_EXPECTED_COLUMNS = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
"""Lit le fichier Excel de contrôle CPAM et retourne un dict OGC -> liste de contrôles.
Args:
path: Chemin vers le fichier .xlsx CPAM.
Returns:
Dict avec le numéro OGC comme clé et la liste des contrôles associés.
"""
path = Path(path)
if not path.exists():
logger.error("Fichier CPAM introuvable : %s", path)
return {}
wb = openpyxl.load_workbook(path, read_only=True)
ws = wb[wb.sheetnames[0]]
# Lire l'en-tête
rows = ws.iter_rows(values_only=True)
header = next(rows, None)
if header is None:
logger.error("Fichier CPAM vide : %s", path)
return {}
# Construire le mapping colonne -> index
col_map = {}
for i, col_name in enumerate(header):
if col_name:
col_map[col_name.strip()] = i
# Vérifier les colonnes requises
missing = [c for c in _EXPECTED_COLUMNS[:4] if c not in col_map]
if missing:
logger.error("Colonnes manquantes dans le fichier CPAM : %s", missing)
return {}
result: dict[int, list[ControleCPAM]] = {}
count = 0
for row in rows:
ogc_val = row[col_map["N° OGC"]]
if ogc_val is None:
continue
try:
numero_ogc = int(ogc_val)
except (ValueError, TypeError):
logger.warning("N° OGC invalide ignoré : %s", ogc_val)
continue
controle = ControleCPAM(
numero_ogc=numero_ogc,
titre=str(row[col_map.get("Titre", 1)] or "").strip(),
arg_ucr=str(row[col_map.get("Arg_UCR", 2)] or "").strip(),
decision_ucr=str(row[col_map.get("Décision_UCR", 3)] or "").strip(),
dp_ucr=_clean_optional(row, col_map.get("DP_UCR")),
da_ucr=_clean_optional(row, col_map.get("DA_UCR")),
dr_ucr=_clean_optional(row, col_map.get("DR_UCR")),
actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")),
)
result.setdefault(numero_ogc, []).append(controle)
count += 1
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", count, len(result))
return result
def _clean_optional(row: tuple, idx: int | None) -> str | None:
"""Extrait une valeur optionnelle depuis une ligne Excel."""
if idx is None or idx >= len(row):
return None
val = row[idx]
if val is None:
return None
val = str(val).strip()
return val if val else None
def match_dossier_ogc(source_name: str, cpam_data: dict[int, list[ControleCPAM]]) -> list[ControleCPAM]:
"""Cherche les contrôles CPAM correspondant à un dossier par préfixe OGC.
Le nom du dossier suit le format "17_23100690" où 17 est le N° OGC.
Args:
source_name: Nom du sous-dossier (ex: "17_23100690").
cpam_data: Dict OGC -> contrôles retourné par parse_cpam_excel().
Returns:
Liste des contrôles CPAM pour cet OGC, ou liste vide.
"""
match = re.match(r"^(\d+)_", source_name)
if not match:
return []
ogc = int(match.group(1))
return cpam_data.get(ogc, [])

View File

@@ -0,0 +1,228 @@
"""Génération de contre-argumentation pour les contrôles CPAM via RAG + Ollama."""
from __future__ import annotations
import logging
from ..config import ControleCPAM, DossierMedical, RAGSource
from ..medical.ollama_client import call_ollama
logger = logging.getLogger(__name__)
def _search_rag_for_control(controle: ControleCPAM, dossier: DossierMedical) -> list[dict]:
"""Recherche RAG ciblée pour le sujet du désaccord."""
try:
from ..medical.rag_search import search_similar
except Exception:
logger.warning("Index RAG non disponible pour la contre-argumentation")
return []
# Construire une requête combinant l'argument CPAM et le diagnostic concerné
query_parts = []
if controle.titre:
query_parts.append(controle.titre)
# Ajouter les codes contestés pour cibler la recherche
if controle.dp_ucr:
query_parts.append(f"diagnostic principal {controle.dp_ucr}")
if controle.da_ucr:
query_parts.append(f"diagnostic associé {controle.da_ucr}")
# Tronquer l'argument CPAM pour ne garder que le coeur
arg_short = controle.arg_ucr[:300] if controle.arg_ucr else ""
if arg_short:
query_parts.append(arg_short)
query = " ".join(query_parts)
if not query.strip():
return []
return search_similar(query, top_k=8)
def _build_cpam_prompt(
dossier: DossierMedical,
controle: ControleCPAM,
sources: list[dict],
) -> str:
"""Construit le prompt pour la contre-argumentation CPAM."""
# Résumé du dossier médical
dossier_lines = []
if dossier.diagnostic_principal:
dp = dossier.diagnostic_principal
dp_code = f" ({dp.cim10_suggestion})" if dp.cim10_suggestion else ""
dossier_lines.append(f"- DP : {dp.texte}{dp_code}")
if dossier.diagnostics_associes:
das_parts = []
for das in dossier.diagnostics_associes:
code = f" ({das.cim10_suggestion})" if das.cim10_suggestion else ""
das_parts.append(f"{das.texte}{code}")
dossier_lines.append(f"- DAS : {', '.join(das_parts)}")
if dossier.actes_ccam:
actes = [f"{a.texte} ({a.code_ccam_suggestion})" if a.code_ccam_suggestion else a.texte
for a in dossier.actes_ccam]
dossier_lines.append(f"- Actes CCAM : {', '.join(actes)}")
sejour = dossier.sejour
if sejour.duree_sejour is not None:
dossier_lines.append(f"- Durée séjour : {sejour.duree_sejour} jours")
if sejour.sexe or sejour.age is not None:
patient_info = []
if sejour.sexe:
patient_info.append(sejour.sexe)
if sejour.age is not None:
patient_info.append(f"{sejour.age} ans")
dossier_lines.append(f"- Patient : {', '.join(patient_info)}")
if dossier.biologie_cle:
bio = [f"{b.test}: {b.valeur}" for b in dossier.biologie_cle[:5] if b.valeur]
if bio:
dossier_lines.append(f"- Biologie clé : {', '.join(bio)}")
if dossier.complications:
dossier_lines.append(f"- Complications : {', '.join(dossier.complications)}")
dossier_str = "\n".join(dossier_lines) if dossier_lines else "Non disponible"
# Codes contestés par la CPAM
codes_contestes = []
if controle.dp_ucr:
codes_contestes.append(f"DP proposé par UCR : {controle.dp_ucr}")
if controle.da_ucr:
codes_contestes.append(f"DA proposés par UCR : {controle.da_ucr}")
if controle.dr_ucr:
codes_contestes.append(f"DR proposé par UCR : {controle.dr_ucr}")
if controle.actes_ucr:
codes_contestes.append(f"Actes proposés par UCR : {controle.actes_ucr}")
codes_str = "\n".join(codes_contestes) if codes_contestes else "Aucun code spécifique proposé"
# Sources RAG
sources_text = ""
for i, src in enumerate(sources, 1):
doc_name = {
"cim10": "CIM-10 FR 2026",
"cim10_alpha": "CIM-10 Index Alphabétique 2026",
"guide_methodo": "Guide Méthodologique MCO 2026",
"ccam": "CCAM PMSI V4 2025",
}.get(src.get("document", ""), src.get("document", ""))
code_info = f" (code: {src['code']})" if src.get("code") else ""
page_info = f" [page {src['page']}]" if src.get("page") else ""
sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n"
sources_text += (src.get("extrait", "")[:800]) + "\n\n"
return f"""Tu es un médecin DIM (Département d'Information Médicale) expert en contentieux T2A.
Tu dois contre-argumenter la décision de la CPAM (UCR) point par point, en t'appuyant sur le guide méthodologique et la CIM-10.
DOSSIER MÉDICAL DE L'ÉTABLISSEMENT :
{dossier_str}
OBJET DU DÉSACCORD : {controle.titre}
ARGUMENTATION DE LA CPAM (UCR) :
{controle.arg_ucr}
DÉCISION UCR : {controle.decision_ucr}
CODES CONTESTÉS :
{codes_str}
SOURCES RÉGLEMENTAIRES (Guide méthodologique, CIM-10) :
{sources_text}
CONSIGNES :
- Analyse objectivement l'argument de la CPAM
- Identifie les points où la CPAM a raison (le cas échéant)
- Contre-argumente point par point en citant le guide méthodologique et la CIM-10
- Cite les références précises (pages, articles, fascicules)
- Propose une conclusion et la position recommandée
Réponds UNIQUEMENT avec un objet JSON au format suivant :
{{
"analyse_contestation": "Résumé de ce que conteste la CPAM",
"points_accord": "Points où la CPAM a raison (ou 'Aucun' si non applicable)",
"contre_arguments": "Arguments point par point en faveur de l'établissement",
"references": "Références guide méthodologique / CIM-10 citées",
"conclusion": "Synthèse et position recommandée"
}}"""
def _format_response(parsed: dict) -> str:
"""Formate la réponse LLM en texte lisible."""
sections = []
analyse = parsed.get("analyse_contestation")
if analyse:
sections.append(f"ANALYSE DE LA CONTESTATION\n{analyse}")
accord = parsed.get("points_accord")
if accord and accord.lower() not in ("aucun", "non applicable", "n/a", ""):
sections.append(f"POINTS D'ACCORD\n{accord}")
contre = parsed.get("contre_arguments")
if contre:
sections.append(f"CONTRE-ARGUMENTS\n{contre}")
refs = parsed.get("references")
if refs:
sections.append(f"REFERENCES\n{refs}")
conclusion = parsed.get("conclusion")
if conclusion:
sections.append(f"CONCLUSION\n{conclusion}")
return "\n\n".join(sections)
def generate_cpam_response(
dossier: DossierMedical,
controle: ControleCPAM,
) -> tuple[str, list[RAGSource]]:
"""Génère une contre-argumentation pour un contrôle CPAM.
Args:
dossier: Le dossier médical analysé.
controle: Le contrôle CPAM à contester.
Returns:
Tuple (texte de contre-argumentation, sources RAG utilisées).
"""
logger.info("CPAM : génération contre-argumentation pour OGC %d%s",
controle.numero_ogc, controle.titre)
# 1. Recherche RAG ciblée
sources = _search_rag_for_control(controle, dossier)
logger.info(" RAG : %d sources trouvées", len(sources))
# 2. Construction du prompt
prompt = _build_cpam_prompt(dossier, controle, sources)
# 3. Appel Ollama
result = call_ollama(prompt, temperature=0.1, max_tokens=3000)
# 4. Conversion des sources RAG
rag_sources = [
RAGSource(
document=s.get("document", ""),
page=s.get("page"),
code=s.get("code"),
extrait=s.get("extrait", "")[:200],
)
for s in sources
]
if result is None:
logger.warning(" Ollama non disponible — contre-argumentation non générée")
return "", rag_sources
# 5. Formater la réponse
text = _format_response(result)
logger.info(" Contre-argumentation générée (%d caractères)", len(text))
return text, rag_sources

0
src/export/__init__.py Normal file
View File

190
src/export/rum_export.py Normal file
View File

@@ -0,0 +1,190 @@
"""Export au format RUM (Résumé d'Unité Médicale) V016 pour le groupeur ATIH.
Génère une ligne fixe de 165 caractères suivie de zones variables
(DAS en 8 chars, actes CCAM en 29 chars chacun).
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from ..config import FINESS, NUM_UM, DossierMedical
@dataclass
class RUMConfig:
finess: str = FINESS
num_um: str = NUM_UM
def _format_cim10(code: str | None) -> str:
"""Formate un code CIM-10 sur 8 caractères (sans point, padded)."""
if not code:
return " " * 8
clean = code.upper().replace(".", "").strip()
return clean.ljust(8)[:8]
def _format_date(date_str: str | None) -> str:
"""Convertit une date DD/MM/YYYY ou YYYY-MM-DD en DDMMYYYY (8 chars)."""
if not date_str:
return " " * 8
date_str = date_str.strip()
# Format DD/MM/YYYY
m = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
if m:
return f"{m.group(1)}{m.group(2)}{m.group(3)}"
# Format YYYY-MM-DD
m = re.match(r"(\d{4})-(\d{2})-(\d{2})", date_str)
if m:
return f"{m.group(3)}{m.group(2)}{m.group(1)}"
return " " * 8
def _format_sex(sexe: str | None) -> str:
"""Convertit le sexe en code RUM (1=M, 2=F)."""
if not sexe:
return " "
s = sexe.strip().upper()
if s in ("M", "MASCULIN", "HOMME", "H"):
return "1"
if s in ("F", "FEMININ", "FÉMININ", "FEMME"):
return "2"
return " "
def _map_mode_entree(text: str | None) -> str:
"""Convertit le mode d'entrée textuel en code RUM (1 char)."""
if not text:
return " "
t = text.strip().lower()
mapping = {
"domicile": "8",
"mutation": "6",
"transfert": "7",
"urgences": "8",
"urgence": "8",
}
for key, code in mapping.items():
if key in t:
return code
return " "
def _map_mode_sortie(text: str | None) -> str:
"""Convertit le mode de sortie textuel en code RUM (1 char)."""
if not text:
return " "
t = text.strip().lower()
mapping = {
"domicile": "8",
"mutation": "6",
"transfert": "7",
"deces": "9",
"décès": "9",
"décédé": "9",
"decede": "9",
}
for key, code in mapping.items():
if key in t:
return code
return " "
def _format_ccam_act(acte) -> str:
"""Formate un acte CCAM sur 29 caractères.
Structure : code(7) + phase(1) + activité(1) + date(8) + doc/extension(12)
"""
code = (acte.code_ccam_suggestion or "").upper().replace(" ", "")
code = code.ljust(7)[:7]
phase = "1"
activite = "1"
date = _format_date(acte.date)
extension = " " * 12
return f"{code}{phase}{activite}{date}{extension}"
def export_rum(dossier: DossierMedical, config: RUMConfig | None = None) -> str:
"""Génère le texte RUM complet pour un dossier médical.
Returns:
Chaîne texte au format RUM V016 (165 chars fixes + zones variables).
"""
if config is None:
config = RUMConfig()
sejour = dossier.sejour
dp = dossier.diagnostic_principal
# Compteurs
das_list = dossier.diagnostics_associes
actes_list = dossier.actes_ccam
nb_das = len(das_list)
nb_actes = len(actes_list)
# Numéros générés
source = dossier.source_file or "UNKNOWN"
num_rss = source.replace(".pdf", "").replace(" ", "_").ljust(20)[:20]
num_admin = num_rss
num_rum = source[:10].ljust(10)[:10]
# Construction de la zone fixe (165 caractères)
parts = [
" " * 2, # 1-2 : Version classification (vide)
" " * 6, # 3-8 : GHM (vide, rempli par groupeur)
" ", # 9 : Filler
"016", # 10-12 : Version format
" " * 3, # 13-15 : Code retour
config.finess.ljust(9)[:9], # 16-24 : FINESS
"016", # 25-27 : Version RUM
num_rss, # 28-47 : N° RSS
num_admin, # 48-67 : N° admin
num_rum, # 68-77 : N° RUM
_format_date(None), # 78-85 : Date naissance (non disponible)
_format_sex(sejour.sexe), # 86 : Sexe
config.num_um.ljust(4)[:4], # 87-90 : N° UM
" " * 2, # 91-92 : Type autorisation
_format_date(sejour.date_entree), # 93-100: Date entrée UM
_map_mode_entree(sejour.mode_entree), # 101 : Mode entrée
" ", # 102 : Provenance
_format_date(sejour.date_sortie), # 103-110: Date sortie UM
_map_mode_sortie(sejour.mode_sortie), # 111 : Mode sortie
" ", # 112 : Destination
" " * 5, # 113-117: CP résidence
" " * 4, # 118-121: Poids nné
" " * 2, # 122-123: Âge gestationnel
"00", # 124-125: Nb séances
str(nb_das).zfill(2)[-2:], # 126-127: Nb DAS
"00", # 128-129: Nb DAD
str(nb_actes).zfill(2)[-2:], # 130-131: Nb actes
_format_cim10(dp.cim10_suggestion if dp else None), # 132-139: DP
" " * 8, # 140-147: DR
" " * 3, # 148-150: IGS2
" " * 15, # 151-165: Réservé
]
fixed = "".join(parts)
assert len(fixed) == 165, f"Zone fixe RUM: attendu 165, obtenu {len(fixed)}"
# Zones variables
variable_parts: list[str] = []
# DAS (8 chars chacun)
for das in das_list:
variable_parts.append(_format_cim10(das.cim10_suggestion))
# Actes CCAM (29 chars chacun)
for acte in actes_list:
variable_parts.append(_format_ccam_act(acte))
return fixed + "".join(variable_parts)
def save_rum(dossier: DossierMedical, path: Path, config: RUMConfig | None = None) -> None:
"""Exporte un dossier au format RUM dans un fichier."""
rum_text = export_rum(dossier, config)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(rum_text, encoding="utf-8")

View File

@@ -10,13 +10,14 @@ import time
from pathlib import Path
from .anonymization.anonymizer import Anonymizer
from .config import ANONYMIZED_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical
from .config import ANONYMIZED_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical
from .extraction.document_classifier import classify
from .extraction.crh_parser import parse_crh
from .extraction.document_splitter import split_documents
from .extraction.pdf_extractor import extract_text
from .extraction.trackare_parser import parse_trackare
from .medical.cim10_extractor import extract_medical_info
from .medical.ghm import estimate_ghm
logging.basicConfig(
level=logging.INFO,
@@ -84,10 +85,20 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag)
dossier.source_file = pdf_path.name
dossier.document_type = doc_type
dossier.processing_time_s = round(time.time() - t0, 2)
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam))
# 8. Estimation GHM
try:
ghm = estimate_ghm(dossier)
dossier.ghm_estimation = ghm
logger.info(" GHM : CMD=%s, Type=%s, Sévérité=%d%s",
ghm.cmd or "?", ghm.type_ghm or "?",
ghm.severite, ghm.ghm_approx or "?")
except Exception:
logger.warning(" Erreur estimation GHM", exc_info=True)
dossier.processing_time_s = round(time.time() - t0, 2)
results.append((anonymized_text, dossier, report))
logger.info(" Temps total : %.2fs", time.time() - t0)
@@ -120,6 +131,7 @@ def write_outputs(
dossier: DossierMedical,
report: AnonymizationReport,
subdir: str | None = None,
export_rum_flag: bool = False,
) -> None:
"""Écrit les fichiers de sortie."""
anon_dir = ANONYMIZED_DIR / subdir if subdir else ANONYMIZED_DIR
@@ -151,6 +163,17 @@ def write_outputs(
)
logger.info("%s", report_path)
# Export RUM
if export_rum_flag:
from .export.rum_export import save_rum
rum_dir = OUTPUT_DIR / "rum"
if subdir:
rum_dir = rum_dir / subdir
rum_dir.mkdir(parents=True, exist_ok=True)
rum_path = rum_dir / f"{stem}_rum.txt"
save_rum(dossier, rum_path)
logger.info("%s", rum_path)
def main(input_path: str | None = None) -> None:
"""Point d'entrée principal."""
@@ -197,6 +220,16 @@ def main(input_path: str | None = None) -> None:
action="store_true",
help="Forcer la reconstruction de l'index FAISS",
)
parser.add_argument(
"--export-rum",
action="store_true",
help="Exporter les dossiers au format RUM V016 (pour groupeur ATIH)",
)
parser.add_argument(
"--control-cpam",
metavar="PATH",
help="Fichier Excel de contrôle CPAM (enrichit les dossiers avec contre-argumentation)",
)
args = parser.parse_args()
if args.build_dict:
@@ -226,6 +259,16 @@ def main(input_path: str | None = None) -> None:
if args.no_rag:
_use_rag = False
export_rum_flag = args.export_rum
# Chargement contrôle CPAM
cpam_data = None
if args.control_cpam:
from .control.cpam_parser import parse_cpam_excel
cpam_data = parse_cpam_excel(args.control_cpam)
if not cpam_data:
logger.warning("Aucun contrôle CPAM chargé depuis %s", args.control_cpam)
input_paths = args.input
# Collecte des groupes (pdfs, subdir) à traiter
@@ -274,12 +317,13 @@ def main(input_path: str | None = None) -> None:
multi = len(pdf_results) > 1
for part_idx, (anonymized_text, dossier, report) in enumerate(pdf_results):
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir)
write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir, export_rum_flag=export_rum_flag)
group_dossiers.append(dossier)
except Exception:
logger.exception("Erreur lors du traitement de %s", pdf_path.name)
# Fusion multi-PDFs si plusieurs documents dans le même groupe
merged = None
if len(group_dossiers) > 1 and subdir:
try:
from .medical.fusion import merge_dossiers
@@ -287,13 +331,47 @@ def main(input_path: str | None = None) -> None:
struct_dir = STRUCTURED_DIR / subdir
struct_dir.mkdir(parents=True, exist_ok=True)
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
# Export RUM du dossier fusionné
if export_rum_flag:
from .export.rum_export import save_rum
rum_dir = OUTPUT_DIR / "rum" / subdir
rum_dir.mkdir(parents=True, exist_ok=True)
rum_path = rum_dir / f"{subdir}_fusionne_rum.txt"
save_rum(merged, rum_path)
logger.info(" → RUM fusionné : %s", rum_path)
except Exception:
logger.exception("Erreur lors de la fusion du groupe %s", subdir)
merged = None
# Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier)
if cpam_data and subdir:
from .control.cpam_parser import match_dossier_ogc
controles = match_dossier_ogc(subdir, cpam_data)
if controles:
from .control.cpam_response import generate_cpam_response
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
if target:
logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir)
for ctrl in controles:
text, sources = generate_cpam_response(target, ctrl)
ctrl.contre_argumentation = text
ctrl.sources_reponse = sources
target.controles_cpam = controles
# Écrire le dossier fusionné (après enrichissement CPAM éventuel)
if merged is not None and subdir:
try:
struct_dir = STRUCTURED_DIR / subdir
struct_dir.mkdir(parents=True, exist_ok=True)
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
merged_path.write_text(
merged.model_dump_json(indent=2, exclude_none=True),
encoding="utf-8",
)
logger.info(" → Dossier fusionné : %s", merged_path)
except Exception:
logger.exception("Erreur lors de la fusion du groupe %s", subdir)
logger.exception("Erreur écriture dossier fusionné %s", subdir)
logger.info("Terminé.")

View File

@@ -173,6 +173,32 @@ def lookup(
return None
def normalize_code(code: str) -> str:
"""Normalise un code CIM-10 : K810 → K81.0, k85.1 → K85.1."""
code = code.strip().upper()
# Insérer le point si absent : K810 → K81.0
if len(code) > 3 and "." not in code:
code = code[:3] + "." + code[3:]
return code
def validate_code(code: str) -> tuple[bool, str]:
"""Vérifie si un code CIM-10 existe dans le dictionnaire.
Returns:
(is_valid, label) — label vide si invalide.
"""
d = load_dict()
normalized = normalize_code(code)
if normalized in d:
return True, d[normalized]
# Tenter aussi le code brut (3 caractères sans point)
raw = code.upper().strip()
if raw in d:
return True, d[raw]
return False, ""
def reset_cache() -> None:
"""Réinitialise les caches (utile pour les tests)."""
global _dict_cache, _normalized_cache

View File

@@ -9,7 +9,7 @@ from typing import Optional
logger = logging.getLogger(__name__)
from .cim10_dict import lookup as dict_lookup, normalize_text
from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate
from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text
from ..config import (
@@ -118,6 +118,9 @@ def extract_medical_info(
# Post-processing : validation des codes CCAM contre le dictionnaire
_validate_ccam(dossier)
# Post-processing : validation des codes CIM-10 contre le dictionnaire
_validate_cim10(dossier)
# Post-processing : exclusions symptôme vs diagnostic précis
_apply_exclusion_rules(dossier)
@@ -663,6 +666,68 @@ def _validate_ccam(dossier: DossierMedical) -> None:
)
_INVALID_CODE_PATTERNS = {"aucun", "none", "n/a", "non_codable", "aucun_code_valide", "inconnu"}
def _fallback_cim10(texte: str) -> str | None:
"""Tente de trouver un code CIM-10 via le dictionnaire à partir du texte diagnostic."""
code = dict_lookup(texte, domain_overrides=CIM10_MAP)
if code:
is_valid, _ = cim10_validate(code)
if is_valid:
return code
return None
def _validate_cim10(dossier: DossierMedical) -> None:
"""Valide les codes CIM-10 suggérés par Ollama contre le dictionnaire."""
diags: list[tuple[str, Diagnostic]] = []
if dossier.diagnostic_principal:
diags.append(("DP", dossier.diagnostic_principal))
for das in dossier.diagnostics_associes:
diags.append(("DAS", das))
for type_diag, diag in diags:
if not diag.cim10_suggestion:
continue
# Rejeter les hallucinations
if diag.cim10_suggestion.lower().strip() in _INVALID_CODE_PATTERNS:
fallback = _fallback_cim10(diag.texte)
if fallback:
dossier.alertes_codage.append(
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} » → fallback {fallback}"
)
diag.cim10_suggestion = fallback
diag.cim10_confidence = "medium"
else:
dossier.alertes_codage.append(
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} »"
)
diag.cim10_suggestion = None
diag.cim10_confidence = None
continue
# Normaliser le format (K810 → K81.0)
diag.cim10_suggestion = normalize_code(diag.cim10_suggestion)
# Valider contre le dictionnaire
is_valid, label = cim10_validate(diag.cim10_suggestion)
if not is_valid:
fallback = _fallback_cim10(diag.texte)
if fallback:
dossier.alertes_codage.append(
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code invalide → fallback {fallback}"
)
diag.cim10_suggestion = fallback
diag.cim10_confidence = "medium"
else:
dossier.alertes_codage.append(
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code absent du dictionnaire CIM-10"
)
diag.cim10_confidence = "low"
def _find_act_date(text: str, act_pattern: str) -> str | None:
"""Trouve la date associée à un acte."""
# Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY"
@@ -705,7 +770,7 @@ def _apply_severity_rules(dossier: DossierMedical) -> None:
"""Enrichit les diagnostics avec les informations de sévérité heuristique."""
try:
from .severity import enrich_dossier_severity
alertes = enrich_dossier_severity(
alertes, _cma_count, _cms_count = enrich_dossier_severity(
dossier.diagnostic_principal, dossier.diagnostics_associes,
)
dossier.alertes_codage.extend(alertes)

View File

@@ -33,9 +33,12 @@ def is_valid_diagnostic_text(text: str) -> bool:
if re.match(r"^([a-zà-ÿ]{3,})\1+[a-zà-ÿ]*$", t, re.IGNORECASE):
return False
# 5. Mots répétés ≥ 3 fois : "Spontanée spontanée spontanée spontanée"
# 5. Mots répétés : tous identiques ("Absence absence", "Anticoagulant anticoagulant")
# ou ≥ 3 occurrences du même mot
words = t.lower().split()
if words:
if len(words) >= 2:
if len(set(words)) == 1:
return False
from collections import Counter
counts = Counter(words)
if counts.most_common(1)[0][1] >= 3:
@@ -47,4 +50,27 @@ def is_valid_diagnostic_text(text: str) -> bool:
if t in {"Isolement", "Pp 500"}:
return False
# 7. Ponctuation initiale (artefacts OCR) : ", sans précision"
if re.match(r'^[,.\-;:!)\]]\s', t):
return False
# 8. Pattern "À X.X" / "A X.X" (valeurs numériques OCR)
if re.match(r'^[ÀA]\s+\d+([.,]\d+)?$', t):
return False
# 9. Crochets (artefacts OCR) : "Episode [episode"
if '[' in t or ']' in t:
return False
# 10. Termes de laboratoire isolés (un seul mot ≠ diagnostic)
_LAB_TERMS = {"hémoglobine", "créatinine", "plaquettes", "leucocytes", "glycémie",
"natrémie", "kaliémie", "calcémie", "bilirubine", "albumine",
"fibrinogène", "hématocrite", "cétonurie", "glycosurie"}
if t.lower() in _LAB_TERMS:
return False
# 11. Fragments anatomiques courts sans pathologie : "Dans la vessie", "Le rein"
if re.match(r'^(Dans |La |Le |Les |Au |Aux )', t) and len(t) < 30:
return False
return True

215
src/medical/ghm.py Normal file
View File

@@ -0,0 +1,215 @@
"""Estimation heuristique du GHM (Groupe Homogène de Malades).
L'algorithme officiel (ATIH FG-MCO) est propriétaire. Ce module fournit une
estimation approximative utile comme pré-codage / aide au DIM :
1. CMD depuis le DP (table de plages CIM-10)
2. Type de prise en charge depuis les actes CCAM
3. Sévérité depuis les CMA/CMS
4. Construction du code GHM approximatif
"""
from __future__ import annotations
import bisect
from typing import Optional
from ..config import DossierMedical, GHMEstimation
# ---------------------------------------------------------------------------
# Table CIM-10 → CMD (Catégorie Majeure de Diagnostic)
# Triée par borne inférieure pour lookup par bisect.
# Format : (debut, fin, cmd, libelle)
# ---------------------------------------------------------------------------
_CMD_RANGES: list[tuple[str, str, str, str]] = [
("A00", "A99", "18", "Maladies infectieuses et parasitaires"),
("B00", "B19", "18", "Maladies infectieuses et parasitaires"),
("B20", "B24", "25", "Maladies dues au VIH"),
("B25", "B99", "18", "Maladies infectieuses et parasitaires"),
("C00", "C97", "17", "Tumeurs malignes"),
("D00", "D09", "17", "Tumeurs malignes"),
("D10", "D48", "16", "Tumeurs bénignes, hémopathies"),
("D50", "D89", "16", "Tumeurs bénignes, hémopathies"),
("E00", "E07", "10", "Maladies endocriniennes"),
("E10", "E14", "10", "Maladies endocriniennes"),
("E15", "E46", "10", "Maladies endocriniennes"),
("E47", "E90", "10", "Maladies endocriniennes"),
("F00", "F09", "19", "Maladies mentales"),
("F10", "F19", "20", "Troubles mentaux liés à l'alcool et aux toxiques"),
("F20", "F99", "19", "Maladies mentales"),
("G00", "G99", "01", "Affections du système nerveux"),
("H00", "H59", "02", "Affections de l'oeil"),
("H60", "H95", "03", "Affections ORL"),
("I00", "I99", "05", "Affections de l'appareil circulatoire"),
("J00", "J99", "04", "Affections de l'appareil respiratoire"),
("K00", "K67", "06", "Affections du tube digestif"),
("K70", "K87", "07", "Affections hépatobiliaires et pancréatiques"),
("K90", "K93", "06", "Affections du tube digestif"),
("L00", "L99", "09", "Affections de la peau"),
("M00", "M99", "08", "Affections du système ostéo-articulaire"),
("N00", "N39", "11", "Affections du rein et des voies urinaires"),
("N40", "N51", "12", "Affections de l'appareil génital masculin"),
("N60", "N98", "13", "Affections de l'appareil génital féminin"),
("N99", "N99", "11", "Affections du rein et des voies urinaires"),
("O00", "O99", "14", "Grossesses, accouchements, post-partum"),
("P00", "P96", "15", "Nouveau-nés, période périnatale"),
("Q00", "Q99", "15", "Nouveau-nés, période périnatale"),
("R00", "R99", "23", "Facteurs influençant l'état de santé (symptômes)"),
("S00", "S99", "21", "Traumatismes"),
("T00", "T19", "21", "Traumatismes"),
("T20", "T32", "22", "Brûlures"),
("T33", "T98", "21", "Traumatismes"),
("U00", "U99", "26", "Catégories spéciales"),
("V00", "Y98", "24", "Causes externes"),
("Z00", "Z99", "23", "Facteurs influençant l'état de santé"),
]
# Pré-calcul : liste triée des bornes inférieures pour bisect
_CMD_STARTS = [r[0] for r in _CMD_RANGES]
def find_cmd(code_cim10: str) -> tuple[Optional[str], Optional[str]]:
"""Trouve la CMD correspondant à un code CIM-10.
Returns:
(cmd, libelle) ou (None, None) si non trouvé.
"""
if not code_cim10:
return None, None
# Normaliser : majuscules, retirer le point
code = code_cim10.upper().replace(".", "").strip()
if len(code) < 3:
return None, None
# Prendre les 3 premiers caractères pour le lookup
code3 = code[:3]
# bisect pour trouver la plage candidate
idx = bisect.bisect_right(_CMD_STARTS, code3) - 1
if idx < 0:
return None, None
debut, fin, cmd, libelle = _CMD_RANGES[idx]
if debut <= code3 <= fin:
return cmd, libelle
return None, None
# ---------------------------------------------------------------------------
# Préfixes CCAM classants (chirurgicaux)
# Les codes CCAM commençant par ces lettres correspondent à des organes
# et sont considérés chirurgicaux quand ils désignent un acte opératoire.
# ---------------------------------------------------------------------------
_CCAM_CHIRURGICAL_PREFIXES = {"H", "J", "K", "L", "N", "P", "Q"}
# Préfixes interventionnels (imagerie, endoscopie)
_CCAM_INTERVENTIONNEL_PREFIXES = {"Z", "Y"}
def _detect_type_ghm(actes_ccam: list) -> str:
"""Détermine le type de prise en charge depuis les actes CCAM.
Returns:
"C" (chirurgical), "K" (interventionnel) ou "M" (médical).
"""
has_chirurgical = False
has_interventionnel = False
for acte in actes_ccam:
code = acte.code_ccam_suggestion
if not code or len(code) < 4:
continue
prefix = code[0].upper()
if prefix in _CCAM_CHIRURGICAL_PREFIXES:
has_chirurgical = True
break
if prefix in _CCAM_INTERVENTIONNEL_PREFIXES:
has_interventionnel = True
if has_chirurgical:
return "C"
if has_interventionnel:
return "K"
return "M"
def _compute_severity(das_list: list) -> tuple[int, int, int]:
"""Calcule le niveau de sévérité à partir des DAS.
Returns:
(niveau, cma_count, cms_count)
"""
cma_count = 0
cms_count = 0
for das in das_list:
if getattr(das, "est_cma", False):
cma_count += 1
if getattr(das, "est_cms", False):
cms_count += 1
if cms_count >= 2:
niveau = 4
elif cms_count >= 1 or cma_count >= 3:
niveau = 3
elif cma_count >= 2:
niveau = 2
else:
niveau = 1
return niveau, cma_count, cms_count
def estimate_ghm(dossier: DossierMedical) -> GHMEstimation:
"""Estime le GHM d'un dossier médical.
Heuristique en 4 étapes :
1. CMD depuis le DP
2. Type de prise en charge depuis les actes CCAM
3. Sévérité depuis les CMA/CMS
4. Construction du code approximatif
"""
estimation = GHMEstimation()
# 1. CMD depuis le DP
dp = dossier.diagnostic_principal
dp_code = dp.cim10_suggestion if dp else None
if not dp:
estimation.alertes.append("DP absent — CMD non déterminable")
elif not dp_code:
estimation.alertes.append("DP sans code CIM-10 — CMD non déterminable")
else:
cmd, libelle = find_cmd(dp_code)
if cmd:
estimation.cmd = cmd
estimation.cmd_libelle = libelle
else:
estimation.alertes.append(f"CMD inconnue pour le code {dp_code}")
# Alerte DP symptomatique
code_letter = dp_code.upper().replace(".", "").strip()[:1]
if code_letter in ("R", "Z"):
estimation.alertes.append(
f"DP symptomatique ({dp_code}) — risque de CMD 23, impact tarif"
)
# 2. Type de prise en charge
estimation.type_ghm = _detect_type_ghm(dossier.actes_ccam)
# 3. Sévérité
niveau, cma_count, cms_count = _compute_severity(dossier.diagnostics_associes)
estimation.severite = niveau
estimation.cma_count = cma_count
estimation.cms_count = cms_count
# 4. Code approximatif
if estimation.cmd and estimation.type_ghm:
estimation.ghm_approx = f"{estimation.cmd}{estimation.type_ghm}??{estimation.severite}"
return estimation

View File

@@ -0,0 +1,85 @@
"""Cache persistant thread-safe pour les résultats Ollama."""
from __future__ import annotations
import json
import logging
import threading
from pathlib import Path
logger = logging.getLogger(__name__)
class OllamaCache:
"""Cache JSON persistant pour éviter les appels Ollama redondants.
Clé = (texte_diagnostic_normalisé, type).
Le modèle Ollama est stocké dans les métadonnées : si le modèle change,
le cache est automatiquement invalidé.
"""
def __init__(self, cache_path: Path, model: str):
self._path = cache_path
self._model = model
self._lock = threading.Lock()
self._data: dict[str, dict] = {}
self._dirty = False
self._load()
def _load(self) -> None:
"""Charge le cache depuis le disque."""
if not self._path.exists():
logger.info("Cache Ollama : nouveau cache (%s)", self._path)
return
try:
raw = json.loads(self._path.read_text(encoding="utf-8"))
if raw.get("model") != self._model:
logger.info(
"Cache Ollama : modèle changé (%s%s), cache invalidé",
raw.get("model"), self._model,
)
return
self._data = raw.get("entries", {})
logger.info("Cache Ollama : %d entrées chargées", len(self._data))
except (json.JSONDecodeError, KeyError) as e:
logger.warning("Cache Ollama : fichier corrompu (%s), réinitialisé", e)
self._data = {}
@staticmethod
def _make_key(texte: str, diag_type: str) -> str:
"""Construit une clé normalisée."""
return f"{diag_type}::{texte.strip().lower()}"
def get(self, texte: str, diag_type: str) -> dict | None:
"""Récupère un résultat caché, ou None si absent."""
key = self._make_key(texte, diag_type)
with self._lock:
return self._data.get(key)
def put(self, texte: str, diag_type: str, result: dict) -> None:
"""Stocke un résultat dans le cache."""
key = self._make_key(texte, diag_type)
with self._lock:
self._data[key] = result
self._dirty = True
def save(self) -> None:
"""Persiste le cache sur disque si modifié."""
with self._lock:
if not self._dirty:
return
self._path.parent.mkdir(parents=True, exist_ok=True)
payload = {
"model": self._model,
"entries": self._data,
}
self._path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2),
encoding="utf-8",
)
self._dirty = False
logger.info("Cache Ollama : %d entrées sauvegardées", len(self._data))
def __len__(self) -> int:
with self._lock:
return len(self._data)

View File

@@ -0,0 +1,80 @@
"""Client Ollama partagé — appel LLM en mode JSON natif."""
from __future__ import annotations
import json
import logging
import requests
from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
logger = logging.getLogger(__name__)
def parse_json_response(raw: str) -> dict | None:
"""Parse une réponse JSON d'Ollama, en gérant les blocs markdown."""
text = raw.strip()
if text.startswith("```"):
first_nl = text.find("\n")
if first_nl != -1:
text = text[first_nl + 1:]
if text.rstrip().endswith("```"):
text = text.rstrip()[:-3]
text = text.strip()
try:
return json.loads(text)
except json.JSONDecodeError:
logger.warning("Ollama : JSON invalide : %s", raw[:200])
return None
def call_ollama(
prompt: str,
temperature: float = 0.1,
max_tokens: int = 2500,
) -> dict | None:
"""Appelle Ollama en mode JSON natif avec retry.
Args:
prompt: Le prompt à envoyer.
temperature: Température de génération (défaut: 0.1).
max_tokens: Nombre max de tokens (défaut: 2500).
Returns:
Le dict JSON parsé, ou None en cas d'erreur.
"""
for attempt in range(2):
try:
response = requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"format": "json",
"options": {
"temperature": temperature,
"num_predict": max_tokens,
},
},
timeout=OLLAMA_TIMEOUT,
)
response.raise_for_status()
raw = response.json().get("response", "")
result = parse_json_response(raw)
if result is not None:
return result
if attempt == 0:
logger.info("Ollama : retry après échec de parsing")
except requests.ConnectionError:
logger.warning("Ollama non disponible (connexion refusée)")
return None
except requests.Timeout:
logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
return None
except (requests.RequestException, json.JSONDecodeError) as e:
logger.warning("Ollama erreur : %s", e)
return None
return None

View File

@@ -2,12 +2,17 @@
from __future__ import annotations
import json
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from ..config import Diagnostic, DossierMedical, RAGSource, OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
from ..config import (
ActeCCAM, Diagnostic, DossierMedical, RAGSource,
OLLAMA_CACHE_PATH, OLLAMA_MAX_PARALLEL, OLLAMA_MODEL,
)
from .cim10_dict import normalize_code, validate_code as cim10_validate
from .ccam_dict import validate_code as ccam_validate
from .ollama_client import call_ollama, parse_json_response
from .ollama_cache import OllamaCache
logger = logging.getLogger(__name__)
@@ -85,6 +90,52 @@ def search_similar(query: str, top_k: int = 10) -> list[dict]:
return final
def search_similar_ccam(query: str, top_k: int = 8) -> list[dict]:
"""Recherche les passages CCAM les plus similaires dans l'index FAISS.
Même logique que search_similar() mais priorise les sources CCAM.
"""
from .rag_index import get_index
import numpy as np
result = get_index()
if result is None:
logger.warning("Index FAISS non disponible")
return []
faiss_index, metadata = result
model = _get_embed_model()
query_vec = model.encode([query], normalize_embeddings=True)
query_vec = np.array(query_vec, dtype=np.float32)
fetch_k = min(top_k * 2, faiss_index.ntotal)
scores, indices = faiss_index.search(query_vec, fetch_k)
raw_results = []
for score, idx in zip(scores[0], indices[0]):
if idx < 0:
continue
if float(score) < _MIN_SCORE:
continue
meta = metadata[idx].copy()
meta["score"] = float(score)
raw_results.append(meta)
# Prioriser les sources CCAM (au moins 5 sur top_k)
ccam_results = [r for r in raw_results if r["document"] == "ccam"]
other_results = [r for r in raw_results if r["document"] != "ccam"]
min_ccam = min(5, len(ccam_results))
final = ccam_results[:min_ccam]
remaining_slots = top_k - len(final)
remaining = ccam_results[min_ccam:] + other_results
remaining.sort(key=lambda r: r["score"], reverse=True)
final.extend(remaining[:remaining_slots])
return final
def _format_contexte(contexte: dict) -> str:
"""Formate le contexte patient de manière structurée pour le prompt."""
lines = []
@@ -193,31 +244,63 @@ Réponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant
}}"""
def _build_prompt_ccam(texte: str, sources: list[dict], contexte: dict) -> str:
"""Construit le prompt expert DIM pour le codage CCAM avec raisonnement structuré."""
sources_text = ""
for i, src in enumerate(sources, 1):
doc_name = {
"cim10": "CIM-10 FR 2026",
"cim10_alpha": "CIM-10 Index Alphabétique 2026",
"guide_methodo": "Guide Méthodologique MCO 2026",
"ccam": "CCAM PMSI V4 2025",
}.get(src["document"], src["document"])
code_info = f" (code: {src['code']})" if src.get("code") else ""
page_info = f" [page {src['page']}]" if src.get("page") else ""
sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n"
sources_text += (src.get("extrait", "")[:800]) + "\n\n"
ctx_str = _format_contexte(contexte)
return f"""Tu es un médecin DIM (Département d'Information Médicale) expert en codage CCAM PMSI.
Tu dois coder l'acte chirurgical/médical suivant en respectant STRICTEMENT la nomenclature CCAM.
RÈGLES IMPÉRATIVES :
- Le code doit provenir UNIQUEMENT des sources CCAM fournies
- Un code CCAM est composé de 4 lettres + 3 chiffres (ex: HMFC004)
- Vérifie l'activité (1=acte technique, 4=anesthésie) et le regroupement
- Tiens compte du tarif secteur 1 pour valider la cohérence
- Si plusieurs codes sont possibles, choisis le plus spécifique à l'acte décrit
- En cas de doute, indique confidence "low" plutôt que de proposer un code inadapté
ACTE À CODER : "{texte}"
CONTEXTE CLINIQUE :
{ctx_str}
SOURCES CCAM :
{sources_text}
Réponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant ou après :
{{
"analyse_acte": "que décrit cet acte sur le plan technique/chirurgical",
"codes_candidats": "quels codes CCAM des sources sont compatibles",
"discrimination": "pourquoi choisir ce code plutôt qu'un autre (activité, regroupement, tarif)",
"code": "ABCD123",
"confidence": "high ou medium ou low",
"justification": "explication courte en français"
}}"""
def _parse_ollama_response(raw: str) -> dict | None:
"""Parse la réponse JSON d'Ollama (mode JSON).
Reconstitue le raisonnement à partir des champs structurés.
"""
# Stripper les blocs markdown ```json ... ``` que certains modèles ajoutent
text = raw.strip()
if text.startswith("```"):
first_nl = text.find("\n")
if first_nl != -1:
text = text[first_nl + 1:]
# Retirer la fence fermante seulement si elle existe en fin de texte
if text.rstrip().endswith("```"):
text = text.rstrip()[:-3]
text = text.strip()
try:
parsed = json.loads(text)
except json.JSONDecodeError:
logger.warning("Ollama : JSON invalide : %s", raw[:200])
"""Parse la réponse JSON d'Ollama et reconstitue le raisonnement structuré."""
parsed = parse_json_response(raw)
if parsed is None:
return None
# Reconstituer le raisonnement à partir des champs structurés
reasoning_parts = []
for key in ("analyse_clinique", "codes_candidats", "discrimination", "regle_pmsi"):
for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"):
val = parsed.pop(key, None)
if val:
titre = key.replace("_", " ").upper()
@@ -229,59 +312,70 @@ def _parse_ollama_response(raw: str) -> dict | None:
def _call_ollama(prompt: str) -> dict | None:
"""Appelle Ollama (mode JSON) et parse la réponse. Retry une fois si parsing échoue."""
for attempt in range(2):
try:
response = requests.post(
f"{OLLAMA_URL}/api/generate",
json={
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"format": "json",
"options": {
"temperature": 0.1,
"num_predict": 2500,
},
},
timeout=OLLAMA_TIMEOUT,
"""Appelle Ollama (mode JSON) et parse la réponse avec reconstitution du raisonnement."""
result = call_ollama(prompt, temperature=0.1, max_tokens=2500)
if result is None:
return None
# Reconstituer le raisonnement structuré
reasoning_parts = []
for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"):
val = result.pop(key, None)
if val:
titre = key.replace("_", " ").upper()
reasoning_parts.append(f"{titre} :\n{val}")
if reasoning_parts:
result["raisonnement"] = "\n\n".join(reasoning_parts)
return result
def _apply_llm_result_diagnostic(diagnostic: Diagnostic, llm_result: dict) -> None:
"""Applique un résultat LLM (frais ou caché) à un Diagnostic."""
code = llm_result.get("code")
confidence = llm_result.get("confidence")
justification = llm_result.get("justification")
raisonnement = llm_result.get("raisonnement")
if code:
code = normalize_code(code)
is_valid, _ = cim10_validate(code)
if is_valid:
diagnostic.cim10_suggestion = code
else:
logger.warning(
"RAG : code Ollama %s invalide pour « %s », code ignoré",
code, diagnostic.texte,
)
response.raise_for_status()
raw = response.json().get("response", "")
result = _parse_ollama_response(raw)
if result is not None:
return result
if attempt == 0:
logger.info("Ollama : retry après échec de parsing")
except requests.ConnectionError:
logger.warning("Ollama non disponible (connexion refusée)")
return None
except requests.Timeout:
logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
return None
except (requests.RequestException, json.JSONDecodeError) as e:
logger.warning("Ollama erreur : %s", e)
return None
return None
if confidence in ("high", "medium", "low"):
diagnostic.cim10_confidence = confidence
if justification:
diagnostic.justification = justification
if raisonnement:
diagnostic.raisonnement = raisonnement
def enrich_diagnostic(
diagnostic: Diagnostic,
contexte: dict,
est_dp: bool = True,
cache: OllamaCache | None = None,
) -> None:
"""Enrichit un Diagnostic avec le RAG (FAISS + Ollama).
Modifie le diagnostic en place. Fallback gracieux si FAISS ou Ollama échouent.
"""
# 1. Recherche FAISS
diag_type = "dp" if est_dp else "das"
# 1. Vérifier le cache
cached = cache.get(diagnostic.texte, diag_type) if cache else None
# 2. Recherche FAISS (toujours, pour les sources_rag fraîches)
sources = search_similar(diagnostic.texte, top_k=10)
if not sources:
logger.debug("Aucune source RAG trouvée pour : %s", diagnostic.texte)
return
# 2. Stocker les sources RAG
# 3. Stocker les sources RAG
diagnostic.sources_rag = [
RAGSource(
document=s["document"],
@@ -292,30 +386,101 @@ def enrich_diagnostic(
for s in sources
]
# 3. Appel Ollama pour justification avec raisonnement structuré
# 4. Si cache hit, appliquer et court-circuiter Ollama
if cached is not None:
logger.info("Cache hit pour %s : « %s »", diag_type.upper(), diagnostic.texte)
_apply_llm_result_diagnostic(diagnostic, cached)
return
# 5. Appel Ollama pour justification avec raisonnement structuré
prompt = _build_prompt(diagnostic.texte, sources, contexte, est_dp=est_dp)
llm_result = _call_ollama(prompt)
if llm_result:
code = llm_result.get("code")
confidence = llm_result.get("confidence")
justification = llm_result.get("justification")
raisonnement = llm_result.get("raisonnement")
if code:
diagnostic.cim10_suggestion = code
if confidence in ("high", "medium", "low"):
diagnostic.cim10_confidence = confidence
if justification:
diagnostic.justification = justification
if raisonnement:
diagnostic.raisonnement = raisonnement
_apply_llm_result_diagnostic(diagnostic, llm_result)
if cache:
cache.put(diagnostic.texte, diag_type, llm_result)
else:
logger.info("Ollama non disponible — sources FAISS conservées sans justification LLM")
def _apply_llm_result_acte(acte: ActeCCAM, llm_result: dict) -> None:
"""Applique un résultat LLM (frais ou caché) à un ActeCCAM."""
code = llm_result.get("code")
confidence = llm_result.get("confidence")
justification = llm_result.get("justification")
raisonnement = llm_result.get("raisonnement")
if code:
code = code.strip().upper()
is_valid, _ = ccam_validate(code)
if is_valid:
acte.code_ccam_suggestion = code
else:
logger.warning(
"RAG : code CCAM Ollama %s invalide pour « %s », code ignoré",
code, acte.texte,
)
if confidence in ("high", "medium", "low"):
acte.ccam_confidence = confidence
if justification:
acte.justification = justification
if raisonnement:
acte.raisonnement = raisonnement
def enrich_acte(acte: ActeCCAM, contexte: dict, cache: OllamaCache | None = None) -> None:
"""Enrichit un ActeCCAM avec le RAG (FAISS + Ollama).
Modifie l'acte en place. Fallback gracieux si FAISS ou Ollama échouent.
"""
# 1. Vérifier le cache
cached = cache.get(acte.texte, "ccam") if cache else None
# 2. Recherche FAISS (sources CCAM priorisées)
sources = search_similar_ccam(acte.texte, top_k=8)
if not sources:
logger.debug("Aucune source RAG CCAM trouvée pour : %s", acte.texte)
return
# 3. Stocker les sources RAG
acte.sources_rag = [
RAGSource(
document=s["document"],
page=s.get("page"),
code=s.get("code"),
extrait=s.get("extrait", "")[:200],
)
for s in sources
]
# 4. Si cache hit, appliquer et court-circuiter Ollama
if cached is not None:
logger.info("Cache hit pour CCAM : « %s »", acte.texte)
_apply_llm_result_acte(acte, cached)
return
# 5. Appel Ollama pour justification avec raisonnement structuré
prompt = _build_prompt_ccam(acte.texte, sources, contexte)
llm_result = _call_ollama(prompt)
if llm_result:
_apply_llm_result_acte(acte, llm_result)
if cache:
cache.put(acte.texte, "ccam", llm_result)
else:
logger.info("Ollama non disponible — sources FAISS CCAM conservées sans justification LLM")
def enrich_dossier(dossier: DossierMedical) -> None:
"""Enrichit le DP et tous les DAS d'un dossier via le RAG."""
"""Enrichit le DP et tous les DAS d'un dossier via le RAG.
Utilise un cache persistant et parallélise les appels Ollama
pour les DAS et actes CCAM (max_workers = OLLAMA_MAX_PARALLEL).
"""
cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL)
contexte = {
"sexe": dossier.sejour.sexe,
"age": dossier.sejour.age,
@@ -327,11 +492,12 @@ def enrich_dossier(dossier: DossierMedical) -> None:
"complications": dossier.complications,
}
# Phase 1 : DP seul (le contexte DAS en dépend)
if dossier.diagnostic_principal:
logger.info("RAG enrichissement DP : %s", dossier.diagnostic_principal.texte)
enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True)
enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True, cache=cache)
# Pour les DAS, ajouter le DP et les DAS existants au contexte pour cohérence
# Mettre à jour le contexte avec le DP pour les DAS
if dossier.diagnostic_principal:
contexte["dp_texte"] = dossier.diagnostic_principal.texte
contexte["das_codes_existants"] = [
@@ -340,6 +506,20 @@ def enrich_dossier(dossier: DossierMedical) -> None:
if d.cim10_suggestion
]
for das in dossier.diagnostics_associes:
logger.info("RAG enrichissement DAS : %s", das.texte)
enrich_diagnostic(das, contexte, est_dp=False)
# Phase 2 : DAS + Actes en parallèle
das_list = dossier.diagnostics_associes
actes_list = dossier.actes_ccam
if das_list or actes_list:
with ThreadPoolExecutor(max_workers=OLLAMA_MAX_PARALLEL) as executor:
futures = []
for das in das_list:
logger.info("RAG enrichissement DAS : %s", das.texte)
futures.append(executor.submit(enrich_diagnostic, das, contexte, False, cache))
for acte in actes_list:
logger.info("RAG enrichissement CCAM : %s", acte.texte)
futures.append(executor.submit(enrich_acte, acte, contexte, cache))
for f in as_completed(futures):
f.result() # propage les exceptions
cache.save()

View File

@@ -158,7 +158,7 @@ def evaluate_severity(diagnostic) -> SeverityInfo:
return info
def enrich_dossier_severity(dp, das_list: list) -> list[str]:
def enrich_dossier_severity(dp, das_list: list) -> tuple[list[str], int, int]:
"""Enrichit les diagnostics d'un dossier avec les informations de sévérité.
Modifie les diagnostics en place (attributs est_cma, est_cms, niveau_severite).
@@ -168,7 +168,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
das_list: Liste des diagnostics associés.
Returns:
Liste d'alertes de sévérité générées.
(alertes, cma_count, cms_count).
"""
alertes = []
@@ -181,6 +181,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
# Évaluer chaque DAS
cma_count = 0
cms_count = 0
for das in das_list:
if not das.cim10_suggestion:
continue
@@ -189,6 +190,10 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
if info.est_cma_probable:
das.est_cma = True
cma_count += 1
# CMS = CMA sévère
if info.niveau_severite == "severe":
das.est_cms = True
cms_count += 1
alertes.append(
f"CMA probable : '{das.texte}' ({das.cim10_suggestion}) — "
f"sévérité {info.niveau_severite}"
@@ -198,4 +203,4 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
if cma_count >= 2:
alertes.insert(0, f"{cma_count} CMA probables détectées — impact potentiel sur le niveau de sévérité GHM")
return alertes
return alertes, cma_count, cms_count

View File

@@ -69,6 +69,125 @@
</div>
{% endif %}
{# ---- Estimation GHM ---- #}
{% if dossier.ghm_estimation %}
{% set ghm = dossier.ghm_estimation %}
<div class="card section" style="border-left:4px solid #8b5cf6;">
<h3 style="color:#6d28d9;">Estimation GHM</h3>
<div class="info-grid">
{% if ghm.cmd %}
<div class="info-item">
<label>CMD</label>
<span><strong>{{ ghm.cmd }}</strong>{% if ghm.cmd_libelle %} — {{ ghm.cmd_libelle }}{% endif %}</span>
</div>
{% endif %}
<div class="info-item">
<label>Type</label>
{% if ghm.type_ghm == 'C' %}
<span class="badge" style="background:#fee2e2;color:#dc2626;">C — Chirurgical</span>
{% elif ghm.type_ghm == 'K' %}
<span class="badge" style="background:#fef3c7;color:#92400e;">K — Interventionnel</span>
{% elif ghm.type_ghm == 'M' %}
<span class="badge" style="background:#dbeafe;color:#1d4ed8;">M — Médical</span>
{% endif %}
</div>
<div class="info-item">
<label>Sévérité</label>
{% if ghm.severite <= 1 %}
<span class="badge" style="background:#d1fae5;color:#065f46;">Niveau {{ ghm.severite }}</span>
{% elif ghm.severite == 2 %}
<span class="badge" style="background:#fef3c7;color:#92400e;">Niveau {{ ghm.severite }}</span>
{% elif ghm.severite == 3 %}
<span class="badge" style="background:#fed7aa;color:#9a3412;">Niveau {{ ghm.severite }}</span>
{% else %}
<span class="badge" style="background:#fee2e2;color:#dc2626;">Niveau {{ ghm.severite }}</span>
{% endif %}
</div>
{% if ghm.ghm_approx %}
<div class="info-item">
<label>Code GHM approx.</label>
<code style="font-size:1.1rem;font-weight:700;letter-spacing:0.05em;">{{ ghm.ghm_approx }}</code>
</div>
{% endif %}
<div class="info-item">
<label>CMA / CMS</label>
<span>{{ ghm.cma_count }} CMA, {{ ghm.cms_count }} CMS</span>
</div>
</div>
{% if ghm.alertes %}
<div style="margin-top:0.75rem;">
{% for alerte in ghm.alertes %}
<div style="font-size:0.8rem;color:#c2410c;margin-bottom:0.2rem;">{{ alerte }}</div>
{% endfor %}
</div>
{% endif %}
<div style="margin-top:0.75rem;font-size:0.7rem;color:#94a3b8;font-style:italic;">
Estimation heuristique — le GHM définitif nécessite le groupeur officiel ATIH
</div>
</div>
{% endif %}
{# ---- Contrôle CPAM ---- #}
{% if dossier.controles_cpam %}
<div class="card section" style="border-left:4px solid #f59e0b;">
<h3 style="color:#b45309;">Contrôle CPAM ({{ dossier.controles_cpam|length }})</h3>
{% for ctrl in dossier.controles_cpam %}
<div style="margin-bottom:1.5rem;{% if not loop.last %}border-bottom:1px solid #e2e8f0;padding-bottom:1rem;{% endif %}">
<div style="display:flex;align-items:center;gap:0.5rem;margin-bottom:0.5rem;">
<strong>OGC {{ ctrl.numero_ogc }} — {{ ctrl.titre }}</strong>
{% if 'retient' in ctrl.decision_ucr|lower %}
<span class="badge" style="background:#d1fae5;color:#065f46;">{{ ctrl.decision_ucr }}</span>
{% elif 'confirme' in ctrl.decision_ucr|lower %}
<span class="badge" style="background:#fee2e2;color:#dc2626;">{{ ctrl.decision_ucr }}</span>
{% else %}
<span class="badge" style="background:#e0e7ff;color:#3730a3;">{{ ctrl.decision_ucr }}</span>
{% endif %}
</div>
{# Argument CPAM #}
{% if ctrl.arg_ucr %}
<div style="border-left:3px solid #f59e0b;padding:0.5rem 0.75rem;background:#fffbeb;margin-bottom:0.75rem;font-size:0.85rem;color:#78350f;">
<div style="font-size:0.7rem;color:#92400e;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Argument CPAM</div>
{{ ctrl.arg_ucr }}
</div>
{% endif %}
{# Codes contestés #}
{% if ctrl.dp_ucr or ctrl.da_ucr or ctrl.dr_ucr or ctrl.actes_ucr %}
<div style="margin-bottom:0.75rem;">
<div style="font-size:0.7rem;color:#64748b;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Codes contestés</div>
<div style="display:flex;gap:0.5rem;flex-wrap:wrap;">
{% if ctrl.dp_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DP: {{ ctrl.dp_ucr }}</span>{% endif %}
{% if ctrl.da_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DA: {{ ctrl.da_ucr }}</span>{% endif %}
{% if ctrl.dr_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DR: {{ ctrl.dr_ucr }}</span>{% endif %}
{% if ctrl.actes_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">Actes: {{ ctrl.actes_ucr }}</span>{% endif %}
</div>
</div>
{% endif %}
{# Contre-argumentation #}
{% if ctrl.contre_argumentation %}
<div style="border-left:3px solid #3b82f6;padding:0.5rem 0.75rem;background:#eff6ff;margin-bottom:0.75rem;font-size:0.85rem;color:#1e3a5f;">
<div style="font-size:0.7rem;color:#1d4ed8;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Contre-argumentation</div>
<pre style="white-space:pre-wrap;font-family:inherit;margin:0;">{{ ctrl.contre_argumentation }}</pre>
</div>
{% endif %}
{# Sources RAG #}
{% if ctrl.sources_reponse %}
<details>
<summary style="font-size:0.8rem;color:#64748b;">Sources RAG ({{ ctrl.sources_reponse|length }})</summary>
{% for src in ctrl.sources_reponse %}
<pre style="font-size:0.75rem;">{{ src.document }}{% if src.code %} — {{ src.code }}{% endif %}{% if src.page %} [p.{{ src.page }}]{% endif %}
{{ src.extrait or '' }}</pre>
{% endfor %}
</details>
{% endif %}
</div>
{% endfor %}
</div>
{% endif %}
{# ---- Alertes de codage ---- #}
{% if dossier.alertes_codage %}
<div class="card section" style="border-left:4px solid #f97316;background:#fff7ed;">

130
tests/test_cpam_parser.py Normal file
View File

@@ -0,0 +1,130 @@
"""Tests pour le parser de contrôle CPAM."""
import tempfile
from pathlib import Path
import openpyxl
import pytest
from src.config import ControleCPAM
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test avec les lignes données."""
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "OGC Contrôle T2A"
ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR"))
for row in rows:
ws.append(row)
wb.save(path)
class TestParseCpamExcel:
def test_parse_basic(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Désaccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None),
(21, "Désaccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert 21 in result
assert len(result[17]) == 1
assert len(result[21]) == 1
assert result[17][0].titre == "Désaccord sur les DAS"
assert result[17][0].decision_ucr == "UCR retient"
assert result[21][0].dp_ucr == "K85.1"
def test_parse_multiple_same_ogc(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Titre 1", "Arg 1", "Décision 1", None, None, None, None),
(17, "Titre 2", "Arg 2", "Décision 2", None, None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result[17]) == 2
def test_parse_empty_file(self, tmp_path):
xlsx = tmp_path / "empty.xlsx"
_create_test_xlsx([], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_parse_nonexistent_file(self):
result = parse_cpam_excel("/nonexistent/path.xlsx")
assert result == {}
def test_parse_optional_fields(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(42, "Titre", "Arg", "Décision", "E11.40", "G63.2", "E11.9", "ABCD123"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[42][0]
assert ctrl.dp_ucr == "E11.40"
assert ctrl.da_ucr == "G63.2"
assert ctrl.dr_ucr == "E11.9"
assert ctrl.actes_ucr == "ABCD123"
class TestMatchDossierOGC:
def setup_method(self):
self.cpam_data = {
17: [ControleCPAM(numero_ogc=17, titre="Test 17")],
21: [ControleCPAM(numero_ogc=21, titre="Test 21")],
}
def test_match_found(self):
result = match_dossier_ogc("17_23100690", self.cpam_data)
assert len(result) == 1
assert result[0].numero_ogc == 17
def test_match_not_found(self):
result = match_dossier_ogc("15_23096332", self.cpam_data)
assert result == []
def test_match_no_prefix(self):
result = match_dossier_ogc("nodash", self.cpam_data)
assert result == []
def test_match_empty_data(self):
result = match_dossier_ogc("17_23100690", {})
assert result == []
class TestControleCPAMModel:
def test_serialization(self):
ctrl = ControleCPAM(
numero_ogc=17,
titre="Désaccord sur les DAS",
arg_ucr="Argument...",
decision_ucr="UCR retient",
dp_ucr="K85.1",
)
data = ctrl.model_dump()
assert data["numero_ogc"] == 17
assert data["dp_ucr"] == "K85.1"
assert data["contre_argumentation"] is None
def test_deserialization(self):
data = {
"numero_ogc": 21,
"titre": "Test",
"arg_ucr": "Arg",
"decision_ucr": "Décision",
"contre_argumentation": "Ma réponse",
}
ctrl = ControleCPAM(**data)
assert ctrl.numero_ogc == 21
assert ctrl.contre_argumentation == "Ma réponse"
assert ctrl.sources_reponse == []

146
tests/test_cpam_response.py Normal file
View File

@@ -0,0 +1,146 @@
"""Tests pour la génération de contre-argumentation CPAM."""
from unittest.mock import patch
import pytest
from src.config import ControleCPAM, Diagnostic, DossierMedical, RAGSource, Sejour
from src.control.cpam_response import _build_cpam_prompt, _format_response, generate_cpam_response
def _make_dossier() -> DossierMedical:
"""Crée un dossier médical de test."""
return DossierMedical(
source_file="test.pdf",
document_type="crh",
sejour=Sejour(sexe="M", age=65, duree_sejour=5),
diagnostic_principal=Diagnostic(
texte="Cholécystite aiguë",
cim10_suggestion="K81.0",
),
diagnostics_associes=[
Diagnostic(texte="Iléus réflexe", cim10_suggestion="K56.0"),
],
)
def _make_controle() -> ControleCPAM:
"""Crée un contrôle CPAM de test."""
return ControleCPAM(
numero_ogc=17,
titre="Désaccord sur les DAS",
arg_ucr="L'UCR confirme l'avis des médecins contrôleurs au motif que le DAS K56.0 n'est pas justifié.",
decision_ucr="UCR confirme avis médecins contrôleurs",
dp_ucr=None,
da_ucr="K56.0",
)
class TestBuildPrompt:
def test_prompt_contains_dossier_info(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert "Cholécystite aiguë" in prompt
assert "K81.0" in prompt
assert "Iléus réflexe" in prompt
assert "65 ans" in prompt
def test_prompt_contains_cpam_argument(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert controle.arg_ucr in prompt
assert controle.decision_ucr in prompt
def test_prompt_contains_codes_contestes(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert "DA proposés par UCR : K56.0" in prompt
def test_prompt_contains_rag_sources(self):
dossier = _make_dossier()
controle = _make_controle()
sources = [
{"document": "guide_methodo", "page": 64, "extrait": "Texte du guide..."},
{"document": "cim10", "code": "K56.0", "extrait": "Iléus paralytique..."},
]
prompt = _build_cpam_prompt(dossier, controle, sources)
assert "Guide Méthodologique MCO 2026" in prompt
assert "CIM-10 FR 2026" in prompt
assert "page 64" in prompt
class TestFormatResponse:
def test_full_response(self):
parsed = {
"analyse_contestation": "La CPAM conteste le DAS K56.0",
"points_accord": "Aucun",
"contre_arguments": "Le guide méthodologique précise...",
"references": "Guide métho p.64",
"conclusion": "Le DAS est justifié",
}
text = _format_response(parsed)
assert "ANALYSE DE LA CONTESTATION" in text
assert "CONTRE-ARGUMENTS" in text
assert "CONCLUSION" in text
# "Aucun" ne doit pas générer la section points d'accord
assert "POINTS D'ACCORD" not in text
def test_partial_response(self):
parsed = {
"contre_arguments": "Arguments...",
"conclusion": "Conclusion...",
}
text = _format_response(parsed)
assert "CONTRE-ARGUMENTS" in text
assert "CONCLUSION" in text
def test_empty_response(self):
text = _format_response({})
assert text == ""
class TestGenerateResponse:
@patch("src.control.cpam_response.call_ollama")
@patch("src.control.cpam_response._search_rag_for_control")
def test_generate_success(self, mock_rag, mock_ollama):
mock_rag.return_value = [
{"document": "guide_methodo", "page": 64, "extrait": "Texte guide"},
]
mock_ollama.return_value = {
"analyse_contestation": "Analyse...",
"contre_arguments": "Contre-arguments...",
"conclusion": "Conclusion...",
}
dossier = _make_dossier()
controle = _make_controle()
text, sources = generate_cpam_response(dossier, controle)
assert "Contre-arguments..." in text
assert len(sources) == 1
assert sources[0].document == "guide_methodo"
mock_ollama.assert_called_once()
@patch("src.control.cpam_response.call_ollama")
@patch("src.control.cpam_response._search_rag_for_control")
def test_generate_ollama_unavailable(self, mock_rag, mock_ollama):
mock_rag.return_value = []
mock_ollama.return_value = None
dossier = _make_dossier()
controle = _make_controle()
text, sources = generate_cpam_response(dossier, controle)
assert text == ""
assert sources == []

View File

@@ -104,3 +104,59 @@ class TestIsValidDiagnosticText:
def test_accept_sepsis(self):
assert is_valid_diagnostic_text("Sepsis sévère")
# --- Règle 5 modifiée : mots dupliqués (2 mots identiques) ---
def test_reject_absence_absence(self):
assert not is_valid_diagnostic_text("Absence absence")
def test_reject_anticoagulant_anticoagulant(self):
assert not is_valid_diagnostic_text("Anticoagulant anticoagulant")
def test_reject_ventilation_ventilation(self):
assert not is_valid_diagnostic_text("Ventilation ventilation")
# --- Règle 7 : ponctuation initiale ---
def test_reject_comma_prefix(self):
assert not is_valid_diagnostic_text(", sans précision")
def test_reject_dash_prefix(self):
assert not is_valid_diagnostic_text("- masse musculaire")
# --- Règle 8 : valeurs numériques OCR "À X.X" ---
def test_reject_a_accent_value(self):
assert not is_valid_diagnostic_text("À 0.1")
def test_reject_a_accent_value_3(self):
assert not is_valid_diagnostic_text("À 3.0")
def test_reject_a_value(self):
assert not is_valid_diagnostic_text("A 12,5")
# --- Règle 9 : crochets (artefacts OCR) ---
def test_reject_bracket_fragment(self):
assert not is_valid_diagnostic_text("Episode [episode")
def test_reject_closing_bracket(self):
assert not is_valid_diagnostic_text("valeur]")
# --- Règle 10 : termes de laboratoire isolés ---
def test_reject_hemoglobine(self):
assert not is_valid_diagnostic_text("Hémoglobine")
def test_reject_creatinine(self):
assert not is_valid_diagnostic_text("Créatinine")
def test_accept_hemoglobine_in_phrase(self):
"""Un terme labo dans un contexte clinique est accepté."""
assert is_valid_diagnostic_text("Hémoglobine basse avec anémie")
# --- Règle 11 : fragments anatomiques courts ---
def test_reject_dans_la_vessie(self):
assert not is_valid_diagnostic_text("Dans la vessie")
def test_reject_le_rein(self):
assert not is_valid_diagnostic_text("Le rein")
def test_accept_long_fragment(self):
"""Un fragment long commençant par 'Dans' peut être légitime."""
assert is_valid_diagnostic_text("Dans le cadre d'une insuffisance rénale chronique terminale")

189
tests/test_ghm.py Normal file
View File

@@ -0,0 +1,189 @@
"""Tests pour le module d'estimation GHM."""
import pytest
from src.config import ActeCCAM, Diagnostic, DossierMedical
from src.medical.ghm import estimate_ghm, find_cmd, _detect_type_ghm, _compute_severity
class TestFindCMD:
def test_k85_hepatobilaire(self):
cmd, libelle = find_cmd("K85.1")
assert cmd == "07"
assert "hépatobiliaire" in libelle.lower() or "pancréat" in libelle.lower()
def test_j18_respiratoire(self):
cmd, _ = find_cmd("J18")
assert cmd == "04"
def test_n17_renal(self):
cmd, _ = find_cmd("N17")
assert cmd == "11"
def test_n40_genital_masculin(self):
cmd, _ = find_cmd("N40")
assert cmd == "12"
def test_f10_toxicomanie(self):
cmd, _ = find_cmd("F10")
assert cmd == "20"
def test_z00_facteurs(self):
cmd, _ = find_cmd("Z00")
assert cmd == "23"
def test_k40_digestif(self):
cmd, _ = find_cmd("K40")
assert cmd == "06"
def test_b20_vih(self):
cmd, _ = find_cmd("B20")
assert cmd == "25"
def test_t25_brulures(self):
cmd, _ = find_cmd("T25")
assert cmd == "22"
def test_s72_traumatismes(self):
cmd, _ = find_cmd("S72")
assert cmd == "21"
def test_code_with_dot(self):
cmd, _ = find_cmd("K85.1")
assert cmd == "07"
def test_code_lowercase(self):
cmd, _ = find_cmd("k85.1")
assert cmd == "07"
def test_empty_code(self):
cmd, libelle = find_cmd("")
assert cmd is None
assert libelle is None
def test_none_code(self):
cmd, libelle = find_cmd(None)
assert cmd is None
assert libelle is None
def test_short_code(self):
cmd, libelle = find_cmd("K8")
assert cmd is None
class TestDetectTypeGHM:
def test_chirurgical(self):
actes = [ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")]
assert _detect_type_ghm(actes) == "C"
def test_interventionnel(self):
actes = [ActeCCAM(texte="Échographie", code_ccam_suggestion="ZCQM001")]
assert _detect_type_ghm(actes) == "K"
def test_medical_no_actes(self):
assert _detect_type_ghm([]) == "M"
def test_medical_no_code(self):
actes = [ActeCCAM(texte="Biopsie", code_ccam_suggestion=None)]
assert _detect_type_ghm(actes) == "M"
def test_chirurgical_overrides_interventionnel(self):
actes = [
ActeCCAM(texte="Écho", code_ccam_suggestion="ZCQM001"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"),
]
assert _detect_type_ghm(actes) == "C"
class TestSeverityLevels:
def test_no_cma_level_1(self):
das = [Diagnostic(texte="HTA", cim10_suggestion="I10")]
niveau, cma, cms = _compute_severity(das)
assert niveau == 1
def test_two_cma_level_2(self):
das = [
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 2
assert cma == 2
def test_one_cms_level_3(self):
das = [
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 3
assert cms == 1
def test_two_cms_level_4(self):
das = [
Diagnostic(texte="Sepsis", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True, est_cms=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 4
assert cms == 2
def test_three_cma_level_3(self):
das = [
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
Diagnostic(texte="Diabète", cim10_suggestion="E11.9", est_cma=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 3
assert cma == 3
class TestEstimateGHM:
def test_chirurgical_with_cma(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K80.1"),
actes_ccam=[ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")],
diagnostics_associes=[
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
],
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "07"
assert ghm.type_ghm == "C"
assert ghm.severite == 2
assert ghm.ghm_approx == "07C??2"
assert ghm.cma_count == 2
def test_medical_sans_actes(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "04"
assert ghm.type_ghm == "M"
assert ghm.severite == 1
assert ghm.ghm_approx == "04M??1"
def test_dp_absent(self):
dossier = DossierMedical()
ghm = estimate_ghm(dossier)
assert ghm.cmd is None
assert ghm.ghm_approx is None
assert any("DP absent" in a for a in ghm.alertes)
def test_dp_sans_code(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Douleur thoracique"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd is None
assert any("sans code" in a for a in ghm.alertes)
def test_dp_symptomatique(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Douleur thoracique", cim10_suggestion="R07.4"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "23"
assert any("symptomatique" in a for a in ghm.alertes)

108
tests/test_ollama_cache.py Normal file
View File

@@ -0,0 +1,108 @@
"""Tests unitaires pour le cache Ollama persistant."""
import json
import threading
import pytest
from src.medical.ollama_cache import OllamaCache
class TestOllamaCache:
def test_get_miss(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
assert cache.get("HTA", "das") is None
def test_put_and_get(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"}
cache.put("HTA", "das", result)
assert cache.get("HTA", "das") == result
def test_key_normalization(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
result = {"code": "I10", "confidence": "high"}
cache.put(" HTA ", "das", result)
assert cache.get("hta", "das") == result
def test_different_types_different_keys(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
cache.put("Diabète", "dp", {"code": "E11.9"})
cache.put("Diabète", "das", {"code": "E11.8"})
assert cache.get("Diabète", "dp")["code"] == "E11.9"
assert cache.get("Diabète", "das")["code"] == "E11.8"
def test_save_and_reload(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
assert path.exists()
cache2 = OllamaCache(path, "gemma3:12b")
assert cache2.get("HTA", "das") == {"code": "I10"}
def test_save_no_write_if_clean(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.save()
assert not path.exists()
def test_model_change_invalidates(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
cache2 = OllamaCache(path, "llama3:8b")
assert cache2.get("HTA", "das") is None
assert len(cache2) == 0
def test_corrupted_file(self, tmp_path):
path = tmp_path / "cache.json"
path.write_text("not valid json", encoding="utf-8")
cache = OllamaCache(path, "gemma3:12b")
assert len(cache) == 0
assert cache.get("HTA", "das") is None
def test_len(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
assert len(cache) == 0
cache.put("HTA", "das", {"code": "I10"})
assert len(cache) == 1
cache.put("Diabète", "dp", {"code": "E11.9"})
assert len(cache) == 2
def test_thread_safety(self, tmp_path):
"""Écriture concurrente depuis plusieurs threads."""
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
errors = []
def writer(i):
try:
cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"})
except Exception as e:
errors.append(e)
threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)]
for t in threads:
t.start()
for t in threads:
t.join()
assert not errors
assert len(cache) == 20
def test_json_format(self, tmp_path):
"""Le fichier JSON contient le modèle et les entrées."""
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
raw = json.loads(path.read_text(encoding="utf-8"))
assert raw["model"] == "gemma3:12b"
assert "entries" in raw
assert len(raw["entries"]) == 1

View File

@@ -7,7 +7,8 @@ from unittest.mock import patch, MagicMock
import pytest
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.config import RAGSource, Diagnostic, ActeCCAM, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.medical.ollama_cache import OllamaCache
class TestRAGSource:
@@ -494,6 +495,47 @@ class TestRAGSearchMocked:
assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancréatite..."
assert len(diag.sources_rag) == 1
def test_enrich_diagnostic_invalid_code_ignored(self):
"""Un code Ollama invalide ne remplace pas le code existant."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "X99.99", # code invalide
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
# Le code original est conservé (pas remplacé par le code invalide)
assert diag.cim10_suggestion == "K85.9"
def test_enrich_diagnostic_normalizes_code(self):
"""Un code Ollama sans point est normalisé (K851 → K85.1)."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "K851", # sans point
"confidence": "high",
"justification": "Pancréatite biliaire",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
assert diag.cim10_suggestion == "K85.1"
def test_enrich_diagnostic_est_dp_flag(self):
"""Le flag est_dp est bien passé à _build_prompt."""
from src.medical.rag_search import enrich_diagnostic
@@ -533,10 +575,12 @@ class TestEnrichDossier:
captured_contexts = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured_contexts.append(contexte.copy())
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured_contexts) == 1 # DP seulement (pas de DAS)
@@ -563,10 +607,12 @@ class TestEnrichDossier:
captured = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")})
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured) == 2
@@ -578,6 +624,149 @@ class TestEnrichDossier:
assert captured[1]["dp_texte"] == "Pancréatite aiguë biliaire"
class TestNormalizeCode:
def test_insert_dot(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K810") == "K81.0"
def test_already_dotted(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("k85.1") == "K85.1"
def test_three_chars(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K85") == "K85"
def test_strip_spaces(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code(" E660 ") == "E66.0"
class TestValidateCodeCIM10:
def test_known_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K81.9")
assert is_valid is True
assert label # non vide
def test_unknown_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("Z99.99")
assert is_valid is False
assert label == ""
def test_normalize_before_validate(self):
"""K810 doit être normalisé en K81.0 et trouvé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K810")
assert is_valid is True
def test_three_char_code(self):
"""Code parent sans point (K85) doit être validé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K85")
assert is_valid is True
class TestValidateCIM10PostProcessing:
def test_hallucination_rejected(self):
"""Les codes hallucination (Aucun, N/A...) sont rejetés."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion is None
assert any("rejeté" in a for a in dossier.alertes_codage)
def test_normalizes_format(self):
"""K810 est normalisé en K81.0."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K810"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
def test_invalid_code_gets_low_confidence(self):
"""Un code inexistant reçoit confidence=low et une alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
def test_valid_code_unchanged(self):
"""Un code valide n'est pas modifié et pas d'alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
assert not any("CIM-10" in a for a in dossier.alertes_codage)
def test_non_codable_rejected(self):
"""'non_codable' est rejeté comme hallucination."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Truc", cim10_suggestion="non_codable"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion is None
def test_hallucination_fallback_found(self):
"""Hallucination rejetée mais fallback dictionnaire trouve un code."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite aiguë", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
assert dossier.diagnostic_principal.cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_fallback_found(self):
"""Code invalide remplacé par fallback dictionnaire."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "I10"
assert dossier.diagnostics_associes[0].cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_no_fallback(self):
"""Code invalide sans fallback possible → low confidence."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre inconnue", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "Z99.99"
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
class TestFormatContexte:
"""Tests pour _format_contexte."""
@@ -610,3 +799,241 @@ class TestFormatContexte:
assert "TDM abdominal" in result
assert "éruption cutanée" in result
assert "Pancréatite aiguë biliaire" in result
class TestActeCCAMExtended:
def test_backward_compatible(self):
"""Les nouveaux champs RAG sont optionnels — rétrocompatible."""
a = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
assert a.texte == "Cholécystectomie"
assert a.code_ccam_suggestion == "HMFC004"
assert a.ccam_confidence is None
assert a.justification is None
assert a.raisonnement is None
assert a.sources_rag == []
def test_with_rag_fields(self):
a = ActeCCAM(
texte="Cholécystectomie par coelioscopie",
code_ccam_suggestion="HMFC004",
ccam_confidence="high",
justification="HMFC004 correspond à la cholécystectomie par coelioscopie",
raisonnement="ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
sources_rag=[
RAGSource(document="ccam", page=10, code="HMFC004"),
],
)
assert a.ccam_confidence == "high"
assert a.justification is not None
assert len(a.sources_rag) == 1
assert a.sources_rag[0].code == "HMFC004"
def test_serialization_exclude_none(self):
a = ActeCCAM(texte="Test", code_ccam_suggestion="HMFC004")
data = a.model_dump(exclude_none=True)
assert "ccam_confidence" not in data
assert "justification" not in data
assert "raisonnement" not in data
assert "sources_rag" in data
class TestSearchSimilarCCAM:
def test_prioritizes_ccam(self):
"""Les sources CCAM sont priorisées (au moins 5 sur 8)."""
from src.medical.rag_search import search_similar_ccam
import numpy as np
mock_metadata = []
for i in range(6):
mock_metadata.append({"document": "ccam", "code": f"HMFC00{i}", "page": i, "extrait": f"CCAM {i}"})
for i in range(6):
mock_metadata.append({"document": "guide_methodo", "page": i + 10, "extrait": f"Guide {i}"})
mock_index = MagicMock()
mock_index.ntotal = 12
scores = np.array([[0.9 - i * 0.03 for i in range(12)]], dtype=np.float32)
indices = np.array([list(range(12))], dtype=np.int64)
mock_index.search.return_value = (scores, indices)
with patch("src.medical.rag_index.get_index", return_value=(mock_index, mock_metadata)), \
patch("src.medical.rag_search._get_embed_model") as mock_model:
mock_model.return_value.encode.return_value = np.array([[0.1] * 768], dtype=np.float32)
results = search_similar_ccam("cholécystectomie", top_k=8)
ccam_count = sum(1 for r in results if r["document"] == "ccam")
assert ccam_count >= 5, f"Seulement {ccam_count} sources CCAM sur {len(results)}"
def test_no_index(self):
"""search_similar_ccam retourne une liste vide si l'index n'existe pas."""
from src.medical.rag_search import search_similar_ccam
with patch("src.medical.rag_index.get_index", return_value=None):
results = search_similar_ccam("cholécystectomie")
assert results == []
class TestEnrichActe:
def test_enrich_with_ollama(self):
"""Enrichissement complet avec sources + Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie par coelioscopie")
mock_sources = [
{
"document": "ccam",
"page": 10,
"code": "HMFC004",
"extrait": "HMFC004 Cholécystectomie par coelioscopie...",
"score": 0.92,
},
]
mock_llm = {
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie par coelioscopie = HMFC004",
"raisonnement": "ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(True, "Cholécystectomie")):
enrich_acte(acte, {"sexe": "F", "age": 43})
assert acte.code_ccam_suggestion == "HMFC004"
assert acte.ccam_confidence == "high"
assert acte.justification == "Cholécystectomie par coelioscopie = HMFC004"
assert acte.raisonnement is not None
assert len(acte.sources_rag) == 1
def test_enrich_no_sources(self):
"""enrich_acte ne plante pas si aucune source trouvée."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Acte inconnu", code_ccam_suggestion="ABCD123")
with patch("src.medical.rag_search.search_similar_ccam", return_value=[]):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert acte.sources_rag == []
assert acte.justification is None
def test_enrich_no_ollama(self):
"""Enrichissement avec sources FAISS mais sans Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=None):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert len(acte.sources_rag) == 1
assert acte.justification is None
assert acte.raisonnement is None
def test_enrich_invalid_code(self):
"""Un code CCAM invalide d'Ollama ne remplace pas le code existant."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
mock_llm = {
"code": "ZZZZ999",
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(False, "")):
enrich_acte(acte, {"sexe": "M", "age": 50})
# Le code original est conservé
assert acte.code_ccam_suggestion == "HMFC004"
# Mais la confidence est quand même affectée
assert acte.ccam_confidence == "high"
class TestEnrichDossierCCAM:
def test_enriches_actes(self):
"""enrich_dossier enrichit aussi les actes CCAM."""
from src.medical.rag_search import enrich_dossier
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Lithiase vésiculaire"),
actes_ccam=[
ActeCCAM(texte="Cholécystectomie par coelioscopie"),
ActeCCAM(texte="Anesthésie générale"),
],
)
enriched = []
def mock_enrich_diag(diag, contexte, est_dp=True, cache=None):
pass
def mock_enrich_acte(acte, contexte, cache=None):
enriched.append(acte.texte)
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich_diag), \
patch("src.medical.rag_search.enrich_acte", side_effect=mock_enrich_acte), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(enriched) == 2
assert "Cholécystectomie par coelioscopie" in enriched
assert "Anesthésie générale" in enriched
class TestBuildPromptCCAM:
def test_prompt_contains_acte(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie"}]
contexte = {"sexe": "F", "age": 43}
prompt = _build_prompt_ccam("Cholécystectomie par coelioscopie", sources, contexte)
assert "Cholécystectomie par coelioscopie" in prompt
assert "CCAM" in prompt
assert "analyse_acte" in prompt
assert "objet JSON" in prompt
def test_prompt_contains_source_info(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie par coelioscopie"}]
contexte = {}
prompt = _build_prompt_ccam("Cholécystectomie", sources, contexte)
assert "CCAM PMSI V4 2025" in prompt
assert "HMFC004" in prompt
class TestParseOllamaResponseCCAM:
def test_parse_ccam_structured_json(self):
"""Le parsing extrait analyse_acte dans le raisonnement."""
from src.medical.rag_search import _parse_ollama_response
import json
raw = json.dumps({
"analyse_acte": "Cholécystectomie par voie coelioscopique",
"codes_candidats": "HMFC004, HMFC003",
"discrimination": "HMFC004 est le code spécifique à la coelioscopie",
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie coelioscopique = HMFC004",
})
result = _parse_ollama_response(raw)
assert result is not None
assert result["code"] == "HMFC004"
assert "raisonnement" in result
assert "ANALYSE ACTE" in result["raisonnement"]
assert "CODES CANDIDATS" in result["raisonnement"]
assert "analyse_acte" not in result

212
tests/test_rum_export.py Normal file
View File

@@ -0,0 +1,212 @@
"""Tests pour le module d'export RUM V016."""
import pytest
from src.config import ActeCCAM, Diagnostic, DossierMedical, Sejour
from src.export.rum_export import (
RUMConfig,
export_rum,
_format_cim10,
_format_date,
_format_sex,
_format_ccam_act,
_map_mode_entree,
_map_mode_sortie,
)
class TestFormatHelpers:
def test_format_cim10_normal(self):
assert _format_cim10("K85.1") == "K851 "
assert len(_format_cim10("K85.1")) == 8
def test_format_cim10_short(self):
result = _format_cim10("J18")
assert result == "J18 "
assert len(result) == 8
def test_format_cim10_none(self):
assert _format_cim10(None) == " "
assert len(_format_cim10(None)) == 8
def test_format_cim10_empty(self):
assert _format_cim10("") == " "
def test_format_date_ddmmyyyy(self):
assert _format_date("15/03/2025") == "15032025"
def test_format_date_iso(self):
assert _format_date("2025-03-15") == "15032025"
def test_format_date_none(self):
assert _format_date(None) == " "
assert len(_format_date(None)) == 8
def test_format_sex_masculin(self):
assert _format_sex("M") == "1"
assert _format_sex("Masculin") == "1"
assert _format_sex("H") == "1"
def test_format_sex_feminin(self):
assert _format_sex("F") == "2"
assert _format_sex("Féminin") == "2"
def test_format_sex_none(self):
assert _format_sex(None) == " "
def test_map_mode_entree(self):
assert _map_mode_entree("Domicile") == "8"
assert _map_mode_entree("Mutation") == "6"
assert _map_mode_entree("Transfert") == "7"
assert _map_mode_entree(None) == " "
def test_map_mode_sortie(self):
assert _map_mode_sortie("Domicile") == "8"
assert _map_mode_sortie("Décès") == "9"
assert _map_mode_sortie("Transfert") == "7"
assert _map_mode_sortie(None) == " "
def test_format_ccam_act(self):
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="15/03/2025")
result = _format_ccam_act(acte)
assert len(result) == 29
assert result[:7] == "HMFC004"
assert result[7] == "1" # phase
assert result[8] == "1" # activité
assert result[9:17] == "15032025" # date
class TestExportRUM:
def _make_dossier(self, **kwargs):
defaults = dict(
source_file="test.pdf",
sejour=Sejour(
sexe="M",
date_entree="01/01/2025",
date_sortie="05/01/2025",
mode_entree="Domicile",
mode_sortie="Domicile",
),
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
diagnostics_associes=[
Diagnostic(texte="HTA", cim10_suggestion="I10"),
],
actes_ccam=[
ActeCCAM(texte="Radio thorax", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
],
)
defaults.update(kwargs)
return DossierMedical(**defaults)
def test_fixed_zone_length(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# La zone fixe fait 165 chars, plus DAS et actes
assert len(rum) >= 165
def test_fixed_zone_exact_165(self):
dossier = self._make_dossier(diagnostics_associes=[], actes_ccam=[])
rum = export_rum(dossier)
assert len(rum) == 165
def test_version_format(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[9:12] == "016" # version format
assert rum[24:27] == "016" # version RUM
def test_finess(self):
config = RUMConfig(finess="123456789")
dossier = self._make_dossier()
rum = export_rum(dossier, config)
assert rum[15:24] == "123456789"
def test_sexe(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[85] == "1" # M
def test_dates(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[92:100] == "01012025" # date entrée
assert rum[102:110] == "05012025" # date sortie
def test_modes(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[100] == "8" # mode entrée domicile
assert rum[110] == "8" # mode sortie domicile
def test_dp_field(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[131:139] == "J189 "
def test_nb_das(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[125:127] == "01"
def test_nb_actes(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[129:131] == "01"
def test_das_variable_zone(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# DAS commence à pos 165, 8 chars
das_zone = rum[165:173]
assert das_zone == "I10 "
def test_acte_variable_zone(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# 1 DAS (8 chars) puis l'acte (29 chars) à pos 173
acte_zone = rum[173:202]
assert len(acte_zone) == 29
assert acte_zone[:7] == "ZBQK002"
def test_total_length(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# 165 + 1*8 (DAS) + 1*29 (acte) = 202
assert len(rum) == 202
class TestEdgeCases:
def test_no_dp(self):
dossier = DossierMedical(source_file="test.pdf")
rum = export_rum(dossier)
assert len(rum) == 165
assert rum[131:139] == " "
def test_no_sejour_data(self):
dossier = DossierMedical(
source_file="test.pdf",
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="J18.9"),
)
rum = export_rum(dossier)
assert len(rum) == 165
assert rum[85] == " " # sexe vide
def test_multiple_das_and_actes(self):
dossier = DossierMedical(
source_file="test.pdf",
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="K85.1"),
diagnostics_associes=[
Diagnostic(texte="D1", cim10_suggestion="I10"),
Diagnostic(texte="D2", cim10_suggestion="E11.9"),
Diagnostic(texte="D3", cim10_suggestion="I48.9"),
],
actes_ccam=[
ActeCCAM(texte="A1", code_ccam_suggestion="HMFC004", date="01/01/2025"),
ActeCCAM(texte="A2", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
],
)
rum = export_rum(dossier)
# 165 + 3*8 + 2*29 = 165 + 24 + 58 = 247
assert len(rum) == 247
assert rum[125:127] == "03" # nb DAS
assert rum[129:131] == "02" # nb actes

View File

@@ -90,7 +90,7 @@ class TestEnrichDossierSeverity:
Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"),
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
]
alertes = enrich_dossier_severity(dp, das)
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
# I48.9 = CMA probable
assert das[0].est_cma is True
@@ -101,9 +101,21 @@ class TestEnrichDossierSeverity:
# Au moins une alerte CMA
assert any("CMA" in a for a in alertes)
assert cma_count >= 1
def test_dp_severity_set(self):
dp = Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9")
alertes = enrich_dossier_severity(dp, [])
alertes, cma_count, cms_count = enrich_dossier_severity(dp, [])
assert dp.niveau_severite == "severe"
assert dp.est_cma is True
def test_cms_detection(self):
"""CMS détecté quand CMA + sévérité severe."""
dp = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
das = [
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9"),
]
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
assert das[0].est_cma is True
assert das[0].est_cms is True
assert cms_count == 1