feat: scoring DP déterministe + parser CPAM nouveau format + sections CRH

- Nouveau module dp_scoring.py : shortlist, scoring multi-critères, select_dp,
  LLM one-shot fallback avec garde-fous (négation, comorbidité, Z/R-codes)
- Parser CPAM : auto-détection format legacy/ucr_extract, 6 nouveaux champs
  ControleCPAM (codes_etablissement, libelle, codes_retenus, ghm_ghs)
- CRH parser : 3 nouvelles sections (diag_sortie, diag_principal, synthese)
- Prompt DP_LLM_ONESHOT externalisé dans templates.py
- Propagation dp_selection dans fusion.py
- 808 tests passent (dont 21 nouveaux CPAM + 77 dp_scoring + 8 CRH)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-23 22:28:59 +01:00
parent 540e0cb400
commit aa501789fd
12 changed files with 2370 additions and 43 deletions

View File

@@ -82,6 +82,32 @@ EMBEDDING_MODEL = os.environ.get("T2A_EMBEDDING_MODEL", "dangvantuan/sentence-ca
RERANKER_MODEL = os.environ.get("T2A_RERANKER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2") RERANKER_MODEL = os.environ.get("T2A_RERANKER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
# --- Scoring DP (Diagnostic Principal) ---
DP_SCORING_WEIGHTS: dict[str, int] = {
"section_diag_sortie": 4,
"section_diag_principal": 4,
"section_motif_hospitalisation": 3,
"section_conclusion": 2,
"section_synthese": 2,
"section_edsnlp": 1,
"section_regex": 1,
"proof_excerpt": 2, # excerpt non-vide + page
"negation": -4, # "pas de", "absence de", "éliminé"
"conditional": -3, # "suspect", "probable", "?"
"z_code_dp": -2, # sauf whitelist
"r_code_dp": -2, # symptôme en DP
"comorbidity_weak": -3, # comorbidité banale (toutes sections, sauf preuve PEC)
}
DP_REVIEW_THRESHOLD: int = 2 # delta minimum top1-top2 pour éviter REVIEW
# Z-codes admis en DP (soins itératifs, surveillance, nouveau-né, rééducation, etc.)
DP_Z_CODE_WHITELIST: frozenset[str] = frozenset({
"Z51.1", "Z51.0", "Z38", "Z50.1", "Z43", "Z45", "Z09", "Z54",
"Z75", "Z03", "Z04", "Z08",
})
# --- Modèles de données CIM-10 --- # --- Modèles de données CIM-10 ---
@@ -128,6 +154,28 @@ class Diagnostic(BaseModel):
source_excerpt: Optional[str] = None # extrait du texte source (~200 chars) source_excerpt: Optional[str] = None # extrait du texte source (~200 chars)
class DPCandidate(BaseModel):
code: Optional[str] = None
label: str
source_section: str # "diag_sortie" | "diag_principal" | "conclusion" | "synthese" | "motif_hospitalisation" | "edsnlp" | "regex"
source_excerpt: Optional[str] = None
source_page: Optional[int] = None
confidence_raw: Optional[str] = None # "high" | "medium" | "low"
score: int = 0
score_details: dict[str, int] = Field(default_factory=dict)
is_negated: bool = False
is_conditional: bool = False
dp_code_original_llm: Optional[str] = None # code original proposé par LLM (avant normalisation)
dp_code_normalized: bool = False # True si le code a été normalisé (parent/fallback)
class DPSelection(BaseModel):
verdict: str = "confirmed" # "confirmed" | "review"
candidates: list[DPCandidate] = Field(default_factory=list)
winner_reason: Optional[str] = None
llm_tiebreak: Optional[dict] = None
class ActeCCAM(BaseModel): class ActeCCAM(BaseModel):
texte: str texte: str
code_ccam_suggestion: Optional[str] = None code_ccam_suggestion: Optional[str] = None
@@ -183,6 +231,7 @@ class DossierMedical(BaseModel):
document_type: str = "" document_type: str = ""
sejour: Sejour = Field(default_factory=Sejour) sejour: Sejour = Field(default_factory=Sejour)
diagnostic_principal: Optional[Diagnostic] = None diagnostic_principal: Optional[Diagnostic] = None
dp_selection: Optional[DPSelection] = None
diagnostics_associes: list[Diagnostic] = Field(default_factory=list) diagnostics_associes: list[Diagnostic] = Field(default_factory=list)
actes_ccam: list[ActeCCAM] = Field(default_factory=list) actes_ccam: list[ActeCCAM] = Field(default_factory=list)
antecedents: list[Antecedent] = Field(default_factory=list) antecedents: list[Antecedent] = Field(default_factory=list)
@@ -248,6 +297,13 @@ class ControleCPAM(BaseModel):
da_ucr: Optional[str] = None da_ucr: Optional[str] = None
dr_ucr: Optional[str] = None dr_ucr: Optional[str] = None
actes_ucr: Optional[str] = None actes_ucr: Optional[str] = None
# Champs enrichis (format ucr_extract)
codes_etablissement: Optional[str] = None
libelle_etablissement: Optional[str] = None
codes_controleurs: Optional[str] = None
libelle_controleurs: Optional[str] = None
codes_retenus: Optional[str] = None
ghm_ghs: Optional[str] = None
contre_argumentation: Optional[str] = None contre_argumentation: Optional[str] = None
response_data: Optional[dict] = None response_data: Optional[dict] = None
sources_reponse: list[RAGSource] = Field(default_factory=list) sources_reponse: list[RAGSource] = Field(default_factory=list)

View File

@@ -1,4 +1,12 @@
"""Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC.""" """Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC.
Supporte deux formats :
- **Ancien** (ogc_structure) : colonnes N° OGC, Titre, Arg_UCR, Décision_UCR, DP_UCR, DA_UCR, DR_UCR, Actes_UCR
- **Nouveau** (ucr_extract) : colonnes N° OGC, Type désaccord, Codes Établissement, Codes Contrôleurs,
Décision UCR, Codes retenus, GHM / GHS, Texte décision, etc.
Le format est auto-détecté à partir des en-têtes de la première ligne.
"""
from __future__ import annotations from __future__ import annotations
@@ -12,18 +20,15 @@ from ..config import ControleCPAM
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Colonnes attendues dans le fichier Excel # Colonnes obligatoires par format
_EXPECTED_COLUMNS = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR") _LEGACY_REQUIRED = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR")
_NEW_REQUIRED = ("N° OGC", "Type désaccord", "Décision UCR", "Texte décision")
def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]: def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
"""Lit le fichier Excel de contrôle CPAM et retourne un dict OGC -> liste de contrôles. """Lit le fichier Excel de contrôle CPAM et retourne un dict OGC -> liste de contrôles.
Args: Auto-détecte le format (ancien ogc_structure vs nouveau ucr_extract).
path: Chemin vers le fichier .xlsx CPAM.
Returns:
Dict avec le numéro OGC comme clé et la liste des contrôles associés.
""" """
path = Path(path) path = Path(path)
if not path.exists(): if not path.exists():
@@ -33,33 +38,53 @@ def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
wb = openpyxl.load_workbook(path, read_only=True) wb = openpyxl.load_workbook(path, read_only=True)
ws = wb[wb.sheetnames[0]] ws = wb[wb.sheetnames[0]]
# Lire l'en-tête
rows = ws.iter_rows(values_only=True) rows = ws.iter_rows(values_only=True)
header = next(rows, None) header = next(rows, None)
if header is None: if header is None:
logger.error("Fichier CPAM vide : %s", path) logger.error("Fichier CPAM vide : %s", path)
wb.close()
return {} return {}
# Construire le mapping colonne -> index
col_map = {} col_map = {}
for i, col_name in enumerate(header): for i, col_name in enumerate(header):
if col_name: if col_name:
col_map[col_name.strip()] = i col_map[str(col_name).strip()] = i
# Vérifier les colonnes requises # Auto-détection du format
missing = [c for c in _EXPECTED_COLUMNS[:4] if c not in col_map] is_new = all(c in col_map for c in _NEW_REQUIRED)
if missing: is_legacy = all(c in col_map for c in _LEGACY_REQUIRED)
logger.error("Colonnes manquantes dans le fichier CPAM : %s", missing)
if is_new:
logger.info("CPAM : format ucr_extract détecté")
result = _parse_new_format(rows, col_map)
elif is_legacy:
logger.info("CPAM : format ogc_structure (ancien) détecté")
result = _parse_legacy_format(rows, col_map)
else:
missing_new = [c for c in _NEW_REQUIRED if c not in col_map]
missing_leg = [c for c in _LEGACY_REQUIRED if c not in col_map]
logger.error(
"Format CPAM non reconnu. Colonnes trouvées : %s. "
"Manquantes (nouveau) : %s, (ancien) : %s",
list(col_map.keys()), missing_new, missing_leg,
)
wb.close()
return {} return {}
wb.close()
total = sum(len(v) for v in result.values())
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", total, len(result))
return result
def _parse_legacy_format(rows, col_map: dict[str, int]) -> dict[int, list[ControleCPAM]]:
"""Parse l'ancien format ogc_structure."""
result: dict[int, list[ControleCPAM]] = {} result: dict[int, list[ControleCPAM]] = {}
count = 0
for row in rows: for row in rows:
ogc_val = row[col_map["N° OGC"]] ogc_val = row[col_map["N° OGC"]]
if ogc_val is None: if ogc_val is None:
continue continue
try: try:
numero_ogc = int(ogc_val) numero_ogc = int(ogc_val)
except (ValueError, TypeError): except (ValueError, TypeError):
@@ -76,11 +101,104 @@ def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
dr_ucr=_clean_optional(row, col_map.get("DR_UCR")), dr_ucr=_clean_optional(row, col_map.get("DR_UCR")),
actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")), actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")),
) )
result.setdefault(numero_ogc, []).append(controle) result.setdefault(numero_ogc, []).append(controle)
count += 1
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", count, len(result)) return result
def _parse_new_format(rows, col_map: dict[str, int]) -> dict[int, list[ControleCPAM]]:
"""Parse le nouveau format ucr_extract.
Mapping colonnes :
N° OGC → numero_ogc
Type désaccord → titre (ex: "Désaccord sur le DP")
Texte décision → arg_ucr
Décision UCR → decision_ucr (Favorable / Défavorable)
Codes Contrôleurs → dp_ucr / da_ucr selon Type désaccord
Codes Établissement → codes_etablissement
Libellé Établissement → libelle_etablissement
Libellé Contrôleurs → libelle_controleurs
Codes retenus → codes_retenus
GHM / GHS → ghm_ghs
"""
result: dict[int, list[ControleCPAM]] = {}
idx_ogc = col_map["N° OGC"]
idx_type = col_map["Type désaccord"]
idx_decision = col_map["Décision UCR"]
idx_texte = col_map["Texte décision"]
idx_codes_etab = col_map.get("Codes Établissement")
idx_lib_etab = col_map.get("Libellé Établissement")
idx_codes_ctrl = col_map.get("Codes Contrôleurs")
idx_lib_ctrl = col_map.get("Libellé Contrôleurs")
idx_codes_ret = col_map.get("Codes retenus")
idx_ghm = col_map.get("GHM / GHS")
for row in rows:
ogc_val = row[idx_ogc]
if ogc_val is None:
continue
try:
numero_ogc = int(ogc_val)
except (ValueError, TypeError):
logger.warning("N° OGC invalide ignoré : %s", ogc_val)
continue
type_desaccord = str(row[idx_type] or "").strip()
decision = str(row[idx_decision] or "").strip()
texte_decision = str(row[idx_texte] or "").strip()
codes_ctrl = _clean_optional(row, idx_codes_ctrl)
codes_etab = _clean_optional(row, idx_codes_etab)
# Construire le titre lisible
if type_desaccord == "DP":
titre = "Désaccord sur le DP"
elif type_desaccord == "DAS":
titre = "Désaccord sur les DAS"
elif type_desaccord == "DP+DAS":
titre = "Désaccord sur le DP et les DAS"
else:
titre = f"Désaccord : {type_desaccord}" if type_desaccord else ""
# Mapper la décision vers le format attendu par cpam_response
if decision.lower().startswith("favorable"):
decision_ucr = "UCR retient"
elif decision.lower().startswith("défavorable") or decision.lower().startswith("defavorable"):
decision_ucr = "UCR confirme avis médecins contrôleurs"
else:
decision_ucr = decision
# Distribuer les codes selon le type de désaccord
dp_ucr = None
da_ucr = None
if type_desaccord == "DP":
dp_ucr = codes_ctrl
elif type_desaccord == "DAS":
da_ucr = codes_ctrl
elif type_desaccord == "DP+DAS":
# Les codes contrôleurs peuvent mélanger DP et DAS.
# Convention : le premier code est le DP, le reste DAS.
if codes_ctrl:
parts = [c.strip() for c in codes_ctrl.split(",") if c.strip()]
dp_ucr = parts[0] if parts else None
da_ucr = ",".join(parts[1:]) if len(parts) > 1 else None
controle = ControleCPAM(
numero_ogc=numero_ogc,
titre=titre,
arg_ucr=texte_decision,
decision_ucr=decision_ucr,
dp_ucr=dp_ucr,
da_ucr=da_ucr,
codes_etablissement=codes_etab,
libelle_etablissement=_clean_optional(row, idx_lib_etab),
codes_controleurs=codes_ctrl,
libelle_controleurs=_clean_optional(row, idx_lib_ctrl),
codes_retenus=_clean_optional(row, idx_codes_ret),
ghm_ghs=_clean_optional(row, idx_ghm),
)
result.setdefault(numero_ogc, []).append(controle)
return result return result

View File

@@ -113,12 +113,19 @@ def _extract_medical_content(text: str, result: dict) -> None:
result["contenu_medical"] = m.group(1).strip() result["contenu_medical"] = m.group(1).strip()
# Sections spécifiques # Sections spécifiques
# Note : les terminaisons incluent les en-têtes des sections suivantes
# pour éviter la capture excessive (une section s'arrête quand la suivante commence).
_DIAG_HEADERS = r"Diagnostic(?:s)?\s+(?:de\s+sortie|retenu|principal)|Problème\s+principal|Synthèse|En\s+résumé|En\s+synthèse"
section_patterns = [ section_patterns = [
("motif_hospitalisation", r"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:Antécédents|Histoire|Examen|Au total|Devenir|TTT)|$)"), ("motif_hospitalisation", r"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:Antécédents|Histoire|Examen|Au total|Devenir|TTT|" + _DIAG_HEADERS + r")|$)"),
("antecedents", r"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir)|$)"), ("antecedents", r"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
("histoire_maladie", r"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir)|$)"), ("histoire_maladie", r"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
("examen_clinique", r"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir)|$)"), ("examen_clinique", r"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
("conclusion", r"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement)|$)"), ("conclusion", r"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|" + _DIAG_HEADERS + r")|$)"),
# Sections à fort signal DP (avant traitement_sortie pour priorité)
("diag_sortie", r"(?:Diagnostic(?:s)?\s+de\s+sortie|Diagnostic(?:s)?\s+retenu(?:s)?(?:\s+(?:à\s+la\s+sortie))?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Synthèse|En\s+résumé|Rédigé|Cordialement)|$)"),
("diag_principal", r"(?:Diagnostic\s+principal|Problème\s+principal)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Diagnostic(?:s)?\s+(?:de\s+sortie|retenu|associé)|Devenir|TTT|Traitement|Synthèse|En\s+résumé|Rédigé|Cordialement)|$)"),
("synthese", r"(?:Synthèse|En\s+résumé|En\s+synthèse)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Rédigé|Cordialement)|$)"),
("traitement_sortie", r"(?:TTT de sortie|Traitement de sortie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement)|$)"), ("traitement_sortie", r"(?:TTT de sortie|Traitement de sortie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement)|$)"),
("devenir", r"(?:Devenir)\s*[:\s]*\n?(.*?)(?=\n\s*(?:TTT|Traitement|Rédigé|Cordialement)|$)"), ("devenir", r"(?:Devenir)\s*[:\s]*\n?(.*?)(?=\n\s*(?:TTT|Traitement|Rédigé|Cordialement)|$)"),
] ]

View File

@@ -116,7 +116,7 @@ def extract_medical_info(
search_text = raw_text or anonymized_text search_text = raw_text or anonymized_text
_extract_sejour(parsed_data, dossier) _extract_sejour(parsed_data, dossier)
_extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result) _extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result, use_rag=use_rag)
_extract_actes(anonymized_text, dossier) _extract_actes(anonymized_text, dossier)
_extract_antecedents(anonymized_text, dossier) _extract_antecedents(anonymized_text, dossier)
_extract_traitements(parsed_data, anonymized_text, dossier, edsnlp_result) _extract_traitements(parsed_data, anonymized_text, dossier, edsnlp_result)
@@ -306,6 +306,7 @@ def _extract_diagnostics(
text: str, text: str,
dossier: DossierMedical, dossier: DossierMedical,
edsnlp_result: Optional[EdsnlpResult] = None, edsnlp_result: Optional[EdsnlpResult] = None,
use_rag: bool = False,
) -> None: ) -> None:
"""Extrait le diagnostic principal et les diagnostics associés.""" """Extrait le diagnostic principal et les diagnostics associés."""
text_lower = text.lower() text_lower = text.lower()
@@ -342,21 +343,52 @@ def _extract_diagnostics(
if not ent.negation and not ent.hypothese: if not ent.negation and not ent.hypothese:
edsnlp_codes[ent.code] = ent.texte edsnlp_codes[ent.code] = ent.texte
# Si pas de DP depuis le codage, chercher dans le texte # Si pas de DP depuis le codage, utiliser le scoring multi-candidats
if not dossier.diagnostic_principal: if not dossier.diagnostic_principal:
# D'abord essayer le fallback regex (plus précis pour les patterns spécifiques) from .dp_scoring import build_dp_shortlist, score_candidates, select_dp, llm_dp_fallback
dp = _find_diagnostic_principal(text_lower, conclusion)
if dp: candidates = build_dp_shortlist(parsed, text, edsnlp_result, dossier)
dossier.diagnostic_principal = dp candidates = score_candidates(candidates, dossier, full_text=text)
elif edsnlp_codes: selection = select_dp(candidates, dossier, use_llm=use_rag)
# Utiliser la première entité CIM-10 edsnlp comme DP
code, texte = next(iter(edsnlp_codes.items())) # Fallback LLM : si scoring déterministe → REVIEW et LLM autorisé
texte_clean = texte.capitalize() if use_rag and selection.verdict == "review":
if is_valid_diagnostic_text(texte_clean): # Instrumentation : dp_pre_llm
dossier.diagnostic_principal = Diagnostic( pre_code = selection.candidates[0].code if selection.candidates else None
texte=texte_clean, cim10_suggestion=code, pre_section = selection.candidates[0].source_section if selection.candidates else None
source="edsnlp", is_comorbidity_trigger = "comorbidité banale" in (selection.winner_reason or "")
) logger.info(
"DP pre-LLM: code=%s section=%s trigger_comorbidity_fallback=%s",
pre_code, pre_section, is_comorbidity_trigger,
)
llm_selection = llm_dp_fallback(
parsed, text, dossier,
dp_candidates=candidates,
edsnlp_result=edsnlp_result,
)
# Fusionner candidats LLM + déterministes (LLM en tête)
if llm_selection.candidates:
all_candidates = list(llm_selection.candidates)
if selection.candidates:
all_candidates.extend(selection.candidates)
llm_selection.candidates = all_candidates
selection = llm_selection
# Instrumentation : dp_post_llm
post_code = selection.candidates[0].code if selection.candidates else None
logger.info("DP post-LLM: code=%s verdict=%s", post_code, selection.verdict)
dossier.dp_selection = selection
if selection.candidates:
winner = selection.candidates[0]
dossier.diagnostic_principal = Diagnostic(
texte=winner.label,
cim10_suggestion=winner.code,
source=winner.source_section,
source_page=winner.source_page,
source_excerpt=winner.source_excerpt,
)
# Diagnostics associés depuis le texte (regex) # Diagnostics associés depuis le texte (regex)
das = _find_diagnostics_associes(text_lower, conclusion, dossier) das = _find_diagnostics_associes(text_lower, conclusion, dossier)

844
src/medical/dp_scoring.py Normal file
View File

@@ -0,0 +1,844 @@
"""Scoring déterministe du Diagnostic Principal (DP) pour les CRH.
Collecte les candidats DP depuis les sections CRH parsées, les entités edsnlp
et les regex, puis applique un scoring multi-critères pour sélectionner le
meilleur candidat ou signaler une ambiguïté (verdict REVIEW).
Fallback LLM one-shot : si use_llm=True et verdict REVIEW, un appel unique
au LLM voit les sections fortes et propose dp_code + evidence en un seul pass.
"""
from __future__ import annotations
import logging
import re
from typing import Optional
from ..config import (
DossierMedical,
DPCandidate,
DPSelection,
DP_REVIEW_THRESHOLD,
DP_SCORING_WEIGHTS,
DP_Z_CODE_WHITELIST,
)
from .cim10_dict import normalize_code, normalize_text, validate_code as cim10_validate
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Patterns de négation et conditionnel
# ---------------------------------------------------------------------------
_NEGATION_PATTERNS = re.compile(
r"(?:pas\s+de|absence\s+d[e']|non\s+retenu|exclu[es]?|"
r"[ée]limin[ée]|n[ée]gatif|aucun[e]?\s|sans\s)",
re.IGNORECASE,
)
_CONDITIONAL_PATTERNS = re.compile(
r"(?:suspect[ée]?|probable|hypothèse|hypothese|\?\s*$|"
r"[àa]\s+confirmer|[ée]ventuel(?:le)?|possiblement|"
r"ne\s+peut\s+(?:pas\s+)?[êe]tre\s+exclu)",
re.IGNORECASE,
)
# Regex pour extraire des codes CIM-10 explicites dans du texte
# Exige le format avec point (X##.#) pour éviter les faux positifs 3-char :
# P02 (diététique), N34 (mutation N34S), T36 (T36.7°C = température)
# Les codes 3-char sans point sont trop ambigus en texte libre.
# CIM10_MAP gère les correspondances terme→code pour les diagnostics courants.
_CIM10_CODE_RE = re.compile(r"\b([A-Z]\d{2}\.\d{1,2})(?![A-Za-z°])")
# Codes de comorbidité banals : pénalisés en DP (toutes sections)
# Presque toujours DAS, même s'ils apparaissent en conclusion/motif
_COMORBIDITY_PREFIXES = ("I10", "E66.", "E78.", "E11.", "D64.9")
# Patterns de preuve explicite de PEC principale (exception comorbidité)
# Ex: "hospitalisé pour HTA maligne", "prise en charge de l'obésité morbide"
_PEC_PROOF_RE = re.compile(
r"(?:hospitalis[ée]e?\s+pour"
r"|prise\s+en\s+charge"
r"|admission\s+pour"
r"|adress[ée]e?\s+pour)",
re.IGNORECASE,
)
# Sections à fort signal DP
# NB : dans ce corpus CRH, "diag_sortie"/"diag_principal" n'existent quasiment
# jamais. "conclusion" et "synthese" SONT les sections diagnostiques de fait.
_STRONG_SECTIONS = frozenset({
"motif", "motif_hospitalisation",
"diag_sortie", "diagnostics_retenus", "diag_principal",
"conclusion", "synthese",
})
# Mapping de normalisation : noms libres renvoyés par le LLM → clés de section
_SECTION_NORMALIZE_MAP = {
# conclusion
"conclusion": "conclusion",
"conclusions": "conclusion",
"au total": "conclusion",
# synthese
"synthèse": "synthese",
"synthese": "synthese",
"synthèse du séjour": "synthese",
"synthese du sejour": "synthese",
"synthèse du dossier": "synthese",
"synthese du dossier": "synthese",
"synthèse clinique": "synthese",
"synthese clinique": "synthese",
"en résumé": "synthese",
"en resume": "synthese",
"en synthèse": "synthese",
"en synthese": "synthese",
"résumé": "synthese",
"resume": "synthese",
# motif_hospitalisation
"motif": "motif_hospitalisation",
"motif d'hospitalisation": "motif_hospitalisation",
"motif d'admission": "motif_hospitalisation",
"motif de consultation": "motif_hospitalisation",
"motif_hospitalisation": "motif_hospitalisation",
"motif hospitalisation": "motif_hospitalisation",
"admission": "motif_hospitalisation",
"motif d'entrée": "motif_hospitalisation",
"motif d'entree": "motif_hospitalisation",
# diag_sortie
"diagnostic de sortie": "diag_sortie",
"diagnostics de sortie": "diag_sortie",
"diag_sortie": "diag_sortie",
"diag sortie": "diag_sortie",
# diagnostics_retenus
"diagnostic retenu": "diagnostics_retenus",
"diagnostics retenus": "diagnostics_retenus",
"diagnostic retenu à la sortie": "diagnostics_retenus",
"diagnostics retenus à la sortie": "diagnostics_retenus",
"diagnostics_retenus": "diagnostics_retenus",
# diag_principal
"diagnostic principal": "diag_principal",
"diag_principal": "diag_principal",
"diag principal": "diag_principal",
"problème principal": "diag_principal",
"probleme principal": "diag_principal",
# histoire_maladie
"histoire de la maladie": "histoire_maladie",
"histoire_maladie": "histoire_maladie",
"histoire maladie": "histoire_maladie",
"hdm": "histoire_maladie",
# evolution
"evolution dans le service": "evolution",
"évolution dans le service": "evolution",
"evolution": "evolution",
"évolution": "evolution",
# examen
"examen clinique": "examen_clinique",
"examen_clinique": "examen_clinique",
# actes
"indication opératoire": "indication_operatoire",
"indication operatoire": "indication_operatoire",
"prise en charge chirurgicale": "indication_operatoire",
"actes réalisés": "actes",
"actes realises": "actes",
"actes": "actes",
# administratif / bruit → "autres"
"sections cliniques": "autres",
"sections_cliniques": "autres",
"sections fortes du dossier": "autres",
"secrétariat": "autres",
"secretariat": "autres",
"médecine interne": "autres",
"medecine interne": "autres",
"médecin": "autres",
"medecin": "autres",
"courrier": "autres",
"courrier de sortie": "autres",
"compte rendu": "autres",
"compte-rendu": "autres",
"dossier médical": "autres",
"dossier medical": "autres",
"observations": "autres",
}
# Fallback par mots-clés quand la correspondance exacte échoue.
# Paires (mot-clé(s), section_normalisée) testées dans l'ordre — premier match gagne.
_SECTION_KEYWORD_FALLBACKS: list[tuple[tuple[str, ...], str]] = [
# diagnostic + sortie/retenu → diag_sortie / diagnostics_retenus
(("diagnostic", "sortie"), "diag_sortie"),
(("diagnostic", "retenu"), "diagnostics_retenus"),
# conclusion / synthese
(("conclusion",), "conclusion"),
(("synthese",), "synthese"),
(("synthèse",), "synthese"),
(("au total",), "synthese"),
(("en résumé",), "synthese"),
# motif / admission
(("motif",), "motif_hospitalisation"),
(("admission",), "motif_hospitalisation"),
]
def _normalize_evidence_section(raw_section: str) -> str:
"""Normalise le nom de section renvoyé par le LLM vers une clé standard.
1. Nettoyage : lower, strip, retrait crochets/deux-points/guillemets.
2. Correspondance exacte dans _SECTION_NORMALIZE_MAP.
3. Fallback par mots-clés (_SECTION_KEYWORD_FALLBACKS).
"""
if not raw_section:
return ""
# Nettoyage agressif : crochets, guillemets, deux-points, underscores → espaces
key = raw_section.lower().strip()
key = re.sub(r"[\[\]\"':]+", "", key).strip()
# 1. Exact match
result = _SECTION_NORMALIZE_MAP.get(key)
if result:
return result
# 1b. Tenter aussi avec underscores → espaces
key_spaces = key.replace("_", " ")
result = _SECTION_NORMALIZE_MAP.get(key_spaces)
if result:
return result
# 2. Fallback par mots-clés
for keywords, section in _SECTION_KEYWORD_FALLBACKS:
if all(kw in key for kw in keywords):
return section
return key
def _is_comorbidity_code(code: str) -> bool:
"""Vérifie si un code est une comorbidité banale (I10, E66.x, E78.x, E11.x, D64.9)."""
return any(code.startswith(prefix) for prefix in _COMORBIDITY_PREFIXES)
def _has_explicit_pec_proof(label: str, full_text: str) -> bool:
"""Vérifie si le texte contient une preuve explicite que cette comorbidité
est le motif PRINCIPAL de prise en charge.
Cherche "hospitalisé pour", "prise en charge de", "admission pour", etc.
suivis du label de la comorbidité dans une fenêtre de 100 caractères.
"""
if not full_text or not label:
return False
text_lower = full_text.lower()
label_lower = label.lower()
for m in _PEC_PROOF_RE.finditer(text_lower):
window = text_lower[m.end():m.end() + 100]
if label_lower in window:
return True
return False
# ---------------------------------------------------------------------------
# 1. Construction de la shortlist
# ---------------------------------------------------------------------------
def build_dp_shortlist(
parsed: dict,
text: str,
edsnlp_result,
dossier: DossierMedical,
) -> list[DPCandidate]:
"""Collecte les candidats DP depuis les sections CRH, edsnlp et regex.
Déduplique par code CIM-10 en gardant la section la plus forte.
"""
from .cim10_extractor import CIM10_MAP, _find_diagnostic_principal
from .das_filter import is_valid_diagnostic_text, clean_diagnostic_text
candidates: list[DPCandidate] = []
sections = parsed.get("sections", {})
# Ordre de priorité des sections (décroissant)
section_priority = [
"diag_sortie", "diag_principal", "motif_hospitalisation",
"conclusion", "synthese",
]
# 1. Sections CRH à fort signal
for section_key in section_priority:
section_text = sections.get(section_key, "")
if not section_text:
continue
section_norm = normalize_text(section_text)
# 1a. Codes CIM-10 explicites dans le texte de section
for m in _CIM10_CODE_RE.finditer(section_text):
code = normalize_code(m.group(1))
is_valid, label = cim10_validate(code)
if is_valid:
excerpt = _extract_excerpt(section_text, m.start())
candidates.append(DPCandidate(
code=code,
label=label,
source_section=section_key,
source_excerpt=excerpt,
))
# 1b. CIM10_MAP uniquement (curé pour les DP courants)
# On n'utilise PAS dict_lookup car le dictionnaire complet (10K+ entrées)
# produit des faux positifs par substring match sur du texte libre.
for terme, code in CIM10_MAP.items():
if normalize_text(terme) in section_norm:
candidates.append(DPCandidate(
code=code,
label=terme.capitalize(),
source_section=section_key,
source_excerpt=section_text[:200].strip(),
))
break # plus-long-match : CIM10_MAP est ordonné spécifique→générique
# 2. edsnlp entities
if edsnlp_result:
for ent in edsnlp_result.cim10_entities:
if ent.negation or ent.hypothese:
continue
texte = clean_diagnostic_text(ent.texte.capitalize())
if not is_valid_diagnostic_text(texte):
continue
candidates.append(DPCandidate(
code=ent.code,
label=texte,
source_section="edsnlp",
))
# 3. Regex fallback (_find_diagnostic_principal sur texte complet)
text_lower = text.lower()
conclusion = sections.get("conclusion", "")
dp_regex = _find_diagnostic_principal(text_lower, conclusion)
if dp_regex:
candidates.append(DPCandidate(
code=dp_regex.cim10_suggestion,
label=dp_regex.texte,
source_section="regex",
source_excerpt=dp_regex.source_excerpt,
))
# 4. Dédup par code CIM-10 : garder la section la plus forte
candidates = _dedup_by_code(candidates, section_priority)
return candidates
def _extract_excerpt(text: str, pos: int, window: int = 100) -> str:
"""Extrait ~200 chars autour d'une position dans le texte."""
start = max(0, pos - window)
end = min(len(text), pos + window)
return text[start:end].strip()
def _dedup_by_code(
candidates: list[DPCandidate],
section_priority: list[str],
) -> list[DPCandidate]:
"""Déduplique par code CIM-10, garde la section la plus forte."""
priority_map = {s: i for i, s in enumerate(section_priority)}
# Ajouter edsnlp et regex en bas de priorité
priority_map.setdefault("edsnlp", len(section_priority))
priority_map.setdefault("regex", len(section_priority) + 1)
seen: dict[str, DPCandidate] = {}
for c in candidates:
key = c.code or c.label.lower()
if key not in seen:
seen[key] = c
else:
existing = seen[key]
existing_prio = priority_map.get(existing.source_section, 99)
new_prio = priority_map.get(c.source_section, 99)
if new_prio < existing_prio:
seen[key] = c
return list(seen.values())
# ---------------------------------------------------------------------------
# 2. Scoring des candidats
# ---------------------------------------------------------------------------
def score_candidates(
candidates: list[DPCandidate],
dossier: DossierMedical,
full_text: str = "",
) -> list[DPCandidate]:
"""Applique le scoring déterministe sur chaque candidat.
Args:
full_text: Texte complet du document pour la détection négation/conditionnel.
"""
for c in candidates:
details: dict[str, int] = {}
# 1. Bonus section
section_key = f"section_{c.source_section}"
section_bonus = DP_SCORING_WEIGHTS.get(section_key, 0)
if section_bonus:
details["section"] = section_bonus
# 2. Bonus preuve (excerpt + page)
if c.source_excerpt:
proof = DP_SCORING_WEIGHTS.get("proof_excerpt", 0)
if proof:
details["proof_excerpt"] = proof
# 3. Pénalité négation (fenêtre étroite AVANT le label)
if full_text and c.label:
prefix = _get_prefix_window(full_text, c.label, chars_before=60)
if prefix and _NEGATION_PATTERNS.search(prefix):
c.is_negated = True
penalty = DP_SCORING_WEIGHTS.get("negation", 0)
if penalty:
details["negation"] = penalty
# 4. Pénalité conditionnel (fenêtre étroite AVANT + APRÈS le label)
if full_text and c.label:
window = _get_context_window(full_text, c.label, radius=80)
if window and _CONDITIONAL_PATTERNS.search(window):
c.is_conditional = True
penalty = DP_SCORING_WEIGHTS.get("conditional", 0)
if penalty:
details["conditional"] = penalty
# 5. Pénalité Z-code en DP
if c.code and c.code.startswith("Z"):
if not _is_z_code_whitelisted(c.code):
penalty = DP_SCORING_WEIGHTS.get("z_code_dp", 0)
if penalty:
details["z_code_dp"] = penalty
# 6. Pénalité R-code (symptôme) en DP
if c.code and c.code.startswith("R"):
penalty = DP_SCORING_WEIGHTS.get("r_code_dp", 0)
if penalty:
details["r_code_dp"] = penalty
# 7. Pénalité comorbidité banale (toutes sections)
if c.code and _is_comorbidity_code(c.code):
penalty = DP_SCORING_WEIGHTS.get("comorbidity_weak", 0)
if penalty:
details["comorbidity_weak"] = penalty
# Exception : preuve explicite de PEC principale → compense
if full_text and _has_explicit_pec_proof(c.label, full_text):
details["comorbidity_pec_proof"] = abs(penalty) if penalty else 3
c.score_details = details
c.score = sum(details.values())
# Trier par score décroissant
candidates.sort(key=lambda c: -c.score)
return candidates
def _get_prefix_window(text: str, label: str, chars_before: int = 60) -> str:
"""Retourne les N caractères AVANT la première occurrence du label.
Sert à détecter les négations qui précèdent directement le diagnostic
("pas de pancréatite" vs "pancréatite ... pas de complication").
"""
text_lower = text.lower()
label_lower = label.lower()
pos = text_lower.find(label_lower)
if pos < 0:
text_norm = normalize_text(text)
label_norm = normalize_text(label)
pos = text_norm.find(label_norm)
if pos < 0:
return ""
start = max(0, pos - chars_before)
return text_norm[start:pos]
start = max(0, pos - chars_before)
return text_lower[start:pos]
def _get_context_window(text: str, label: str, radius: int = 200) -> str:
"""Retourne une fenêtre de texte autour de la première occurrence du label."""
text_lower = text.lower()
label_lower = label.lower()
pos = text_lower.find(label_lower)
if pos < 0:
# Essayer avec le texte normalisé
text_norm = normalize_text(text)
label_norm = normalize_text(label)
pos = text_norm.find(label_norm)
if pos < 0:
return ""
start = max(0, pos - radius)
end = min(len(text_norm), pos + len(label_norm) + radius)
return text_norm[start:end]
start = max(0, pos - radius)
end = min(len(text), pos + len(label) + radius)
return text[start:end].lower()
def _is_z_code_whitelisted(code: str) -> bool:
"""Vérifie si un Z-code est dans la whitelist (match préfixe)."""
for prefix in DP_Z_CODE_WHITELIST:
if code.startswith(prefix):
return True
return False
# ---------------------------------------------------------------------------
# 3. Sélection du DP
# ---------------------------------------------------------------------------
def select_dp(
candidates: list[DPCandidate],
dossier: DossierMedical,
use_llm: bool = False,
) -> DPSelection:
"""Sélectionne le DP parmi les candidats scorés.
Retourne verdict="confirmed" si le delta est suffisant,
"review" si ambiguïté.
"""
if not candidates:
return DPSelection(verdict="review", winner_reason="aucun candidat DP trouvé")
# Anti-comorbidité universelle : comorbidité banale en DP → REVIEW
# sauf preuve explicite de PEC principale (hospitalisé pour, prise en charge de)
top = candidates[0]
if top.code and _is_comorbidity_code(top.code):
has_pec = "comorbidity_pec_proof" in top.score_details
if not has_pec:
logger.info(
"Comorbidité-banale DP : %s (%s, section=%s) → REVIEW + fallback LLM",
top.code, top.label, top.source_section,
)
return DPSelection(
verdict="review",
candidates=candidates[:3],
winner_reason=f"comorbidité banale {top.code} sans preuve PEC ({top.source_section})",
)
if len(candidates) == 1:
return DPSelection(
verdict="confirmed",
candidates=candidates,
winner_reason="candidat unique",
)
top1 = candidates[0]
top2 = candidates[1]
delta = top1.score - top2.score
if delta >= DP_REVIEW_THRESHOLD:
return DPSelection(
verdict="confirmed",
candidates=candidates,
winner_reason=f"score {top1.score} vs {top2.score} (delta {delta})",
)
# Delta trop faible — tenter tiebreaker LLM si autorisé
if use_llm and top1.score == top2.score:
tiebreak = _llm_tiebreak(top1, top2, dossier)
if tiebreak and tiebreak.get("winner") in ("A", "B"):
if tiebreak["winner"] == "B":
# Swap pour que le gagnant soit en premier
candidates[0], candidates[1] = candidates[1], candidates[0]
return DPSelection(
verdict="confirmed",
candidates=candidates,
winner_reason=f"LLM tiebreak: {tiebreak.get('reason', '')}",
llm_tiebreak=tiebreak,
)
return DPSelection(
verdict="review",
candidates=candidates[:3],
winner_reason=f"delta insuffisant: {top1.score} vs {top2.score} (delta {delta} < seuil {DP_REVIEW_THRESHOLD})",
)
# ---------------------------------------------------------------------------
# 4. Tiebreaker LLM (optionnel)
# ---------------------------------------------------------------------------
def _llm_tiebreak(
candidate_a: DPCandidate,
candidate_b: DPCandidate,
dossier: DossierMedical,
) -> dict | None:
"""Appelle le LLM pour départager deux candidats DP à scores identiques."""
try:
from .ollama_client import call_ollama
from ..prompts import DP_TIEBREAK
except ImportError:
logger.warning("Module ollama_client non disponible pour le tiebreaker DP")
return None
motif = ""
if dossier.sejour and dossier.sejour.mode_entree:
motif = dossier.sejour.mode_entree
def _format_candidate(c: DPCandidate) -> str:
parts = [c.label]
if c.code:
parts.append(f"({c.code})")
parts.append(f"[section: {c.source_section}, score: {c.score}]")
if c.source_excerpt:
parts.append(f'extrait: "{c.source_excerpt[:150]}"')
return "".join(parts)
candidat_a_str = _format_candidate(candidate_a)
candidat_b_str = _format_candidate(candidate_b)
sections_fortes = "Non disponible"
prompt = DP_TIEBREAK.format(
motif=motif or "Non renseigné",
candidat_a=candidat_a_str,
candidat_b=candidat_b_str,
sections_fortes=sections_fortes,
)
try:
result = call_ollama(prompt, temperature=0.0, max_tokens=500, role="coding")
except Exception:
logger.warning("Erreur LLM tiebreaker DP", exc_info=True)
return None
if not result or not isinstance(result, dict):
return None
winner = result.get("winner")
if winner not in ("A", "B"):
return None
return {"winner": winner, "reason": result.get("reason", "")}
# ---------------------------------------------------------------------------
# 5. LLM Fallback one-shot — proposition DP quand le scoring déterministe échoue
# ---------------------------------------------------------------------------
def _build_strong_sections_text(parsed: dict) -> str:
"""Construit le texte des sections fortes pour le prompt LLM one-shot.
Sections fortes : motif, diag_sortie, diag_principal, diagnostics_retenus,
conclusion, synthese. PAS histoire_maladie ni examen_clinique (= bruit).
"""
sections = parsed.get("sections", {})
_STRONG_ORDER = [
("motif_hospitalisation", 500),
("diag_sortie", 600), ("diagnostics_retenus", 600),
("diag_principal", 600),
("conclusion", 600), ("synthese", 600),
]
parts = []
for key, max_len in _STRONG_ORDER:
val = sections.get(key, "")
if val:
parts.append(f"[{key}] {val[:max_len]}")
return "\n".join(parts) or "Aucune section forte"
def _build_motif(parsed: dict, dossier: DossierMedical) -> str:
"""Extrait le motif d'hospitalisation pour le prompt LLM."""
motif = ""
if dossier.sejour and dossier.sejour.mode_entree:
motif = dossier.sejour.mode_entree
if not motif:
motif = parsed.get("sections", {}).get("motif_hospitalisation", "")[:300] or "Non renseigné"
return motif
def _build_actes(dossier: DossierMedical) -> str:
"""Construit la liste des actes pour le prompt LLM."""
parts = []
for a in dossier.actes_ccam[:5]:
label = a.texte
if a.code_ccam_suggestion:
label += f" ({a.code_ccam_suggestion})"
parts.append(label)
return ", ".join(parts) or "Non renseignés"
def _validate_and_normalize_code(dp_code: str, pool_codes: set[str] | None = None) -> tuple[str, str | None, bool]:
"""Valide et normalise un code CIM-10. Retourne (code, original_si_normalisé, is_valid)."""
dp_code = normalize_code(dp_code)
dp_code_original = None
# Si pool fourni, vérifier appartenance
if pool_codes is not None and dp_code in pool_codes:
return dp_code, None, True
parent3 = dp_code[:3]
parent9 = f"{parent3}.9"
# Tenter match pool par parent
if pool_codes is not None:
if parent3 in pool_codes:
return parent3, dp_code, True
if parent9 in pool_codes:
return parent9, dp_code, True
# Validation CIM-10 directe
is_valid, _ = cim10_validate(dp_code)
if is_valid:
return dp_code, None, True
# Tenter parent
is_valid_p, _ = cim10_validate(parent3)
if is_valid_p:
return parent3, dp_code, True
is_valid_9, _ = cim10_validate(parent9)
if is_valid_9:
return parent9, dp_code, True
return dp_code, None, False
def _apply_guardrails(
dp_code: str,
candidate: DPCandidate,
evidence_section: str,
evidence_excerpt: str,
confidence: str,
) -> DPSelection:
"""Applique les garde-fous déterministes sur un candidat LLM.
Retourne DPSelection avec verdict confirmed ou review.
"""
is_strong_section = evidence_section in _STRONG_SECTIONS
has_evidence = bool(evidence_excerpt and evidence_excerpt.strip())
is_high_conf = confidence == "high"
# Score synthétique
confidence_scores = {"high": 3, "medium": 2, "low": 1}
candidate.score = confidence_scores.get(confidence, 1)
candidate.score_details = {"llm_confidence": candidate.score}
# GF-1 : evidence_excerpt vide → REVIEW
if not has_evidence:
logger.info("LLM fallback DP : pas d'extrait preuve pour %s, REVIEW", dp_code)
return DPSelection(
verdict="review", candidates=[candidate],
winner_reason="LLM fallback: evidence_excerpt vide",
)
# GF-2 : comorbidité banale ET section non-forte → REVIEW
if _is_comorbidity_code(dp_code) and not is_strong_section:
logger.info("LLM fallback DP : comorbidité %s hors section forte (%s), REVIEW", dp_code, evidence_section)
return DPSelection(
verdict="review", candidates=[candidate],
winner_reason=f"LLM fallback: comorbidité {dp_code} hors section forte",
)
# GF-3 : CONFIRMED uniquement si section forte + confidence high
if not is_strong_section or not is_high_conf:
reasons = []
if not is_strong_section:
reasons.append(f"section faible ({evidence_section})")
if not is_high_conf:
reasons.append(f"confidence {confidence}")
reason_str = " + ".join(reasons)
logger.info("LLM fallback DP : %s pour %s, REVIEW", reason_str, dp_code)
return DPSelection(
verdict="review", candidates=[candidate],
winner_reason=f"LLM fallback: {dp_code}{reason_str}",
)
# Toutes les conditions réunies → CONFIRMED
return DPSelection(
verdict="confirmed", candidates=[candidate],
winner_reason=f"LLM fallback: {dp_code} ({confidence}, {evidence_section})",
)
def llm_dp_fallback(
parsed: dict,
text: str,
dossier: DossierMedical,
dp_candidates: list[DPCandidate] | None = None,
edsnlp_result=None,
) -> DPSelection:
"""Appelle le LLM en one-shot pour identifier et coder le DP.
Le LLM voit directement les sections fortes du CRH et doit fournir
en un seul appel : dp_code, dp_label, evidence_section, evidence_excerpt, confidence.
Ne doit être appelé que si use_llm=True ET verdict="review".
"""
try:
from .ollama_client import call_ollama
from ..prompts import DP_LLM_ONESHOT
except ImportError:
logger.warning("Module ollama_client non disponible pour le fallback DP LLM")
return DPSelection(verdict="review", winner_reason="LLM non disponible")
# Contexte
motif = _build_motif(parsed, dossier)
sections_fortes = _build_strong_sections_text(parsed)
actes = _build_actes(dossier)
prompt = DP_LLM_ONESHOT.format(
motif=motif, sections_fortes=sections_fortes, actes=actes,
)
try:
result = call_ollama(prompt, temperature=0.0, max_tokens=800, role="coding")
except Exception:
logger.warning("Erreur LLM fallback DP", exc_info=True)
return DPSelection(verdict="review", winner_reason="erreur LLM fallback DP")
if not result or not isinstance(result, dict):
return DPSelection(verdict="review", winner_reason="réponse LLM invalide")
dp_code = result.get("dp_code", "")
dp_label = result.get("dp_label", "")
confidence = result.get("confidence", "low")
evidence_section_raw = result.get("evidence_section", "")
evidence_excerpt = result.get("evidence_excerpt", "")
# Normaliser la section
evidence_section = _normalize_evidence_section(evidence_section_raw)
logger.info(
"LLM oneshot: code=%s label='%s' section=%s confidence=%s",
dp_code, dp_label[:60], evidence_section, confidence,
)
if not dp_code:
return DPSelection(
verdict="review",
winner_reason="LLM: aucun code DP proposé",
)
# Validation et normalisation du code CIM-10
dp_code, dp_code_original, is_valid = _validate_and_normalize_code(dp_code)
if not is_valid:
return DPSelection(
verdict="review",
winner_reason=f"code invalide {dp_code}",
)
if dp_code_original:
logger.info("LLM oneshot: normalisation %s%s", dp_code_original, dp_code)
# Résoudre le label final
_, dict_label = cim10_validate(dp_code)
# Construire le candidat
source_tag = f"llm_oneshot ({evidence_section})" if evidence_section else "llm_oneshot"
candidate = DPCandidate(
code=dp_code,
label=dp_label or dict_label or "",
source_section=source_tag,
source_excerpt=evidence_excerpt,
confidence_raw=confidence,
dp_code_original_llm=dp_code_original,
dp_code_normalized=dp_code_original is not None,
)
# Appliquer les garde-fous déterministes
return _apply_guardrails(dp_code, candidate, evidence_section, evidence_excerpt, confidence)

View File

@@ -188,6 +188,17 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
# Diagnostic principal : le plus spécifique # Diagnostic principal : le plus spécifique
merged.diagnostic_principal = _prefer_most_specific_dp(dossiers) merged.diagnostic_principal = _prefer_most_specific_dp(dossiers)
# Propager dp_selection depuis le dossier source du DP retenu
if merged.diagnostic_principal:
for d in dossiers:
if (
d.diagnostic_principal
and d.diagnostic_principal.cim10_suggestion == merged.diagnostic_principal.cim10_suggestion
and d.dp_selection is not None
):
merged.dp_selection = d.dp_selection
break
# Collecter tous les DAS + DP non retenus comme DAS # Collecter tous les DAS + DP non retenus comme DAS
all_das: list[Diagnostic] = [] all_das: list[Diagnostic] = []
for d in dossiers: for d in dossiers:

View File

@@ -7,6 +7,8 @@ from .templates import (
QC_VALIDATION, QC_VALIDATION,
CPAM_EXTRACTION, CPAM_EXTRACTION,
CPAM_ARGUMENTATION, CPAM_ARGUMENTATION,
DP_TIEBREAK,
DP_LLM_ONESHOT,
CPAM_ADVERSARIAL, CPAM_ADVERSARIAL,
) )
@@ -17,5 +19,7 @@ __all__ = [
"QC_VALIDATION", "QC_VALIDATION",
"CPAM_EXTRACTION", "CPAM_EXTRACTION",
"CPAM_ARGUMENTATION", "CPAM_ARGUMENTATION",
"DP_TIEBREAK",
"DP_LLM_ONESHOT",
"CPAM_ADVERSARIAL", "CPAM_ADVERSARIAL",
] ]

View File

@@ -300,7 +300,79 @@ Réponds UNIQUEMENT avec un objet JSON au format suivant :
}}""" }}"""
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# 7. CPAM passe 3 — validation adversariale (relecture critique) # 7. DP Tiebreaker — départage entre deux candidats DP à scores proches
# ---------------------------------------------------------------------------
# Rôle : coding | Temperature : 0.0 | Max tokens : 500
# Fichier d'origine : src/medical/dp_scoring.py → _llm_tiebreak()
# Variables : motif, candidat_a, candidat_b, sections_fortes
DP_TIEBREAK = """\
Tu es un médecin DIM expert. Deux diagnostics sont candidats au poste de Diagnostic Principal (DP).
Le DP doit refléter le motif principal de prise en charge qui a mobilisé le plus de ressources pendant le séjour.
MOTIF D'HOSPITALISATION : {motif}
CANDIDAT A : {candidat_a}
CANDIDAT B : {candidat_b}
SECTIONS DU CRH À FORT SIGNAL :
{sections_fortes}
Choisis le candidat le plus approprié comme DP selon les critères ATIH :
1. Motif principal de prise en charge du séjour
2. Ressources mobilisées (actes, biologie, traitement)
3. Spécificité du code CIM-10 (préférer le plus spécifique)
Réponds UNIQUEMENT en JSON :
{{
"winner": "A" ou "B",
"reason": "explication courte en français"
}}"""
# ---------------------------------------------------------------------------
# 7b. DP LLM One-shot — identification + codage CIM-10 du DP en un appel
# ---------------------------------------------------------------------------
# Rôle : coding | Temperature : 0.0 | Max tokens : 800
# Fichier d'origine : src/medical/dp_scoring.py → llm_dp_fallback()
# Variables : motif, sections_fortes, actes
DP_LLM_ONESHOT = """\
Tu es un médecin DIM (Département d'Information Médicale) expert en codage PMSI.
Identifie le Diagnostic Principal (DP) et code-le en CIM-10 avec le code le plus SPÉCIFIQUE (4e ou 5e caractère).
DÉFINITION DU DP (Guide méthodologique ATIH) :
Le DP est le diagnostic qui a mobilisé l'essentiel des ressources du séjour. C'est la pathologie ACTIVE, TRAITÉE, RETENUE en fin de séjour — pas le symptôme d'entrée si un diagnostic étiologique a été posé.
CE QUE TU NE CHERCHES PAS :
- Les comorbidités chroniques de fond (hypertension, obésité, diabète équilibré, dyslipidémie, anémie chronique) SAUF si elles sont DÉCOMPENSÉES et constituent le motif d'hospitalisation
- Les antécédents stables non traités activement pendant ce séjour
- Les facteurs de risque (tabac, alcool, sédentarité)
MÉTHODE :
1. Lis le motif d'hospitalisation → pourquoi le patient est arrivé
2. Lis la conclusion/synthèse → quel diagnostic a été retenu après le séjour
3. Identifie la pathologie ACTIVE traitée, puis code-la en CIM-10
4. Préfère le code le plus spécifique (ex: K85.1 > K85.9 > K85)
5. Cite la SECTION et l'EXTRAIT exact qui prouvent ton choix
MOTIF D'HOSPITALISATION : {motif}
SECTIONS CLINIQUES (fortes uniquement) :
{sections_fortes}
ACTES RÉALISÉS : {actes}
Réponds UNIQUEMENT en JSON :
{{
"dp_code": "X00.0",
"dp_label": "libellé officiel CIM-10 en français",
"evidence_section": "nom exact de la section source",
"evidence_excerpt": "extrait EXACT copié du texte (2-3 lignes max)",
"confidence": "high ou medium ou low"
}}"""
# ---------------------------------------------------------------------------
# 8. CPAM passe 3 — validation adversariale (relecture critique)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Rôle : validation | Temperature : 0.0 | Max tokens : 800 # Rôle : validation | Temperature : 0.0 | Max tokens : 800
# Fichier d'origine : src/control/cpam_response.py → _validate_adversarial() # Fichier d'origine : src/control/cpam_response.py → _validate_adversarial()

View File

@@ -9,13 +9,32 @@ import pytest
from src.config import ControleCPAM from src.config import ControleCPAM
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
# En-têtes
_LEGACY_HEADER = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
_NEW_HEADER = (
"N° OGC", "Type désaccord", "Codes Établissement", "Libellé Établissement",
"Codes Contrôleurs", "Libellé Contrôleurs", "Décision UCR", "Codes retenus",
"GHM / GHS", "Texte décision",
)
def _create_test_xlsx(rows: list[tuple], path: Path) -> None: def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test avec les lignes données.""" """Crée un fichier xlsx de test au format legacy."""
wb = openpyxl.Workbook() wb = openpyxl.Workbook()
ws = wb.active ws = wb.active
ws.title = "OGC Contrôle T2A" ws.title = "OGC Contrôle T2A"
ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")) ws.append(_LEGACY_HEADER)
for row in rows:
ws.append(row)
wb.save(path)
def _create_new_format_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test au format ucr_extract (nouveau)."""
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "UCR Extract"
ws.append(_NEW_HEADER)
for row in rows: for row in rows:
ws.append(row) ws.append(row)
wb.save(path) wb.save(path)
@@ -128,3 +147,292 @@ class TestControleCPAMModel:
assert ctrl.numero_ogc == 21 assert ctrl.numero_ogc == 21
assert ctrl.contre_argumentation == "Ma réponse" assert ctrl.contre_argumentation == "Ma réponse"
assert ctrl.sources_reponse == [] assert ctrl.sources_reponse == []
def test_new_fields_defaults(self):
"""Les 6 nouveaux champs ucr_extract sont None par défaut."""
ctrl = ControleCPAM(numero_ogc=1)
assert ctrl.codes_etablissement is None
assert ctrl.libelle_etablissement is None
assert ctrl.codes_controleurs is None
assert ctrl.libelle_controleurs is None
assert ctrl.codes_retenus is None
assert ctrl.ghm_ghs is None
def test_new_fields_serialization(self):
"""Les champs ucr_extract apparaissent dans model_dump."""
ctrl = ControleCPAM(
numero_ogc=10,
titre="Désaccord sur le DP",
codes_etablissement="K85.1",
libelle_etablissement="Pancréatite aiguë biliaire",
codes_controleurs="K85.9",
libelle_controleurs="Pancréatite aiguë, sans précision",
codes_retenus="K85.1",
ghm_ghs="06M091 / 1854",
)
data = ctrl.model_dump()
assert data["codes_etablissement"] == "K85.1"
assert data["libelle_etablissement"] == "Pancréatite aiguë biliaire"
assert data["codes_controleurs"] == "K85.9"
assert data["libelle_controleurs"] == "Pancréatite aiguë, sans précision"
assert data["codes_retenus"] == "K85.1"
assert data["ghm_ghs"] == "06M091 / 1854"
class TestParseNewFormat:
"""Tests pour le format ucr_extract (nouveau)."""
def test_parse_basic_dp(self, tmp_path):
"""Parsing basique — désaccord DP avec Codes Contrôleurs."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
# N° OGC, Type, Codes Étab, Lib Étab, Codes Ctrl, Lib Ctrl, Décision, Codes ret, GHM, Texte
(17, "DP", "K85.1", "Pancréatite aiguë biliaire", "K85.9",
"Pancréatite aiguë SAI", "Défavorable", "K85.9", "06M091 / 1854",
"Le contrôleur ne retient pas K85.1"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
ctrl = result[17][0]
assert ctrl.numero_ogc == 17
assert ctrl.titre == "Désaccord sur le DP"
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr is None
assert ctrl.arg_ucr == "Le contrôleur ne retient pas K85.1"
assert ctrl.decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_parse_basic_das(self, tmp_path):
"""Parsing — désaccord DAS."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(21, "DAS", "E11.40,G63.2", "Diabète+neuropathie", "E11.40",
"Diabète type 2", "Favorable", "E11.40,G63.2", None,
"L'UCR retient les codes initiaux"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[21][0]
assert ctrl.titre == "Désaccord sur les DAS"
assert ctrl.dp_ucr is None
assert ctrl.da_ucr == "E11.40"
assert ctrl.decision_ucr == "UCR retient"
def test_parse_dp_plus_das(self, tmp_path):
"""DP+DAS : premier code → dp_ucr, reste → da_ucr."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(30, "DP+DAS", "K85.1,E11.40", "...", "K85.9,G63.2,I10",
"...", "Défavorable", "K85.9,G63.2,I10", None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[30][0]
assert ctrl.titre == "Désaccord sur le DP et les DAS"
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr == "G63.2,I10"
def test_parse_dp_plus_das_single_code(self, tmp_path):
"""DP+DAS avec un seul code → tout en dp_ucr, pas de da_ucr."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(31, "DP+DAS", "K85.1", "...", "K85.9",
"...", "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[31][0]
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr is None
def test_new_fields_populated(self, tmp_path):
"""Les 6 champs enrichis sont bien remplis depuis les colonnes."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(42, "DP", "E11.40", "Diabète type 2 avec complications",
"E11.9", "Diabète type 2 sans complication",
"Défavorable", "E11.9", "05M092 / 1780", "Argumentation contrôleur"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[42][0]
assert ctrl.codes_etablissement == "E11.40"
assert ctrl.libelle_etablissement == "Diabète type 2 avec complications"
assert ctrl.codes_controleurs == "E11.9"
assert ctrl.libelle_controleurs == "Diabète type 2 sans complication"
assert ctrl.codes_retenus == "E11.9"
assert ctrl.ghm_ghs == "05M092 / 1780"
def test_decision_favorable(self, tmp_path):
"""Favorable → 'UCR retient'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(10, "DP", None, None, None, None, "Favorable", None, None, "OK"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[10][0].decision_ucr == "UCR retient"
def test_decision_defavorable(self, tmp_path):
"""Défavorable → 'UCR confirme avis médecins contrôleurs'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(11, "DAS", None, None, None, None, "Défavorable", None, None, "KO"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[11][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_decision_defavorable_no_accent(self, tmp_path):
"""Defavorable (sans accent) → même mapping."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(12, "DP", None, None, None, None, "Defavorable", None, None, "KO"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[12][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_decision_unknown_passthrough(self, tmp_path):
"""Décision inconnue → passée telle quelle."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(13, "DP", None, None, None, None, "Partielle", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[13][0].decision_ucr == "Partielle"
def test_type_desaccord_unknown(self, tmp_path):
"""Type désaccord inconnu → titre 'Désaccord : XXX'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(14, "Actes", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[14][0].titre == "Désaccord : Actes"
def test_type_desaccord_empty(self, tmp_path):
"""Type désaccord vide → titre vide."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(15, "", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[15][0].titre == ""
def test_multiple_ogc_new_format(self, tmp_path):
"""Plusieurs OGC dans le nouveau format."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(10, "DP", None, None, "K85.9", None, "Favorable", None, None, "Arg 1"),
(20, "DAS", None, None, "E11.40", None, "Défavorable", None, None, "Arg 2"),
(10, "DAS", None, None, "G63.2", None, "Favorable", None, None, "Arg 3"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result) == 2
assert len(result[10]) == 2
assert len(result[20]) == 1
assert result[10][0].dp_ucr == "K85.9"
assert result[10][1].da_ucr == "G63.2"
def test_empty_new_format(self, tmp_path):
"""Fichier nouveau format vide (seulement en-têtes)."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_ogc_none_skipped(self, tmp_path):
"""Lignes avec N° OGC None sont ignorées."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(None, "DP", None, None, None, None, "Favorable", None, None, "Texte"),
(10, "DP", None, None, "K85.1", None, "Favorable", None, None, "OK"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result) == 1
assert 10 in result
def test_ogc_invalid_skipped(self, tmp_path):
"""N° OGC non-numérique est ignoré."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
("ABC", "DP", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
class TestAutoDetection:
"""Tests pour l'auto-détection du format."""
def test_detects_legacy(self, tmp_path):
"""Format legacy détecté par ses en-têtes."""
xlsx = tmp_path / "legacy.xlsx"
_create_test_xlsx([
(17, "Titre", "Arg", "Décision", None, None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert result[17][0].titre == "Titre"
def test_detects_new(self, tmp_path):
"""Format nouveau détecté par ses en-têtes."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(17, "DP", "K85.1", "Label", "K85.9", "Label2",
"Favorable", "K85.1", None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert result[17][0].titre == "Désaccord sur le DP"
def test_unknown_format_returns_empty(self, tmp_path):
"""En-têtes non reconnues → dict vide."""
xlsx = tmp_path / "unknown.xlsx"
wb = openpyxl.Workbook()
ws = wb.active
ws.append(("Col1", "Col2", "Col3"))
ws.append((1, "val", "val"))
wb.save(xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_new_format_priority_over_legacy(self, tmp_path):
"""Si les deux jeux de colonnes sont présents, le nouveau format prime."""
xlsx = tmp_path / "both.xlsx"
wb = openpyxl.Workbook()
ws = wb.active
# En-têtes contenant les deux formats
ws.append((
"N° OGC", "Titre", "Arg_UCR", "Décision_UCR",
"Type désaccord", "Décision UCR", "Texte décision",
"DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR",
))
ws.append((17, "Titre", "Arg", "Déc legacy", "DP", "Favorable", "Texte nouveau",
"K85.1", None, None, None))
wb.save(xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
# Le nouveau format est prioritaire → titre construit depuis Type désaccord
assert result[17][0].titre == "Désaccord sur le DP"
# arg_ucr vient de Texte décision (nouveau), pas de Arg_UCR (legacy)
assert result[17][0].arg_ucr == "Texte nouveau"

710
tests/test_dp_scoring.py Normal file
View File

@@ -0,0 +1,710 @@
"""Tests pour le module de scoring DP (Diagnostic Principal)."""
import pytest
from src.config import (
DossierMedical,
Diagnostic,
DPCandidate,
DPSelection,
DP_SCORING_WEIGHTS,
DP_REVIEW_THRESHOLD,
Sejour,
)
from src.medical.dp_scoring import (
build_dp_shortlist,
score_candidates,
select_dp,
_get_context_window,
_is_z_code_whitelisted,
_is_comorbidity_code,
_has_explicit_pec_proof,
_dedup_by_code,
_normalize_evidence_section,
)
# --- Helpers ---
def _make_parsed(sections: dict | None = None, diagnostics: list | None = None) -> dict:
return {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": diagnostics or [],
"sections": sections or {},
}
def _make_candidate(
code: str = "K85.1",
label: str = "Pancréatite aiguë biliaire",
source_section: str = "diag_sortie",
**kwargs,
) -> DPCandidate:
return DPCandidate(code=code, label=label, source_section=source_section, **kwargs)
# === Tests build_dp_shortlist ===
class TestBuildDPShortlist:
def test_from_diag_sortie_with_cim10_code(self):
parsed = _make_parsed(sections={
"diag_sortie": "Pancréatite aiguë biliaire K85.1",
})
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", None, dossier)
codes = [c.code for c in candidates]
assert "K85.1" in codes
def test_from_diag_principal_section(self):
parsed = _make_parsed(sections={
"diag_principal": "Embolie pulmonaire I26.9",
})
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", None, dossier)
codes = [c.code for c in candidates]
assert "I26.9" in codes
def test_from_conclusion_via_cim10_map(self):
parsed = _make_parsed(sections={
"conclusion": "pancréatite aiguë biliaire, bonne évolution",
})
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", None, dossier)
codes = [c.code for c in candidates]
assert "K85.1" in codes
def test_from_regex_fallback(self):
parsed = _make_parsed(sections={})
text = "Au total : pancréatite aiguë biliaire.\nDevenir : retour."
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, text, None, dossier)
codes = [c.code for c in candidates]
assert "K85.1" in codes
def test_from_edsnlp(self):
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
parsed = _make_parsed(sections={})
edsnlp = EdsnlpResult(cim10_entities=[
CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False),
])
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", edsnlp, dossier)
codes = [c.code for c in candidates]
assert "R10.4" in codes
def test_edsnlp_negated_excluded(self):
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
parsed = _make_parsed(sections={})
edsnlp = EdsnlpResult(cim10_entities=[
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
])
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", edsnlp, dossier)
codes = [c.code for c in candidates]
assert "R50.9" not in codes
def test_dedup_keeps_strongest_section(self):
"""Si le même code vient de diag_sortie et conclusion, garder diag_sortie."""
parsed = _make_parsed(sections={
"diag_sortie": "Pancréatite K85.1",
"conclusion": "pancréatite K85.1 bonne évolution",
})
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "", None, dossier)
k85_candidates = [c for c in candidates if c.code == "K85.1"]
assert len(k85_candidates) == 1
assert k85_candidates[0].source_section == "diag_sortie"
def test_empty_sections_returns_empty(self):
parsed = _make_parsed(sections={})
dossier = DossierMedical()
candidates = build_dp_shortlist(parsed, "Patient en bon état.", None, dossier)
assert candidates == []
# === Tests score_candidates ===
class TestScoreCandidates:
def test_section_bonus_diag_sortie(self):
c = _make_candidate(source_section="diag_sortie")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_diag_sortie"]
def test_section_bonus_conclusion(self):
c = _make_candidate(source_section="conclusion")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_conclusion"]
def test_section_bonus_edsnlp(self):
c = _make_candidate(source_section="edsnlp")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_edsnlp"]
def test_proof_excerpt_bonus(self):
c = _make_candidate(source_excerpt="Pancréatite aiguë biliaire confirmée au scanner")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("proof_excerpt") == DP_SCORING_WEIGHTS["proof_excerpt"]
def test_no_proof_bonus_without_excerpt(self):
c = _make_candidate(source_excerpt=None)
scored = score_candidates([c], DossierMedical())
assert "proof_excerpt" not in scored[0].score_details
def test_negation_penalty(self):
c = _make_candidate(label="Fièvre")
text = "Pas de fièvre constatée."
scored = score_candidates([c], DossierMedical(), full_text=text)
assert scored[0].is_negated is True
assert scored[0].score_details.get("negation") == DP_SCORING_WEIGHTS["negation"]
def test_conditional_penalty(self):
c = _make_candidate(label="Embolie pulmonaire", code="I26.9")
text = "Embolie pulmonaire suspectée, à confirmer par angioscanner."
scored = score_candidates([c], DossierMedical(), full_text=text)
assert scored[0].is_conditional is True
assert scored[0].score_details.get("conditional") == DP_SCORING_WEIGHTS["conditional"]
def test_z_code_penalty(self):
c = _make_candidate(code="Z76.0", label="Bilan de santé", source_section="conclusion")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("z_code_dp") == DP_SCORING_WEIGHTS["z_code_dp"]
def test_z_code_whitelist_no_penalty(self):
c = _make_candidate(code="Z51.1", label="Chimiothérapie", source_section="conclusion")
scored = score_candidates([c], DossierMedical())
assert "z_code_dp" not in scored[0].score_details
def test_r_code_penalty(self):
c = _make_candidate(code="R10.4", label="Douleur abdominale", source_section="edsnlp")
scored = score_candidates([c], DossierMedical())
assert scored[0].score_details.get("r_code_dp") == DP_SCORING_WEIGHTS["r_code_dp"]
def test_sort_by_score_descending(self):
c1 = _make_candidate(code="K85.1", source_section="diag_sortie")
c2 = _make_candidate(code="R10.4", label="Douleur", source_section="edsnlp")
scored = score_candidates([c2, c1], DossierMedical())
assert scored[0].code == "K85.1" # diag_sortie score > edsnlp
def test_combined_scoring(self):
"""Score = section bonus + proof - negation penalties."""
c = _make_candidate(
code="K85.1",
source_section="diag_sortie",
source_excerpt="Pancréatite aiguë",
)
scored = score_candidates([c], DossierMedical())
expected = DP_SCORING_WEIGHTS["section_diag_sortie"] + DP_SCORING_WEIGHTS["proof_excerpt"]
assert scored[0].score == expected
# === Tests select_dp ===
class TestSelectDP:
def test_no_candidates_returns_review(self):
sel = select_dp([], DossierMedical())
assert sel.verdict == "review"
def test_single_candidate_confirmed(self):
c = _make_candidate()
c.score = 6
sel = select_dp([c], DossierMedical())
assert sel.verdict == "confirmed"
assert sel.winner_reason == "candidat unique"
def test_clear_winner_confirmed(self):
c1 = _make_candidate(code="K85.1")
c1.score = 6
c2 = _make_candidate(code="R10.4", label="Douleur", source_section="edsnlp")
c2.score = 1
sel = select_dp([c1, c2], DossierMedical())
assert sel.verdict == "confirmed"
assert "delta" in sel.winner_reason
def test_close_scores_returns_review(self):
c1 = _make_candidate(code="K85.1")
c1.score = 3
c2 = _make_candidate(code="K80.5", label="Lithiase", source_section="conclusion")
c2.score = 2
sel = select_dp([c1, c2], DossierMedical())
assert sel.verdict == "review"
def test_review_returns_top3(self):
candidates = [
_make_candidate(code=f"K8{i}.{i}", label=f"Diag {i}")
for i in range(5)
]
for i, c in enumerate(candidates):
c.score = 5 - i
# delta between top1 and top2 = 1, < DP_REVIEW_THRESHOLD
sel = select_dp(candidates, DossierMedical())
assert sel.verdict == "review"
assert len(sel.candidates) <= 3
# === Tests utilitaires ===
class TestContextWindow:
def test_finds_label_in_text(self):
text = "Patient admis pour pancréatite aiguë biliaire confirmée."
window = _get_context_window(text, "pancréatite aiguë", radius=50)
assert "pancréatite" in window.lower()
def test_returns_empty_when_not_found(self):
text = "Patient en bon état."
window = _get_context_window(text, "embolie pulmonaire")
assert window == ""
class TestZCodeWhitelist:
def test_z51_1_whitelisted(self):
assert _is_z_code_whitelisted("Z51.1") is True
def test_z45_prefix_whitelisted(self):
assert _is_z_code_whitelisted("Z45.80") is True
def test_z76_not_whitelisted(self):
assert _is_z_code_whitelisted("Z76.0") is False
class TestDedupByCode:
def test_dedup_same_code_keeps_strongest(self):
c1 = _make_candidate(code="K85.1", source_section="conclusion")
c2 = _make_candidate(code="K85.1", source_section="diag_sortie")
priority = ["diag_sortie", "diag_principal", "motif_hospitalisation", "conclusion", "synthese"]
result = _dedup_by_code([c1, c2], priority)
assert len(result) == 1
assert result[0].source_section == "diag_sortie"
def test_dedup_different_codes_kept(self):
c1 = _make_candidate(code="K85.1")
c2 = _make_candidate(code="K80.5", label="Lithiase")
priority = ["diag_sortie"]
result = _dedup_by_code([c1, c2], priority)
assert len(result) == 2
# === Tests intégration légère ===
class TestDPScoringIntegration:
def test_crh_with_diag_sortie_section(self):
"""Un CRH avec section 'Diagnostic de sortie' produit un dp_selection."""
from src.medical.cim10_extractor import extract_medical_info
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
"sections": {
"diag_sortie": "Pancréatite aiguë biliaire K85.1",
},
}
text = "Diagnostic de sortie :\nPancréatite aiguë biliaire K85.1\n\nTraitement de sortie :\nParacétamol"
dossier = extract_medical_info(parsed, text)
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
assert dossier.dp_selection is not None
assert dossier.dp_selection.verdict == "confirmed"
def test_llm_fallback_confirmed_high_strong_section(self):
"""LLM one-shot CONFIRMED : high confidence + section forte."""
from unittest.mock import patch
from src.medical.cim10_extractor import extract_medical_info
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
"sections": {
"conclusion": "Pancréatite aiguë biliaire avec HTA connue.",
},
}
text = "Conclusion : Pancréatite aiguë biliaire avec HTA connue."
mock_result = {
"dp_code": "K85.1",
"dp_label": "Pancréatite aiguë biliaire",
"evidence_section": "conclusion",
"evidence_excerpt": "Pancréatite aiguë biliaire",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
dossier = extract_medical_info(parsed, text, use_rag=True)
assert dossier.dp_selection is not None
assert dossier.dp_selection.verdict == "confirmed"
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
def test_llm_fallback_confirmed_conclusion_section(self):
"""LLM one-shot CONFIRMED : conclusion est section forte."""
from unittest.mock import patch
from src.medical.cim10_extractor import extract_medical_info
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
"sections": {"conclusion": "Pneumopathie avec insuffisance rénale aiguë."},
}
text = "Conclusion : Pneumopathie avec insuffisance rénale aiguë."
mock_result = {
"dp_code": "J18.9",
"dp_label": "Pneumopathie, sans précision",
"evidence_section": "conclusion",
"evidence_excerpt": "Pneumopathie avec insuffisance rénale aiguë",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
dossier = extract_medical_info(parsed, text, use_rag=True)
assert dossier.dp_selection is not None
assert dossier.dp_selection.verdict == "confirmed"
assert dossier.diagnostic_principal is not None
def test_llm_fallback_review_weak_section(self):
"""LLM one-shot REVIEW : evidence de histoire_maladie (section faible) → guardrail."""
from unittest.mock import patch
from src.medical.dp_scoring import llm_dp_fallback
from src.config import DossierMedical, DPCandidate
parsed = {"type": "crh", "sections": {"histoire_maladie": "Dyspnée aiguë."}}
text = "Histoire de la maladie : Dyspnée aiguë."
dossier = DossierMedical()
dp_candidates = [DPCandidate(code="R06.0", label="Dyspnée", source_section="edsnlp")]
mock_result = {
"dp_code": "R06.0",
"dp_label": "Dyspnée",
"evidence_section": "histoire_maladie",
"evidence_excerpt": "Dyspnée aiguë",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
assert selection.verdict == "review"
assert len(selection.candidates) >= 1
def test_llm_fallback_review_low_confidence(self):
"""LLM one-shot REVIEW : confidence=medium → guardrail."""
from unittest.mock import patch
from src.medical.dp_scoring import llm_dp_fallback
from src.config import DossierMedical, DPCandidate
parsed = {"type": "crh", "sections": {"conclusion": "HTA connue, diabète équilibré."}}
text = "Conclusion : HTA connue, diabète équilibré."
dossier = DossierMedical()
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
mock_result = {
"dp_code": "I10",
"dp_label": "Hypertension essentielle",
"evidence_section": "conclusion",
"evidence_excerpt": "HTA connue",
"confidence": "medium",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
assert selection.verdict == "review"
assert "confidence medium" in selection.winner_reason
def test_llm_fallback_guardrail_no_evidence(self):
"""Garde-fou : LLM renvoie evidence vide → REVIEW."""
from unittest.mock import patch
from src.medical.dp_scoring import llm_dp_fallback
from src.config import DossierMedical, DPCandidate
parsed = {"type": "crh", "sections": {"conclusion": "Pancréatite."}}
text = "Conclusion : Pancréatite."
dossier = DossierMedical()
dp_candidates = [DPCandidate(code="K85.9", label="Pancréatite", source_section="edsnlp")]
mock_result = {
"dp_code": "K85.9",
"dp_label": "Pancréatite aiguë",
"evidence_section": "conclusion",
"evidence_excerpt": "",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
assert selection.verdict == "review"
def test_llm_fallback_guardrail_comorbidity_weak_section(self):
"""Garde-fou : HTA en section non-forte → REVIEW."""
from unittest.mock import patch
from src.medical.dp_scoring import llm_dp_fallback
from src.config import DossierMedical, DPCandidate
parsed = {"type": "crh", "sections": {"histoire_maladie": "Patient hypertendu."}}
text = "Histoire de la maladie : Patient hypertendu."
dossier = DossierMedical()
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
mock_result = {
"dp_code": "I10",
"dp_label": "Hypertension essentielle",
"evidence_section": "histoire_maladie",
"evidence_excerpt": "Patient hypertendu",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
assert selection.verdict == "review"
def test_llm_fallback_comorbidity_in_strong_section(self):
"""I10 en section forte + high confidence → CONFIRMED (garde-fou GF-2 ne bloque pas)."""
from unittest.mock import patch
from src.medical.dp_scoring import llm_dp_fallback
from src.config import DossierMedical, DPCandidate
parsed = {"type": "crh", "sections": {"motif_hospitalisation": "HTA maligne."}}
text = "Motif d'hospitalisation : HTA maligne."
dossier = DossierMedical()
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
mock_result = {
"dp_code": "I10",
"dp_label": "Hypertension essentielle",
"evidence_section": "motif_hospitalisation",
"evidence_excerpt": "HTA maligne",
"confidence": "high",
}
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
assert selection.verdict == "confirmed"
assert selection.candidates[0].code == "I10"
def test_no_llm_fallback_without_use_rag(self):
"""Sans use_rag, le fallback LLM ne se déclenche PAS."""
from src.medical.cim10_extractor import extract_medical_info
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
"sections": {"conclusion": "Bonne évolution."},
}
text = "Conclusion : Bonne évolution."
dossier = extract_medical_info(parsed, text, use_rag=False)
# Sans use_rag → pas de fallback LLM → verdict review
assert dossier.dp_selection is not None
assert dossier.dp_selection.verdict == "review"
def test_trackare_dp_bypasses_scoring(self):
"""Un Trackare avec DP codé ne déclenche PAS le scoring."""
from src.medical.cim10_extractor import extract_medical_info
parsed = {
"type": "trackare",
"patient": {"sexe": "F"},
"sejour": {"date_entree": "01/01/2024", "date_sortie": "05/01/2024"},
"diagnostics": [
{"type": "Principal", "code_cim10": "K80.5", "libelle": "Calcul des canaux biliaires"},
],
}
text = "Calcul des canaux biliaires."
dossier = extract_medical_info(parsed, text)
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_suggestion == "K80.5"
assert dossier.dp_selection is None # Trackare DP, pas de scoring
# === Tests comorbidité-banale DP ===
class TestComorbidityGuard:
"""Règle comorbidité-banale : I10/E66.x/E78.x/E11.x/D64.9 en DP → REVIEW
sauf preuve explicite de PEC principale."""
def test_is_comorbidity_expanded(self):
"""La liste élargie couvre I10, E66.*, E78.*, E11.*, D64.9."""
assert _is_comorbidity_code("I10") is True
assert _is_comorbidity_code("E66.0") is True
assert _is_comorbidity_code("E66.9") is True
assert _is_comorbidity_code("E78.0") is True
assert _is_comorbidity_code("E11.9") is True
assert _is_comorbidity_code("E11.0") is True
assert _is_comorbidity_code("D64.9") is True
# Pas comorbidité
assert _is_comorbidity_code("D64.0") is False
assert _is_comorbidity_code("E10.9") is False
assert _is_comorbidity_code("K85.1") is False
def test_sole_comorbidity_review(self):
"""Candidat unique comorbidité → REVIEW (même section forte)."""
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
c.score = 4
c.score_details = {"section": 2, "proof_excerpt": 2, "comorbidity_weak": -3}
sel = select_dp([c], DossierMedical())
assert sel.verdict == "review"
assert "comorbidité banale" in sel.winner_reason
def test_comorbidity_top1_multi_review(self):
"""Comorbidité top1 parmi plusieurs → REVIEW."""
c1 = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
c1.score = 3
c1.score_details = {"section": 3, "comorbidity_weak": -3}
c2 = _make_candidate(code="K85.1", label="Pancréatite", source_section="edsnlp")
c2.score = 1
sel = select_dp([c1, c2], DossierMedical())
assert sel.verdict == "review"
assert "comorbidité banale" in sel.winner_reason
def test_comorbidity_with_pec_proof_confirmed(self):
"""Comorbidité + preuve PEC → CONFIRMED."""
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
c.score = 3
c.score_details = {"section": 3, "comorbidity_weak": -3, "comorbidity_pec_proof": 3}
sel = select_dp([c], DossierMedical())
assert sel.verdict == "confirmed"
assert sel.winner_reason == "candidat unique"
def test_non_comorbidity_sole_confirmed(self):
"""Candidat unique non-comorbidité → CONFIRMED (pas affecté)."""
c = _make_candidate(code="K85.1", label="Pancréatite", source_section="conclusion")
c.score = 4
sel = select_dp([c], DossierMedical())
assert sel.verdict == "confirmed"
def test_score_comorbidity_penalty_strong_section(self):
"""Comorbidité pénalisée même en section forte (conclusion)."""
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
scored = score_candidates([c], DossierMedical())
assert "comorbidity_weak" in scored[0].score_details
assert scored[0].score_details["comorbidity_weak"] == DP_SCORING_WEIGHTS["comorbidity_weak"]
def test_score_comorbidity_penalty_motif(self):
"""Comorbidité pénalisée en motif_hospitalisation."""
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
scored = score_candidates([c], DossierMedical())
assert "comorbidity_weak" in scored[0].score_details
def test_pec_proof_detected(self):
"""PEC proof détectée dans le texte → bonus dans score_details."""
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
text = "Patient hospitalisé pour hta maligne résistante au traitement."
scored = score_candidates([c], DossierMedical(), full_text=text)
assert "comorbidity_pec_proof" in scored[0].score_details
assert scored[0].score_details["comorbidity_pec_proof"] > 0
def test_pec_proof_not_found(self):
"""Pas de PEC proof → pas de bonus."""
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
text = "Patient obèse, pneumopathie communautaire."
scored = score_candidates([c], DossierMedical(), full_text=text)
assert "comorbidity_pec_proof" not in scored[0].score_details
def test_has_explicit_pec_proof_hospitalized(self):
"""Détection 'hospitalisé pour' + label."""
assert _has_explicit_pec_proof("hta", "Patient hospitalisé pour HTA maligne.") is True
def test_has_explicit_pec_proof_prise_en_charge(self):
"""Détection 'prise en charge' + label."""
assert _has_explicit_pec_proof("obésité", "Prise en charge de l'obésité morbide.") is True
def test_has_explicit_pec_proof_absent(self):
"""Pas de PEC proof pour un label non mentionné."""
assert _has_explicit_pec_proof("hta", "Patient admis pour douleur thoracique.") is False
def test_has_explicit_pec_proof_admission(self):
"""Détection 'admission pour' + label."""
assert _has_explicit_pec_proof("diabète", "Admission pour diabète déséquilibré.") is True
class TestSectionNormalization:
"""Tests pour _normalize_evidence_section — normalisation robuste."""
# --- Correspondances exactes existantes ---
def test_exact_conclusion(self):
assert _normalize_evidence_section("conclusion") == "conclusion"
def test_exact_synthese(self):
assert _normalize_evidence_section("synthèse") == "synthese"
def test_exact_motif_hospitalisation(self):
assert _normalize_evidence_section("motif_hospitalisation") == "motif_hospitalisation"
# --- Nouveaux alias exacts ---
def test_synthese_du_sejour(self):
assert _normalize_evidence_section("synthèse du séjour") == "synthese"
def test_synthese_du_sejour_ascii(self):
assert _normalize_evidence_section("synthese du sejour") == "synthese"
def test_conclusions_pluriel(self):
assert _normalize_evidence_section("conclusions") == "conclusion"
def test_secretariat_to_autres(self):
assert _normalize_evidence_section("secrétariat") == "autres"
def test_medecine_interne_to_autres(self):
assert _normalize_evidence_section("médecine interne") == "autres"
def test_sections_cliniques_to_autres(self):
assert _normalize_evidence_section("sections cliniques") == "autres"
# --- Nettoyage crochets/guillemets ---
def test_brackets_conclusion(self):
assert _normalize_evidence_section("[conclusion]") == "conclusion"
def test_brackets_motif(self):
assert _normalize_evidence_section("[motif_hospitalisation]") == "motif_hospitalisation"
def test_colon_conclusion(self):
assert _normalize_evidence_section("conclusion:") == "conclusion"
def test_quotes_synthese(self):
assert _normalize_evidence_section('"synthèse"') == "synthese"
# --- Fallback par mots-clés ---
def test_keyword_conclusion_du_sejour(self):
assert _normalize_evidence_section("conclusion du séjour") == "conclusion"
def test_keyword_synthese_medicale(self):
assert _normalize_evidence_section("synthèse médicale du dossier") == "synthese"
def test_keyword_diagnostic_de_sortie_variant(self):
assert _normalize_evidence_section("diagnostic(s) de sortie") == "diag_sortie"
def test_keyword_diagnostic_retenu_variant(self):
assert _normalize_evidence_section("diagnostics retenus à la sortie") == "diagnostics_retenus"
def test_keyword_motif_admission(self):
assert _normalize_evidence_section("motif d'admission aux urgences") == "motif_hospitalisation"
# --- Cas limites ---
def test_empty_string(self):
assert _normalize_evidence_section("") == ""
def test_none_like_empty(self):
assert _normalize_evidence_section(" ") == ""
def test_unknown_section_passthrough(self):
"""Section inconnue sans mot-clé → passthrough nettoyé."""
result = _normalize_evidence_section("biologie")
assert result == "biologie"
def test_sections_fortes_du_dossier(self):
"""Alias administratif observé en benchmark."""
assert _normalize_evidence_section("sections fortes du dossier") == "autres"

View File

@@ -109,6 +109,139 @@ de masse 34.370"""
assert result["signes_vitaux"]["imc"] == 34.370 assert result["signes_vitaux"]["imc"] == 34.370
class TestCRHParserDiagSections:
"""Tests pour les nouvelles sections à fort signal DP."""
def test_parse_diag_sortie(self):
text = """Mon cher confrère,
Votre patient a été hospitalisé du 01/01/2024 au 05/01/2024.
Diagnostic de sortie :
Pancréatite aiguë biliaire (K85.1)
Traitement de sortie :
Paracétamol"""
result = parse_crh(text)
assert "diag_sortie" in result["sections"]
assert "K85.1" in result["sections"]["diag_sortie"]
def test_parse_diagnostics_retenus(self):
text = """Conclusion :
Bonne évolution.
Diagnostics retenus :
- Cholécystite aiguë lithiasique
- Lithiase vésiculaire
Traitement de sortie :
Paracétamol"""
result = parse_crh(text)
assert "diag_sortie" in result["sections"]
assert "Cholécystite" in result["sections"]["diag_sortie"]
def test_parse_diag_principal(self):
text = """Examen clinique :
Abdomen souple.
Diagnostic principal :
Embolie pulmonaire segmentaire droite
Diagnostics de sortie :
EP + TVP"""
result = parse_crh(text)
assert "diag_principal" in result["sections"]
assert "Embolie pulmonaire" in result["sections"]["diag_principal"]
def test_parse_probleme_principal(self):
text = """Examen clinique :
Patient stable.
Problème principal :
Insuffisance cardiaque décompensée
Devenir : retour à domicile."""
result = parse_crh(text)
assert "diag_principal" in result["sections"]
assert "Insuffisance cardiaque" in result["sections"]["diag_principal"]
def test_parse_synthese(self):
text = """Examen clinique :
RAS.
Synthèse :
Patient de 75 ans hospitalisé pour AVC ischémique sylvien droit.
Traitement de sortie :
Aspirine"""
result = parse_crh(text)
assert "synthese" in result["sections"]
assert "AVC" in result["sections"]["synthese"]
def test_existing_sections_preserved(self):
"""Les 7 sections existantes sont toujours capturées."""
text = """pour le motif suivant:
Pancréatite aiguë
Antécédents :
HTA, diabète
Histoire de la maladie
Douleur abdominale brutale
Examen clinique
Abdomen défense en HCD
Au total :
Pancréatite aiguë biliaire
TTT de sortie :
Paracétamol
Devenir :
Retour à domicile"""
result = parse_crh(text)
assert "motif_hospitalisation" in result["sections"]
assert "antecedents" in result["sections"]
assert "histoire_maladie" in result["sections"]
assert "examen_clinique" in result["sections"]
assert "conclusion" in result["sections"]
assert "traitement_sortie" in result["sections"]
assert "devenir" in result["sections"]
def test_diag_sortie_multiline(self):
text = """Au total :
Bonne évolution.
Diagnostic de sortie :
- Pancréatite aiguë biliaire K85.1
- Lithiase vésiculaire K80.2
- Obésité E66.0
Traitement de sortie :
Paracétamol"""
result = parse_crh(text)
assert "diag_sortie" in result["sections"]
section = result["sections"]["diag_sortie"]
assert "K85.1" in section
assert "K80.2" in section
assert "E66.0" in section
def test_conclusion_does_not_overflow_into_diag_sortie(self):
text = """Au total :
Pancréatite aiguë biliaire, évolution favorable.
Diagnostic de sortie :
Pancréatite aiguë biliaire K85.1
Traitement de sortie :
Paracétamol"""
result = parse_crh(text)
assert "conclusion" in result["sections"]
assert "diag_sortie" in result["sections"]
# La conclusion ne doit PAS contenir le texte de diag_sortie
assert "K85.1" not in result["sections"]["conclusion"]
class TestCleanPersonName: class TestCleanPersonName:
def test_clean_simple(self): def test_clean_simple(self):
assert _clean_person_name("Sarah DUTREY") == "Sarah DUTREY" assert _clean_person_name("Sarah DUTREY") == "Sarah DUTREY"

View File

@@ -653,6 +653,38 @@ class TestBackwardCompatAntecedent:
assert all(isinstance(c, Complication) for c in dossier.complications) assert all(isinstance(c, Complication) for c in dossier.complications)
class TestDPSelectionIntegration:
"""Tests d'intégration du scoring DP dans le pipeline d'extraction."""
def test_crh_dp_selection_populated(self):
"""Un CRH sans DP Trackare déclenche le scoring et peuple dp_selection."""
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
}
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
dossier = extract_medical_info(parsed, text)
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
assert dossier.dp_selection is not None
assert len(dossier.dp_selection.candidates) >= 1
def test_dp_selection_serialization(self):
"""dp_selection est sérialisable en JSON via model_dump()."""
from src.config import DPCandidate, DPSelection
sel = DPSelection(
verdict="confirmed",
candidates=[DPCandidate(code="K85.1", label="Test", source_section="regex")],
winner_reason="candidat unique",
)
data = sel.model_dump()
assert data["verdict"] == "confirmed"
assert len(data["candidates"]) == 1
assert data["candidates"][0]["code"] == "K85.1"
class TestSourceTrackingFields: class TestSourceTrackingFields:
"""Tests que les champs source_page/source_excerpt existent sur les modèles.""" """Tests que les champs source_page/source_excerpt existent sur les modèles."""