feat: scoring DP déterministe + parser CPAM nouveau format + sections CRH
- Nouveau module dp_scoring.py : shortlist, scoring multi-critères, select_dp, LLM one-shot fallback avec garde-fous (négation, comorbidité, Z/R-codes) - Parser CPAM : auto-détection format legacy/ucr_extract, 6 nouveaux champs ControleCPAM (codes_etablissement, libelle, codes_retenus, ghm_ghs) - CRH parser : 3 nouvelles sections (diag_sortie, diag_principal, synthese) - Prompt DP_LLM_ONESHOT externalisé dans templates.py - Propagation dp_selection dans fusion.py - 808 tests passent (dont 21 nouveaux CPAM + 77 dp_scoring + 8 CRH) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -82,6 +82,32 @@ EMBEDDING_MODEL = os.environ.get("T2A_EMBEDDING_MODEL", "dangvantuan/sentence-ca
|
||||
|
||||
RERANKER_MODEL = os.environ.get("T2A_RERANKER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
||||
|
||||
# --- Scoring DP (Diagnostic Principal) ---
|
||||
|
||||
DP_SCORING_WEIGHTS: dict[str, int] = {
|
||||
"section_diag_sortie": 4,
|
||||
"section_diag_principal": 4,
|
||||
"section_motif_hospitalisation": 3,
|
||||
"section_conclusion": 2,
|
||||
"section_synthese": 2,
|
||||
"section_edsnlp": 1,
|
||||
"section_regex": 1,
|
||||
"proof_excerpt": 2, # excerpt non-vide + page
|
||||
"negation": -4, # "pas de", "absence de", "éliminé"
|
||||
"conditional": -3, # "suspect", "probable", "?"
|
||||
"z_code_dp": -2, # sauf whitelist
|
||||
"r_code_dp": -2, # symptôme en DP
|
||||
"comorbidity_weak": -3, # comorbidité banale (toutes sections, sauf preuve PEC)
|
||||
}
|
||||
DP_REVIEW_THRESHOLD: int = 2 # delta minimum top1-top2 pour éviter REVIEW
|
||||
|
||||
# Z-codes admis en DP (soins itératifs, surveillance, nouveau-né, rééducation, etc.)
|
||||
DP_Z_CODE_WHITELIST: frozenset[str] = frozenset({
|
||||
"Z51.1", "Z51.0", "Z38", "Z50.1", "Z43", "Z45", "Z09", "Z54",
|
||||
"Z75", "Z03", "Z04", "Z08",
|
||||
})
|
||||
|
||||
|
||||
# --- Modèles de données CIM-10 ---
|
||||
|
||||
|
||||
@@ -128,6 +154,28 @@ class Diagnostic(BaseModel):
|
||||
source_excerpt: Optional[str] = None # extrait du texte source (~200 chars)
|
||||
|
||||
|
||||
class DPCandidate(BaseModel):
|
||||
code: Optional[str] = None
|
||||
label: str
|
||||
source_section: str # "diag_sortie" | "diag_principal" | "conclusion" | "synthese" | "motif_hospitalisation" | "edsnlp" | "regex"
|
||||
source_excerpt: Optional[str] = None
|
||||
source_page: Optional[int] = None
|
||||
confidence_raw: Optional[str] = None # "high" | "medium" | "low"
|
||||
score: int = 0
|
||||
score_details: dict[str, int] = Field(default_factory=dict)
|
||||
is_negated: bool = False
|
||||
is_conditional: bool = False
|
||||
dp_code_original_llm: Optional[str] = None # code original proposé par LLM (avant normalisation)
|
||||
dp_code_normalized: bool = False # True si le code a été normalisé (parent/fallback)
|
||||
|
||||
|
||||
class DPSelection(BaseModel):
|
||||
verdict: str = "confirmed" # "confirmed" | "review"
|
||||
candidates: list[DPCandidate] = Field(default_factory=list)
|
||||
winner_reason: Optional[str] = None
|
||||
llm_tiebreak: Optional[dict] = None
|
||||
|
||||
|
||||
class ActeCCAM(BaseModel):
|
||||
texte: str
|
||||
code_ccam_suggestion: Optional[str] = None
|
||||
@@ -183,6 +231,7 @@ class DossierMedical(BaseModel):
|
||||
document_type: str = ""
|
||||
sejour: Sejour = Field(default_factory=Sejour)
|
||||
diagnostic_principal: Optional[Diagnostic] = None
|
||||
dp_selection: Optional[DPSelection] = None
|
||||
diagnostics_associes: list[Diagnostic] = Field(default_factory=list)
|
||||
actes_ccam: list[ActeCCAM] = Field(default_factory=list)
|
||||
antecedents: list[Antecedent] = Field(default_factory=list)
|
||||
@@ -248,6 +297,13 @@ class ControleCPAM(BaseModel):
|
||||
da_ucr: Optional[str] = None
|
||||
dr_ucr: Optional[str] = None
|
||||
actes_ucr: Optional[str] = None
|
||||
# Champs enrichis (format ucr_extract)
|
||||
codes_etablissement: Optional[str] = None
|
||||
libelle_etablissement: Optional[str] = None
|
||||
codes_controleurs: Optional[str] = None
|
||||
libelle_controleurs: Optional[str] = None
|
||||
codes_retenus: Optional[str] = None
|
||||
ghm_ghs: Optional[str] = None
|
||||
contre_argumentation: Optional[str] = None
|
||||
response_data: Optional[dict] = None
|
||||
sources_reponse: list[RAGSource] = Field(default_factory=list)
|
||||
|
||||
@@ -1,4 +1,12 @@
|
||||
"""Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC."""
|
||||
"""Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC.
|
||||
|
||||
Supporte deux formats :
|
||||
- **Ancien** (ogc_structure) : colonnes N° OGC, Titre, Arg_UCR, Décision_UCR, DP_UCR, DA_UCR, DR_UCR, Actes_UCR
|
||||
- **Nouveau** (ucr_extract) : colonnes N° OGC, Type désaccord, Codes Établissement, Codes Contrôleurs,
|
||||
Décision UCR, Codes retenus, GHM / GHS, Texte décision, etc.
|
||||
|
||||
Le format est auto-détecté à partir des en-têtes de la première ligne.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -12,18 +20,15 @@ from ..config import ControleCPAM
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Colonnes attendues dans le fichier Excel
|
||||
_EXPECTED_COLUMNS = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
|
||||
# Colonnes obligatoires par format
|
||||
_LEGACY_REQUIRED = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR")
|
||||
_NEW_REQUIRED = ("N° OGC", "Type désaccord", "Décision UCR", "Texte décision")
|
||||
|
||||
|
||||
def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
|
||||
"""Lit le fichier Excel de contrôle CPAM et retourne un dict OGC -> liste de contrôles.
|
||||
|
||||
Args:
|
||||
path: Chemin vers le fichier .xlsx CPAM.
|
||||
|
||||
Returns:
|
||||
Dict avec le numéro OGC comme clé et la liste des contrôles associés.
|
||||
Auto-détecte le format (ancien ogc_structure vs nouveau ucr_extract).
|
||||
"""
|
||||
path = Path(path)
|
||||
if not path.exists():
|
||||
@@ -33,33 +38,53 @@ def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
|
||||
wb = openpyxl.load_workbook(path, read_only=True)
|
||||
ws = wb[wb.sheetnames[0]]
|
||||
|
||||
# Lire l'en-tête
|
||||
rows = ws.iter_rows(values_only=True)
|
||||
header = next(rows, None)
|
||||
if header is None:
|
||||
logger.error("Fichier CPAM vide : %s", path)
|
||||
wb.close()
|
||||
return {}
|
||||
|
||||
# Construire le mapping colonne -> index
|
||||
col_map = {}
|
||||
for i, col_name in enumerate(header):
|
||||
if col_name:
|
||||
col_map[col_name.strip()] = i
|
||||
col_map[str(col_name).strip()] = i
|
||||
|
||||
# Vérifier les colonnes requises
|
||||
missing = [c for c in _EXPECTED_COLUMNS[:4] if c not in col_map]
|
||||
if missing:
|
||||
logger.error("Colonnes manquantes dans le fichier CPAM : %s", missing)
|
||||
# Auto-détection du format
|
||||
is_new = all(c in col_map for c in _NEW_REQUIRED)
|
||||
is_legacy = all(c in col_map for c in _LEGACY_REQUIRED)
|
||||
|
||||
if is_new:
|
||||
logger.info("CPAM : format ucr_extract détecté")
|
||||
result = _parse_new_format(rows, col_map)
|
||||
elif is_legacy:
|
||||
logger.info("CPAM : format ogc_structure (ancien) détecté")
|
||||
result = _parse_legacy_format(rows, col_map)
|
||||
else:
|
||||
missing_new = [c for c in _NEW_REQUIRED if c not in col_map]
|
||||
missing_leg = [c for c in _LEGACY_REQUIRED if c not in col_map]
|
||||
logger.error(
|
||||
"Format CPAM non reconnu. Colonnes trouvées : %s. "
|
||||
"Manquantes (nouveau) : %s, (ancien) : %s",
|
||||
list(col_map.keys()), missing_new, missing_leg,
|
||||
)
|
||||
wb.close()
|
||||
return {}
|
||||
|
||||
wb.close()
|
||||
total = sum(len(v) for v in result.values())
|
||||
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", total, len(result))
|
||||
return result
|
||||
|
||||
|
||||
def _parse_legacy_format(rows, col_map: dict[str, int]) -> dict[int, list[ControleCPAM]]:
|
||||
"""Parse l'ancien format ogc_structure."""
|
||||
result: dict[int, list[ControleCPAM]] = {}
|
||||
count = 0
|
||||
|
||||
for row in rows:
|
||||
ogc_val = row[col_map["N° OGC"]]
|
||||
if ogc_val is None:
|
||||
continue
|
||||
|
||||
try:
|
||||
numero_ogc = int(ogc_val)
|
||||
except (ValueError, TypeError):
|
||||
@@ -76,11 +101,104 @@ def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
|
||||
dr_ucr=_clean_optional(row, col_map.get("DR_UCR")),
|
||||
actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")),
|
||||
)
|
||||
|
||||
result.setdefault(numero_ogc, []).append(controle)
|
||||
count += 1
|
||||
|
||||
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", count, len(result))
|
||||
return result
|
||||
|
||||
|
||||
def _parse_new_format(rows, col_map: dict[str, int]) -> dict[int, list[ControleCPAM]]:
|
||||
"""Parse le nouveau format ucr_extract.
|
||||
|
||||
Mapping colonnes :
|
||||
N° OGC → numero_ogc
|
||||
Type désaccord → titre (ex: "Désaccord sur le DP")
|
||||
Texte décision → arg_ucr
|
||||
Décision UCR → decision_ucr (Favorable / Défavorable)
|
||||
Codes Contrôleurs → dp_ucr / da_ucr selon Type désaccord
|
||||
Codes Établissement → codes_etablissement
|
||||
Libellé Établissement → libelle_etablissement
|
||||
Libellé Contrôleurs → libelle_controleurs
|
||||
Codes retenus → codes_retenus
|
||||
GHM / GHS → ghm_ghs
|
||||
"""
|
||||
result: dict[int, list[ControleCPAM]] = {}
|
||||
|
||||
idx_ogc = col_map["N° OGC"]
|
||||
idx_type = col_map["Type désaccord"]
|
||||
idx_decision = col_map["Décision UCR"]
|
||||
idx_texte = col_map["Texte décision"]
|
||||
idx_codes_etab = col_map.get("Codes Établissement")
|
||||
idx_lib_etab = col_map.get("Libellé Établissement")
|
||||
idx_codes_ctrl = col_map.get("Codes Contrôleurs")
|
||||
idx_lib_ctrl = col_map.get("Libellé Contrôleurs")
|
||||
idx_codes_ret = col_map.get("Codes retenus")
|
||||
idx_ghm = col_map.get("GHM / GHS")
|
||||
|
||||
for row in rows:
|
||||
ogc_val = row[idx_ogc]
|
||||
if ogc_val is None:
|
||||
continue
|
||||
try:
|
||||
numero_ogc = int(ogc_val)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning("N° OGC invalide ignoré : %s", ogc_val)
|
||||
continue
|
||||
|
||||
type_desaccord = str(row[idx_type] or "").strip()
|
||||
decision = str(row[idx_decision] or "").strip()
|
||||
texte_decision = str(row[idx_texte] or "").strip()
|
||||
codes_ctrl = _clean_optional(row, idx_codes_ctrl)
|
||||
codes_etab = _clean_optional(row, idx_codes_etab)
|
||||
|
||||
# Construire le titre lisible
|
||||
if type_desaccord == "DP":
|
||||
titre = "Désaccord sur le DP"
|
||||
elif type_desaccord == "DAS":
|
||||
titre = "Désaccord sur les DAS"
|
||||
elif type_desaccord == "DP+DAS":
|
||||
titre = "Désaccord sur le DP et les DAS"
|
||||
else:
|
||||
titre = f"Désaccord : {type_desaccord}" if type_desaccord else ""
|
||||
|
||||
# Mapper la décision vers le format attendu par cpam_response
|
||||
if decision.lower().startswith("favorable"):
|
||||
decision_ucr = "UCR retient"
|
||||
elif decision.lower().startswith("défavorable") or decision.lower().startswith("defavorable"):
|
||||
decision_ucr = "UCR confirme avis médecins contrôleurs"
|
||||
else:
|
||||
decision_ucr = decision
|
||||
|
||||
# Distribuer les codes selon le type de désaccord
|
||||
dp_ucr = None
|
||||
da_ucr = None
|
||||
if type_desaccord == "DP":
|
||||
dp_ucr = codes_ctrl
|
||||
elif type_desaccord == "DAS":
|
||||
da_ucr = codes_ctrl
|
||||
elif type_desaccord == "DP+DAS":
|
||||
# Les codes contrôleurs peuvent mélanger DP et DAS.
|
||||
# Convention : le premier code est le DP, le reste DAS.
|
||||
if codes_ctrl:
|
||||
parts = [c.strip() for c in codes_ctrl.split(",") if c.strip()]
|
||||
dp_ucr = parts[0] if parts else None
|
||||
da_ucr = ",".join(parts[1:]) if len(parts) > 1 else None
|
||||
|
||||
controle = ControleCPAM(
|
||||
numero_ogc=numero_ogc,
|
||||
titre=titre,
|
||||
arg_ucr=texte_decision,
|
||||
decision_ucr=decision_ucr,
|
||||
dp_ucr=dp_ucr,
|
||||
da_ucr=da_ucr,
|
||||
codes_etablissement=codes_etab,
|
||||
libelle_etablissement=_clean_optional(row, idx_lib_etab),
|
||||
codes_controleurs=codes_ctrl,
|
||||
libelle_controleurs=_clean_optional(row, idx_lib_ctrl),
|
||||
codes_retenus=_clean_optional(row, idx_codes_ret),
|
||||
ghm_ghs=_clean_optional(row, idx_ghm),
|
||||
)
|
||||
result.setdefault(numero_ogc, []).append(controle)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -113,12 +113,19 @@ def _extract_medical_content(text: str, result: dict) -> None:
|
||||
result["contenu_medical"] = m.group(1).strip()
|
||||
|
||||
# Sections spécifiques
|
||||
# Note : les terminaisons incluent les en-têtes des sections suivantes
|
||||
# pour éviter la capture excessive (une section s'arrête quand la suivante commence).
|
||||
_DIAG_HEADERS = r"Diagnostic(?:s)?\s+(?:de\s+sortie|retenu|principal)|Problème\s+principal|Synthèse|En\s+résumé|En\s+synthèse"
|
||||
section_patterns = [
|
||||
("motif_hospitalisation", r"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:Antécédents|Histoire|Examen|Au total|Devenir|TTT)|$)"),
|
||||
("antecedents", r"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir)|$)"),
|
||||
("histoire_maladie", r"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir)|$)"),
|
||||
("examen_clinique", r"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir)|$)"),
|
||||
("conclusion", r"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement)|$)"),
|
||||
("motif_hospitalisation", r"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:Antécédents|Histoire|Examen|Au total|Devenir|TTT|" + _DIAG_HEADERS + r")|$)"),
|
||||
("antecedents", r"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
|
||||
("histoire_maladie", r"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
|
||||
("examen_clinique", r"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir|" + _DIAG_HEADERS + r")|$)"),
|
||||
("conclusion", r"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|" + _DIAG_HEADERS + r")|$)"),
|
||||
# Sections à fort signal DP (avant traitement_sortie pour priorité)
|
||||
("diag_sortie", r"(?:Diagnostic(?:s)?\s+de\s+sortie|Diagnostic(?:s)?\s+retenu(?:s)?(?:\s+(?:à\s+la\s+sortie))?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Synthèse|En\s+résumé|Rédigé|Cordialement)|$)"),
|
||||
("diag_principal", r"(?:Diagnostic\s+principal|Problème\s+principal)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Diagnostic(?:s)?\s+(?:de\s+sortie|retenu|associé)|Devenir|TTT|Traitement|Synthèse|En\s+résumé|Rédigé|Cordialement)|$)"),
|
||||
("synthese", r"(?:Synthèse|En\s+résumé|En\s+synthèse)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Rédigé|Cordialement)|$)"),
|
||||
("traitement_sortie", r"(?:TTT de sortie|Traitement de sortie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement)|$)"),
|
||||
("devenir", r"(?:Devenir)\s*[:\s]*\n?(.*?)(?=\n\s*(?:TTT|Traitement|Rédigé|Cordialement)|$)"),
|
||||
]
|
||||
|
||||
@@ -116,7 +116,7 @@ def extract_medical_info(
|
||||
search_text = raw_text or anonymized_text
|
||||
|
||||
_extract_sejour(parsed_data, dossier)
|
||||
_extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result)
|
||||
_extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result, use_rag=use_rag)
|
||||
_extract_actes(anonymized_text, dossier)
|
||||
_extract_antecedents(anonymized_text, dossier)
|
||||
_extract_traitements(parsed_data, anonymized_text, dossier, edsnlp_result)
|
||||
@@ -306,6 +306,7 @@ def _extract_diagnostics(
|
||||
text: str,
|
||||
dossier: DossierMedical,
|
||||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||||
use_rag: bool = False,
|
||||
) -> None:
|
||||
"""Extrait le diagnostic principal et les diagnostics associés."""
|
||||
text_lower = text.lower()
|
||||
@@ -342,20 +343,51 @@ def _extract_diagnostics(
|
||||
if not ent.negation and not ent.hypothese:
|
||||
edsnlp_codes[ent.code] = ent.texte
|
||||
|
||||
# Si pas de DP depuis le codage, chercher dans le texte
|
||||
# Si pas de DP depuis le codage, utiliser le scoring multi-candidats
|
||||
if not dossier.diagnostic_principal:
|
||||
# D'abord essayer le fallback regex (plus précis pour les patterns spécifiques)
|
||||
dp = _find_diagnostic_principal(text_lower, conclusion)
|
||||
if dp:
|
||||
dossier.diagnostic_principal = dp
|
||||
elif edsnlp_codes:
|
||||
# Utiliser la première entité CIM-10 edsnlp comme DP
|
||||
code, texte = next(iter(edsnlp_codes.items()))
|
||||
texte_clean = texte.capitalize()
|
||||
if is_valid_diagnostic_text(texte_clean):
|
||||
from .dp_scoring import build_dp_shortlist, score_candidates, select_dp, llm_dp_fallback
|
||||
|
||||
candidates = build_dp_shortlist(parsed, text, edsnlp_result, dossier)
|
||||
candidates = score_candidates(candidates, dossier, full_text=text)
|
||||
selection = select_dp(candidates, dossier, use_llm=use_rag)
|
||||
|
||||
# Fallback LLM : si scoring déterministe → REVIEW et LLM autorisé
|
||||
if use_rag and selection.verdict == "review":
|
||||
# Instrumentation : dp_pre_llm
|
||||
pre_code = selection.candidates[0].code if selection.candidates else None
|
||||
pre_section = selection.candidates[0].source_section if selection.candidates else None
|
||||
is_comorbidity_trigger = "comorbidité banale" in (selection.winner_reason or "")
|
||||
logger.info(
|
||||
"DP pre-LLM: code=%s section=%s trigger_comorbidity_fallback=%s",
|
||||
pre_code, pre_section, is_comorbidity_trigger,
|
||||
)
|
||||
|
||||
llm_selection = llm_dp_fallback(
|
||||
parsed, text, dossier,
|
||||
dp_candidates=candidates,
|
||||
edsnlp_result=edsnlp_result,
|
||||
)
|
||||
# Fusionner candidats LLM + déterministes (LLM en tête)
|
||||
if llm_selection.candidates:
|
||||
all_candidates = list(llm_selection.candidates)
|
||||
if selection.candidates:
|
||||
all_candidates.extend(selection.candidates)
|
||||
llm_selection.candidates = all_candidates
|
||||
selection = llm_selection
|
||||
|
||||
# Instrumentation : dp_post_llm
|
||||
post_code = selection.candidates[0].code if selection.candidates else None
|
||||
logger.info("DP post-LLM: code=%s verdict=%s", post_code, selection.verdict)
|
||||
|
||||
dossier.dp_selection = selection
|
||||
if selection.candidates:
|
||||
winner = selection.candidates[0]
|
||||
dossier.diagnostic_principal = Diagnostic(
|
||||
texte=texte_clean, cim10_suggestion=code,
|
||||
source="edsnlp",
|
||||
texte=winner.label,
|
||||
cim10_suggestion=winner.code,
|
||||
source=winner.source_section,
|
||||
source_page=winner.source_page,
|
||||
source_excerpt=winner.source_excerpt,
|
||||
)
|
||||
|
||||
# Diagnostics associés depuis le texte (regex)
|
||||
|
||||
844
src/medical/dp_scoring.py
Normal file
844
src/medical/dp_scoring.py
Normal file
@@ -0,0 +1,844 @@
|
||||
"""Scoring déterministe du Diagnostic Principal (DP) pour les CRH.
|
||||
|
||||
Collecte les candidats DP depuis les sections CRH parsées, les entités edsnlp
|
||||
et les regex, puis applique un scoring multi-critères pour sélectionner le
|
||||
meilleur candidat ou signaler une ambiguïté (verdict REVIEW).
|
||||
|
||||
Fallback LLM one-shot : si use_llm=True et verdict REVIEW, un appel unique
|
||||
au LLM voit les sections fortes et propose dp_code + evidence en un seul pass.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from ..config import (
|
||||
DossierMedical,
|
||||
DPCandidate,
|
||||
DPSelection,
|
||||
DP_REVIEW_THRESHOLD,
|
||||
DP_SCORING_WEIGHTS,
|
||||
DP_Z_CODE_WHITELIST,
|
||||
)
|
||||
from .cim10_dict import normalize_code, normalize_text, validate_code as cim10_validate
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Patterns de négation et conditionnel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_NEGATION_PATTERNS = re.compile(
|
||||
r"(?:pas\s+de|absence\s+d[e']|non\s+retenu|exclu[es]?|"
|
||||
r"[ée]limin[ée]|n[ée]gatif|aucun[e]?\s|sans\s)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
_CONDITIONAL_PATTERNS = re.compile(
|
||||
r"(?:suspect[ée]?|probable|hypothèse|hypothese|\?\s*$|"
|
||||
r"[àa]\s+confirmer|[ée]ventuel(?:le)?|possiblement|"
|
||||
r"ne\s+peut\s+(?:pas\s+)?[êe]tre\s+exclu)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Regex pour extraire des codes CIM-10 explicites dans du texte
|
||||
# Exige le format avec point (X##.#) pour éviter les faux positifs 3-char :
|
||||
# P02 (diététique), N34 (mutation N34S), T36 (T36.7°C = température)
|
||||
# Les codes 3-char sans point sont trop ambigus en texte libre.
|
||||
# CIM10_MAP gère les correspondances terme→code pour les diagnostics courants.
|
||||
_CIM10_CODE_RE = re.compile(r"\b([A-Z]\d{2}\.\d{1,2})(?![A-Za-z°])")
|
||||
|
||||
# Codes de comorbidité banals : pénalisés en DP (toutes sections)
|
||||
# Presque toujours DAS, même s'ils apparaissent en conclusion/motif
|
||||
_COMORBIDITY_PREFIXES = ("I10", "E66.", "E78.", "E11.", "D64.9")
|
||||
|
||||
# Patterns de preuve explicite de PEC principale (exception comorbidité)
|
||||
# Ex: "hospitalisé pour HTA maligne", "prise en charge de l'obésité morbide"
|
||||
_PEC_PROOF_RE = re.compile(
|
||||
r"(?:hospitalis[ée]e?\s+pour"
|
||||
r"|prise\s+en\s+charge"
|
||||
r"|admission\s+pour"
|
||||
r"|adress[ée]e?\s+pour)",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# Sections à fort signal DP
|
||||
# NB : dans ce corpus CRH, "diag_sortie"/"diag_principal" n'existent quasiment
|
||||
# jamais. "conclusion" et "synthese" SONT les sections diagnostiques de fait.
|
||||
_STRONG_SECTIONS = frozenset({
|
||||
"motif", "motif_hospitalisation",
|
||||
"diag_sortie", "diagnostics_retenus", "diag_principal",
|
||||
"conclusion", "synthese",
|
||||
})
|
||||
|
||||
# Mapping de normalisation : noms libres renvoyés par le LLM → clés de section
|
||||
_SECTION_NORMALIZE_MAP = {
|
||||
# conclusion
|
||||
"conclusion": "conclusion",
|
||||
"conclusions": "conclusion",
|
||||
"au total": "conclusion",
|
||||
# synthese
|
||||
"synthèse": "synthese",
|
||||
"synthese": "synthese",
|
||||
"synthèse du séjour": "synthese",
|
||||
"synthese du sejour": "synthese",
|
||||
"synthèse du dossier": "synthese",
|
||||
"synthese du dossier": "synthese",
|
||||
"synthèse clinique": "synthese",
|
||||
"synthese clinique": "synthese",
|
||||
"en résumé": "synthese",
|
||||
"en resume": "synthese",
|
||||
"en synthèse": "synthese",
|
||||
"en synthese": "synthese",
|
||||
"résumé": "synthese",
|
||||
"resume": "synthese",
|
||||
# motif_hospitalisation
|
||||
"motif": "motif_hospitalisation",
|
||||
"motif d'hospitalisation": "motif_hospitalisation",
|
||||
"motif d'admission": "motif_hospitalisation",
|
||||
"motif de consultation": "motif_hospitalisation",
|
||||
"motif_hospitalisation": "motif_hospitalisation",
|
||||
"motif hospitalisation": "motif_hospitalisation",
|
||||
"admission": "motif_hospitalisation",
|
||||
"motif d'entrée": "motif_hospitalisation",
|
||||
"motif d'entree": "motif_hospitalisation",
|
||||
# diag_sortie
|
||||
"diagnostic de sortie": "diag_sortie",
|
||||
"diagnostics de sortie": "diag_sortie",
|
||||
"diag_sortie": "diag_sortie",
|
||||
"diag sortie": "diag_sortie",
|
||||
# diagnostics_retenus
|
||||
"diagnostic retenu": "diagnostics_retenus",
|
||||
"diagnostics retenus": "diagnostics_retenus",
|
||||
"diagnostic retenu à la sortie": "diagnostics_retenus",
|
||||
"diagnostics retenus à la sortie": "diagnostics_retenus",
|
||||
"diagnostics_retenus": "diagnostics_retenus",
|
||||
# diag_principal
|
||||
"diagnostic principal": "diag_principal",
|
||||
"diag_principal": "diag_principal",
|
||||
"diag principal": "diag_principal",
|
||||
"problème principal": "diag_principal",
|
||||
"probleme principal": "diag_principal",
|
||||
# histoire_maladie
|
||||
"histoire de la maladie": "histoire_maladie",
|
||||
"histoire_maladie": "histoire_maladie",
|
||||
"histoire maladie": "histoire_maladie",
|
||||
"hdm": "histoire_maladie",
|
||||
# evolution
|
||||
"evolution dans le service": "evolution",
|
||||
"évolution dans le service": "evolution",
|
||||
"evolution": "evolution",
|
||||
"évolution": "evolution",
|
||||
# examen
|
||||
"examen clinique": "examen_clinique",
|
||||
"examen_clinique": "examen_clinique",
|
||||
# actes
|
||||
"indication opératoire": "indication_operatoire",
|
||||
"indication operatoire": "indication_operatoire",
|
||||
"prise en charge chirurgicale": "indication_operatoire",
|
||||
"actes réalisés": "actes",
|
||||
"actes realises": "actes",
|
||||
"actes": "actes",
|
||||
# administratif / bruit → "autres"
|
||||
"sections cliniques": "autres",
|
||||
"sections_cliniques": "autres",
|
||||
"sections fortes du dossier": "autres",
|
||||
"secrétariat": "autres",
|
||||
"secretariat": "autres",
|
||||
"médecine interne": "autres",
|
||||
"medecine interne": "autres",
|
||||
"médecin": "autres",
|
||||
"medecin": "autres",
|
||||
"courrier": "autres",
|
||||
"courrier de sortie": "autres",
|
||||
"compte rendu": "autres",
|
||||
"compte-rendu": "autres",
|
||||
"dossier médical": "autres",
|
||||
"dossier medical": "autres",
|
||||
"observations": "autres",
|
||||
}
|
||||
|
||||
# Fallback par mots-clés quand la correspondance exacte échoue.
|
||||
# Paires (mot-clé(s), section_normalisée) testées dans l'ordre — premier match gagne.
|
||||
_SECTION_KEYWORD_FALLBACKS: list[tuple[tuple[str, ...], str]] = [
|
||||
# diagnostic + sortie/retenu → diag_sortie / diagnostics_retenus
|
||||
(("diagnostic", "sortie"), "diag_sortie"),
|
||||
(("diagnostic", "retenu"), "diagnostics_retenus"),
|
||||
# conclusion / synthese
|
||||
(("conclusion",), "conclusion"),
|
||||
(("synthese",), "synthese"),
|
||||
(("synthèse",), "synthese"),
|
||||
(("au total",), "synthese"),
|
||||
(("en résumé",), "synthese"),
|
||||
# motif / admission
|
||||
(("motif",), "motif_hospitalisation"),
|
||||
(("admission",), "motif_hospitalisation"),
|
||||
]
|
||||
|
||||
|
||||
def _normalize_evidence_section(raw_section: str) -> str:
|
||||
"""Normalise le nom de section renvoyé par le LLM vers une clé standard.
|
||||
|
||||
1. Nettoyage : lower, strip, retrait crochets/deux-points/guillemets.
|
||||
2. Correspondance exacte dans _SECTION_NORMALIZE_MAP.
|
||||
3. Fallback par mots-clés (_SECTION_KEYWORD_FALLBACKS).
|
||||
"""
|
||||
if not raw_section:
|
||||
return ""
|
||||
# Nettoyage agressif : crochets, guillemets, deux-points, underscores → espaces
|
||||
key = raw_section.lower().strip()
|
||||
key = re.sub(r"[\[\]\"':]+", "", key).strip()
|
||||
|
||||
# 1. Exact match
|
||||
result = _SECTION_NORMALIZE_MAP.get(key)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# 1b. Tenter aussi avec underscores → espaces
|
||||
key_spaces = key.replace("_", " ")
|
||||
result = _SECTION_NORMALIZE_MAP.get(key_spaces)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# 2. Fallback par mots-clés
|
||||
for keywords, section in _SECTION_KEYWORD_FALLBACKS:
|
||||
if all(kw in key for kw in keywords):
|
||||
return section
|
||||
|
||||
return key
|
||||
|
||||
|
||||
def _is_comorbidity_code(code: str) -> bool:
|
||||
"""Vérifie si un code est une comorbidité banale (I10, E66.x, E78.x, E11.x, D64.9)."""
|
||||
return any(code.startswith(prefix) for prefix in _COMORBIDITY_PREFIXES)
|
||||
|
||||
|
||||
def _has_explicit_pec_proof(label: str, full_text: str) -> bool:
|
||||
"""Vérifie si le texte contient une preuve explicite que cette comorbidité
|
||||
est le motif PRINCIPAL de prise en charge.
|
||||
|
||||
Cherche "hospitalisé pour", "prise en charge de", "admission pour", etc.
|
||||
suivis du label de la comorbidité dans une fenêtre de 100 caractères.
|
||||
"""
|
||||
if not full_text or not label:
|
||||
return False
|
||||
text_lower = full_text.lower()
|
||||
label_lower = label.lower()
|
||||
for m in _PEC_PROOF_RE.finditer(text_lower):
|
||||
window = text_lower[m.end():m.end() + 100]
|
||||
if label_lower in window:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Construction de la shortlist
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_dp_shortlist(
|
||||
parsed: dict,
|
||||
text: str,
|
||||
edsnlp_result,
|
||||
dossier: DossierMedical,
|
||||
) -> list[DPCandidate]:
|
||||
"""Collecte les candidats DP depuis les sections CRH, edsnlp et regex.
|
||||
|
||||
Déduplique par code CIM-10 en gardant la section la plus forte.
|
||||
"""
|
||||
from .cim10_extractor import CIM10_MAP, _find_diagnostic_principal
|
||||
from .das_filter import is_valid_diagnostic_text, clean_diagnostic_text
|
||||
|
||||
candidates: list[DPCandidate] = []
|
||||
sections = parsed.get("sections", {})
|
||||
|
||||
# Ordre de priorité des sections (décroissant)
|
||||
section_priority = [
|
||||
"diag_sortie", "diag_principal", "motif_hospitalisation",
|
||||
"conclusion", "synthese",
|
||||
]
|
||||
|
||||
# 1. Sections CRH à fort signal
|
||||
for section_key in section_priority:
|
||||
section_text = sections.get(section_key, "")
|
||||
if not section_text:
|
||||
continue
|
||||
|
||||
section_norm = normalize_text(section_text)
|
||||
|
||||
# 1a. Codes CIM-10 explicites dans le texte de section
|
||||
for m in _CIM10_CODE_RE.finditer(section_text):
|
||||
code = normalize_code(m.group(1))
|
||||
is_valid, label = cim10_validate(code)
|
||||
if is_valid:
|
||||
excerpt = _extract_excerpt(section_text, m.start())
|
||||
candidates.append(DPCandidate(
|
||||
code=code,
|
||||
label=label,
|
||||
source_section=section_key,
|
||||
source_excerpt=excerpt,
|
||||
))
|
||||
|
||||
# 1b. CIM10_MAP uniquement (curé pour les DP courants)
|
||||
# On n'utilise PAS dict_lookup car le dictionnaire complet (10K+ entrées)
|
||||
# produit des faux positifs par substring match sur du texte libre.
|
||||
for terme, code in CIM10_MAP.items():
|
||||
if normalize_text(terme) in section_norm:
|
||||
candidates.append(DPCandidate(
|
||||
code=code,
|
||||
label=terme.capitalize(),
|
||||
source_section=section_key,
|
||||
source_excerpt=section_text[:200].strip(),
|
||||
))
|
||||
break # plus-long-match : CIM10_MAP est ordonné spécifique→générique
|
||||
|
||||
# 2. edsnlp entities
|
||||
if edsnlp_result:
|
||||
for ent in edsnlp_result.cim10_entities:
|
||||
if ent.negation or ent.hypothese:
|
||||
continue
|
||||
texte = clean_diagnostic_text(ent.texte.capitalize())
|
||||
if not is_valid_diagnostic_text(texte):
|
||||
continue
|
||||
candidates.append(DPCandidate(
|
||||
code=ent.code,
|
||||
label=texte,
|
||||
source_section="edsnlp",
|
||||
))
|
||||
|
||||
# 3. Regex fallback (_find_diagnostic_principal sur texte complet)
|
||||
text_lower = text.lower()
|
||||
conclusion = sections.get("conclusion", "")
|
||||
dp_regex = _find_diagnostic_principal(text_lower, conclusion)
|
||||
if dp_regex:
|
||||
candidates.append(DPCandidate(
|
||||
code=dp_regex.cim10_suggestion,
|
||||
label=dp_regex.texte,
|
||||
source_section="regex",
|
||||
source_excerpt=dp_regex.source_excerpt,
|
||||
))
|
||||
|
||||
# 4. Dédup par code CIM-10 : garder la section la plus forte
|
||||
candidates = _dedup_by_code(candidates, section_priority)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def _extract_excerpt(text: str, pos: int, window: int = 100) -> str:
|
||||
"""Extrait ~200 chars autour d'une position dans le texte."""
|
||||
start = max(0, pos - window)
|
||||
end = min(len(text), pos + window)
|
||||
return text[start:end].strip()
|
||||
|
||||
|
||||
def _dedup_by_code(
|
||||
candidates: list[DPCandidate],
|
||||
section_priority: list[str],
|
||||
) -> list[DPCandidate]:
|
||||
"""Déduplique par code CIM-10, garde la section la plus forte."""
|
||||
priority_map = {s: i for i, s in enumerate(section_priority)}
|
||||
# Ajouter edsnlp et regex en bas de priorité
|
||||
priority_map.setdefault("edsnlp", len(section_priority))
|
||||
priority_map.setdefault("regex", len(section_priority) + 1)
|
||||
|
||||
seen: dict[str, DPCandidate] = {}
|
||||
for c in candidates:
|
||||
key = c.code or c.label.lower()
|
||||
if key not in seen:
|
||||
seen[key] = c
|
||||
else:
|
||||
existing = seen[key]
|
||||
existing_prio = priority_map.get(existing.source_section, 99)
|
||||
new_prio = priority_map.get(c.source_section, 99)
|
||||
if new_prio < existing_prio:
|
||||
seen[key] = c
|
||||
|
||||
return list(seen.values())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Scoring des candidats
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def score_candidates(
|
||||
candidates: list[DPCandidate],
|
||||
dossier: DossierMedical,
|
||||
full_text: str = "",
|
||||
) -> list[DPCandidate]:
|
||||
"""Applique le scoring déterministe sur chaque candidat.
|
||||
|
||||
Args:
|
||||
full_text: Texte complet du document pour la détection négation/conditionnel.
|
||||
"""
|
||||
for c in candidates:
|
||||
details: dict[str, int] = {}
|
||||
|
||||
# 1. Bonus section
|
||||
section_key = f"section_{c.source_section}"
|
||||
section_bonus = DP_SCORING_WEIGHTS.get(section_key, 0)
|
||||
if section_bonus:
|
||||
details["section"] = section_bonus
|
||||
|
||||
# 2. Bonus preuve (excerpt + page)
|
||||
if c.source_excerpt:
|
||||
proof = DP_SCORING_WEIGHTS.get("proof_excerpt", 0)
|
||||
if proof:
|
||||
details["proof_excerpt"] = proof
|
||||
|
||||
# 3. Pénalité négation (fenêtre étroite AVANT le label)
|
||||
if full_text and c.label:
|
||||
prefix = _get_prefix_window(full_text, c.label, chars_before=60)
|
||||
if prefix and _NEGATION_PATTERNS.search(prefix):
|
||||
c.is_negated = True
|
||||
penalty = DP_SCORING_WEIGHTS.get("negation", 0)
|
||||
if penalty:
|
||||
details["negation"] = penalty
|
||||
|
||||
# 4. Pénalité conditionnel (fenêtre étroite AVANT + APRÈS le label)
|
||||
if full_text and c.label:
|
||||
window = _get_context_window(full_text, c.label, radius=80)
|
||||
if window and _CONDITIONAL_PATTERNS.search(window):
|
||||
c.is_conditional = True
|
||||
penalty = DP_SCORING_WEIGHTS.get("conditional", 0)
|
||||
if penalty:
|
||||
details["conditional"] = penalty
|
||||
|
||||
# 5. Pénalité Z-code en DP
|
||||
if c.code and c.code.startswith("Z"):
|
||||
if not _is_z_code_whitelisted(c.code):
|
||||
penalty = DP_SCORING_WEIGHTS.get("z_code_dp", 0)
|
||||
if penalty:
|
||||
details["z_code_dp"] = penalty
|
||||
|
||||
# 6. Pénalité R-code (symptôme) en DP
|
||||
if c.code and c.code.startswith("R"):
|
||||
penalty = DP_SCORING_WEIGHTS.get("r_code_dp", 0)
|
||||
if penalty:
|
||||
details["r_code_dp"] = penalty
|
||||
|
||||
# 7. Pénalité comorbidité banale (toutes sections)
|
||||
if c.code and _is_comorbidity_code(c.code):
|
||||
penalty = DP_SCORING_WEIGHTS.get("comorbidity_weak", 0)
|
||||
if penalty:
|
||||
details["comorbidity_weak"] = penalty
|
||||
# Exception : preuve explicite de PEC principale → compense
|
||||
if full_text and _has_explicit_pec_proof(c.label, full_text):
|
||||
details["comorbidity_pec_proof"] = abs(penalty) if penalty else 3
|
||||
|
||||
c.score_details = details
|
||||
c.score = sum(details.values())
|
||||
|
||||
# Trier par score décroissant
|
||||
candidates.sort(key=lambda c: -c.score)
|
||||
return candidates
|
||||
|
||||
|
||||
def _get_prefix_window(text: str, label: str, chars_before: int = 60) -> str:
|
||||
"""Retourne les N caractères AVANT la première occurrence du label.
|
||||
|
||||
Sert à détecter les négations qui précèdent directement le diagnostic
|
||||
("pas de pancréatite" vs "pancréatite ... pas de complication").
|
||||
"""
|
||||
text_lower = text.lower()
|
||||
label_lower = label.lower()
|
||||
pos = text_lower.find(label_lower)
|
||||
if pos < 0:
|
||||
text_norm = normalize_text(text)
|
||||
label_norm = normalize_text(label)
|
||||
pos = text_norm.find(label_norm)
|
||||
if pos < 0:
|
||||
return ""
|
||||
start = max(0, pos - chars_before)
|
||||
return text_norm[start:pos]
|
||||
start = max(0, pos - chars_before)
|
||||
return text_lower[start:pos]
|
||||
|
||||
|
||||
def _get_context_window(text: str, label: str, radius: int = 200) -> str:
|
||||
"""Retourne une fenêtre de texte autour de la première occurrence du label."""
|
||||
text_lower = text.lower()
|
||||
label_lower = label.lower()
|
||||
pos = text_lower.find(label_lower)
|
||||
if pos < 0:
|
||||
# Essayer avec le texte normalisé
|
||||
text_norm = normalize_text(text)
|
||||
label_norm = normalize_text(label)
|
||||
pos = text_norm.find(label_norm)
|
||||
if pos < 0:
|
||||
return ""
|
||||
start = max(0, pos - radius)
|
||||
end = min(len(text_norm), pos + len(label_norm) + radius)
|
||||
return text_norm[start:end]
|
||||
start = max(0, pos - radius)
|
||||
end = min(len(text), pos + len(label) + radius)
|
||||
return text[start:end].lower()
|
||||
|
||||
|
||||
def _is_z_code_whitelisted(code: str) -> bool:
|
||||
"""Vérifie si un Z-code est dans la whitelist (match préfixe)."""
|
||||
for prefix in DP_Z_CODE_WHITELIST:
|
||||
if code.startswith(prefix):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Sélection du DP
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def select_dp(
|
||||
candidates: list[DPCandidate],
|
||||
dossier: DossierMedical,
|
||||
use_llm: bool = False,
|
||||
) -> DPSelection:
|
||||
"""Sélectionne le DP parmi les candidats scorés.
|
||||
|
||||
Retourne verdict="confirmed" si le delta est suffisant,
|
||||
"review" si ambiguïté.
|
||||
"""
|
||||
if not candidates:
|
||||
return DPSelection(verdict="review", winner_reason="aucun candidat DP trouvé")
|
||||
|
||||
# Anti-comorbidité universelle : comorbidité banale en DP → REVIEW
|
||||
# sauf preuve explicite de PEC principale (hospitalisé pour, prise en charge de)
|
||||
top = candidates[0]
|
||||
if top.code and _is_comorbidity_code(top.code):
|
||||
has_pec = "comorbidity_pec_proof" in top.score_details
|
||||
if not has_pec:
|
||||
logger.info(
|
||||
"Comorbidité-banale DP : %s (%s, section=%s) → REVIEW + fallback LLM",
|
||||
top.code, top.label, top.source_section,
|
||||
)
|
||||
return DPSelection(
|
||||
verdict="review",
|
||||
candidates=candidates[:3],
|
||||
winner_reason=f"comorbidité banale {top.code} sans preuve PEC ({top.source_section})",
|
||||
)
|
||||
|
||||
if len(candidates) == 1:
|
||||
return DPSelection(
|
||||
verdict="confirmed",
|
||||
candidates=candidates,
|
||||
winner_reason="candidat unique",
|
||||
)
|
||||
|
||||
top1 = candidates[0]
|
||||
top2 = candidates[1]
|
||||
delta = top1.score - top2.score
|
||||
|
||||
if delta >= DP_REVIEW_THRESHOLD:
|
||||
return DPSelection(
|
||||
verdict="confirmed",
|
||||
candidates=candidates,
|
||||
winner_reason=f"score {top1.score} vs {top2.score} (delta {delta})",
|
||||
)
|
||||
|
||||
# Delta trop faible — tenter tiebreaker LLM si autorisé
|
||||
if use_llm and top1.score == top2.score:
|
||||
tiebreak = _llm_tiebreak(top1, top2, dossier)
|
||||
if tiebreak and tiebreak.get("winner") in ("A", "B"):
|
||||
if tiebreak["winner"] == "B":
|
||||
# Swap pour que le gagnant soit en premier
|
||||
candidates[0], candidates[1] = candidates[1], candidates[0]
|
||||
return DPSelection(
|
||||
verdict="confirmed",
|
||||
candidates=candidates,
|
||||
winner_reason=f"LLM tiebreak: {tiebreak.get('reason', '')}",
|
||||
llm_tiebreak=tiebreak,
|
||||
)
|
||||
|
||||
return DPSelection(
|
||||
verdict="review",
|
||||
candidates=candidates[:3],
|
||||
winner_reason=f"delta insuffisant: {top1.score} vs {top2.score} (delta {delta} < seuil {DP_REVIEW_THRESHOLD})",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Tiebreaker LLM (optionnel)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _llm_tiebreak(
|
||||
candidate_a: DPCandidate,
|
||||
candidate_b: DPCandidate,
|
||||
dossier: DossierMedical,
|
||||
) -> dict | None:
|
||||
"""Appelle le LLM pour départager deux candidats DP à scores identiques."""
|
||||
try:
|
||||
from .ollama_client import call_ollama
|
||||
from ..prompts import DP_TIEBREAK
|
||||
except ImportError:
|
||||
logger.warning("Module ollama_client non disponible pour le tiebreaker DP")
|
||||
return None
|
||||
|
||||
motif = ""
|
||||
if dossier.sejour and dossier.sejour.mode_entree:
|
||||
motif = dossier.sejour.mode_entree
|
||||
|
||||
def _format_candidate(c: DPCandidate) -> str:
|
||||
parts = [c.label]
|
||||
if c.code:
|
||||
parts.append(f"({c.code})")
|
||||
parts.append(f"[section: {c.source_section}, score: {c.score}]")
|
||||
if c.source_excerpt:
|
||||
parts.append(f'extrait: "{c.source_excerpt[:150]}"')
|
||||
return " — ".join(parts)
|
||||
|
||||
candidat_a_str = _format_candidate(candidate_a)
|
||||
candidat_b_str = _format_candidate(candidate_b)
|
||||
|
||||
sections_fortes = "Non disponible"
|
||||
|
||||
prompt = DP_TIEBREAK.format(
|
||||
motif=motif or "Non renseigné",
|
||||
candidat_a=candidat_a_str,
|
||||
candidat_b=candidat_b_str,
|
||||
sections_fortes=sections_fortes,
|
||||
)
|
||||
|
||||
try:
|
||||
result = call_ollama(prompt, temperature=0.0, max_tokens=500, role="coding")
|
||||
except Exception:
|
||||
logger.warning("Erreur LLM tiebreaker DP", exc_info=True)
|
||||
return None
|
||||
|
||||
if not result or not isinstance(result, dict):
|
||||
return None
|
||||
|
||||
winner = result.get("winner")
|
||||
if winner not in ("A", "B"):
|
||||
return None
|
||||
|
||||
return {"winner": winner, "reason": result.get("reason", "")}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. LLM Fallback one-shot — proposition DP quand le scoring déterministe échoue
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _build_strong_sections_text(parsed: dict) -> str:
|
||||
"""Construit le texte des sections fortes pour le prompt LLM one-shot.
|
||||
|
||||
Sections fortes : motif, diag_sortie, diag_principal, diagnostics_retenus,
|
||||
conclusion, synthese. PAS histoire_maladie ni examen_clinique (= bruit).
|
||||
"""
|
||||
sections = parsed.get("sections", {})
|
||||
_STRONG_ORDER = [
|
||||
("motif_hospitalisation", 500),
|
||||
("diag_sortie", 600), ("diagnostics_retenus", 600),
|
||||
("diag_principal", 600),
|
||||
("conclusion", 600), ("synthese", 600),
|
||||
]
|
||||
parts = []
|
||||
for key, max_len in _STRONG_ORDER:
|
||||
val = sections.get(key, "")
|
||||
if val:
|
||||
parts.append(f"[{key}] {val[:max_len]}")
|
||||
return "\n".join(parts) or "Aucune section forte"
|
||||
|
||||
|
||||
def _build_motif(parsed: dict, dossier: DossierMedical) -> str:
|
||||
"""Extrait le motif d'hospitalisation pour le prompt LLM."""
|
||||
motif = ""
|
||||
if dossier.sejour and dossier.sejour.mode_entree:
|
||||
motif = dossier.sejour.mode_entree
|
||||
if not motif:
|
||||
motif = parsed.get("sections", {}).get("motif_hospitalisation", "")[:300] or "Non renseigné"
|
||||
return motif
|
||||
|
||||
|
||||
def _build_actes(dossier: DossierMedical) -> str:
|
||||
"""Construit la liste des actes pour le prompt LLM."""
|
||||
parts = []
|
||||
for a in dossier.actes_ccam[:5]:
|
||||
label = a.texte
|
||||
if a.code_ccam_suggestion:
|
||||
label += f" ({a.code_ccam_suggestion})"
|
||||
parts.append(label)
|
||||
return ", ".join(parts) or "Non renseignés"
|
||||
|
||||
|
||||
def _validate_and_normalize_code(dp_code: str, pool_codes: set[str] | None = None) -> tuple[str, str | None, bool]:
|
||||
"""Valide et normalise un code CIM-10. Retourne (code, original_si_normalisé, is_valid)."""
|
||||
dp_code = normalize_code(dp_code)
|
||||
dp_code_original = None
|
||||
|
||||
# Si pool fourni, vérifier appartenance
|
||||
if pool_codes is not None and dp_code in pool_codes:
|
||||
return dp_code, None, True
|
||||
|
||||
parent3 = dp_code[:3]
|
||||
parent9 = f"{parent3}.9"
|
||||
|
||||
# Tenter match pool par parent
|
||||
if pool_codes is not None:
|
||||
if parent3 in pool_codes:
|
||||
return parent3, dp_code, True
|
||||
if parent9 in pool_codes:
|
||||
return parent9, dp_code, True
|
||||
|
||||
# Validation CIM-10 directe
|
||||
is_valid, _ = cim10_validate(dp_code)
|
||||
if is_valid:
|
||||
return dp_code, None, True
|
||||
|
||||
# Tenter parent
|
||||
is_valid_p, _ = cim10_validate(parent3)
|
||||
if is_valid_p:
|
||||
return parent3, dp_code, True
|
||||
|
||||
is_valid_9, _ = cim10_validate(parent9)
|
||||
if is_valid_9:
|
||||
return parent9, dp_code, True
|
||||
|
||||
return dp_code, None, False
|
||||
|
||||
|
||||
def _apply_guardrails(
|
||||
dp_code: str,
|
||||
candidate: DPCandidate,
|
||||
evidence_section: str,
|
||||
evidence_excerpt: str,
|
||||
confidence: str,
|
||||
) -> DPSelection:
|
||||
"""Applique les garde-fous déterministes sur un candidat LLM.
|
||||
|
||||
Retourne DPSelection avec verdict confirmed ou review.
|
||||
"""
|
||||
is_strong_section = evidence_section in _STRONG_SECTIONS
|
||||
has_evidence = bool(evidence_excerpt and evidence_excerpt.strip())
|
||||
is_high_conf = confidence == "high"
|
||||
|
||||
# Score synthétique
|
||||
confidence_scores = {"high": 3, "medium": 2, "low": 1}
|
||||
candidate.score = confidence_scores.get(confidence, 1)
|
||||
candidate.score_details = {"llm_confidence": candidate.score}
|
||||
|
||||
# GF-1 : evidence_excerpt vide → REVIEW
|
||||
if not has_evidence:
|
||||
logger.info("LLM fallback DP : pas d'extrait preuve pour %s, REVIEW", dp_code)
|
||||
return DPSelection(
|
||||
verdict="review", candidates=[candidate],
|
||||
winner_reason="LLM fallback: evidence_excerpt vide",
|
||||
)
|
||||
|
||||
# GF-2 : comorbidité banale ET section non-forte → REVIEW
|
||||
if _is_comorbidity_code(dp_code) and not is_strong_section:
|
||||
logger.info("LLM fallback DP : comorbidité %s hors section forte (%s), REVIEW", dp_code, evidence_section)
|
||||
return DPSelection(
|
||||
verdict="review", candidates=[candidate],
|
||||
winner_reason=f"LLM fallback: comorbidité {dp_code} hors section forte",
|
||||
)
|
||||
|
||||
# GF-3 : CONFIRMED uniquement si section forte + confidence high
|
||||
if not is_strong_section or not is_high_conf:
|
||||
reasons = []
|
||||
if not is_strong_section:
|
||||
reasons.append(f"section faible ({evidence_section})")
|
||||
if not is_high_conf:
|
||||
reasons.append(f"confidence {confidence}")
|
||||
reason_str = " + ".join(reasons)
|
||||
logger.info("LLM fallback DP : %s pour %s, REVIEW", reason_str, dp_code)
|
||||
return DPSelection(
|
||||
verdict="review", candidates=[candidate],
|
||||
winner_reason=f"LLM fallback: {dp_code} — {reason_str}",
|
||||
)
|
||||
|
||||
# Toutes les conditions réunies → CONFIRMED
|
||||
return DPSelection(
|
||||
verdict="confirmed", candidates=[candidate],
|
||||
winner_reason=f"LLM fallback: {dp_code} ({confidence}, {evidence_section})",
|
||||
)
|
||||
|
||||
|
||||
|
||||
def llm_dp_fallback(
|
||||
parsed: dict,
|
||||
text: str,
|
||||
dossier: DossierMedical,
|
||||
dp_candidates: list[DPCandidate] | None = None,
|
||||
edsnlp_result=None,
|
||||
) -> DPSelection:
|
||||
"""Appelle le LLM en one-shot pour identifier et coder le DP.
|
||||
|
||||
Le LLM voit directement les sections fortes du CRH et doit fournir
|
||||
en un seul appel : dp_code, dp_label, evidence_section, evidence_excerpt, confidence.
|
||||
|
||||
Ne doit être appelé que si use_llm=True ET verdict="review".
|
||||
"""
|
||||
try:
|
||||
from .ollama_client import call_ollama
|
||||
from ..prompts import DP_LLM_ONESHOT
|
||||
except ImportError:
|
||||
logger.warning("Module ollama_client non disponible pour le fallback DP LLM")
|
||||
return DPSelection(verdict="review", winner_reason="LLM non disponible")
|
||||
|
||||
# Contexte
|
||||
motif = _build_motif(parsed, dossier)
|
||||
sections_fortes = _build_strong_sections_text(parsed)
|
||||
actes = _build_actes(dossier)
|
||||
|
||||
prompt = DP_LLM_ONESHOT.format(
|
||||
motif=motif, sections_fortes=sections_fortes, actes=actes,
|
||||
)
|
||||
|
||||
try:
|
||||
result = call_ollama(prompt, temperature=0.0, max_tokens=800, role="coding")
|
||||
except Exception:
|
||||
logger.warning("Erreur LLM fallback DP", exc_info=True)
|
||||
return DPSelection(verdict="review", winner_reason="erreur LLM fallback DP")
|
||||
|
||||
if not result or not isinstance(result, dict):
|
||||
return DPSelection(verdict="review", winner_reason="réponse LLM invalide")
|
||||
|
||||
dp_code = result.get("dp_code", "")
|
||||
dp_label = result.get("dp_label", "")
|
||||
confidence = result.get("confidence", "low")
|
||||
evidence_section_raw = result.get("evidence_section", "")
|
||||
evidence_excerpt = result.get("evidence_excerpt", "")
|
||||
|
||||
# Normaliser la section
|
||||
evidence_section = _normalize_evidence_section(evidence_section_raw)
|
||||
|
||||
logger.info(
|
||||
"LLM oneshot: code=%s label='%s' section=%s confidence=%s",
|
||||
dp_code, dp_label[:60], evidence_section, confidence,
|
||||
)
|
||||
|
||||
if not dp_code:
|
||||
return DPSelection(
|
||||
verdict="review",
|
||||
winner_reason="LLM: aucun code DP proposé",
|
||||
)
|
||||
|
||||
# Validation et normalisation du code CIM-10
|
||||
dp_code, dp_code_original, is_valid = _validate_and_normalize_code(dp_code)
|
||||
if not is_valid:
|
||||
return DPSelection(
|
||||
verdict="review",
|
||||
winner_reason=f"code invalide {dp_code}",
|
||||
)
|
||||
if dp_code_original:
|
||||
logger.info("LLM oneshot: normalisation %s → %s", dp_code_original, dp_code)
|
||||
|
||||
# Résoudre le label final
|
||||
_, dict_label = cim10_validate(dp_code)
|
||||
|
||||
# Construire le candidat
|
||||
source_tag = f"llm_oneshot ({evidence_section})" if evidence_section else "llm_oneshot"
|
||||
|
||||
candidate = DPCandidate(
|
||||
code=dp_code,
|
||||
label=dp_label or dict_label or "",
|
||||
source_section=source_tag,
|
||||
source_excerpt=evidence_excerpt,
|
||||
confidence_raw=confidence,
|
||||
dp_code_original_llm=dp_code_original,
|
||||
dp_code_normalized=dp_code_original is not None,
|
||||
)
|
||||
|
||||
# Appliquer les garde-fous déterministes
|
||||
return _apply_guardrails(dp_code, candidate, evidence_section, evidence_excerpt, confidence)
|
||||
@@ -188,6 +188,17 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
|
||||
# Diagnostic principal : le plus spécifique
|
||||
merged.diagnostic_principal = _prefer_most_specific_dp(dossiers)
|
||||
|
||||
# Propager dp_selection depuis le dossier source du DP retenu
|
||||
if merged.diagnostic_principal:
|
||||
for d in dossiers:
|
||||
if (
|
||||
d.diagnostic_principal
|
||||
and d.diagnostic_principal.cim10_suggestion == merged.diagnostic_principal.cim10_suggestion
|
||||
and d.dp_selection is not None
|
||||
):
|
||||
merged.dp_selection = d.dp_selection
|
||||
break
|
||||
|
||||
# Collecter tous les DAS + DP non retenus comme DAS
|
||||
all_das: list[Diagnostic] = []
|
||||
for d in dossiers:
|
||||
|
||||
@@ -7,6 +7,8 @@ from .templates import (
|
||||
QC_VALIDATION,
|
||||
CPAM_EXTRACTION,
|
||||
CPAM_ARGUMENTATION,
|
||||
DP_TIEBREAK,
|
||||
DP_LLM_ONESHOT,
|
||||
CPAM_ADVERSARIAL,
|
||||
)
|
||||
|
||||
@@ -17,5 +19,7 @@ __all__ = [
|
||||
"QC_VALIDATION",
|
||||
"CPAM_EXTRACTION",
|
||||
"CPAM_ARGUMENTATION",
|
||||
"DP_TIEBREAK",
|
||||
"DP_LLM_ONESHOT",
|
||||
"CPAM_ADVERSARIAL",
|
||||
]
|
||||
|
||||
@@ -300,7 +300,79 @@ Réponds UNIQUEMENT avec un objet JSON au format suivant :
|
||||
}}"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 7. CPAM passe 3 — validation adversariale (relecture critique)
|
||||
# 7. DP Tiebreaker — départage entre deux candidats DP à scores proches
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rôle : coding | Temperature : 0.0 | Max tokens : 500
|
||||
# Fichier d'origine : src/medical/dp_scoring.py → _llm_tiebreak()
|
||||
# Variables : motif, candidat_a, candidat_b, sections_fortes
|
||||
|
||||
DP_TIEBREAK = """\
|
||||
Tu es un médecin DIM expert. Deux diagnostics sont candidats au poste de Diagnostic Principal (DP).
|
||||
Le DP doit refléter le motif principal de prise en charge qui a mobilisé le plus de ressources pendant le séjour.
|
||||
|
||||
MOTIF D'HOSPITALISATION : {motif}
|
||||
|
||||
CANDIDAT A : {candidat_a}
|
||||
CANDIDAT B : {candidat_b}
|
||||
|
||||
SECTIONS DU CRH À FORT SIGNAL :
|
||||
{sections_fortes}
|
||||
|
||||
Choisis le candidat le plus approprié comme DP selon les critères ATIH :
|
||||
1. Motif principal de prise en charge du séjour
|
||||
2. Ressources mobilisées (actes, biologie, traitement)
|
||||
3. Spécificité du code CIM-10 (préférer le plus spécifique)
|
||||
|
||||
Réponds UNIQUEMENT en JSON :
|
||||
{{
|
||||
"winner": "A" ou "B",
|
||||
"reason": "explication courte en français"
|
||||
}}"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 7b. DP LLM One-shot — identification + codage CIM-10 du DP en un appel
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rôle : coding | Temperature : 0.0 | Max tokens : 800
|
||||
# Fichier d'origine : src/medical/dp_scoring.py → llm_dp_fallback()
|
||||
# Variables : motif, sections_fortes, actes
|
||||
|
||||
DP_LLM_ONESHOT = """\
|
||||
Tu es un médecin DIM (Département d'Information Médicale) expert en codage PMSI.
|
||||
Identifie le Diagnostic Principal (DP) et code-le en CIM-10 avec le code le plus SPÉCIFIQUE (4e ou 5e caractère).
|
||||
|
||||
DÉFINITION DU DP (Guide méthodologique ATIH) :
|
||||
Le DP est le diagnostic qui a mobilisé l'essentiel des ressources du séjour. C'est la pathologie ACTIVE, TRAITÉE, RETENUE en fin de séjour — pas le symptôme d'entrée si un diagnostic étiologique a été posé.
|
||||
|
||||
CE QUE TU NE CHERCHES PAS :
|
||||
- Les comorbidités chroniques de fond (hypertension, obésité, diabète équilibré, dyslipidémie, anémie chronique) SAUF si elles sont DÉCOMPENSÉES et constituent le motif d'hospitalisation
|
||||
- Les antécédents stables non traités activement pendant ce séjour
|
||||
- Les facteurs de risque (tabac, alcool, sédentarité)
|
||||
|
||||
MÉTHODE :
|
||||
1. Lis le motif d'hospitalisation → pourquoi le patient est arrivé
|
||||
2. Lis la conclusion/synthèse → quel diagnostic a été retenu après le séjour
|
||||
3. Identifie la pathologie ACTIVE traitée, puis code-la en CIM-10
|
||||
4. Préfère le code le plus spécifique (ex: K85.1 > K85.9 > K85)
|
||||
5. Cite la SECTION et l'EXTRAIT exact qui prouvent ton choix
|
||||
|
||||
MOTIF D'HOSPITALISATION : {motif}
|
||||
|
||||
SECTIONS CLINIQUES (fortes uniquement) :
|
||||
{sections_fortes}
|
||||
|
||||
ACTES RÉALISÉS : {actes}
|
||||
|
||||
Réponds UNIQUEMENT en JSON :
|
||||
{{
|
||||
"dp_code": "X00.0",
|
||||
"dp_label": "libellé officiel CIM-10 en français",
|
||||
"evidence_section": "nom exact de la section source",
|
||||
"evidence_excerpt": "extrait EXACT copié du texte (2-3 lignes max)",
|
||||
"confidence": "high ou medium ou low"
|
||||
}}"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 8. CPAM passe 3 — validation adversariale (relecture critique)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rôle : validation | Temperature : 0.0 | Max tokens : 800
|
||||
# Fichier d'origine : src/control/cpam_response.py → _validate_adversarial()
|
||||
|
||||
@@ -9,13 +9,32 @@ import pytest
|
||||
from src.config import ControleCPAM
|
||||
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
|
||||
|
||||
# En-têtes
|
||||
_LEGACY_HEADER = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
|
||||
_NEW_HEADER = (
|
||||
"N° OGC", "Type désaccord", "Codes Établissement", "Libellé Établissement",
|
||||
"Codes Contrôleurs", "Libellé Contrôleurs", "Décision UCR", "Codes retenus",
|
||||
"GHM / GHS", "Texte décision",
|
||||
)
|
||||
|
||||
|
||||
def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
|
||||
"""Crée un fichier xlsx de test avec les lignes données."""
|
||||
"""Crée un fichier xlsx de test au format legacy."""
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "OGC Contrôle T2A"
|
||||
ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR"))
|
||||
ws.append(_LEGACY_HEADER)
|
||||
for row in rows:
|
||||
ws.append(row)
|
||||
wb.save(path)
|
||||
|
||||
|
||||
def _create_new_format_xlsx(rows: list[tuple], path: Path) -> None:
|
||||
"""Crée un fichier xlsx de test au format ucr_extract (nouveau)."""
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "UCR Extract"
|
||||
ws.append(_NEW_HEADER)
|
||||
for row in rows:
|
||||
ws.append(row)
|
||||
wb.save(path)
|
||||
@@ -128,3 +147,292 @@ class TestControleCPAMModel:
|
||||
assert ctrl.numero_ogc == 21
|
||||
assert ctrl.contre_argumentation == "Ma réponse"
|
||||
assert ctrl.sources_reponse == []
|
||||
|
||||
def test_new_fields_defaults(self):
|
||||
"""Les 6 nouveaux champs ucr_extract sont None par défaut."""
|
||||
ctrl = ControleCPAM(numero_ogc=1)
|
||||
assert ctrl.codes_etablissement is None
|
||||
assert ctrl.libelle_etablissement is None
|
||||
assert ctrl.codes_controleurs is None
|
||||
assert ctrl.libelle_controleurs is None
|
||||
assert ctrl.codes_retenus is None
|
||||
assert ctrl.ghm_ghs is None
|
||||
|
||||
def test_new_fields_serialization(self):
|
||||
"""Les champs ucr_extract apparaissent dans model_dump."""
|
||||
ctrl = ControleCPAM(
|
||||
numero_ogc=10,
|
||||
titre="Désaccord sur le DP",
|
||||
codes_etablissement="K85.1",
|
||||
libelle_etablissement="Pancréatite aiguë biliaire",
|
||||
codes_controleurs="K85.9",
|
||||
libelle_controleurs="Pancréatite aiguë, sans précision",
|
||||
codes_retenus="K85.1",
|
||||
ghm_ghs="06M091 / 1854",
|
||||
)
|
||||
data = ctrl.model_dump()
|
||||
assert data["codes_etablissement"] == "K85.1"
|
||||
assert data["libelle_etablissement"] == "Pancréatite aiguë biliaire"
|
||||
assert data["codes_controleurs"] == "K85.9"
|
||||
assert data["libelle_controleurs"] == "Pancréatite aiguë, sans précision"
|
||||
assert data["codes_retenus"] == "K85.1"
|
||||
assert data["ghm_ghs"] == "06M091 / 1854"
|
||||
|
||||
|
||||
class TestParseNewFormat:
|
||||
"""Tests pour le format ucr_extract (nouveau)."""
|
||||
|
||||
def test_parse_basic_dp(self, tmp_path):
|
||||
"""Parsing basique — désaccord DP avec Codes Contrôleurs."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
# N° OGC, Type, Codes Étab, Lib Étab, Codes Ctrl, Lib Ctrl, Décision, Codes ret, GHM, Texte
|
||||
(17, "DP", "K85.1", "Pancréatite aiguë biliaire", "K85.9",
|
||||
"Pancréatite aiguë SAI", "Défavorable", "K85.9", "06M091 / 1854",
|
||||
"Le contrôleur ne retient pas K85.1"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
assert 17 in result
|
||||
ctrl = result[17][0]
|
||||
assert ctrl.numero_ogc == 17
|
||||
assert ctrl.titre == "Désaccord sur le DP"
|
||||
assert ctrl.dp_ucr == "K85.9"
|
||||
assert ctrl.da_ucr is None
|
||||
assert ctrl.arg_ucr == "Le contrôleur ne retient pas K85.1"
|
||||
assert ctrl.decision_ucr == "UCR confirme avis médecins contrôleurs"
|
||||
|
||||
def test_parse_basic_das(self, tmp_path):
|
||||
"""Parsing — désaccord DAS."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(21, "DAS", "E11.40,G63.2", "Diabète+neuropathie", "E11.40",
|
||||
"Diabète type 2", "Favorable", "E11.40,G63.2", None,
|
||||
"L'UCR retient les codes initiaux"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
ctrl = result[21][0]
|
||||
assert ctrl.titre == "Désaccord sur les DAS"
|
||||
assert ctrl.dp_ucr is None
|
||||
assert ctrl.da_ucr == "E11.40"
|
||||
assert ctrl.decision_ucr == "UCR retient"
|
||||
|
||||
def test_parse_dp_plus_das(self, tmp_path):
|
||||
"""DP+DAS : premier code → dp_ucr, reste → da_ucr."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(30, "DP+DAS", "K85.1,E11.40", "...", "K85.9,G63.2,I10",
|
||||
"...", "Défavorable", "K85.9,G63.2,I10", None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
ctrl = result[30][0]
|
||||
assert ctrl.titre == "Désaccord sur le DP et les DAS"
|
||||
assert ctrl.dp_ucr == "K85.9"
|
||||
assert ctrl.da_ucr == "G63.2,I10"
|
||||
|
||||
def test_parse_dp_plus_das_single_code(self, tmp_path):
|
||||
"""DP+DAS avec un seul code → tout en dp_ucr, pas de da_ucr."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(31, "DP+DAS", "K85.1", "...", "K85.9",
|
||||
"...", "Favorable", None, None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
ctrl = result[31][0]
|
||||
assert ctrl.dp_ucr == "K85.9"
|
||||
assert ctrl.da_ucr is None
|
||||
|
||||
def test_new_fields_populated(self, tmp_path):
|
||||
"""Les 6 champs enrichis sont bien remplis depuis les colonnes."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(42, "DP", "E11.40", "Diabète type 2 avec complications",
|
||||
"E11.9", "Diabète type 2 sans complication",
|
||||
"Défavorable", "E11.9", "05M092 / 1780", "Argumentation contrôleur"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
ctrl = result[42][0]
|
||||
assert ctrl.codes_etablissement == "E11.40"
|
||||
assert ctrl.libelle_etablissement == "Diabète type 2 avec complications"
|
||||
assert ctrl.codes_controleurs == "E11.9"
|
||||
assert ctrl.libelle_controleurs == "Diabète type 2 sans complication"
|
||||
assert ctrl.codes_retenus == "E11.9"
|
||||
assert ctrl.ghm_ghs == "05M092 / 1780"
|
||||
|
||||
def test_decision_favorable(self, tmp_path):
|
||||
"""Favorable → 'UCR retient'."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(10, "DP", None, None, None, None, "Favorable", None, None, "OK"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[10][0].decision_ucr == "UCR retient"
|
||||
|
||||
def test_decision_defavorable(self, tmp_path):
|
||||
"""Défavorable → 'UCR confirme avis médecins contrôleurs'."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(11, "DAS", None, None, None, None, "Défavorable", None, None, "KO"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[11][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
|
||||
|
||||
def test_decision_defavorable_no_accent(self, tmp_path):
|
||||
"""Defavorable (sans accent) → même mapping."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(12, "DP", None, None, None, None, "Defavorable", None, None, "KO"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[12][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
|
||||
|
||||
def test_decision_unknown_passthrough(self, tmp_path):
|
||||
"""Décision inconnue → passée telle quelle."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(13, "DP", None, None, None, None, "Partielle", None, None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[13][0].decision_ucr == "Partielle"
|
||||
|
||||
def test_type_desaccord_unknown(self, tmp_path):
|
||||
"""Type désaccord inconnu → titre 'Désaccord : XXX'."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(14, "Actes", None, None, None, None, "Favorable", None, None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[14][0].titre == "Désaccord : Actes"
|
||||
|
||||
def test_type_desaccord_empty(self, tmp_path):
|
||||
"""Type désaccord vide → titre vide."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(15, "", None, None, None, None, "Favorable", None, None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result[15][0].titre == ""
|
||||
|
||||
def test_multiple_ogc_new_format(self, tmp_path):
|
||||
"""Plusieurs OGC dans le nouveau format."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(10, "DP", None, None, "K85.9", None, "Favorable", None, None, "Arg 1"),
|
||||
(20, "DAS", None, None, "E11.40", None, "Défavorable", None, None, "Arg 2"),
|
||||
(10, "DAS", None, None, "G63.2", None, "Favorable", None, None, "Arg 3"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
assert len(result) == 2
|
||||
assert len(result[10]) == 2
|
||||
assert len(result[20]) == 1
|
||||
assert result[10][0].dp_ucr == "K85.9"
|
||||
assert result[10][1].da_ucr == "G63.2"
|
||||
|
||||
def test_empty_new_format(self, tmp_path):
|
||||
"""Fichier nouveau format vide (seulement en-têtes)."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result == {}
|
||||
|
||||
def test_ogc_none_skipped(self, tmp_path):
|
||||
"""Lignes avec N° OGC None sont ignorées."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(None, "DP", None, None, None, None, "Favorable", None, None, "Texte"),
|
||||
(10, "DP", None, None, "K85.1", None, "Favorable", None, None, "OK"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert len(result) == 1
|
||||
assert 10 in result
|
||||
|
||||
def test_ogc_invalid_skipped(self, tmp_path):
|
||||
"""N° OGC non-numérique est ignoré."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
("ABC", "DP", None, None, None, None, "Favorable", None, None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result == {}
|
||||
|
||||
|
||||
class TestAutoDetection:
|
||||
"""Tests pour l'auto-détection du format."""
|
||||
|
||||
def test_detects_legacy(self, tmp_path):
|
||||
"""Format legacy détecté par ses en-têtes."""
|
||||
xlsx = tmp_path / "legacy.xlsx"
|
||||
_create_test_xlsx([
|
||||
(17, "Titre", "Arg", "Décision", None, None, None, None),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert 17 in result
|
||||
assert result[17][0].titre == "Titre"
|
||||
|
||||
def test_detects_new(self, tmp_path):
|
||||
"""Format nouveau détecté par ses en-têtes."""
|
||||
xlsx = tmp_path / "new.xlsx"
|
||||
_create_new_format_xlsx([
|
||||
(17, "DP", "K85.1", "Label", "K85.9", "Label2",
|
||||
"Favorable", "K85.1", None, "Texte"),
|
||||
], xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert 17 in result
|
||||
assert result[17][0].titre == "Désaccord sur le DP"
|
||||
|
||||
def test_unknown_format_returns_empty(self, tmp_path):
|
||||
"""En-têtes non reconnues → dict vide."""
|
||||
xlsx = tmp_path / "unknown.xlsx"
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.append(("Col1", "Col2", "Col3"))
|
||||
ws.append((1, "val", "val"))
|
||||
wb.save(xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
assert result == {}
|
||||
|
||||
def test_new_format_priority_over_legacy(self, tmp_path):
|
||||
"""Si les deux jeux de colonnes sont présents, le nouveau format prime."""
|
||||
xlsx = tmp_path / "both.xlsx"
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
# En-têtes contenant les deux formats
|
||||
ws.append((
|
||||
"N° OGC", "Titre", "Arg_UCR", "Décision_UCR",
|
||||
"Type désaccord", "Décision UCR", "Texte décision",
|
||||
"DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR",
|
||||
))
|
||||
ws.append((17, "Titre", "Arg", "Déc legacy", "DP", "Favorable", "Texte nouveau",
|
||||
"K85.1", None, None, None))
|
||||
wb.save(xlsx)
|
||||
|
||||
result = parse_cpam_excel(xlsx)
|
||||
|
||||
assert 17 in result
|
||||
# Le nouveau format est prioritaire → titre construit depuis Type désaccord
|
||||
assert result[17][0].titre == "Désaccord sur le DP"
|
||||
# arg_ucr vient de Texte décision (nouveau), pas de Arg_UCR (legacy)
|
||||
assert result[17][0].arg_ucr == "Texte nouveau"
|
||||
|
||||
710
tests/test_dp_scoring.py
Normal file
710
tests/test_dp_scoring.py
Normal file
@@ -0,0 +1,710 @@
|
||||
"""Tests pour le module de scoring DP (Diagnostic Principal)."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import (
|
||||
DossierMedical,
|
||||
Diagnostic,
|
||||
DPCandidate,
|
||||
DPSelection,
|
||||
DP_SCORING_WEIGHTS,
|
||||
DP_REVIEW_THRESHOLD,
|
||||
Sejour,
|
||||
)
|
||||
from src.medical.dp_scoring import (
|
||||
build_dp_shortlist,
|
||||
score_candidates,
|
||||
select_dp,
|
||||
_get_context_window,
|
||||
_is_z_code_whitelisted,
|
||||
_is_comorbidity_code,
|
||||
_has_explicit_pec_proof,
|
||||
_dedup_by_code,
|
||||
_normalize_evidence_section,
|
||||
)
|
||||
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
def _make_parsed(sections: dict | None = None, diagnostics: list | None = None) -> dict:
|
||||
return {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": diagnostics or [],
|
||||
"sections": sections or {},
|
||||
}
|
||||
|
||||
|
||||
def _make_candidate(
|
||||
code: str = "K85.1",
|
||||
label: str = "Pancréatite aiguë biliaire",
|
||||
source_section: str = "diag_sortie",
|
||||
**kwargs,
|
||||
) -> DPCandidate:
|
||||
return DPCandidate(code=code, label=label, source_section=source_section, **kwargs)
|
||||
|
||||
|
||||
# === Tests build_dp_shortlist ===
|
||||
|
||||
class TestBuildDPShortlist:
|
||||
def test_from_diag_sortie_with_cim10_code(self):
|
||||
parsed = _make_parsed(sections={
|
||||
"diag_sortie": "Pancréatite aiguë biliaire K85.1",
|
||||
})
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", None, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "K85.1" in codes
|
||||
|
||||
def test_from_diag_principal_section(self):
|
||||
parsed = _make_parsed(sections={
|
||||
"diag_principal": "Embolie pulmonaire I26.9",
|
||||
})
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", None, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "I26.9" in codes
|
||||
|
||||
def test_from_conclusion_via_cim10_map(self):
|
||||
parsed = _make_parsed(sections={
|
||||
"conclusion": "pancréatite aiguë biliaire, bonne évolution",
|
||||
})
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", None, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "K85.1" in codes
|
||||
|
||||
def test_from_regex_fallback(self):
|
||||
parsed = _make_parsed(sections={})
|
||||
text = "Au total : pancréatite aiguë biliaire.\nDevenir : retour."
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, text, None, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "K85.1" in codes
|
||||
|
||||
def test_from_edsnlp(self):
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = _make_parsed(sections={})
|
||||
edsnlp = EdsnlpResult(cim10_entities=[
|
||||
CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False),
|
||||
])
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", edsnlp, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "R10.4" in codes
|
||||
|
||||
def test_edsnlp_negated_excluded(self):
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = _make_parsed(sections={})
|
||||
edsnlp = EdsnlpResult(cim10_entities=[
|
||||
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
||||
])
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", edsnlp, dossier)
|
||||
codes = [c.code for c in candidates]
|
||||
assert "R50.9" not in codes
|
||||
|
||||
def test_dedup_keeps_strongest_section(self):
|
||||
"""Si le même code vient de diag_sortie et conclusion, garder diag_sortie."""
|
||||
parsed = _make_parsed(sections={
|
||||
"diag_sortie": "Pancréatite K85.1",
|
||||
"conclusion": "pancréatite K85.1 bonne évolution",
|
||||
})
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "", None, dossier)
|
||||
k85_candidates = [c for c in candidates if c.code == "K85.1"]
|
||||
assert len(k85_candidates) == 1
|
||||
assert k85_candidates[0].source_section == "diag_sortie"
|
||||
|
||||
def test_empty_sections_returns_empty(self):
|
||||
parsed = _make_parsed(sections={})
|
||||
dossier = DossierMedical()
|
||||
candidates = build_dp_shortlist(parsed, "Patient en bon état.", None, dossier)
|
||||
assert candidates == []
|
||||
|
||||
|
||||
# === Tests score_candidates ===
|
||||
|
||||
class TestScoreCandidates:
|
||||
def test_section_bonus_diag_sortie(self):
|
||||
c = _make_candidate(source_section="diag_sortie")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_diag_sortie"]
|
||||
|
||||
def test_section_bonus_conclusion(self):
|
||||
c = _make_candidate(source_section="conclusion")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_conclusion"]
|
||||
|
||||
def test_section_bonus_edsnlp(self):
|
||||
c = _make_candidate(source_section="edsnlp")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("section") == DP_SCORING_WEIGHTS["section_edsnlp"]
|
||||
|
||||
def test_proof_excerpt_bonus(self):
|
||||
c = _make_candidate(source_excerpt="Pancréatite aiguë biliaire confirmée au scanner")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("proof_excerpt") == DP_SCORING_WEIGHTS["proof_excerpt"]
|
||||
|
||||
def test_no_proof_bonus_without_excerpt(self):
|
||||
c = _make_candidate(source_excerpt=None)
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert "proof_excerpt" not in scored[0].score_details
|
||||
|
||||
def test_negation_penalty(self):
|
||||
c = _make_candidate(label="Fièvre")
|
||||
text = "Pas de fièvre constatée."
|
||||
scored = score_candidates([c], DossierMedical(), full_text=text)
|
||||
assert scored[0].is_negated is True
|
||||
assert scored[0].score_details.get("negation") == DP_SCORING_WEIGHTS["negation"]
|
||||
|
||||
def test_conditional_penalty(self):
|
||||
c = _make_candidate(label="Embolie pulmonaire", code="I26.9")
|
||||
text = "Embolie pulmonaire suspectée, à confirmer par angioscanner."
|
||||
scored = score_candidates([c], DossierMedical(), full_text=text)
|
||||
assert scored[0].is_conditional is True
|
||||
assert scored[0].score_details.get("conditional") == DP_SCORING_WEIGHTS["conditional"]
|
||||
|
||||
def test_z_code_penalty(self):
|
||||
c = _make_candidate(code="Z76.0", label="Bilan de santé", source_section="conclusion")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("z_code_dp") == DP_SCORING_WEIGHTS["z_code_dp"]
|
||||
|
||||
def test_z_code_whitelist_no_penalty(self):
|
||||
c = _make_candidate(code="Z51.1", label="Chimiothérapie", source_section="conclusion")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert "z_code_dp" not in scored[0].score_details
|
||||
|
||||
def test_r_code_penalty(self):
|
||||
c = _make_candidate(code="R10.4", label="Douleur abdominale", source_section="edsnlp")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert scored[0].score_details.get("r_code_dp") == DP_SCORING_WEIGHTS["r_code_dp"]
|
||||
|
||||
def test_sort_by_score_descending(self):
|
||||
c1 = _make_candidate(code="K85.1", source_section="diag_sortie")
|
||||
c2 = _make_candidate(code="R10.4", label="Douleur", source_section="edsnlp")
|
||||
scored = score_candidates([c2, c1], DossierMedical())
|
||||
assert scored[0].code == "K85.1" # diag_sortie score > edsnlp
|
||||
|
||||
def test_combined_scoring(self):
|
||||
"""Score = section bonus + proof - negation penalties."""
|
||||
c = _make_candidate(
|
||||
code="K85.1",
|
||||
source_section="diag_sortie",
|
||||
source_excerpt="Pancréatite aiguë",
|
||||
)
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
expected = DP_SCORING_WEIGHTS["section_diag_sortie"] + DP_SCORING_WEIGHTS["proof_excerpt"]
|
||||
assert scored[0].score == expected
|
||||
|
||||
|
||||
# === Tests select_dp ===
|
||||
|
||||
class TestSelectDP:
|
||||
def test_no_candidates_returns_review(self):
|
||||
sel = select_dp([], DossierMedical())
|
||||
assert sel.verdict == "review"
|
||||
|
||||
def test_single_candidate_confirmed(self):
|
||||
c = _make_candidate()
|
||||
c.score = 6
|
||||
sel = select_dp([c], DossierMedical())
|
||||
assert sel.verdict == "confirmed"
|
||||
assert sel.winner_reason == "candidat unique"
|
||||
|
||||
def test_clear_winner_confirmed(self):
|
||||
c1 = _make_candidate(code="K85.1")
|
||||
c1.score = 6
|
||||
c2 = _make_candidate(code="R10.4", label="Douleur", source_section="edsnlp")
|
||||
c2.score = 1
|
||||
sel = select_dp([c1, c2], DossierMedical())
|
||||
assert sel.verdict == "confirmed"
|
||||
assert "delta" in sel.winner_reason
|
||||
|
||||
def test_close_scores_returns_review(self):
|
||||
c1 = _make_candidate(code="K85.1")
|
||||
c1.score = 3
|
||||
c2 = _make_candidate(code="K80.5", label="Lithiase", source_section="conclusion")
|
||||
c2.score = 2
|
||||
sel = select_dp([c1, c2], DossierMedical())
|
||||
assert sel.verdict == "review"
|
||||
|
||||
def test_review_returns_top3(self):
|
||||
candidates = [
|
||||
_make_candidate(code=f"K8{i}.{i}", label=f"Diag {i}")
|
||||
for i in range(5)
|
||||
]
|
||||
for i, c in enumerate(candidates):
|
||||
c.score = 5 - i
|
||||
# delta between top1 and top2 = 1, < DP_REVIEW_THRESHOLD
|
||||
sel = select_dp(candidates, DossierMedical())
|
||||
assert sel.verdict == "review"
|
||||
assert len(sel.candidates) <= 3
|
||||
|
||||
|
||||
# === Tests utilitaires ===
|
||||
|
||||
class TestContextWindow:
|
||||
def test_finds_label_in_text(self):
|
||||
text = "Patient admis pour pancréatite aiguë biliaire confirmée."
|
||||
window = _get_context_window(text, "pancréatite aiguë", radius=50)
|
||||
assert "pancréatite" in window.lower()
|
||||
|
||||
def test_returns_empty_when_not_found(self):
|
||||
text = "Patient en bon état."
|
||||
window = _get_context_window(text, "embolie pulmonaire")
|
||||
assert window == ""
|
||||
|
||||
|
||||
class TestZCodeWhitelist:
|
||||
def test_z51_1_whitelisted(self):
|
||||
assert _is_z_code_whitelisted("Z51.1") is True
|
||||
|
||||
def test_z45_prefix_whitelisted(self):
|
||||
assert _is_z_code_whitelisted("Z45.80") is True
|
||||
|
||||
def test_z76_not_whitelisted(self):
|
||||
assert _is_z_code_whitelisted("Z76.0") is False
|
||||
|
||||
|
||||
class TestDedupByCode:
|
||||
def test_dedup_same_code_keeps_strongest(self):
|
||||
c1 = _make_candidate(code="K85.1", source_section="conclusion")
|
||||
c2 = _make_candidate(code="K85.1", source_section="diag_sortie")
|
||||
priority = ["diag_sortie", "diag_principal", "motif_hospitalisation", "conclusion", "synthese"]
|
||||
result = _dedup_by_code([c1, c2], priority)
|
||||
assert len(result) == 1
|
||||
assert result[0].source_section == "diag_sortie"
|
||||
|
||||
def test_dedup_different_codes_kept(self):
|
||||
c1 = _make_candidate(code="K85.1")
|
||||
c2 = _make_candidate(code="K80.5", label="Lithiase")
|
||||
priority = ["diag_sortie"]
|
||||
result = _dedup_by_code([c1, c2], priority)
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
# === Tests intégration légère ===
|
||||
|
||||
class TestDPScoringIntegration:
|
||||
def test_crh_with_diag_sortie_section(self):
|
||||
"""Un CRH avec section 'Diagnostic de sortie' produit un dp_selection."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
"sections": {
|
||||
"diag_sortie": "Pancréatite aiguë biliaire K85.1",
|
||||
},
|
||||
}
|
||||
text = "Diagnostic de sortie :\nPancréatite aiguë biliaire K85.1\n\nTraitement de sortie :\nParacétamol"
|
||||
|
||||
dossier = extract_medical_info(parsed, text)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
assert dossier.dp_selection is not None
|
||||
assert dossier.dp_selection.verdict == "confirmed"
|
||||
|
||||
def test_llm_fallback_confirmed_high_strong_section(self):
|
||||
"""LLM one-shot CONFIRMED : high confidence + section forte."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
"sections": {
|
||||
"conclusion": "Pancréatite aiguë biliaire avec HTA connue.",
|
||||
},
|
||||
}
|
||||
text = "Conclusion : Pancréatite aiguë biliaire avec HTA connue."
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "K85.1",
|
||||
"dp_label": "Pancréatite aiguë biliaire",
|
||||
"evidence_section": "conclusion",
|
||||
"evidence_excerpt": "Pancréatite aiguë biliaire",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
dossier = extract_medical_info(parsed, text, use_rag=True)
|
||||
|
||||
assert dossier.dp_selection is not None
|
||||
assert dossier.dp_selection.verdict == "confirmed"
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
|
||||
def test_llm_fallback_confirmed_conclusion_section(self):
|
||||
"""LLM one-shot CONFIRMED : conclusion est section forte."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
"sections": {"conclusion": "Pneumopathie avec insuffisance rénale aiguë."},
|
||||
}
|
||||
text = "Conclusion : Pneumopathie avec insuffisance rénale aiguë."
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "J18.9",
|
||||
"dp_label": "Pneumopathie, sans précision",
|
||||
"evidence_section": "conclusion",
|
||||
"evidence_excerpt": "Pneumopathie avec insuffisance rénale aiguë",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
dossier = extract_medical_info(parsed, text, use_rag=True)
|
||||
|
||||
assert dossier.dp_selection is not None
|
||||
assert dossier.dp_selection.verdict == "confirmed"
|
||||
assert dossier.diagnostic_principal is not None
|
||||
|
||||
def test_llm_fallback_review_weak_section(self):
|
||||
"""LLM one-shot REVIEW : evidence de histoire_maladie (section faible) → guardrail."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.dp_scoring import llm_dp_fallback
|
||||
from src.config import DossierMedical, DPCandidate
|
||||
|
||||
parsed = {"type": "crh", "sections": {"histoire_maladie": "Dyspnée aiguë."}}
|
||||
text = "Histoire de la maladie : Dyspnée aiguë."
|
||||
dossier = DossierMedical()
|
||||
dp_candidates = [DPCandidate(code="R06.0", label="Dyspnée", source_section="edsnlp")]
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "R06.0",
|
||||
"dp_label": "Dyspnée",
|
||||
"evidence_section": "histoire_maladie",
|
||||
"evidence_excerpt": "Dyspnée aiguë",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
|
||||
|
||||
assert selection.verdict == "review"
|
||||
assert len(selection.candidates) >= 1
|
||||
|
||||
def test_llm_fallback_review_low_confidence(self):
|
||||
"""LLM one-shot REVIEW : confidence=medium → guardrail."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.dp_scoring import llm_dp_fallback
|
||||
from src.config import DossierMedical, DPCandidate
|
||||
|
||||
parsed = {"type": "crh", "sections": {"conclusion": "HTA connue, diabète équilibré."}}
|
||||
text = "Conclusion : HTA connue, diabète équilibré."
|
||||
dossier = DossierMedical()
|
||||
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "I10",
|
||||
"dp_label": "Hypertension essentielle",
|
||||
"evidence_section": "conclusion",
|
||||
"evidence_excerpt": "HTA connue",
|
||||
"confidence": "medium",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
|
||||
|
||||
assert selection.verdict == "review"
|
||||
assert "confidence medium" in selection.winner_reason
|
||||
|
||||
def test_llm_fallback_guardrail_no_evidence(self):
|
||||
"""Garde-fou : LLM renvoie evidence vide → REVIEW."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.dp_scoring import llm_dp_fallback
|
||||
from src.config import DossierMedical, DPCandidate
|
||||
|
||||
parsed = {"type": "crh", "sections": {"conclusion": "Pancréatite."}}
|
||||
text = "Conclusion : Pancréatite."
|
||||
dossier = DossierMedical()
|
||||
dp_candidates = [DPCandidate(code="K85.9", label="Pancréatite", source_section="edsnlp")]
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "K85.9",
|
||||
"dp_label": "Pancréatite aiguë",
|
||||
"evidence_section": "conclusion",
|
||||
"evidence_excerpt": "",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
|
||||
|
||||
assert selection.verdict == "review"
|
||||
|
||||
def test_llm_fallback_guardrail_comorbidity_weak_section(self):
|
||||
"""Garde-fou : HTA en section non-forte → REVIEW."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.dp_scoring import llm_dp_fallback
|
||||
from src.config import DossierMedical, DPCandidate
|
||||
|
||||
parsed = {"type": "crh", "sections": {"histoire_maladie": "Patient hypertendu."}}
|
||||
text = "Histoire de la maladie : Patient hypertendu."
|
||||
dossier = DossierMedical()
|
||||
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "I10",
|
||||
"dp_label": "Hypertension essentielle",
|
||||
"evidence_section": "histoire_maladie",
|
||||
"evidence_excerpt": "Patient hypertendu",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
|
||||
|
||||
assert selection.verdict == "review"
|
||||
|
||||
def test_llm_fallback_comorbidity_in_strong_section(self):
|
||||
"""I10 en section forte + high confidence → CONFIRMED (garde-fou GF-2 ne bloque pas)."""
|
||||
from unittest.mock import patch
|
||||
from src.medical.dp_scoring import llm_dp_fallback
|
||||
from src.config import DossierMedical, DPCandidate
|
||||
|
||||
parsed = {"type": "crh", "sections": {"motif_hospitalisation": "HTA maligne."}}
|
||||
text = "Motif d'hospitalisation : HTA maligne."
|
||||
dossier = DossierMedical()
|
||||
dp_candidates = [DPCandidate(code="I10", label="HTA", source_section="edsnlp")]
|
||||
|
||||
mock_result = {
|
||||
"dp_code": "I10",
|
||||
"dp_label": "Hypertension essentielle",
|
||||
"evidence_section": "motif_hospitalisation",
|
||||
"evidence_excerpt": "HTA maligne",
|
||||
"confidence": "high",
|
||||
}
|
||||
with patch("src.medical.ollama_client.call_ollama", return_value=mock_result):
|
||||
selection = llm_dp_fallback(parsed, text, dossier, dp_candidates=dp_candidates)
|
||||
|
||||
assert selection.verdict == "confirmed"
|
||||
assert selection.candidates[0].code == "I10"
|
||||
|
||||
def test_no_llm_fallback_without_use_rag(self):
|
||||
"""Sans use_rag, le fallback LLM ne se déclenche PAS."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
"sections": {"conclusion": "Bonne évolution."},
|
||||
}
|
||||
text = "Conclusion : Bonne évolution."
|
||||
|
||||
dossier = extract_medical_info(parsed, text, use_rag=False)
|
||||
# Sans use_rag → pas de fallback LLM → verdict review
|
||||
assert dossier.dp_selection is not None
|
||||
assert dossier.dp_selection.verdict == "review"
|
||||
|
||||
def test_trackare_dp_bypasses_scoring(self):
|
||||
"""Un Trackare avec DP codé ne déclenche PAS le scoring."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "trackare",
|
||||
"patient": {"sexe": "F"},
|
||||
"sejour": {"date_entree": "01/01/2024", "date_sortie": "05/01/2024"},
|
||||
"diagnostics": [
|
||||
{"type": "Principal", "code_cim10": "K80.5", "libelle": "Calcul des canaux biliaires"},
|
||||
],
|
||||
}
|
||||
text = "Calcul des canaux biliaires."
|
||||
|
||||
dossier = extract_medical_info(parsed, text)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K80.5"
|
||||
assert dossier.dp_selection is None # Trackare DP, pas de scoring
|
||||
|
||||
|
||||
# === Tests comorbidité-banale DP ===
|
||||
|
||||
class TestComorbidityGuard:
|
||||
"""Règle comorbidité-banale : I10/E66.x/E78.x/E11.x/D64.9 en DP → REVIEW
|
||||
sauf preuve explicite de PEC principale."""
|
||||
|
||||
def test_is_comorbidity_expanded(self):
|
||||
"""La liste élargie couvre I10, E66.*, E78.*, E11.*, D64.9."""
|
||||
assert _is_comorbidity_code("I10") is True
|
||||
assert _is_comorbidity_code("E66.0") is True
|
||||
assert _is_comorbidity_code("E66.9") is True
|
||||
assert _is_comorbidity_code("E78.0") is True
|
||||
assert _is_comorbidity_code("E11.9") is True
|
||||
assert _is_comorbidity_code("E11.0") is True
|
||||
assert _is_comorbidity_code("D64.9") is True
|
||||
# Pas comorbidité
|
||||
assert _is_comorbidity_code("D64.0") is False
|
||||
assert _is_comorbidity_code("E10.9") is False
|
||||
assert _is_comorbidity_code("K85.1") is False
|
||||
|
||||
def test_sole_comorbidity_review(self):
|
||||
"""Candidat unique comorbidité → REVIEW (même section forte)."""
|
||||
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
|
||||
c.score = 4
|
||||
c.score_details = {"section": 2, "proof_excerpt": 2, "comorbidity_weak": -3}
|
||||
sel = select_dp([c], DossierMedical())
|
||||
assert sel.verdict == "review"
|
||||
assert "comorbidité banale" in sel.winner_reason
|
||||
|
||||
def test_comorbidity_top1_multi_review(self):
|
||||
"""Comorbidité top1 parmi plusieurs → REVIEW."""
|
||||
c1 = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
|
||||
c1.score = 3
|
||||
c1.score_details = {"section": 3, "comorbidity_weak": -3}
|
||||
c2 = _make_candidate(code="K85.1", label="Pancréatite", source_section="edsnlp")
|
||||
c2.score = 1
|
||||
sel = select_dp([c1, c2], DossierMedical())
|
||||
assert sel.verdict == "review"
|
||||
assert "comorbidité banale" in sel.winner_reason
|
||||
|
||||
def test_comorbidity_with_pec_proof_confirmed(self):
|
||||
"""Comorbidité + preuve PEC → CONFIRMED."""
|
||||
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
|
||||
c.score = 3
|
||||
c.score_details = {"section": 3, "comorbidity_weak": -3, "comorbidity_pec_proof": 3}
|
||||
sel = select_dp([c], DossierMedical())
|
||||
assert sel.verdict == "confirmed"
|
||||
assert sel.winner_reason == "candidat unique"
|
||||
|
||||
def test_non_comorbidity_sole_confirmed(self):
|
||||
"""Candidat unique non-comorbidité → CONFIRMED (pas affecté)."""
|
||||
c = _make_candidate(code="K85.1", label="Pancréatite", source_section="conclusion")
|
||||
c.score = 4
|
||||
sel = select_dp([c], DossierMedical())
|
||||
assert sel.verdict == "confirmed"
|
||||
|
||||
def test_score_comorbidity_penalty_strong_section(self):
|
||||
"""Comorbidité pénalisée même en section forte (conclusion)."""
|
||||
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert "comorbidity_weak" in scored[0].score_details
|
||||
assert scored[0].score_details["comorbidity_weak"] == DP_SCORING_WEIGHTS["comorbidity_weak"]
|
||||
|
||||
def test_score_comorbidity_penalty_motif(self):
|
||||
"""Comorbidité pénalisée en motif_hospitalisation."""
|
||||
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
|
||||
scored = score_candidates([c], DossierMedical())
|
||||
assert "comorbidity_weak" in scored[0].score_details
|
||||
|
||||
def test_pec_proof_detected(self):
|
||||
"""PEC proof détectée dans le texte → bonus dans score_details."""
|
||||
c = _make_candidate(code="I10", label="Hta", source_section="motif_hospitalisation")
|
||||
text = "Patient hospitalisé pour hta maligne résistante au traitement."
|
||||
scored = score_candidates([c], DossierMedical(), full_text=text)
|
||||
assert "comorbidity_pec_proof" in scored[0].score_details
|
||||
assert scored[0].score_details["comorbidity_pec_proof"] > 0
|
||||
|
||||
def test_pec_proof_not_found(self):
|
||||
"""Pas de PEC proof → pas de bonus."""
|
||||
c = _make_candidate(code="E66.0", label="Obésité", source_section="conclusion")
|
||||
text = "Patient obèse, pneumopathie communautaire."
|
||||
scored = score_candidates([c], DossierMedical(), full_text=text)
|
||||
assert "comorbidity_pec_proof" not in scored[0].score_details
|
||||
|
||||
def test_has_explicit_pec_proof_hospitalized(self):
|
||||
"""Détection 'hospitalisé pour' + label."""
|
||||
assert _has_explicit_pec_proof("hta", "Patient hospitalisé pour HTA maligne.") is True
|
||||
|
||||
def test_has_explicit_pec_proof_prise_en_charge(self):
|
||||
"""Détection 'prise en charge' + label."""
|
||||
assert _has_explicit_pec_proof("obésité", "Prise en charge de l'obésité morbide.") is True
|
||||
|
||||
def test_has_explicit_pec_proof_absent(self):
|
||||
"""Pas de PEC proof pour un label non mentionné."""
|
||||
assert _has_explicit_pec_proof("hta", "Patient admis pour douleur thoracique.") is False
|
||||
|
||||
def test_has_explicit_pec_proof_admission(self):
|
||||
"""Détection 'admission pour' + label."""
|
||||
assert _has_explicit_pec_proof("diabète", "Admission pour diabète déséquilibré.") is True
|
||||
|
||||
|
||||
class TestSectionNormalization:
|
||||
"""Tests pour _normalize_evidence_section — normalisation robuste."""
|
||||
|
||||
# --- Correspondances exactes existantes ---
|
||||
|
||||
def test_exact_conclusion(self):
|
||||
assert _normalize_evidence_section("conclusion") == "conclusion"
|
||||
|
||||
def test_exact_synthese(self):
|
||||
assert _normalize_evidence_section("synthèse") == "synthese"
|
||||
|
||||
def test_exact_motif_hospitalisation(self):
|
||||
assert _normalize_evidence_section("motif_hospitalisation") == "motif_hospitalisation"
|
||||
|
||||
# --- Nouveaux alias exacts ---
|
||||
|
||||
def test_synthese_du_sejour(self):
|
||||
assert _normalize_evidence_section("synthèse du séjour") == "synthese"
|
||||
|
||||
def test_synthese_du_sejour_ascii(self):
|
||||
assert _normalize_evidence_section("synthese du sejour") == "synthese"
|
||||
|
||||
def test_conclusions_pluriel(self):
|
||||
assert _normalize_evidence_section("conclusions") == "conclusion"
|
||||
|
||||
def test_secretariat_to_autres(self):
|
||||
assert _normalize_evidence_section("secrétariat") == "autres"
|
||||
|
||||
def test_medecine_interne_to_autres(self):
|
||||
assert _normalize_evidence_section("médecine interne") == "autres"
|
||||
|
||||
def test_sections_cliniques_to_autres(self):
|
||||
assert _normalize_evidence_section("sections cliniques") == "autres"
|
||||
|
||||
# --- Nettoyage crochets/guillemets ---
|
||||
|
||||
def test_brackets_conclusion(self):
|
||||
assert _normalize_evidence_section("[conclusion]") == "conclusion"
|
||||
|
||||
def test_brackets_motif(self):
|
||||
assert _normalize_evidence_section("[motif_hospitalisation]") == "motif_hospitalisation"
|
||||
|
||||
def test_colon_conclusion(self):
|
||||
assert _normalize_evidence_section("conclusion:") == "conclusion"
|
||||
|
||||
def test_quotes_synthese(self):
|
||||
assert _normalize_evidence_section('"synthèse"') == "synthese"
|
||||
|
||||
# --- Fallback par mots-clés ---
|
||||
|
||||
def test_keyword_conclusion_du_sejour(self):
|
||||
assert _normalize_evidence_section("conclusion du séjour") == "conclusion"
|
||||
|
||||
def test_keyword_synthese_medicale(self):
|
||||
assert _normalize_evidence_section("synthèse médicale du dossier") == "synthese"
|
||||
|
||||
def test_keyword_diagnostic_de_sortie_variant(self):
|
||||
assert _normalize_evidence_section("diagnostic(s) de sortie") == "diag_sortie"
|
||||
|
||||
def test_keyword_diagnostic_retenu_variant(self):
|
||||
assert _normalize_evidence_section("diagnostics retenus à la sortie") == "diagnostics_retenus"
|
||||
|
||||
def test_keyword_motif_admission(self):
|
||||
assert _normalize_evidence_section("motif d'admission aux urgences") == "motif_hospitalisation"
|
||||
|
||||
# --- Cas limites ---
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _normalize_evidence_section("") == ""
|
||||
|
||||
def test_none_like_empty(self):
|
||||
assert _normalize_evidence_section(" ") == ""
|
||||
|
||||
def test_unknown_section_passthrough(self):
|
||||
"""Section inconnue sans mot-clé → passthrough nettoyé."""
|
||||
result = _normalize_evidence_section("biologie")
|
||||
assert result == "biologie"
|
||||
|
||||
def test_sections_fortes_du_dossier(self):
|
||||
"""Alias administratif observé en benchmark."""
|
||||
assert _normalize_evidence_section("sections fortes du dossier") == "autres"
|
||||
@@ -109,6 +109,139 @@ de masse 34.370"""
|
||||
assert result["signes_vitaux"]["imc"] == 34.370
|
||||
|
||||
|
||||
class TestCRHParserDiagSections:
|
||||
"""Tests pour les nouvelles sections à fort signal DP."""
|
||||
|
||||
def test_parse_diag_sortie(self):
|
||||
text = """Mon cher confrère,
|
||||
Votre patient a été hospitalisé du 01/01/2024 au 05/01/2024.
|
||||
|
||||
Diagnostic de sortie :
|
||||
Pancréatite aiguë biliaire (K85.1)
|
||||
|
||||
Traitement de sortie :
|
||||
Paracétamol"""
|
||||
result = parse_crh(text)
|
||||
assert "diag_sortie" in result["sections"]
|
||||
assert "K85.1" in result["sections"]["diag_sortie"]
|
||||
|
||||
def test_parse_diagnostics_retenus(self):
|
||||
text = """Conclusion :
|
||||
Bonne évolution.
|
||||
|
||||
Diagnostics retenus :
|
||||
- Cholécystite aiguë lithiasique
|
||||
- Lithiase vésiculaire
|
||||
|
||||
Traitement de sortie :
|
||||
Paracétamol"""
|
||||
result = parse_crh(text)
|
||||
assert "diag_sortie" in result["sections"]
|
||||
assert "Cholécystite" in result["sections"]["diag_sortie"]
|
||||
|
||||
def test_parse_diag_principal(self):
|
||||
text = """Examen clinique :
|
||||
Abdomen souple.
|
||||
|
||||
Diagnostic principal :
|
||||
Embolie pulmonaire segmentaire droite
|
||||
|
||||
Diagnostics de sortie :
|
||||
EP + TVP"""
|
||||
result = parse_crh(text)
|
||||
assert "diag_principal" in result["sections"]
|
||||
assert "Embolie pulmonaire" in result["sections"]["diag_principal"]
|
||||
|
||||
def test_parse_probleme_principal(self):
|
||||
text = """Examen clinique :
|
||||
Patient stable.
|
||||
|
||||
Problème principal :
|
||||
Insuffisance cardiaque décompensée
|
||||
|
||||
Devenir : retour à domicile."""
|
||||
result = parse_crh(text)
|
||||
assert "diag_principal" in result["sections"]
|
||||
assert "Insuffisance cardiaque" in result["sections"]["diag_principal"]
|
||||
|
||||
def test_parse_synthese(self):
|
||||
text = """Examen clinique :
|
||||
RAS.
|
||||
|
||||
Synthèse :
|
||||
Patient de 75 ans hospitalisé pour AVC ischémique sylvien droit.
|
||||
|
||||
Traitement de sortie :
|
||||
Aspirine"""
|
||||
result = parse_crh(text)
|
||||
assert "synthese" in result["sections"]
|
||||
assert "AVC" in result["sections"]["synthese"]
|
||||
|
||||
def test_existing_sections_preserved(self):
|
||||
"""Les 7 sections existantes sont toujours capturées."""
|
||||
text = """pour le motif suivant:
|
||||
Pancréatite aiguë
|
||||
|
||||
Antécédents :
|
||||
HTA, diabète
|
||||
|
||||
Histoire de la maladie
|
||||
Douleur abdominale brutale
|
||||
|
||||
Examen clinique
|
||||
Abdomen défense en HCD
|
||||
|
||||
Au total :
|
||||
Pancréatite aiguë biliaire
|
||||
|
||||
TTT de sortie :
|
||||
Paracétamol
|
||||
|
||||
Devenir :
|
||||
Retour à domicile"""
|
||||
result = parse_crh(text)
|
||||
assert "motif_hospitalisation" in result["sections"]
|
||||
assert "antecedents" in result["sections"]
|
||||
assert "histoire_maladie" in result["sections"]
|
||||
assert "examen_clinique" in result["sections"]
|
||||
assert "conclusion" in result["sections"]
|
||||
assert "traitement_sortie" in result["sections"]
|
||||
assert "devenir" in result["sections"]
|
||||
|
||||
def test_diag_sortie_multiline(self):
|
||||
text = """Au total :
|
||||
Bonne évolution.
|
||||
|
||||
Diagnostic de sortie :
|
||||
- Pancréatite aiguë biliaire K85.1
|
||||
- Lithiase vésiculaire K80.2
|
||||
- Obésité E66.0
|
||||
|
||||
Traitement de sortie :
|
||||
Paracétamol"""
|
||||
result = parse_crh(text)
|
||||
assert "diag_sortie" in result["sections"]
|
||||
section = result["sections"]["diag_sortie"]
|
||||
assert "K85.1" in section
|
||||
assert "K80.2" in section
|
||||
assert "E66.0" in section
|
||||
|
||||
def test_conclusion_does_not_overflow_into_diag_sortie(self):
|
||||
text = """Au total :
|
||||
Pancréatite aiguë biliaire, évolution favorable.
|
||||
|
||||
Diagnostic de sortie :
|
||||
Pancréatite aiguë biliaire K85.1
|
||||
|
||||
Traitement de sortie :
|
||||
Paracétamol"""
|
||||
result = parse_crh(text)
|
||||
assert "conclusion" in result["sections"]
|
||||
assert "diag_sortie" in result["sections"]
|
||||
# La conclusion ne doit PAS contenir le texte de diag_sortie
|
||||
assert "K85.1" not in result["sections"]["conclusion"]
|
||||
|
||||
|
||||
class TestCleanPersonName:
|
||||
def test_clean_simple(self):
|
||||
assert _clean_person_name("Sarah DUTREY") == "Sarah DUTREY"
|
||||
|
||||
@@ -653,6 +653,38 @@ class TestBackwardCompatAntecedent:
|
||||
assert all(isinstance(c, Complication) for c in dossier.complications)
|
||||
|
||||
|
||||
class TestDPSelectionIntegration:
|
||||
"""Tests d'intégration du scoring DP dans le pipeline d'extraction."""
|
||||
|
||||
def test_crh_dp_selection_populated(self):
|
||||
"""Un CRH sans DP Trackare déclenche le scoring et peuple dp_selection."""
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
|
||||
dossier = extract_medical_info(parsed, text)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
assert dossier.dp_selection is not None
|
||||
assert len(dossier.dp_selection.candidates) >= 1
|
||||
|
||||
def test_dp_selection_serialization(self):
|
||||
"""dp_selection est sérialisable en JSON via model_dump()."""
|
||||
from src.config import DPCandidate, DPSelection
|
||||
sel = DPSelection(
|
||||
verdict="confirmed",
|
||||
candidates=[DPCandidate(code="K85.1", label="Test", source_section="regex")],
|
||||
winner_reason="candidat unique",
|
||||
)
|
||||
data = sel.model_dump()
|
||||
assert data["verdict"] == "confirmed"
|
||||
assert len(data["candidates"]) == 1
|
||||
assert data["candidates"][0]["code"] == "K85.1"
|
||||
|
||||
|
||||
class TestSourceTrackingFields:
|
||||
"""Tests que les champs source_page/source_excerpt existent sur les modèles."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user