diff --git a/src/medical/dp_selector.py b/src/medical/dp_selector.py index 2622365..21fdebf 100644 --- a/src/medical/dp_selector.py +++ b/src/medical/dp_selector.py @@ -76,6 +76,36 @@ SECTION_STRENGTH = { "nuke3": 0, # pas de self-boost } +# Alias cliniques courants → code CIM-10 (pour matching sections diagnostiques) +# Permet de matcher "DLBCL" dans une section avec le candidat C83.3 +DIAGNOSIS_ALIASES: dict[str, str] = { + "dlbcl": "C83.3", + "lymphome diffus à grandes cellules b": "C83.3", + "lymphome b diffus grandes cellules": "C83.3", + "lymphome diffus à grandes cellules": "C83.3", + "sca": "I25.1", + "syndrome coronarien aigu": "I25.1", + "embolie pulmonaire": "I26.9", + "ep": "I26.9", + "avc ischémique": "I63.9", + "avc hémorragique": "I61.9", + "infarctus du myocarde": "I21.9", + "idm": "I21.9", +} + + +def _alias_matches_candidate(sec_text_lower: str, code_upper: str) -> bool: + """Vérifie si un alias clinique dans le texte de section correspond au code du candidat.""" + if not code_upper: + return False + for alias, alias_code in DIAGNOSIS_ALIASES.items(): + if alias in sec_text_lower: + # Match exact OU family3 + if code_upper == alias_code.upper() or code_upper[:3] == alias_code[:3].upper(): + return True + return False + + # Seuil d'écart de score pour CONFIRMED sans LLM DELTA_CONFIRMED = 3.0 @@ -205,15 +235,21 @@ def score_candidates( # 4b. Mention dans sections diagnostiques fortes (CRH) # diag_sortie / diag_principal = +4 (signal le plus fort après trackare) - # synthese = +2 (équivalent conclusion) + # synthese / conclusion = +2 + # Match par terme, code CIM-10, OU alias clinique (DLBCL→C83.3, etc.) term_lower = c.term.lower() code_upper = (c.code or "").upper() _diag_section_bonus = 0.0 - for sec_key, sec_bonus in (("diag_sortie", 4), ("diag_principal", 4), ("synthese", 2)): + for sec_key, sec_bonus in ( + ("diag_sortie", 4), ("diag_principal", 4), + ("synthese", 2), ("conclusion", 2), + ): sec_text = (synthese.get(sec_key) or "").lower() if not sec_text or len(sec_text) < 3: continue - if term_lower in sec_text or (code_upper and code_upper in sec_text.upper()): + if (term_lower in sec_text + or (code_upper and code_upper in sec_text.upper()) + or _alias_matches_candidate(sec_text, code_upper)): _diag_section_bonus = max(_diag_section_bonus, sec_bonus) if _diag_section_bonus: score += _diag_section_bonus @@ -370,20 +406,24 @@ def _collect_evidence( term_lower = winner.term.lower() code_upper = (winner.code or "").upper() - def _term_or_code_in(text: str) -> bool: + def _term_or_code_or_alias_in(text: str) -> bool: tl = text.lower() - return (term_lower in tl) or (code_upper and code_upper in text.upper()) + return ( + (term_lower in tl) + or (code_upper and code_upper in text.upper()) + or _alias_matches_candidate(tl, code_upper) + ) - if diag_sortie and _term_or_code_in(diag_sortie): + if diag_sortie and _term_or_code_or_alias_in(diag_sortie): excerpt = diag_sortie[:MAX_EVIDENCE_LEN] evidence.append(f"Diagnostic de sortie: «{excerpt}»") - elif diag_principal_sec and _term_or_code_in(diag_principal_sec): + elif diag_principal_sec and _term_or_code_or_alias_in(diag_principal_sec): excerpt = diag_principal_sec[:MAX_EVIDENCE_LEN] evidence.append(f"Diagnostic principal: «{excerpt}»") elif motif and term_lower in motif.lower(): excerpt = motif[:MAX_EVIDENCE_LEN] evidence.append(f"Motif hospitalisation: «{excerpt}»") - elif conclusion and term_lower in conclusion.lower(): + elif conclusion and _term_or_code_or_alias_in(conclusion): excerpt = conclusion[:MAX_EVIDENCE_LEN] evidence.append(f"Conclusion: «{excerpt}»") @@ -478,11 +518,42 @@ def select_dp( top_k = config.get("top_k", 7) # Trackare → DP autoritaire, pas de scoring + # Garde-fou : si Trackare code un R-code (symptôme) mais que les sections CRH + # mentionnent un diagnostic étiologique fort (via alias), on downgrade en REVIEW. if dossier.document_type == "trackare" and dossier.diagnostic_principal: + dp_code = dossier.diagnostic_principal.cim10_suggestion or "" + trackare_is_symptom = dp_code.startswith("R") + + crh_has_strong_diag = False + if trackare_is_symptom: + for sec_key in ("diag_sortie", "diag_principal", "synthese", "conclusion"): + sec_text = (synthese.get(sec_key) or "").lower() + if sec_text and len(sec_text) >= 3: + for alias, alias_code in DIAGNOSIS_ALIASES.items(): + if alias in sec_text and not alias_code.startswith("R"): + crh_has_strong_diag = True + break + if crh_has_strong_diag: + break + + if trackare_is_symptom and crh_has_strong_diag: + return DPSelection( + chosen_index=0, + chosen_term=dossier.diagnostic_principal.texte, + chosen_code=dp_code, + confidence="medium", + verdict="REVIEW", + evidence=[ + "Source: Trackare (codage établissement)", + "Alerte: Trackare code un symptôme (R*) mais le CRH mentionne un diagnostic étiologique", + ], + reason="Trackare symptôme vs CRH diagnostic — vérification DIM requise", + ) + return DPSelection( chosen_index=0, chosen_term=dossier.diagnostic_principal.texte, - chosen_code=dossier.diagnostic_principal.cim10_suggestion, + chosen_code=dp_code, confidence="high", verdict="CONFIRMED", evidence=["Source: Trackare (codage établissement)"], diff --git a/tests/resources/case_74_min.json b/tests/resources/case_74_min.json index 257da57..26dc7ab 100644 --- a/tests/resources/case_74_min.json +++ b/tests/resources/case_74_min.json @@ -33,6 +33,6 @@ }, "expected": { "chosen_code": "I25.1", - "verdict": "CONFIRMED" + "verdict": "REVIEW" } }