feat: alias diagnostiques CIM-10 + scoring conclusion + garde-fou Trackare R-code

- DIAGNOSIS_ALIASES : mapping acronymes cliniques → CIM-10 (DLBCL→C83.3, SCA→I25.1, EP→I26.9, IDM→I21.9, etc.) - Scoring 4b étendu : conclusion (+2) ajouté aux sections diagnostiques, matching par alias en plus du terme/code - _collect_evidence : détection alias dans les sections pour preuves plus complètes - Garde-fou Trackare : si DP est un R-code (symptôme) et que les sections CRH mentionnent un diagnostic étiologique via alias → verdict REVIEW au lieu de CONFIRMED, alerte DIM - Case 74 : verdict attendu REVIEW (conclusion mentionne les 2 diagnostics, delta insuffisant) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 14:35:15 +01:00
parent 07c267539c
commit 06a1be5425
2 changed files with 81 additions and 10 deletions
--- a/src/medical/dp_selector.py
+++ b/src/medical/dp_selector.py
@@ -76,6 +76,36 @@ SECTION_STRENGTH = {
    "nuke3": 0,  # pas de self-boost
 }

+# Alias cliniques courants → code CIM-10 (pour matching sections diagnostiques)
+# Permet de matcher "DLBCL" dans une section avec le candidat C83.3
+DIAGNOSIS_ALIASES: dict[str, str] = {
+    "dlbcl": "C83.3",
+    "lymphome diffus à grandes cellules b": "C83.3",
+    "lymphome b diffus grandes cellules": "C83.3",
+    "lymphome diffus à grandes cellules": "C83.3",
+    "sca": "I25.1",
+    "syndrome coronarien aigu": "I25.1",
+    "embolie pulmonaire": "I26.9",
+    "ep": "I26.9",
+    "avc ischémique": "I63.9",
+    "avc hémorragique": "I61.9",
+    "infarctus du myocarde": "I21.9",
+    "idm": "I21.9",
+}
+
+
+def _alias_matches_candidate(sec_text_lower: str, code_upper: str) -> bool:
+    """Vérifie si un alias clinique dans le texte de section correspond au code du candidat."""
+    if not code_upper:
+        return False
+    for alias, alias_code in DIAGNOSIS_ALIASES.items():
+        if alias in sec_text_lower:
+            # Match exact OU family3
+            if code_upper == alias_code.upper() or code_upper[:3] == alias_code[:3].upper():
+                return True
+    return False
+
+
 # Seuil d'écart de score pour CONFIRMED sans LLM
 DELTA_CONFIRMED = 3.0

@@ -205,15 +235,21 @@ def score_candidates(

        # 4b. Mention dans sections diagnostiques fortes (CRH)
        #     diag_sortie / diag_principal = +4 (signal le plus fort après trackare)
-        #     synthese = +2 (équivalent conclusion)
+        #     synthese / conclusion = +2
+        #     Match par terme, code CIM-10, OU alias clinique (DLBCL→C83.3, etc.)
        term_lower = c.term.lower()
        code_upper = (c.code or "").upper()
        _diag_section_bonus = 0.0
-        for sec_key, sec_bonus in (("diag_sortie", 4), ("diag_principal", 4), ("synthese", 2)):
+        for sec_key, sec_bonus in (
+            ("diag_sortie", 4), ("diag_principal", 4),
+            ("synthese", 2), ("conclusion", 2),
+        ):
            sec_text = (synthese.get(sec_key) or "").lower()
            if not sec_text or len(sec_text) < 3:
                continue
-            if term_lower in sec_text or (code_upper and code_upper in sec_text.upper()):
+            if (term_lower in sec_text
+                    or (code_upper and code_upper in sec_text.upper())
+                    or _alias_matches_candidate(sec_text, code_upper)):
                _diag_section_bonus = max(_diag_section_bonus, sec_bonus)
        if _diag_section_bonus:
            score += _diag_section_bonus
@@ -370,20 +406,24 @@ def _collect_evidence(
    term_lower = winner.term.lower()
    code_upper = (winner.code or "").upper()

-    def _term_or_code_in(text: str) -> bool:
+    def _term_or_code_or_alias_in(text: str) -> bool:
        tl = text.lower()
-        return (term_lower in tl) or (code_upper and code_upper in text.upper())
+        return (
+            (term_lower in tl)
+            or (code_upper and code_upper in text.upper())
+            or _alias_matches_candidate(tl, code_upper)
+        )

-    if diag_sortie and _term_or_code_in(diag_sortie):
+    if diag_sortie and _term_or_code_or_alias_in(diag_sortie):
        excerpt = diag_sortie[:MAX_EVIDENCE_LEN]
        evidence.append(f"Diagnostic de sortie: «{excerpt}»")
-    elif diag_principal_sec and _term_or_code_in(diag_principal_sec):
+    elif diag_principal_sec and _term_or_code_or_alias_in(diag_principal_sec):
        excerpt = diag_principal_sec[:MAX_EVIDENCE_LEN]
        evidence.append(f"Diagnostic principal: «{excerpt}»")
    elif motif and term_lower in motif.lower():
        excerpt = motif[:MAX_EVIDENCE_LEN]
        evidence.append(f"Motif hospitalisation: «{excerpt}»")
-    elif conclusion and term_lower in conclusion.lower():
+    elif conclusion and _term_or_code_or_alias_in(conclusion):
        excerpt = conclusion[:MAX_EVIDENCE_LEN]
        evidence.append(f"Conclusion: «{excerpt}»")

@@ -478,11 +518,42 @@ def select_dp(
    top_k = config.get("top_k", 7)

    # Trackare → DP autoritaire, pas de scoring
+    # Garde-fou : si Trackare code un R-code (symptôme) mais que les sections CRH
+    # mentionnent un diagnostic étiologique fort (via alias), on downgrade en REVIEW.
    if dossier.document_type == "trackare" and dossier.diagnostic_principal:
+        dp_code = dossier.diagnostic_principal.cim10_suggestion or ""
+        trackare_is_symptom = dp_code.startswith("R")
+
+        crh_has_strong_diag = False
+        if trackare_is_symptom:
+            for sec_key in ("diag_sortie", "diag_principal", "synthese", "conclusion"):
+                sec_text = (synthese.get(sec_key) or "").lower()
+                if sec_text and len(sec_text) >= 3:
+                    for alias, alias_code in DIAGNOSIS_ALIASES.items():
+                        if alias in sec_text and not alias_code.startswith("R"):
+                            crh_has_strong_diag = True
+                            break
+                if crh_has_strong_diag:
+                    break
+
+        if trackare_is_symptom and crh_has_strong_diag:
+            return DPSelection(
+                chosen_index=0,
+                chosen_term=dossier.diagnostic_principal.texte,
+                chosen_code=dp_code,
+                confidence="medium",
+                verdict="REVIEW",
+                evidence=[
+                    "Source: Trackare (codage établissement)",
+                    "Alerte: Trackare code un symptôme (R*) mais le CRH mentionne un diagnostic étiologique",
+                ],
+                reason="Trackare symptôme vs CRH diagnostic — vérification DIM requise",
+            )
+
        return DPSelection(
            chosen_index=0,
            chosen_term=dossier.diagnostic_principal.texte,
-            chosen_code=dossier.diagnostic_principal.cim10_suggestion,
+            chosen_code=dp_code,
            confidence="high",
            verdict="CONFIRMED",
            evidence=["Source: Trackare (codage établissement)"],