feat: alias diagnostiques CIM-10 + scoring conclusion + garde-fou Trackare R-code

- DIAGNOSIS_ALIASES : mapping acronymes cliniques → CIM-10 (DLBCL→C83.3, SCA→I25.1, EP→I26.9, IDM→I21.9, etc.)
- Scoring 4b étendu : conclusion (+2) ajouté aux sections diagnostiques, matching par alias en plus du terme/code
- _collect_evidence : détection alias dans les sections pour preuves plus complètes
- Garde-fou Trackare : si DP est un R-code (symptôme) et que les sections CRH mentionnent un diagnostic étiologique via alias → verdict REVIEW au lieu de CONFIRMED, alerte DIM
- Case 74 : verdict attendu REVIEW (conclusion mentionne les 2 diagnostics, delta insuffisant)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-24 14:35:15 +01:00
parent 07c267539c
commit 06a1be5425
2 changed files with 81 additions and 10 deletions

View File

@@ -76,6 +76,36 @@ SECTION_STRENGTH = {
"nuke3": 0, # pas de self-boost
}
# Alias cliniques courants → code CIM-10 (pour matching sections diagnostiques)
# Permet de matcher "DLBCL" dans une section avec le candidat C83.3
DIAGNOSIS_ALIASES: dict[str, str] = {
"dlbcl": "C83.3",
"lymphome diffus à grandes cellules b": "C83.3",
"lymphome b diffus grandes cellules": "C83.3",
"lymphome diffus à grandes cellules": "C83.3",
"sca": "I25.1",
"syndrome coronarien aigu": "I25.1",
"embolie pulmonaire": "I26.9",
"ep": "I26.9",
"avc ischémique": "I63.9",
"avc hémorragique": "I61.9",
"infarctus du myocarde": "I21.9",
"idm": "I21.9",
}
def _alias_matches_candidate(sec_text_lower: str, code_upper: str) -> bool:
"""Vérifie si un alias clinique dans le texte de section correspond au code du candidat."""
if not code_upper:
return False
for alias, alias_code in DIAGNOSIS_ALIASES.items():
if alias in sec_text_lower:
# Match exact OU family3
if code_upper == alias_code.upper() or code_upper[:3] == alias_code[:3].upper():
return True
return False
# Seuil d'écart de score pour CONFIRMED sans LLM
DELTA_CONFIRMED = 3.0
@@ -205,15 +235,21 @@ def score_candidates(
# 4b. Mention dans sections diagnostiques fortes (CRH)
# diag_sortie / diag_principal = +4 (signal le plus fort après trackare)
# synthese = +2 (équivalent conclusion)
# synthese / conclusion = +2
# Match par terme, code CIM-10, OU alias clinique (DLBCL→C83.3, etc.)
term_lower = c.term.lower()
code_upper = (c.code or "").upper()
_diag_section_bonus = 0.0
for sec_key, sec_bonus in (("diag_sortie", 4), ("diag_principal", 4), ("synthese", 2)):
for sec_key, sec_bonus in (
("diag_sortie", 4), ("diag_principal", 4),
("synthese", 2), ("conclusion", 2),
):
sec_text = (synthese.get(sec_key) or "").lower()
if not sec_text or len(sec_text) < 3:
continue
if term_lower in sec_text or (code_upper and code_upper in sec_text.upper()):
if (term_lower in sec_text
or (code_upper and code_upper in sec_text.upper())
or _alias_matches_candidate(sec_text, code_upper)):
_diag_section_bonus = max(_diag_section_bonus, sec_bonus)
if _diag_section_bonus:
score += _diag_section_bonus
@@ -370,20 +406,24 @@ def _collect_evidence(
term_lower = winner.term.lower()
code_upper = (winner.code or "").upper()
def _term_or_code_in(text: str) -> bool:
def _term_or_code_or_alias_in(text: str) -> bool:
tl = text.lower()
return (term_lower in tl) or (code_upper and code_upper in text.upper())
return (
(term_lower in tl)
or (code_upper and code_upper in text.upper())
or _alias_matches_candidate(tl, code_upper)
)
if diag_sortie and _term_or_code_in(diag_sortie):
if diag_sortie and _term_or_code_or_alias_in(diag_sortie):
excerpt = diag_sortie[:MAX_EVIDENCE_LEN]
evidence.append(f"Diagnostic de sortie: «{excerpt}»")
elif diag_principal_sec and _term_or_code_in(diag_principal_sec):
elif diag_principal_sec and _term_or_code_or_alias_in(diag_principal_sec):
excerpt = diag_principal_sec[:MAX_EVIDENCE_LEN]
evidence.append(f"Diagnostic principal: «{excerpt}»")
elif motif and term_lower in motif.lower():
excerpt = motif[:MAX_EVIDENCE_LEN]
evidence.append(f"Motif hospitalisation: «{excerpt}»")
elif conclusion and term_lower in conclusion.lower():
elif conclusion and _term_or_code_or_alias_in(conclusion):
excerpt = conclusion[:MAX_EVIDENCE_LEN]
evidence.append(f"Conclusion: «{excerpt}»")
@@ -478,11 +518,42 @@ def select_dp(
top_k = config.get("top_k", 7)
# Trackare → DP autoritaire, pas de scoring
# Garde-fou : si Trackare code un R-code (symptôme) mais que les sections CRH
# mentionnent un diagnostic étiologique fort (via alias), on downgrade en REVIEW.
if dossier.document_type == "trackare" and dossier.diagnostic_principal:
dp_code = dossier.diagnostic_principal.cim10_suggestion or ""
trackare_is_symptom = dp_code.startswith("R")
crh_has_strong_diag = False
if trackare_is_symptom:
for sec_key in ("diag_sortie", "diag_principal", "synthese", "conclusion"):
sec_text = (synthese.get(sec_key) or "").lower()
if sec_text and len(sec_text) >= 3:
for alias, alias_code in DIAGNOSIS_ALIASES.items():
if alias in sec_text and not alias_code.startswith("R"):
crh_has_strong_diag = True
break
if crh_has_strong_diag:
break
if trackare_is_symptom and crh_has_strong_diag:
return DPSelection(
chosen_index=0,
chosen_term=dossier.diagnostic_principal.texte,
chosen_code=dp_code,
confidence="medium",
verdict="REVIEW",
evidence=[
"Source: Trackare (codage établissement)",
"Alerte: Trackare code un symptôme (R*) mais le CRH mentionne un diagnostic étiologique",
],
reason="Trackare symptôme vs CRH diagnostic — vérification DIM requise",
)
return DPSelection(
chosen_index=0,
chosen_term=dossier.diagnostic_principal.texte,
chosen_code=dossier.diagnostic_principal.cim10_suggestion,
chosen_code=dp_code,
confidence="high",
verdict="CONFIRMED",
evidence=["Source: Trackare (codage établissement)"],