diff --git a/docs/gold_debug/NUKE3_GOLD_TOP_ERRORS.md b/docs/gold_debug/NUKE3_GOLD_TOP_ERRORS.md index 1784d9c..7f3f730 100644 --- a/docs/gold_debug/NUKE3_GOLD_TOP_ERRORS.md +++ b/docs/gold_debug/NUKE3_GOLD_TOP_ERRORS.md @@ -1,6 +1,6 @@ # NUKE-3 — Top erreurs gold CRH -**Date** : 2026-02-24 14:34 +**Date** : 2026-02-24 17:44 **Cas** : 5 | # | Case ID | Choisi | Attendu | Strict | Accept. | Verdict | Conf. | Delta | Reason | @@ -12,4 +12,4 @@ | 5 | 73_23139637 | R06.0 | R06.0 | OK | OK | REVIEW | medium | 1.0 | mono_fragile | --- -*Généré le 2026-02-24 14:34* \ No newline at end of file +*Généré le 2026-02-24 17:44* \ No newline at end of file diff --git a/tests/test_dp_finalizer.py b/tests/test_dp_finalizer.py new file mode 100644 index 0000000..e1449ff --- /dev/null +++ b/tests/test_dp_finalizer.py @@ -0,0 +1,379 @@ +"""Tests du DP Finalizer — arbitrage Trackare vs CRH-only. + +Pas de mocks. Fixtures synthétiques avec objets réels (DossierMedical, DPSelection, DPCandidate). +""" + +from __future__ import annotations + +import pytest + +from src.config import DossierMedical, DPSelection, DPCandidate, Diagnostic, Sejour +from src.medical.dp_finalizer import finalize_dp, decide_dp_final + + +# ── Helpers ──────────────────────────────────────────────────────────── + + +def _sel( + code: str, + term: str = "", + verdict: str = "REVIEW", + confidence: str = "medium", + evidence: list[str] | None = None, + reason: str = "", + candidates: list[DPCandidate] | None = None, +) -> DPSelection: + """Crée un DPSelection minimal.""" + return DPSelection( + chosen_code=code, + chosen_term=term or code, + verdict=verdict, + confidence=confidence, + evidence=evidence or [], + reason=reason, + candidates=candidates or [], + ) + + +def _cand(code: str, term: str = "", score: float = 0.0) -> DPCandidate: + return DPCandidate(index=0, term=term or code, code=code, score=score) + + +def _dossier( + doc_type: str = "crh", + dp_code: str | None = None, + dp_selection: DPSelection | None = None, + existing_flags: dict | None = None, + existing_alertes: list[str] | None = None, +) -> DossierMedical: + dp = None + if dp_code: + dp = Diagnostic(texte=dp_code, cim10_suggestion=dp_code) + d = DossierMedical( + document_type=doc_type, + sejour=Sejour(), + diagnostic_principal=dp, + dp_selection=dp_selection, + ) + if existing_flags: + d.quality_flags = existing_flags.copy() + if existing_alertes: + d.alertes_codage = existing_alertes.copy() + return d + + +# =================================================================== +# R1 — CRH CONFIRMED override Trackare +# =================================================================== + + +class TestR1CrhConfirmedOverridesTrackare: + + def test_crh_confirmed_with_evidence_wins(self): + """R1 : CRH CONFIRMED + evidence forte → override Trackare.""" + trackare = _sel("I10", verdict="CONFIRMED", evidence=["Source: Trackare (codage établissement)"], + reason="DP Trackare — source d'autorité") + crh = _sel("I26.9", term="Embolie pulmonaire", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Embolie pulmonaire»"], + reason="CRH CONFIRMED") + + dp_final, flags, alertes = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "I26.9" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("override_trackare_by_crh_confirmed") is True + assert any("Trackare" in a and "écarté" in a for a in alertes) + + def test_crh_confirmed_coherent_no_override_flag(self): + """R1 : CRH CONFIRMED + même code que Trackare → pas de flag override.""" + trackare = _sel("I26.9", verdict="CONFIRMED", evidence=["Source: Trackare"]) + crh = _sel("I26.9", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Embolie pulmonaire»"]) + + dp_final, flags, _ = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "I26.9" + assert "override_trackare_by_crh_confirmed" not in flags + assert flags.get("crh_confirmed_coherent") is True + + +# =================================================================== +# R2 — Trackare non-symptôme corroboré par CRH +# =================================================================== + + +class TestR2TrackareCorroborated: + + def test_trackare_corroborated_exact_match(self): + """R2 : Trackare I26.9 + CRH candidates contient I26.9 → CONFIRMED.""" + trackare = _sel("I26.9", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + crh = _sel("I26.9", term="EP", verdict="REVIEW", + candidates=[_cand("I26.9", "Embolie pulmonaire", 5.0), + _cand("Q53.9", "Cryptorchidie", 2.0)]) + + dp_final, flags, _ = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "I26.9" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("trackare_confirmed_by_crh") is True + assert any("corroboré" in e for e in dp_final.evidence) + + def test_trackare_corroborated_family3(self): + """R2 : Trackare I26.0 + CRH I26.9 → family3 match → CONFIRMED.""" + trackare = _sel("I26.0", verdict="CONFIRMED", evidence=["Source: Trackare"]) + crh = _sel("I26.9", verdict="REVIEW", + candidates=[_cand("I26.9")]) + + dp_final, flags, _ = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "I26.0" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("trackare_confirmed_by_crh") is True + + +# =================================================================== +# R3 — Trackare symptôme (R*) + CRH étiologique +# =================================================================== + + +class TestR3TrackareSymptom: + + def test_trackare_symptom_overridden_by_crh_confirmed(self): + """R3 : Trackare R59.0 + CRH CONFIRMED C83.3 → override.""" + trackare = _sel("R59.0", term="Adénopathie", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + crh = _sel("C83.3", term="DLBCL", verdict="CONFIRMED", + evidence=["Conclusion: «DLBCL en progression»"]) + + dp_final, flags, alertes = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "C83.3" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("trackare_symptom_overridden") is True + assert any("R59.0" in a and "C83.3" in a for a in alertes) + + def test_trackare_symptom_review_when_crh_not_confirmed(self): + """R3 : Trackare R59.0 + CRH REVIEW C83.3 → REVIEW prudent.""" + trackare = _sel("R59.0", term="Adénopathie", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + crh = _sel("C83.3", term="DLBCL", verdict="REVIEW", + evidence=["Scores proches"]) + + dp_final, flags, alertes = decide_dp_final(trackare, crh) + + assert dp_final.chosen_code == "R59.0" # reste Trackare + assert dp_final.verdict == "REVIEW" + assert dp_final.confidence == "medium" + assert flags.get("trackare_symptom_vs_crh_diagnosis") is True + assert any("vérification DIM" in a for a in alertes) + + def test_trackare_symptom_review_when_crh_weak_evidence(self): + """R3 : Trackare R06.0 + CRH CONFIRMED mais evidence vide → REVIEW.""" + trackare = _sel("R06.0", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + crh = _sel("J18.9", verdict="CONFIRMED", evidence=[]) # pas de preuve forte + + dp_final, flags, _ = decide_dp_final(trackare, crh) + + assert dp_final.verdict == "REVIEW" + assert flags.get("trackare_symptom_vs_crh_diagnosis") is True + + +# =================================================================== +# R4 — Ambigu / preuves faibles +# =================================================================== + + +class TestR4Ambiguous: + + def test_trackare_non_r_crh_review_different_code(self): + """R4 : Trackare K85.1 + CRH REVIEW K85.9 non corroboré → ambigu.""" + # K85.1 vs K85.9 → family3 match → actually R2 triggers + # Use truly different codes + trackare = _sel("K85.1", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + crh = _sel("N17.9", verdict="REVIEW", + candidates=[_cand("N17.9", "IRA", 3.0)]) + + dp_final, flags, alertes = decide_dp_final(trackare, crh) + + assert dp_final.verdict == "REVIEW" + assert flags.get("review_ambiguous") is True + assert any("ambigu" in a.lower() for a in alertes) + + +# =================================================================== +# R5 — Z-code / R-code interdits auto-confirm +# =================================================================== + + +class TestR5Interdictions: + + def test_z_code_never_confirmed(self): + """R5 : Z95.5 (non whitelisté) → forcer REVIEW.""" + trackare = _sel("Z95.5", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + + dp_final, flags, alertes = decide_dp_final(trackare, None) + + assert dp_final.verdict == "REVIEW" + assert flags.get("z_code_dp_review") is True + + def test_z_code_whitelisted_stays_confirmed(self): + """R5 : Z51.1 (whitelisté) → CONFIRMED ok.""" + trackare = _sel("Z51.1", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + + dp_final, flags, _ = decide_dp_final(trackare, None) + + assert dp_final.verdict == "CONFIRMED" + assert "z_code_dp_review" not in flags + + def test_r_code_not_allowed_if_non_r_candidate(self): + """R5 : R06.0 CONFIRMED + candidat J18.9 non-R → REVIEW.""" + crh = _sel("R06.0", term="Dyspnée", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Dyspnée»"], + candidates=[_cand("R06.0", "Dyspnée", 5.0), + _cand("J18.9", "Pneumopathie", 3.0)]) + + dp_final, flags, _ = decide_dp_final(None, crh, allow_symptom_dp=False) + + assert dp_final.verdict == "REVIEW" + assert flags.get("r_code_dp_with_non_r_candidate") is True + + def test_r_code_allowed_when_flag_true(self): + """R5 : R06.0 CONFIRMED + allow_symptom_dp=True → CONFIRMED ok.""" + crh = _sel("R06.0", term="Dyspnée", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Dyspnée»"], + candidates=[_cand("R06.0", "Dyspnée", 5.0), + _cand("J18.9", "Pneumopathie", 3.0)]) + + dp_final, flags, _ = decide_dp_final(None, crh, allow_symptom_dp=True) + + assert dp_final.verdict == "CONFIRMED" + assert "r_code_dp_with_non_r_candidate" not in flags + + +# =================================================================== +# Cas dégénérés — pass-through +# =================================================================== + + +class TestPassThrough: + + def test_no_trackare_crh_only(self): + """CRH-only → pass-through.""" + crh = _sel("A87.0", term="Méningite entérovirus", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Méningite à entérovirus»"]) + + dp_final, flags, _ = decide_dp_final(None, crh) + + assert dp_final.chosen_code == "A87.0" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("crh_only_mode") is True + + def test_no_crh_trackare_only(self): + """Trackare-only → CONFIRMED (si non Z/R).""" + trackare = _sel("K81.0", term="Cholécystite", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"]) + + dp_final, flags, _ = decide_dp_final(trackare, None) + + assert dp_final.chosen_code == "K81.0" + assert dp_final.verdict == "CONFIRMED" + assert flags.get("trackare_only_mode") is True + + def test_no_sources_returns_review(self): + """Aucun DP → REVIEW.""" + dp_final, flags, alertes = decide_dp_final(None, None) + + assert dp_final.verdict == "REVIEW" + assert flags.get("no_dp_source") is True + assert len(alertes) > 0 + + +# =================================================================== +# Intégration finalize_dp() — dossier complet +# =================================================================== + + +class TestFinalizeDp: + + def test_quality_flags_merge(self): + """Les flags existants ne sont pas écrasés par le finalizer.""" + crh = _sel("A87.0", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «Méningite»"]) + d = _dossier(doc_type="crh", dp_selection=crh, + existing_flags={"my_existing_flag": True}) + + finalize_dp(d) + + assert d.quality_flags["my_existing_flag"] is True + assert d.quality_flags.get("crh_only_mode") is True + + def test_alertes_codage_appended(self): + """Le finalizer ajoute des alertes sans supprimer les existantes.""" + trackare = _sel("R59.0", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"], + reason="DP Trackare — source d'autorité") + crh_candidates = [_cand("C83.3", "DLBCL", 5.0)] + crh = _sel("C83.3", term="DLBCL", verdict="REVIEW", + evidence=["Scores proches"], + candidates=crh_candidates) + + # Dossier Trackare avec dp_selection trackare, mais on simule la présence + # d'un CRH secondaire via un appel direct decide_dp_final + d = _dossier( + doc_type="trackare", + dp_code="R59.0", + dp_selection=trackare, + existing_alertes=["Alerte existante"], + ) + finalize_dp(d) + + assert "Alerte existante" in d.alertes_codage + assert d.dp_trackare is not None + assert d.dp_final is not None + + def test_dp_trackare_and_crh_only_set(self): + """Vérifie que dp_trackare et dp_crh_only sont correctement renseignés.""" + crh = _sel("K85.1", term="Pancréatite biliaire", verdict="CONFIRMED", + evidence=["Conclusion: «Pancréatite aiguë biliaire»"]) + d = _dossier(doc_type="crh", dp_selection=crh) + + finalize_dp(d) + + assert d.dp_trackare is None # pas de Trackare + assert d.dp_crh_only is not None + assert d.dp_crh_only.chosen_code == "K85.1" + assert d.dp_final is not None + assert d.dp_final.chosen_code == "K85.1" + + def test_trackare_dossier_sets_dp_trackare(self): + """Un dossier Trackare voit dp_trackare renseigné.""" + trackare = _sel("K81.0", verdict="CONFIRMED", + evidence=["Source: Trackare (codage établissement)"], + reason="DP Trackare — source d'autorité") + d = _dossier(doc_type="trackare", dp_code="K81.0", dp_selection=trackare) + + finalize_dp(d) + + assert d.dp_trackare is not None + assert d.dp_trackare.chosen_code == "K81.0" + assert d.dp_crh_only is None + assert d.dp_final.chosen_code == "K81.0" + assert d.dp_final.verdict == "CONFIRMED" + + def test_serializable(self): + """Le dossier reste sérialisable en JSON après finalize_dp.""" + crh = _sel("I26.9", verdict="CONFIRMED", + evidence=["Diagnostic de sortie: «EP»"], + candidates=[_cand("I26.9", "EP", 6.0)]) + d = _dossier(doc_type="crh", dp_selection=crh) + finalize_dp(d) + + data = d.model_dump(exclude_none=True) + assert "dp_final" in data + assert data["dp_final"]["chosen_code"] == "I26.9" + assert "quality_flags" in data