From 77ffbc56d4c3c5206302f26188cc218ad4cc4a93 Mon Sep 17 00:00:00 2001 From: dom Date: Fri, 20 Feb 2026 11:01:06 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20CODE=5FCORRECTIONS=2012=20r=C3=A8gles?= =?UTF-8?q?=20d=C3=A9terministes=20+=20sentinel=20REJECT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - CODE_CORRECTIONS passe de 1 à 12 règles (corrections + rejets) - REJECT_SENTINEL pour codes trop vagues (R69, R69.8, Z53.9, D71.9) ou inexistants - Corrections : J96.0→J96.00, I50.9→I50.1 (IC gauche), N17.9→N17.0 (NTA), E11.9→E11.65 (DT2 insuline), K92.2→K92.0 (hématémèse), G40.9→G40.3 (épilepsie) - _apply_code_corrections() gère REJECT : DP→None, DAS→supprimé + alerte - 21 tests paramétrés (corrections, rejets, non-corrections) Co-Authored-By: Claude Opus 4.6 --- src/medical/das_filter.py | 69 ++++++++++++++++++++++-- src/medical/validation_pipeline.py | 41 +++++++++++--- tests/test_das_filter.py | 87 ++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 10 deletions(-) diff --git a/src/medical/das_filter.py b/src/medical/das_filter.py index dc312e1..a3ddc29 100644 --- a/src/medical/das_filter.py +++ b/src/medical/das_filter.py @@ -3,14 +3,73 @@ import re import unicodedata +# Sentinel indiquant qu'un code doit être rejeté (supprimé du dossier) +REJECT_SENTINEL = "__REJECT__" + # Corrections de codes CIM-10 systématiquement mal attribués par le LLM -# D55.9 (anémie enzymatique) est proposé pour "Anémie" non qualifiée → D64.9 +# correct_code = None → le code est rejeté (REJECT_SENTINEL retourné) CODE_CORRECTIONS: dict[str, dict] = { "D55.9": { "correct_code": "D64.9", - "condition_texte": r"^an[ée]mie$", # uniquement si texte = "Anémie" seul + "condition_texte": r"^an[ée]mie$", "reason": "Anémie non qualifiée → D64.9 (sans précision), pas D55.9 (enzymatique)", }, + "J96.0": { + "correct_code": "J96.00", + "condition_texte": r"insuffisance respiratoire aig", + "reason": "J96.0 invalide en CIM-10 FR → J96.00 (insuffisance respiratoire aiguë)", + }, + "I50.9": { + "correct_code": "I50.1", + "condition_texte": r"insuffisance cardiaque gauche|oap|œdème aigu", + "reason": "IC gauche / OAP → I50.1 (insuffisance ventriculaire gauche), pas I50.9 (SAI)", + }, + "N17.9": { + "correct_code": "N17.0", + "condition_texte": r"nécrose tubulaire|nta", + "reason": "Nécrose tubulaire aiguë → N17.0 (avec NTA), pas N17.9 (SAI)", + }, + "E11.9": { + "correct_code": "E11.65", + "condition_texte": r"diab[èe]te.*insuline|trait[ée].*insuline", + "reason": "DT2 traité par insuline → E11.65, pas E11.9 (SAI)", + }, + "K92.2": { + "correct_code": "K92.0", + "condition_texte": r"h[ée]mat[ée]m[eè]se", + "reason": "Hématémèse → K92.0, pas K92.2 (hémorragie GI SAI)", + }, + "G40.9": { + "correct_code": "G40.3", + "condition_texte": r"^[ée]pilepsie$", + "reason": "Épilepsie SAI → G40.3 (généralisée idiopathique), pas G40.9", + }, + # --- Rejets (codes trop vagues ou inexistants) --- + "R69": { + "correct_code": None, + "condition_texte": r".*", + "reason": "R69 trop vague, jamais justifié en DP/DAS", + }, + "R69.8": { + "correct_code": None, + "condition_texte": r".*", + "reason": "R69.8 n'existe pas en CIM-10 FR", + }, + "Z53.9": { + "correct_code": None, + "condition_texte": r".*", + "reason": "Z53.9 trop vague pour un codage PMSI", + }, + "B96.9": { + "correct_code": None, + "condition_texte": r"^agent bact[ée]rien", + "reason": "B96.9 SAI trop vague sans identification de l'agent", + }, + "D71.9": { + "correct_code": None, + "condition_texte": r".*", + "reason": "D71.9 n'existe pas en CIM-10 FR", + }, } @@ -205,11 +264,15 @@ def correct_known_miscodes(code: str, texte: str) -> str | None: """Corrige les codes CIM-10 systématiquement mal attribués par le LLM. Returns: - Le code corrigé, ou None si pas de correction nécessaire. + - Le code corrigé si une correction s'applique. + - REJECT_SENTINEL si le code doit être supprimé. + - None si pas de correction nécessaire. """ correction = CODE_CORRECTIONS.get(code) if not correction: return None if re.match(correction["condition_texte"], texte.strip(), re.IGNORECASE): + if correction["correct_code"] is None: + return REJECT_SENTINEL return correction["correct_code"] return None diff --git a/src/medical/validation_pipeline.py b/src/medical/validation_pipeline.py index b9ac03b..12f1740 100644 --- a/src/medical/validation_pipeline.py +++ b/src/medical/validation_pipeline.py @@ -6,7 +6,7 @@ import logging from .cim10_dict import lookup as dict_lookup, normalize_code, validate_code as cim10_validate from .ccam_dict import validate_code as ccam_validate -from .das_filter import correct_known_miscodes, apply_semantic_dedup +from .das_filter import correct_known_miscodes, apply_semantic_dedup, REJECT_SENTINEL from ..config import Diagnostic, DossierMedical from .diagnostic_extraction import CIM10_MAP @@ -92,19 +92,46 @@ def _validate_cim10(dossier: DossierMedical) -> None: def _apply_code_corrections(dossier: DossierMedical) -> None: - """Corrige les codes CIM-10 systématiquement mal attribués par le LLM.""" - all_diags = [] - if dossier.diagnostic_principal: - all_diags.append(dossier.diagnostic_principal) - all_diags.extend(dossier.diagnostics_associes) + """Corrige les codes CIM-10 systématiquement mal attribués par le LLM. - for diag in all_diags: + Si un code est marqué REJECT_SENTINEL, il est retiré : + - DP : code mis à None (pas de suppression du diagnostic principal) + - DAS : diagnostic supprimé de la liste + """ + # DP + if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion: + dp = dossier.diagnostic_principal + corrected = correct_known_miscodes(dp.cim10_suggestion, dp.texte) + if corrected == REJECT_SENTINEL: + logger.info(" Code rejeté : %s pour « %s » (DP) — code supprimé", dp.cim10_suggestion, dp.texte) + dossier.alertes_codage.append( + f"Code rejeté : {dp.cim10_suggestion} ({dp.texte}) — trop vague ou inexistant" + ) + dp.cim10_suggestion = None + dp.cim10_confidence = None + elif corrected: + logger.info(" Code corrigé : %s → %s pour « %s »", dp.cim10_suggestion, corrected, dp.texte) + dp.cim10_suggestion = corrected + + # DAS + das_to_keep = [] + for diag in dossier.diagnostics_associes: if not diag.cim10_suggestion: + das_to_keep.append(diag) continue corrected = correct_known_miscodes(diag.cim10_suggestion, diag.texte) + if corrected == REJECT_SENTINEL: + logger.info(" Code rejeté : %s pour « %s » (DAS) — diagnostic supprimé", diag.cim10_suggestion, diag.texte) + dossier.alertes_codage.append( + f"Code rejeté : {diag.cim10_suggestion} ({diag.texte}) — trop vague ou inexistant" + ) + continue # ne pas ajouter à das_to_keep if corrected: logger.info(" Code corrigé : %s → %s pour « %s »", diag.cim10_suggestion, corrected, diag.texte) diag.cim10_suggestion = corrected + das_to_keep.append(diag) + + dossier.diagnostics_associes = das_to_keep def _apply_exclusion_rules(dossier: DossierMedical) -> None: diff --git a/tests/test_das_filter.py b/tests/test_das_filter.py index 03c10eb..f30c0bb 100644 --- a/tests/test_das_filter.py +++ b/tests/test_das_filter.py @@ -8,6 +8,8 @@ from src.medical.das_filter import ( correct_known_miscodes, expand_medical_abbreviations, SEMANTIC_REDUNDANCIES, + CODE_CORRECTIONS, + REJECT_SENTINEL, ) @@ -330,3 +332,88 @@ class TestExpandMedicalAbbreviations: def test_unknown_unchanged(self): """Texte non-abréviation reste inchangé.""" assert expand_medical_abbreviations("Pancréatite aiguë") == "Pancréatite aiguë" + + +class TestCodeCorrectionsStructure: + """Vérifie le format et la complétude de CODE_CORRECTIONS.""" + + def test_has_12_rules(self): + assert len(CODE_CORRECTIONS) == 12 + + def test_all_rules_have_required_fields(self): + for code, rule in CODE_CORRECTIONS.items(): + assert "correct_code" in rule, f"{code}: manque 'correct_code'" + assert "condition_texte" in rule, f"{code}: manque 'condition_texte'" + assert "reason" in rule, f"{code}: manque 'reason'" + + +class TestCodeCorrectionsParametrized: + """Tests paramétrés pour les 12 règles CODE_CORRECTIONS.""" + + # --- Corrections (code → code) --- + + def test_d55_9_anemie(self): + assert correct_known_miscodes("D55.9", "Anémie") == "D64.9" + + def test_j96_0_insuf_resp(self): + assert correct_known_miscodes("J96.0", "Insuffisance respiratoire aiguë") == "J96.00" + + def test_i50_9_ic_gauche(self): + assert correct_known_miscodes("I50.9", "Insuffisance cardiaque gauche") == "I50.1" + + def test_i50_9_oap(self): + assert correct_known_miscodes("I50.9", "OAP") == "I50.1" + + def test_n17_9_nta(self): + assert correct_known_miscodes("N17.9", "Nécrose tubulaire aiguë") == "N17.0" + + def test_n17_9_nta_abbreviation(self): + assert correct_known_miscodes("N17.9", "NTA") == "N17.0" + + def test_e11_9_diabete_insuline(self): + assert correct_known_miscodes("E11.9", "Diabète traité par insuline") == "E11.65" + + def test_k92_2_hematemese(self): + assert correct_known_miscodes("K92.2", "Hématémèse") == "K92.0" + + def test_g40_9_epilepsie(self): + assert correct_known_miscodes("G40.9", "Épilepsie") == "G40.3" + + # --- Rejets (REJECT_SENTINEL) --- + + def test_r69_reject(self): + assert correct_known_miscodes("R69", "Maladie SAI") == REJECT_SENTINEL + + def test_r69_8_reject(self): + assert correct_known_miscodes("R69.8", "Quelque chose") == REJECT_SENTINEL + + def test_z53_9_reject(self): + assert correct_known_miscodes("Z53.9", "Procédure non effectuée") == REJECT_SENTINEL + + def test_d71_9_reject(self): + assert correct_known_miscodes("D71.9", "Anomalie leucocytes") == REJECT_SENTINEL + + def test_b96_9_reject_agent_bacterien(self): + assert correct_known_miscodes("B96.9", "Agent bactérien") == REJECT_SENTINEL + + def test_b96_9_no_reject_specific(self): + """B96.9 avec texte spécifique → pas de rejet.""" + assert correct_known_miscodes("B96.9", "Staphylococcus aureus") is None + + # --- Non-correction quand le texte ne matche pas --- + + def test_d55_9_qualified_no_correction(self): + assert correct_known_miscodes("D55.9", "Anémie hémolytique") is None + + def test_i50_9_unqualified_no_correction(self): + """I50.9 sans mention IC gauche → pas de correction.""" + assert correct_known_miscodes("I50.9", "Insuffisance cardiaque") is None + + def test_e11_9_no_insuline_no_correction(self): + assert correct_known_miscodes("E11.9", "Diabète de type 2") is None + + def test_g40_9_qualified_no_correction(self): + assert correct_known_miscodes("G40.9", "Épilepsie focale") is None + + def test_unknown_code_no_correction(self): + assert correct_known_miscodes("Z99.0", "Test") is None