feat: CODE_CORRECTIONS 12 règles déterministes + sentinel REJECT

- CODE_CORRECTIONS passe de 1 à 12 règles (corrections + rejets)
- REJECT_SENTINEL pour codes trop vagues (R69, R69.8, Z53.9, D71.9) ou inexistants
- Corrections : J96.0→J96.00, I50.9→I50.1 (IC gauche), N17.9→N17.0 (NTA),
  E11.9→E11.65 (DT2 insuline), K92.2→K92.0 (hématémèse), G40.9→G40.3 (épilepsie)
- _apply_code_corrections() gère REJECT : DP→None, DAS→supprimé + alerte
- 21 tests paramétrés (corrections, rejets, non-corrections)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-20 11:01:06 +01:00
parent 1a3c523987
commit 77ffbc56d4
3 changed files with 187 additions and 10 deletions

View File

@@ -3,14 +3,73 @@
import re import re
import unicodedata import unicodedata
# Sentinel indiquant qu'un code doit être rejeté (supprimé du dossier)
REJECT_SENTINEL = "__REJECT__"
# Corrections de codes CIM-10 systématiquement mal attribués par le LLM # Corrections de codes CIM-10 systématiquement mal attribués par le LLM
# D55.9 (anémie enzymatique) est proposé pour "Anémie" non qualifiée → D64.9 # correct_code = None → le code est rejeté (REJECT_SENTINEL retourné)
CODE_CORRECTIONS: dict[str, dict] = { CODE_CORRECTIONS: dict[str, dict] = {
"D55.9": { "D55.9": {
"correct_code": "D64.9", "correct_code": "D64.9",
"condition_texte": r"^an[ée]mie$", # uniquement si texte = "Anémie" seul "condition_texte": r"^an[ée]mie$",
"reason": "Anémie non qualifiée → D64.9 (sans précision), pas D55.9 (enzymatique)", "reason": "Anémie non qualifiée → D64.9 (sans précision), pas D55.9 (enzymatique)",
}, },
"J96.0": {
"correct_code": "J96.00",
"condition_texte": r"insuffisance respiratoire aig",
"reason": "J96.0 invalide en CIM-10 FR → J96.00 (insuffisance respiratoire aiguë)",
},
"I50.9": {
"correct_code": "I50.1",
"condition_texte": r"insuffisance cardiaque gauche|oap|œdème aigu",
"reason": "IC gauche / OAP → I50.1 (insuffisance ventriculaire gauche), pas I50.9 (SAI)",
},
"N17.9": {
"correct_code": "N17.0",
"condition_texte": r"nécrose tubulaire|nta",
"reason": "Nécrose tubulaire aiguë → N17.0 (avec NTA), pas N17.9 (SAI)",
},
"E11.9": {
"correct_code": "E11.65",
"condition_texte": r"diab[èe]te.*insuline|trait[ée].*insuline",
"reason": "DT2 traité par insuline → E11.65, pas E11.9 (SAI)",
},
"K92.2": {
"correct_code": "K92.0",
"condition_texte": r"h[ée]mat[ée]m[eè]se",
"reason": "Hématémèse → K92.0, pas K92.2 (hémorragie GI SAI)",
},
"G40.9": {
"correct_code": "G40.3",
"condition_texte": r"^[ée]pilepsie$",
"reason": "Épilepsie SAI → G40.3 (généralisée idiopathique), pas G40.9",
},
# --- Rejets (codes trop vagues ou inexistants) ---
"R69": {
"correct_code": None,
"condition_texte": r".*",
"reason": "R69 trop vague, jamais justifié en DP/DAS",
},
"R69.8": {
"correct_code": None,
"condition_texte": r".*",
"reason": "R69.8 n'existe pas en CIM-10 FR",
},
"Z53.9": {
"correct_code": None,
"condition_texte": r".*",
"reason": "Z53.9 trop vague pour un codage PMSI",
},
"B96.9": {
"correct_code": None,
"condition_texte": r"^agent bact[ée]rien",
"reason": "B96.9 SAI trop vague sans identification de l'agent",
},
"D71.9": {
"correct_code": None,
"condition_texte": r".*",
"reason": "D71.9 n'existe pas en CIM-10 FR",
},
} }
@@ -205,11 +264,15 @@ def correct_known_miscodes(code: str, texte: str) -> str | None:
"""Corrige les codes CIM-10 systématiquement mal attribués par le LLM. """Corrige les codes CIM-10 systématiquement mal attribués par le LLM.
Returns: Returns:
Le code corrigé, ou None si pas de correction nécessaire. - Le code corrigé si une correction s'applique.
- REJECT_SENTINEL si le code doit être supprimé.
- None si pas de correction nécessaire.
""" """
correction = CODE_CORRECTIONS.get(code) correction = CODE_CORRECTIONS.get(code)
if not correction: if not correction:
return None return None
if re.match(correction["condition_texte"], texte.strip(), re.IGNORECASE): if re.match(correction["condition_texte"], texte.strip(), re.IGNORECASE):
if correction["correct_code"] is None:
return REJECT_SENTINEL
return correction["correct_code"] return correction["correct_code"]
return None return None

View File

@@ -6,7 +6,7 @@ import logging
from .cim10_dict import lookup as dict_lookup, normalize_code, validate_code as cim10_validate from .cim10_dict import lookup as dict_lookup, normalize_code, validate_code as cim10_validate
from .ccam_dict import validate_code as ccam_validate from .ccam_dict import validate_code as ccam_validate
from .das_filter import correct_known_miscodes, apply_semantic_dedup from .das_filter import correct_known_miscodes, apply_semantic_dedup, REJECT_SENTINEL
from ..config import Diagnostic, DossierMedical from ..config import Diagnostic, DossierMedical
from .diagnostic_extraction import CIM10_MAP from .diagnostic_extraction import CIM10_MAP
@@ -92,19 +92,46 @@ def _validate_cim10(dossier: DossierMedical) -> None:
def _apply_code_corrections(dossier: DossierMedical) -> None: def _apply_code_corrections(dossier: DossierMedical) -> None:
"""Corrige les codes CIM-10 systématiquement mal attribués par le LLM.""" """Corrige les codes CIM-10 systématiquement mal attribués par le LLM.
all_diags = []
if dossier.diagnostic_principal:
all_diags.append(dossier.diagnostic_principal)
all_diags.extend(dossier.diagnostics_associes)
for diag in all_diags: Si un code est marqué REJECT_SENTINEL, il est retiré :
- DP : code mis à None (pas de suppression du diagnostic principal)
- DAS : diagnostic supprimé de la liste
"""
# DP
if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion:
dp = dossier.diagnostic_principal
corrected = correct_known_miscodes(dp.cim10_suggestion, dp.texte)
if corrected == REJECT_SENTINEL:
logger.info(" Code rejeté : %s pour « %s » (DP) — code supprimé", dp.cim10_suggestion, dp.texte)
dossier.alertes_codage.append(
f"Code rejeté : {dp.cim10_suggestion} ({dp.texte}) — trop vague ou inexistant"
)
dp.cim10_suggestion = None
dp.cim10_confidence = None
elif corrected:
logger.info(" Code corrigé : %s%s pour « %s »", dp.cim10_suggestion, corrected, dp.texte)
dp.cim10_suggestion = corrected
# DAS
das_to_keep = []
for diag in dossier.diagnostics_associes:
if not diag.cim10_suggestion: if not diag.cim10_suggestion:
das_to_keep.append(diag)
continue continue
corrected = correct_known_miscodes(diag.cim10_suggestion, diag.texte) corrected = correct_known_miscodes(diag.cim10_suggestion, diag.texte)
if corrected == REJECT_SENTINEL:
logger.info(" Code rejeté : %s pour « %s » (DAS) — diagnostic supprimé", diag.cim10_suggestion, diag.texte)
dossier.alertes_codage.append(
f"Code rejeté : {diag.cim10_suggestion} ({diag.texte}) — trop vague ou inexistant"
)
continue # ne pas ajouter à das_to_keep
if corrected: if corrected:
logger.info(" Code corrigé : %s%s pour « %s »", diag.cim10_suggestion, corrected, diag.texte) logger.info(" Code corrigé : %s%s pour « %s »", diag.cim10_suggestion, corrected, diag.texte)
diag.cim10_suggestion = corrected diag.cim10_suggestion = corrected
das_to_keep.append(diag)
dossier.diagnostics_associes = das_to_keep
def _apply_exclusion_rules(dossier: DossierMedical) -> None: def _apply_exclusion_rules(dossier: DossierMedical) -> None:

View File

@@ -8,6 +8,8 @@ from src.medical.das_filter import (
correct_known_miscodes, correct_known_miscodes,
expand_medical_abbreviations, expand_medical_abbreviations,
SEMANTIC_REDUNDANCIES, SEMANTIC_REDUNDANCIES,
CODE_CORRECTIONS,
REJECT_SENTINEL,
) )
@@ -330,3 +332,88 @@ class TestExpandMedicalAbbreviations:
def test_unknown_unchanged(self): def test_unknown_unchanged(self):
"""Texte non-abréviation reste inchangé.""" """Texte non-abréviation reste inchangé."""
assert expand_medical_abbreviations("Pancréatite aiguë") == "Pancréatite aiguë" assert expand_medical_abbreviations("Pancréatite aiguë") == "Pancréatite aiguë"
class TestCodeCorrectionsStructure:
"""Vérifie le format et la complétude de CODE_CORRECTIONS."""
def test_has_12_rules(self):
assert len(CODE_CORRECTIONS) == 12
def test_all_rules_have_required_fields(self):
for code, rule in CODE_CORRECTIONS.items():
assert "correct_code" in rule, f"{code}: manque 'correct_code'"
assert "condition_texte" in rule, f"{code}: manque 'condition_texte'"
assert "reason" in rule, f"{code}: manque 'reason'"
class TestCodeCorrectionsParametrized:
"""Tests paramétrés pour les 12 règles CODE_CORRECTIONS."""
# --- Corrections (code → code) ---
def test_d55_9_anemie(self):
assert correct_known_miscodes("D55.9", "Anémie") == "D64.9"
def test_j96_0_insuf_resp(self):
assert correct_known_miscodes("J96.0", "Insuffisance respiratoire aiguë") == "J96.00"
def test_i50_9_ic_gauche(self):
assert correct_known_miscodes("I50.9", "Insuffisance cardiaque gauche") == "I50.1"
def test_i50_9_oap(self):
assert correct_known_miscodes("I50.9", "OAP") == "I50.1"
def test_n17_9_nta(self):
assert correct_known_miscodes("N17.9", "Nécrose tubulaire aiguë") == "N17.0"
def test_n17_9_nta_abbreviation(self):
assert correct_known_miscodes("N17.9", "NTA") == "N17.0"
def test_e11_9_diabete_insuline(self):
assert correct_known_miscodes("E11.9", "Diabète traité par insuline") == "E11.65"
def test_k92_2_hematemese(self):
assert correct_known_miscodes("K92.2", "Hématémèse") == "K92.0"
def test_g40_9_epilepsie(self):
assert correct_known_miscodes("G40.9", "Épilepsie") == "G40.3"
# --- Rejets (REJECT_SENTINEL) ---
def test_r69_reject(self):
assert correct_known_miscodes("R69", "Maladie SAI") == REJECT_SENTINEL
def test_r69_8_reject(self):
assert correct_known_miscodes("R69.8", "Quelque chose") == REJECT_SENTINEL
def test_z53_9_reject(self):
assert correct_known_miscodes("Z53.9", "Procédure non effectuée") == REJECT_SENTINEL
def test_d71_9_reject(self):
assert correct_known_miscodes("D71.9", "Anomalie leucocytes") == REJECT_SENTINEL
def test_b96_9_reject_agent_bacterien(self):
assert correct_known_miscodes("B96.9", "Agent bactérien") == REJECT_SENTINEL
def test_b96_9_no_reject_specific(self):
"""B96.9 avec texte spécifique → pas de rejet."""
assert correct_known_miscodes("B96.9", "Staphylococcus aureus") is None
# --- Non-correction quand le texte ne matche pas ---
def test_d55_9_qualified_no_correction(self):
assert correct_known_miscodes("D55.9", "Anémie hémolytique") is None
def test_i50_9_unqualified_no_correction(self):
"""I50.9 sans mention IC gauche → pas de correction."""
assert correct_known_miscodes("I50.9", "Insuffisance cardiaque") is None
def test_e11_9_no_insuline_no_correction(self):
assert correct_known_miscodes("E11.9", "Diabète de type 2") is None
def test_g40_9_qualified_no_correction(self):
assert correct_known_miscodes("G40.9", "Épilepsie focale") is None
def test_unknown_code_no_correction(self):
assert correct_known_miscodes("Z99.0", "Test") is None