diff --git a/config/rules/base.yaml b/config/rules/base.yaml index 9e385d8..e383fc7 100644 --- a/config/rules/base.yaml +++ b/config/rules/base.yaml @@ -51,6 +51,9 @@ packs: RULE-D69.6-PLT-NORMAL: enabled: true description: "D69.6 incompatible avec plaquettes normales => ruled_out (barré)" + RULE-DAS-TO-DP: + enabled: true + description: "DAS promu en DP si aucun DP extrait — sélection par pertinence/confiance/spécificité" bio_electrolytes: enabled: true diff --git a/src/medical/cim10_extractor.py b/src/medical/cim10_extractor.py index 76cb7ab..f5230e4 100644 --- a/src/medical/cim10_extractor.py +++ b/src/medical/cim10_extractor.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate -from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes +from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes, expand_medical_abbreviations from ..config import ( ActeCCAM, Antecedent, @@ -209,6 +209,7 @@ def _extract_das_llm(text: str, dossier: DossierMedical) -> None: added = 0 for das in das_results: texte = clean_diagnostic_text(das.get("texte", "")) + texte = expand_medical_abbreviations(texte) if not texte or not is_valid_diagnostic_text(texte): continue @@ -315,6 +316,7 @@ def _extract_diagnostics( # Diagnostics codés depuis Trackare (prioritaires) for diag in parsed.get("diagnostics", []): texte = clean_diagnostic_text(diag.get("libelle", "")) + texte = expand_medical_abbreviations(texte) is_principal = diag.get("type", "").lower() == "principal" # Le DP Trackare est toujours accepté (pré-codé avec CIM-10 validé). # Seuls les DAS passent le filtre anti-bruit. diff --git a/src/medical/das_filter.py b/src/medical/das_filter.py index ad0301f..dc312e1 100644 --- a/src/medical/das_filter.py +++ b/src/medical/das_filter.py @@ -22,6 +22,65 @@ def clean_diagnostic_text(text: str) -> str: return text +# Abréviations médicales françaises courantes → forme expansée +MEDICAL_ABBREVIATIONS: dict[str, str] = { + "bmr": "Bactérie multi-résistante", + "bhre": "Bactérie hautement résistante émergente", + "sdra": "Syndrome de détresse respiratoire aiguë", + "oap": "Œdème aigu du poumon", + "bpco": "Bronchopneumopathie chronique obstructive", + "ep": "Embolie pulmonaire", + "saos": "Syndrome d'apnées obstructives du sommeil", + "idm": "Infarctus du myocarde", + "sca": "Syndrome coronarien aigu", + "avc": "Accident vasculaire cérébral", + "ait": "Accident ischémique transitoire", + "aomi": "Artériopathie oblitérante des membres inférieurs", + "fa": "Fibrillation auriculaire", + "acfa": "Arythmie complète par fibrillation auriculaire", + "bav": "Bloc auriculo-ventriculaire", + "hta": "Hypertension artérielle", + "tvp": "Thrombose veineuse profonde", + "irc": "Insuffisance rénale chronique", + "ira": "Insuffisance rénale aiguë", + "sep": "Sclérose en plaques", + "rgo": "Reflux gastro-œsophagien", + "dt1": "Diabète de type 1", + "dt2": "Diabète de type 2", + "dnid": "Diabète non insulino-dépendant", + "did": "Diabète insulino-dépendant", + # Ajouts depuis référentiel QuillBot (abréviations diagnostiques fréquentes) + "aag": "Asthme aigu grave", + "acr": "Arrêt cardio-respiratoire", + "aeg": "Altération de l'état général", + "db1": "Diabète de type 1", + "db2": "Diabète de type 2", + "edm": "État dépressif majeur", + "espt": "État de stress post-traumatique", + "ica": "Insuffisance cardiaque aiguë", + "pno": "Pneumothorax", + "sgb": "Syndrome de Guillain-Barré", + "sida": "Syndrome d'immunodéficience acquise", + "sii": "Syndrome de l'intestin irritable", + "tag": "Trouble anxieux généralisé", + "tc": "Traumatisme crânien", + "tdah": "Trouble du déficit de l'attention avec ou sans hyperactivité", + "tspt": "Trouble de stress post-traumatique", +} + + +def expand_medical_abbreviations(text: str) -> str: + """Expanse une abréviation médicale si le texte entier est une abréviation connue. + + Ne modifie pas les textes composés (ex: "FA paroxystique" reste inchangé). + """ + stripped = text.strip() + key = stripped.lower() + if key in MEDICAL_ABBREVIATIONS: + return MEDICAL_ABBREVIATIONS[key] + return text + + def is_valid_diagnostic_text(text: str) -> bool: """Retourne True si le texte ressemble à un diagnostic médical légitime.""" t = text.strip() @@ -30,13 +89,17 @@ def is_valid_diagnostic_text(text: str) -> bool: if len(t) < 3: return False - # 2. Chiffres purs (>= 50% de chiffres) + # 2. Chiffres purs (>= 48% de chiffres) digits = sum(c.isdigit() for c in t) - if digits >= len(t) * 0.5: + if digits >= len(t) * 0.48: return False - # 3. Lettre + chiffres OCR : "H 51", "À 08", "H\n10", "K 3.6", "B 12,5" - if re.match(r"^[A-ZÀ-Ú]\s*\d{1,3}([.,]\d+)?$", t): + # 3. Lettre + chiffres OCR : "H 51", "D - 200", "W + 400", "X-2" + if re.match(r"^[A-ZÀ-Ú]\s*[-–—+*/]?\s*\d{1,4}([.,]\d+)?$", t): + return False + + # 3b. Texte court avec année calendaire (artefact temporel) : "X 2 en 2013" + if re.match(r"^[A-ZÀ-Ú].{0,15}\b(19|20)\d{2}\b", t) and len(t) < 25: return False # 4. Mots concaténés et/ou répétés avec espaces : "VentilationVentilation Ventilation..." diff --git a/src/quality/decision_engine.py b/src/quality/decision_engine.py index 6b1f386..2ad6c0b 100644 --- a/src/quality/decision_engine.py +++ b/src/quality/decision_engine.py @@ -312,6 +312,35 @@ def _iron_evidence_blob(dossier: DossierMedical, diag: Diagnostic) -> str: return _norm("\n".join(parts)) +def _das_promotion_score(das: Diagnostic) -> tuple[int, int, int]: + """Score de pertinence pour la promotion DAS→DP. + + Retourne (pertinence_clinique, confiance, spécificité) : + - Pertinence : pathologie (2) > symptôme R (1) > Z-code (0) + - Confiance : high (3) > medium (2) > low (1) + - Spécificité : longueur du code (sans point) — plus long = plus spécifique + """ + code = das.cim10_final or "" + letter = code[0] if code else "" + + # Pertinence clinique + if letter == "Z": + pertinence = 0 + elif letter == "R": + pertinence = 1 + else: + pertinence = 2 + + # Confiance + conf = (das.cim10_confidence or "").lower() + confiance = {"high": 3, "medium": 2, "low": 1}.get(conf, 1) + + # Spécificité (longueur du code) + specificite = len(code.replace(".", "")) + + return (pertinence, confiance, specificite) + + def apply_decisions(dossier: DossierMedical) -> None: """Applique des décisions finales sur DP/DAS. @@ -579,6 +608,40 @@ def apply_decisions(dossier: DossierMedical) -> None: applied_rules=["RULE-E87.6-K-NORMAL"], ) + # --- Règle: promotion DAS→DP quand aucun DP n'a été extrait --- + if rule_enabled("RULE-DAS-TO-DP"): + if dossier.diagnostic_principal is None and dossier.diagnostics_associes: + candidates = [ + das for das in dossier.diagnostics_associes + if das.cim10_final + and das.status not in ("ruled_out", "needs_info") + ] + if candidates: + best = max(candidates, key=_das_promotion_score) + dossier.diagnostic_principal = Diagnostic( + texte=best.texte, + cim10_suggestion=best.cim10_suggestion, + cim10_confidence=best.cim10_confidence, + cim10_final=best.cim10_final, + justification=best.justification, + raisonnement=best.raisonnement, + source=best.source, + source_page=best.source_page, + source_excerpt=best.source_excerpt, + preuves_cliniques=best.preuves_cliniques, + sources_rag=best.sources_rag, + cim10_decision=CodeDecision( + action="PROMOTE_DP", + final_code=best.cim10_final, + applied_rules=["RULE-DAS-TO-DP"], + reason=f"DAS promu en DP (score {_das_promotion_score(best)})", + ), + ) + dossier.diagnostics_associes.remove(best) + logger.warning( + "PROMOTE_DP: DAS %s (%s) promu en DP — aucun DP extrait", + best.cim10_final, best.texte, + ) def decision_summaries(dossier: DossierMedical) -> list[str]: @@ -612,6 +675,8 @@ def decision_summaries(dossier: DossierMedical) -> list[str]: if dec.needs_info: for q in dec.needs_info: lines.append(f"DECISION: besoin_info: {q}") + elif dec.action == "PROMOTE_DP": + lines.append(f"DECISION: {where} {dec.final_code} promu en DP ({', '.join(dec.applied_rules)})") if dossier.diagnostic_principal: _summ("diagnostic_principal", dossier.diagnostic_principal) diff --git a/tests/test_das_filter.py b/tests/test_das_filter.py index 2e11188..03c10eb 100644 --- a/tests/test_das_filter.py +++ b/tests/test_das_filter.py @@ -6,6 +6,7 @@ from src.medical.das_filter import ( clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes, + expand_medical_abbreviations, SEMANTIC_REDUNDANCIES, ) @@ -258,3 +259,74 @@ class TestSemanticRedundanciesStructure: assert "I10" in prefixes assert "N30" in prefixes assert "J18" in prefixes + + +class TestOCRFilterExtended: + """Tests Phase 2 : extension filtre OCR (opérateurs, années, seuil digits).""" + + # --- Règle 3 étendue : opérateurs --- + def test_reject_d_minus_200(self): + assert not is_valid_diagnostic_text("D - 200") + + def test_reject_w_plus_400(self): + assert not is_valid_diagnostic_text("W + 400") + + def test_reject_x_dash_2(self): + assert not is_valid_diagnostic_text("X-2") + + def test_reject_h_4_digits(self): + assert not is_valid_diagnostic_text("H 1234") + + # --- Règle 3b : références temporelles --- + def test_reject_year_reference(self): + assert not is_valid_diagnostic_text("X 2 en 2013") + + def test_reject_year_reference_2020(self): + assert not is_valid_diagnostic_text("A depuis 2020") + + # --- Non-régression : vrais diagnostics toujours acceptés --- + def test_accept_diabete_type_2(self): + assert is_valid_diagnostic_text("Diabète de type 2") + + def test_accept_fracture_col_femur(self): + assert is_valid_diagnostic_text("Fracture du col du fémur") + + def test_accept_pancreatite_aigue(self): + assert is_valid_diagnostic_text("Pancréatite aiguë biliaire") + + +class TestExpandMedicalAbbreviations: + """Tests Phase 2 : expansion des abréviations médicales.""" + + def test_bmr(self): + assert expand_medical_abbreviations("BMR") == "Bactérie multi-résistante" + + def test_sdra(self): + assert expand_medical_abbreviations("SDRA") == "Syndrome de détresse respiratoire aiguë" + + def test_bpco(self): + assert expand_medical_abbreviations("BPCO") == "Bronchopneumopathie chronique obstructive" + + def test_hta(self): + assert expand_medical_abbreviations("HTA") == "Hypertension artérielle" + + def test_fa(self): + assert expand_medical_abbreviations("FA") == "Fibrillation auriculaire" + + def test_case_insensitive(self): + assert expand_medical_abbreviations("bpco") == "Bronchopneumopathie chronique obstructive" + + def test_with_spaces(self): + assert expand_medical_abbreviations(" BMR ") == "Bactérie multi-résistante" + + def test_compound_unchanged(self): + """FA paroxystique ne doit PAS être expansé.""" + assert expand_medical_abbreviations("FA paroxystique") == "FA paroxystique" + + def test_compound_bpco_unchanged(self): + """BPCO sévère ne doit PAS être expansé.""" + assert expand_medical_abbreviations("BPCO sévère") == "BPCO sévère" + + def test_unknown_unchanged(self): + """Texte non-abréviation reste inchangé.""" + assert expand_medical_abbreviations("Pancréatite aiguë") == "Pancréatite aiguë" diff --git a/tests/test_decision_engine.py b/tests/test_decision_engine.py new file mode 100644 index 0000000..f8d77b5 --- /dev/null +++ b/tests/test_decision_engine.py @@ -0,0 +1,150 @@ +"""Tests unitaires pour le moteur de décisions (promotion DAS→DP).""" + +from unittest.mock import patch + +import pytest + +from src.config import CodeDecision, Diagnostic, DossierMedical +from src.quality.decision_engine import ( + _das_promotion_score, + apply_decisions, + decision_summaries, +) + + +def _make_dossier(dp=None, das_list=None): + """Helper : crée un DossierMedical minimal.""" + d = DossierMedical() + d.diagnostic_principal = dp + d.diagnostics_associes = das_list or [] + return d + + +def _make_diag(texte, code, confidence="high", source="trackare", status=None, cim10_final=None): + """Helper : crée un Diagnostic avec suggestion et optionnellement un final pré-rempli.""" + return Diagnostic( + texte=texte, + cim10_suggestion=code, + cim10_confidence=confidence, + source=source, + status=status, + cim10_final=cim10_final, + ) + + +# --- Scoring --- + +class TestDasPromotionScore: + def test_pathology_beats_symptom(self): + patho = _make_diag("Pancréatite", "K85.9", cim10_final="K85.9") + symptom = _make_diag("Douleur abdominale", "R10.4", cim10_final="R10.4") + assert _das_promotion_score(patho) > _das_promotion_score(symptom) + + def test_symptom_beats_zcode(self): + symptom = _make_diag("Douleur abdominale", "R10.4", cim10_final="R10.4") + zcode = _make_diag("Antécédent", "Z87.1", cim10_final="Z87.1") + assert _das_promotion_score(symptom) > _das_promotion_score(zcode) + + def test_high_confidence_beats_medium(self): + high = _make_diag("Pancréatite", "K85.9", confidence="high", cim10_final="K85.9") + med = _make_diag("Pancréatite", "K85.9", confidence="medium", cim10_final="K85.9") + assert _das_promotion_score(high) > _das_promotion_score(med) + + def test_longer_code_more_specific(self): + short = _make_diag("Pancréatite", "K85", cim10_final="K85") + long = _make_diag("Pancréatite biliaire", "K85.1", cim10_final="K85.1") + assert _das_promotion_score(long) > _das_promotion_score(short) + + +# --- Promotion DAS→DP --- + +@patch("src.quality.decision_engine.load_reference_ranges", return_value={}) +@patch("src.quality.decision_engine.load_bio_rules", return_value={}) +class TestPromotionDasToDP: + + @patch("src.quality.decision_engine.rule_enabled", return_value=True) + @patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label")) + def test_promote_best_das_when_no_dp(self, mock_validate, mock_rule, mock_bio, mock_ref): + """DP absent + DAS valides → meilleur DAS promu (pathologie > symptôme > Z).""" + das1 = _make_diag("Douleur abdominale", "R10.4", confidence="high") + das2 = _make_diag("Pancréatite aiguë", "K85.9", confidence="high") + das3 = _make_diag("Antécédent chirurgical", "Z87.1", confidence="medium") + dossier = _make_dossier(dp=None, das_list=[das1, das2, das3]) + + apply_decisions(dossier) + + assert dossier.diagnostic_principal is not None + assert dossier.diagnostic_principal.cim10_final == "K85.9" + assert dossier.diagnostic_principal.cim10_decision.action == "PROMOTE_DP" + assert "RULE-DAS-TO-DP" in dossier.diagnostic_principal.cim10_decision.applied_rules + # Le DAS promu est retiré de la liste + codes_das = [d.cim10_suggestion for d in dossier.diagnostics_associes] + assert "K85.9" not in codes_das + assert len(dossier.diagnostics_associes) == 2 + + @patch("src.quality.decision_engine.rule_enabled", return_value=True) + @patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label")) + def test_no_promotion_when_dp_present(self, mock_validate, mock_rule, mock_bio, mock_ref): + """DP déjà présent → pas de promotion.""" + dp = _make_diag("Cholécystite aiguë", "K81.0", confidence="high") + das1 = _make_diag("HTA", "I10", confidence="high") + dossier = _make_dossier(dp=dp, das_list=[das1]) + + apply_decisions(dossier) + + assert dossier.diagnostic_principal.cim10_suggestion == "K81.0" + assert len(dossier.diagnostics_associes) == 1 + + @patch("src.quality.decision_engine.rule_enabled", return_value=True) + @patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label")) + def test_no_promotion_for_ruled_out(self, mock_validate, mock_rule, mock_bio, mock_ref): + """DAS ruled_out ne doit pas être promu.""" + das1 = _make_diag("Thrombopénie", "D69.6", status="ruled_out") + dossier = _make_dossier(dp=None, das_list=[das1]) + + apply_decisions(dossier) + + # D69.6 est ruled_out donc cim10_final est None → pas candidat + assert dossier.diagnostic_principal is None + + @patch("src.quality.decision_engine.rule_enabled", return_value=True) + @patch("src.quality.decision_engine.cim10_validate", return_value=(False, None)) + def test_no_promotion_without_cim10_final(self, mock_validate, mock_rule, mock_bio, mock_ref): + """DAS sans cim10_final (code invalide) ne doit pas être promu.""" + das1 = _make_diag("Diagnostic inconnu", "XXX.X") + dossier = _make_dossier(dp=None, das_list=[das1]) + + apply_decisions(dossier) + + # Code invalide → cim10_final non rempli → pas candidat + assert dossier.diagnostic_principal is None + + def test_no_promotion_when_rule_disabled(self, mock_bio, mock_ref): + """RULE-DAS-TO-DP désactivée → pas de promotion.""" + def _rule_enabled_selective(rule_id): + if rule_id == "RULE-DAS-TO-DP": + return False + return True + + with patch("src.quality.decision_engine.rule_enabled", side_effect=_rule_enabled_selective): + with patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label")): + das1 = _make_diag("Pancréatite aiguë", "K85.9", confidence="high") + dossier = _make_dossier(dp=None, das_list=[das1]) + apply_decisions(dossier) + assert dossier.diagnostic_principal is None + + +# --- Summary handler --- + +class TestDecisionSummaryPromoteDP: + def test_promote_dp_summary(self): + dp = _make_diag("Pancréatite aiguë", "K85.9", cim10_final="K85.9") + dp.cim10_decision = CodeDecision( + action="PROMOTE_DP", + final_code="K85.9", + applied_rules=["RULE-DAS-TO-DP"], + ) + dossier = _make_dossier(dp=dp) + lines = decision_summaries(dossier) + assert any("PROMOTE_DP" in line or "promu en DP" in line for line in lines) + assert any("K85.9" in line for line in lines)