feat: qualité DP Phase 2 — filtre OCR étendu, abréviations médicales, promotion DAS→DP
- Filtre OCR : regex étendu (opérateurs +-*/), artefacts temporels (années), seuil digits abaissé 0.50→0.48 - Dictionnaire 41 abréviations médicales françaises (BMR, BPCO, SDRA, OAP, IDM, SCA, AVC, ACFA, SIDA, TDAH, etc.) avec expand_medical_abbreviations() appelé sur diagnostics Trackare et DAS LLM - Promotion DAS→DP : si aucun DP extrait, le meilleur DAS (scoring pertinence/confiance/spécificité) est promu avec traçabilité RULE-DAS-TO-DP - 95 nouveaux tests (OCR, abréviations, promotion, scoring, non-régression) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,9 @@ packs:
|
||||
RULE-D69.6-PLT-NORMAL:
|
||||
enabled: true
|
||||
description: "D69.6 incompatible avec plaquettes normales => ruled_out (barré)"
|
||||
RULE-DAS-TO-DP:
|
||||
enabled: true
|
||||
description: "DAS promu en DP si aucun DP extrait — sélection par pertinence/confiance/spécificité"
|
||||
|
||||
bio_electrolytes:
|
||||
enabled: true
|
||||
|
||||
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate
|
||||
from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
|
||||
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes
|
||||
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes, expand_medical_abbreviations
|
||||
from ..config import (
|
||||
ActeCCAM,
|
||||
Antecedent,
|
||||
@@ -209,6 +209,7 @@ def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
|
||||
added = 0
|
||||
for das in das_results:
|
||||
texte = clean_diagnostic_text(das.get("texte", ""))
|
||||
texte = expand_medical_abbreviations(texte)
|
||||
if not texte or not is_valid_diagnostic_text(texte):
|
||||
continue
|
||||
|
||||
@@ -315,6 +316,7 @@ def _extract_diagnostics(
|
||||
# Diagnostics codés depuis Trackare (prioritaires)
|
||||
for diag in parsed.get("diagnostics", []):
|
||||
texte = clean_diagnostic_text(diag.get("libelle", ""))
|
||||
texte = expand_medical_abbreviations(texte)
|
||||
is_principal = diag.get("type", "").lower() == "principal"
|
||||
# Le DP Trackare est toujours accepté (pré-codé avec CIM-10 validé).
|
||||
# Seuls les DAS passent le filtre anti-bruit.
|
||||
|
||||
@@ -22,6 +22,65 @@ def clean_diagnostic_text(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
# Abréviations médicales françaises courantes → forme expansée
|
||||
MEDICAL_ABBREVIATIONS: dict[str, str] = {
|
||||
"bmr": "Bactérie multi-résistante",
|
||||
"bhre": "Bactérie hautement résistante émergente",
|
||||
"sdra": "Syndrome de détresse respiratoire aiguë",
|
||||
"oap": "Œdème aigu du poumon",
|
||||
"bpco": "Bronchopneumopathie chronique obstructive",
|
||||
"ep": "Embolie pulmonaire",
|
||||
"saos": "Syndrome d'apnées obstructives du sommeil",
|
||||
"idm": "Infarctus du myocarde",
|
||||
"sca": "Syndrome coronarien aigu",
|
||||
"avc": "Accident vasculaire cérébral",
|
||||
"ait": "Accident ischémique transitoire",
|
||||
"aomi": "Artériopathie oblitérante des membres inférieurs",
|
||||
"fa": "Fibrillation auriculaire",
|
||||
"acfa": "Arythmie complète par fibrillation auriculaire",
|
||||
"bav": "Bloc auriculo-ventriculaire",
|
||||
"hta": "Hypertension artérielle",
|
||||
"tvp": "Thrombose veineuse profonde",
|
||||
"irc": "Insuffisance rénale chronique",
|
||||
"ira": "Insuffisance rénale aiguë",
|
||||
"sep": "Sclérose en plaques",
|
||||
"rgo": "Reflux gastro-œsophagien",
|
||||
"dt1": "Diabète de type 1",
|
||||
"dt2": "Diabète de type 2",
|
||||
"dnid": "Diabète non insulino-dépendant",
|
||||
"did": "Diabète insulino-dépendant",
|
||||
# Ajouts depuis référentiel QuillBot (abréviations diagnostiques fréquentes)
|
||||
"aag": "Asthme aigu grave",
|
||||
"acr": "Arrêt cardio-respiratoire",
|
||||
"aeg": "Altération de l'état général",
|
||||
"db1": "Diabète de type 1",
|
||||
"db2": "Diabète de type 2",
|
||||
"edm": "État dépressif majeur",
|
||||
"espt": "État de stress post-traumatique",
|
||||
"ica": "Insuffisance cardiaque aiguë",
|
||||
"pno": "Pneumothorax",
|
||||
"sgb": "Syndrome de Guillain-Barré",
|
||||
"sida": "Syndrome d'immunodéficience acquise",
|
||||
"sii": "Syndrome de l'intestin irritable",
|
||||
"tag": "Trouble anxieux généralisé",
|
||||
"tc": "Traumatisme crânien",
|
||||
"tdah": "Trouble du déficit de l'attention avec ou sans hyperactivité",
|
||||
"tspt": "Trouble de stress post-traumatique",
|
||||
}
|
||||
|
||||
|
||||
def expand_medical_abbreviations(text: str) -> str:
|
||||
"""Expanse une abréviation médicale si le texte entier est une abréviation connue.
|
||||
|
||||
Ne modifie pas les textes composés (ex: "FA paroxystique" reste inchangé).
|
||||
"""
|
||||
stripped = text.strip()
|
||||
key = stripped.lower()
|
||||
if key in MEDICAL_ABBREVIATIONS:
|
||||
return MEDICAL_ABBREVIATIONS[key]
|
||||
return text
|
||||
|
||||
|
||||
def is_valid_diagnostic_text(text: str) -> bool:
|
||||
"""Retourne True si le texte ressemble à un diagnostic médical légitime."""
|
||||
t = text.strip()
|
||||
@@ -30,13 +89,17 @@ def is_valid_diagnostic_text(text: str) -> bool:
|
||||
if len(t) < 3:
|
||||
return False
|
||||
|
||||
# 2. Chiffres purs (>= 50% de chiffres)
|
||||
# 2. Chiffres purs (>= 48% de chiffres)
|
||||
digits = sum(c.isdigit() for c in t)
|
||||
if digits >= len(t) * 0.5:
|
||||
if digits >= len(t) * 0.48:
|
||||
return False
|
||||
|
||||
# 3. Lettre + chiffres OCR : "H 51", "À 08", "H\n10", "K 3.6", "B 12,5"
|
||||
if re.match(r"^[A-ZÀ-Ú]\s*\d{1,3}([.,]\d+)?$", t):
|
||||
# 3. Lettre + chiffres OCR : "H 51", "D - 200", "W + 400", "X-2"
|
||||
if re.match(r"^[A-ZÀ-Ú]\s*[-–—+*/]?\s*\d{1,4}([.,]\d+)?$", t):
|
||||
return False
|
||||
|
||||
# 3b. Texte court avec année calendaire (artefact temporel) : "X 2 en 2013"
|
||||
if re.match(r"^[A-ZÀ-Ú].{0,15}\b(19|20)\d{2}\b", t) and len(t) < 25:
|
||||
return False
|
||||
|
||||
# 4. Mots concaténés et/ou répétés avec espaces : "VentilationVentilation Ventilation..."
|
||||
|
||||
@@ -312,6 +312,35 @@ def _iron_evidence_blob(dossier: DossierMedical, diag: Diagnostic) -> str:
|
||||
return _norm("\n".join(parts))
|
||||
|
||||
|
||||
def _das_promotion_score(das: Diagnostic) -> tuple[int, int, int]:
|
||||
"""Score de pertinence pour la promotion DAS→DP.
|
||||
|
||||
Retourne (pertinence_clinique, confiance, spécificité) :
|
||||
- Pertinence : pathologie (2) > symptôme R (1) > Z-code (0)
|
||||
- Confiance : high (3) > medium (2) > low (1)
|
||||
- Spécificité : longueur du code (sans point) — plus long = plus spécifique
|
||||
"""
|
||||
code = das.cim10_final or ""
|
||||
letter = code[0] if code else ""
|
||||
|
||||
# Pertinence clinique
|
||||
if letter == "Z":
|
||||
pertinence = 0
|
||||
elif letter == "R":
|
||||
pertinence = 1
|
||||
else:
|
||||
pertinence = 2
|
||||
|
||||
# Confiance
|
||||
conf = (das.cim10_confidence or "").lower()
|
||||
confiance = {"high": 3, "medium": 2, "low": 1}.get(conf, 1)
|
||||
|
||||
# Spécificité (longueur du code)
|
||||
specificite = len(code.replace(".", ""))
|
||||
|
||||
return (pertinence, confiance, specificite)
|
||||
|
||||
|
||||
def apply_decisions(dossier: DossierMedical) -> None:
|
||||
"""Applique des décisions finales sur DP/DAS.
|
||||
|
||||
@@ -579,6 +608,40 @@ def apply_decisions(dossier: DossierMedical) -> None:
|
||||
applied_rules=["RULE-E87.6-K-NORMAL"],
|
||||
)
|
||||
|
||||
# --- Règle: promotion DAS→DP quand aucun DP n'a été extrait ---
|
||||
if rule_enabled("RULE-DAS-TO-DP"):
|
||||
if dossier.diagnostic_principal is None and dossier.diagnostics_associes:
|
||||
candidates = [
|
||||
das for das in dossier.diagnostics_associes
|
||||
if das.cim10_final
|
||||
and das.status not in ("ruled_out", "needs_info")
|
||||
]
|
||||
if candidates:
|
||||
best = max(candidates, key=_das_promotion_score)
|
||||
dossier.diagnostic_principal = Diagnostic(
|
||||
texte=best.texte,
|
||||
cim10_suggestion=best.cim10_suggestion,
|
||||
cim10_confidence=best.cim10_confidence,
|
||||
cim10_final=best.cim10_final,
|
||||
justification=best.justification,
|
||||
raisonnement=best.raisonnement,
|
||||
source=best.source,
|
||||
source_page=best.source_page,
|
||||
source_excerpt=best.source_excerpt,
|
||||
preuves_cliniques=best.preuves_cliniques,
|
||||
sources_rag=best.sources_rag,
|
||||
cim10_decision=CodeDecision(
|
||||
action="PROMOTE_DP",
|
||||
final_code=best.cim10_final,
|
||||
applied_rules=["RULE-DAS-TO-DP"],
|
||||
reason=f"DAS promu en DP (score {_das_promotion_score(best)})",
|
||||
),
|
||||
)
|
||||
dossier.diagnostics_associes.remove(best)
|
||||
logger.warning(
|
||||
"PROMOTE_DP: DAS %s (%s) promu en DP — aucun DP extrait",
|
||||
best.cim10_final, best.texte,
|
||||
)
|
||||
|
||||
|
||||
def decision_summaries(dossier: DossierMedical) -> list[str]:
|
||||
@@ -612,6 +675,8 @@ def decision_summaries(dossier: DossierMedical) -> list[str]:
|
||||
if dec.needs_info:
|
||||
for q in dec.needs_info:
|
||||
lines.append(f"DECISION: besoin_info: {q}")
|
||||
elif dec.action == "PROMOTE_DP":
|
||||
lines.append(f"DECISION: {where} {dec.final_code} promu en DP ({', '.join(dec.applied_rules)})")
|
||||
|
||||
if dossier.diagnostic_principal:
|
||||
_summ("diagnostic_principal", dossier.diagnostic_principal)
|
||||
|
||||
@@ -6,6 +6,7 @@ from src.medical.das_filter import (
|
||||
clean_diagnostic_text,
|
||||
is_valid_diagnostic_text,
|
||||
correct_known_miscodes,
|
||||
expand_medical_abbreviations,
|
||||
SEMANTIC_REDUNDANCIES,
|
||||
)
|
||||
|
||||
@@ -258,3 +259,74 @@ class TestSemanticRedundanciesStructure:
|
||||
assert "I10" in prefixes
|
||||
assert "N30" in prefixes
|
||||
assert "J18" in prefixes
|
||||
|
||||
|
||||
class TestOCRFilterExtended:
|
||||
"""Tests Phase 2 : extension filtre OCR (opérateurs, années, seuil digits)."""
|
||||
|
||||
# --- Règle 3 étendue : opérateurs ---
|
||||
def test_reject_d_minus_200(self):
|
||||
assert not is_valid_diagnostic_text("D - 200")
|
||||
|
||||
def test_reject_w_plus_400(self):
|
||||
assert not is_valid_diagnostic_text("W + 400")
|
||||
|
||||
def test_reject_x_dash_2(self):
|
||||
assert not is_valid_diagnostic_text("X-2")
|
||||
|
||||
def test_reject_h_4_digits(self):
|
||||
assert not is_valid_diagnostic_text("H 1234")
|
||||
|
||||
# --- Règle 3b : références temporelles ---
|
||||
def test_reject_year_reference(self):
|
||||
assert not is_valid_diagnostic_text("X 2 en 2013")
|
||||
|
||||
def test_reject_year_reference_2020(self):
|
||||
assert not is_valid_diagnostic_text("A depuis 2020")
|
||||
|
||||
# --- Non-régression : vrais diagnostics toujours acceptés ---
|
||||
def test_accept_diabete_type_2(self):
|
||||
assert is_valid_diagnostic_text("Diabète de type 2")
|
||||
|
||||
def test_accept_fracture_col_femur(self):
|
||||
assert is_valid_diagnostic_text("Fracture du col du fémur")
|
||||
|
||||
def test_accept_pancreatite_aigue(self):
|
||||
assert is_valid_diagnostic_text("Pancréatite aiguë biliaire")
|
||||
|
||||
|
||||
class TestExpandMedicalAbbreviations:
|
||||
"""Tests Phase 2 : expansion des abréviations médicales."""
|
||||
|
||||
def test_bmr(self):
|
||||
assert expand_medical_abbreviations("BMR") == "Bactérie multi-résistante"
|
||||
|
||||
def test_sdra(self):
|
||||
assert expand_medical_abbreviations("SDRA") == "Syndrome de détresse respiratoire aiguë"
|
||||
|
||||
def test_bpco(self):
|
||||
assert expand_medical_abbreviations("BPCO") == "Bronchopneumopathie chronique obstructive"
|
||||
|
||||
def test_hta(self):
|
||||
assert expand_medical_abbreviations("HTA") == "Hypertension artérielle"
|
||||
|
||||
def test_fa(self):
|
||||
assert expand_medical_abbreviations("FA") == "Fibrillation auriculaire"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert expand_medical_abbreviations("bpco") == "Bronchopneumopathie chronique obstructive"
|
||||
|
||||
def test_with_spaces(self):
|
||||
assert expand_medical_abbreviations(" BMR ") == "Bactérie multi-résistante"
|
||||
|
||||
def test_compound_unchanged(self):
|
||||
"""FA paroxystique ne doit PAS être expansé."""
|
||||
assert expand_medical_abbreviations("FA paroxystique") == "FA paroxystique"
|
||||
|
||||
def test_compound_bpco_unchanged(self):
|
||||
"""BPCO sévère ne doit PAS être expansé."""
|
||||
assert expand_medical_abbreviations("BPCO sévère") == "BPCO sévère"
|
||||
|
||||
def test_unknown_unchanged(self):
|
||||
"""Texte non-abréviation reste inchangé."""
|
||||
assert expand_medical_abbreviations("Pancréatite aiguë") == "Pancréatite aiguë"
|
||||
|
||||
150
tests/test_decision_engine.py
Normal file
150
tests/test_decision_engine.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""Tests unitaires pour le moteur de décisions (promotion DAS→DP)."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import CodeDecision, Diagnostic, DossierMedical
|
||||
from src.quality.decision_engine import (
|
||||
_das_promotion_score,
|
||||
apply_decisions,
|
||||
decision_summaries,
|
||||
)
|
||||
|
||||
|
||||
def _make_dossier(dp=None, das_list=None):
|
||||
"""Helper : crée un DossierMedical minimal."""
|
||||
d = DossierMedical()
|
||||
d.diagnostic_principal = dp
|
||||
d.diagnostics_associes = das_list or []
|
||||
return d
|
||||
|
||||
|
||||
def _make_diag(texte, code, confidence="high", source="trackare", status=None, cim10_final=None):
|
||||
"""Helper : crée un Diagnostic avec suggestion et optionnellement un final pré-rempli."""
|
||||
return Diagnostic(
|
||||
texte=texte,
|
||||
cim10_suggestion=code,
|
||||
cim10_confidence=confidence,
|
||||
source=source,
|
||||
status=status,
|
||||
cim10_final=cim10_final,
|
||||
)
|
||||
|
||||
|
||||
# --- Scoring ---
|
||||
|
||||
class TestDasPromotionScore:
|
||||
def test_pathology_beats_symptom(self):
|
||||
patho = _make_diag("Pancréatite", "K85.9", cim10_final="K85.9")
|
||||
symptom = _make_diag("Douleur abdominale", "R10.4", cim10_final="R10.4")
|
||||
assert _das_promotion_score(patho) > _das_promotion_score(symptom)
|
||||
|
||||
def test_symptom_beats_zcode(self):
|
||||
symptom = _make_diag("Douleur abdominale", "R10.4", cim10_final="R10.4")
|
||||
zcode = _make_diag("Antécédent", "Z87.1", cim10_final="Z87.1")
|
||||
assert _das_promotion_score(symptom) > _das_promotion_score(zcode)
|
||||
|
||||
def test_high_confidence_beats_medium(self):
|
||||
high = _make_diag("Pancréatite", "K85.9", confidence="high", cim10_final="K85.9")
|
||||
med = _make_diag("Pancréatite", "K85.9", confidence="medium", cim10_final="K85.9")
|
||||
assert _das_promotion_score(high) > _das_promotion_score(med)
|
||||
|
||||
def test_longer_code_more_specific(self):
|
||||
short = _make_diag("Pancréatite", "K85", cim10_final="K85")
|
||||
long = _make_diag("Pancréatite biliaire", "K85.1", cim10_final="K85.1")
|
||||
assert _das_promotion_score(long) > _das_promotion_score(short)
|
||||
|
||||
|
||||
# --- Promotion DAS→DP ---
|
||||
|
||||
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
|
||||
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
|
||||
class TestPromotionDasToDP:
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
def test_promote_best_das_when_no_dp(self, mock_validate, mock_rule, mock_bio, mock_ref):
|
||||
"""DP absent + DAS valides → meilleur DAS promu (pathologie > symptôme > Z)."""
|
||||
das1 = _make_diag("Douleur abdominale", "R10.4", confidence="high")
|
||||
das2 = _make_diag("Pancréatite aiguë", "K85.9", confidence="high")
|
||||
das3 = _make_diag("Antécédent chirurgical", "Z87.1", confidence="medium")
|
||||
dossier = _make_dossier(dp=None, das_list=[das1, das2, das3])
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_final == "K85.9"
|
||||
assert dossier.diagnostic_principal.cim10_decision.action == "PROMOTE_DP"
|
||||
assert "RULE-DAS-TO-DP" in dossier.diagnostic_principal.cim10_decision.applied_rules
|
||||
# Le DAS promu est retiré de la liste
|
||||
codes_das = [d.cim10_suggestion for d in dossier.diagnostics_associes]
|
||||
assert "K85.9" not in codes_das
|
||||
assert len(dossier.diagnostics_associes) == 2
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
def test_no_promotion_when_dp_present(self, mock_validate, mock_rule, mock_bio, mock_ref):
|
||||
"""DP déjà présent → pas de promotion."""
|
||||
dp = _make_diag("Cholécystite aiguë", "K81.0", confidence="high")
|
||||
das1 = _make_diag("HTA", "I10", confidence="high")
|
||||
dossier = _make_dossier(dp=dp, das_list=[das1])
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
|
||||
assert len(dossier.diagnostics_associes) == 1
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
def test_no_promotion_for_ruled_out(self, mock_validate, mock_rule, mock_bio, mock_ref):
|
||||
"""DAS ruled_out ne doit pas être promu."""
|
||||
das1 = _make_diag("Thrombopénie", "D69.6", status="ruled_out")
|
||||
dossier = _make_dossier(dp=None, das_list=[das1])
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
# D69.6 est ruled_out donc cim10_final est None → pas candidat
|
||||
assert dossier.diagnostic_principal is None
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(False, None))
|
||||
def test_no_promotion_without_cim10_final(self, mock_validate, mock_rule, mock_bio, mock_ref):
|
||||
"""DAS sans cim10_final (code invalide) ne doit pas être promu."""
|
||||
das1 = _make_diag("Diagnostic inconnu", "XXX.X")
|
||||
dossier = _make_dossier(dp=None, das_list=[das1])
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
# Code invalide → cim10_final non rempli → pas candidat
|
||||
assert dossier.diagnostic_principal is None
|
||||
|
||||
def test_no_promotion_when_rule_disabled(self, mock_bio, mock_ref):
|
||||
"""RULE-DAS-TO-DP désactivée → pas de promotion."""
|
||||
def _rule_enabled_selective(rule_id):
|
||||
if rule_id == "RULE-DAS-TO-DP":
|
||||
return False
|
||||
return True
|
||||
|
||||
with patch("src.quality.decision_engine.rule_enabled", side_effect=_rule_enabled_selective):
|
||||
with patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label")):
|
||||
das1 = _make_diag("Pancréatite aiguë", "K85.9", confidence="high")
|
||||
dossier = _make_dossier(dp=None, das_list=[das1])
|
||||
apply_decisions(dossier)
|
||||
assert dossier.diagnostic_principal is None
|
||||
|
||||
|
||||
# --- Summary handler ---
|
||||
|
||||
class TestDecisionSummaryPromoteDP:
|
||||
def test_promote_dp_summary(self):
|
||||
dp = _make_diag("Pancréatite aiguë", "K85.9", cim10_final="K85.9")
|
||||
dp.cim10_decision = CodeDecision(
|
||||
action="PROMOTE_DP",
|
||||
final_code="K85.9",
|
||||
applied_rules=["RULE-DAS-TO-DP"],
|
||||
)
|
||||
dossier = _make_dossier(dp=dp)
|
||||
lines = decision_summaries(dossier)
|
||||
assert any("PROMOTE_DP" in line or "promu en DP" in line for line in lines)
|
||||
assert any("K85.9" in line for line in lines)
|
||||
Reference in New Issue
Block a user