- das_filter: regex anti-répétition gère les espaces entre mots concaténés
("VentilationVentilation Ventilation..." désormais rejeté)
- cim10_extractor: regex antécédents s'arrête à "Signes Vitaux" (ne capture
plus le tableau de surveillance)
- Nouveau _is_valid_antecedent() filtre noms de service, mots de surveillance
isolés, infos admin (RPPS), répétitions, Mode de vie
- 28 nouveaux tests (TestIsValidAntecedent + das_filter repetition)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
544 lines
19 KiB
Python
544 lines
19 KiB
Python
"""Tests pour le module d'extraction médicale CIM-10."""
|
|
|
|
import pytest
|
|
|
|
from src.config import DossierMedical, Diagnostic
|
|
from src.medical.cim10_extractor import (
|
|
extract_medical_info,
|
|
_lookup_cim10,
|
|
_is_abnormal,
|
|
_is_valid_antecedent,
|
|
)
|
|
from src.medical.cim10_dict import normalize_text, load_dict, lookup, reset_cache
|
|
from src.extraction.document_classifier import classify, classify_with_confidence
|
|
|
|
|
|
class TestCIM10Lookup:
|
|
def test_pancréatite_lithiasique(self):
|
|
assert _lookup_cim10("pancréatite aiguë lithiasique") == "K85.1"
|
|
|
|
def test_lithiase_choledoque(self):
|
|
assert _lookup_cim10("lithiase du cholédoque") == "K80.5"
|
|
|
|
def test_eruption_medicamenteuse(self):
|
|
assert _lookup_cim10("éruption médicamenteuse") == "L27.0"
|
|
|
|
def test_obesite(self):
|
|
assert _lookup_cim10("obésité") == "E66.0"
|
|
|
|
def test_unknown(self):
|
|
assert _lookup_cim10("grippe") is None
|
|
|
|
|
|
class TestIsAbnormal:
|
|
def test_lipasemie_high(self):
|
|
assert _is_abnormal("Lipasémie", "6000") is True
|
|
|
|
def test_crp_normal(self):
|
|
assert _is_abnormal("CRP", "3") is False
|
|
|
|
def test_crp_high(self):
|
|
assert _is_abnormal("CRP", "12") is True
|
|
|
|
def test_troponine_negative(self):
|
|
assert _is_abnormal("Troponine", "négative") is False
|
|
|
|
def test_unknown_test(self):
|
|
assert _is_abnormal("TestInconnu", "42") is None
|
|
|
|
|
|
class TestExtractMedicalInfo:
|
|
def test_extract_from_trackare(self):
|
|
parsed = {
|
|
"type": "trackare",
|
|
"patient": {
|
|
"sexe": "F",
|
|
"date_naissance": "23/02/1980",
|
|
"imc": 34.37,
|
|
"poids_kg": 90.2,
|
|
"taille_cm": 162,
|
|
},
|
|
"sejour": {
|
|
"date_entree": "25/02/2023",
|
|
"date_sortie": "03/03/2023",
|
|
},
|
|
"urgences": {"mode_entree": "Urgences"},
|
|
"diagnostics": [
|
|
{
|
|
"type": "Principal",
|
|
"statut": "actif",
|
|
"code_cim10": "K80.5",
|
|
"libelle": "Calcul des canaux biliaires",
|
|
}
|
|
],
|
|
"signes_vitaux": {"imc": 34.37, "poids_kg": 90.2, "taille_cm": 162},
|
|
}
|
|
text = """Pancréatite aiguë lithiasique.
|
|
Cholécystectomie par cœlioscopie le 01/03.
|
|
Cholangiographie retrouvant une lithiase du bas cholédoque.
|
|
TDM à J3 retrouve : Absence de signe de gravité. Score de Balthazar à 0.
|
|
Éruption cutanée érythémateuse. Réaction au tramadol.
|
|
IMC: 34.370
|
|
TTT de sortie :
|
|
Paracétamol et Acupan si besoin
|
|
Cétirizine
|
|
|
|
Devenir : sortie le 03/03."""
|
|
|
|
dossier = extract_medical_info(parsed, text)
|
|
|
|
# Séjour
|
|
assert dossier.sejour.sexe == "F"
|
|
assert dossier.sejour.age == 43
|
|
assert dossier.sejour.duree_sejour == 6
|
|
assert dossier.sejour.imc == 34.37
|
|
|
|
# DP
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K80.5"
|
|
|
|
# DAS
|
|
codes = {d.cim10_suggestion for d in dossier.diagnostics_associes}
|
|
assert "L27.0" in codes # Éruption médicamenteuse
|
|
assert "E66.0" in codes # Obésité
|
|
|
|
# Actes
|
|
acte_codes = {a.code_ccam_suggestion for a in dossier.actes_ccam}
|
|
assert "HMFC004" in acte_codes # Cholécystectomie
|
|
assert "ZCQK002" in acte_codes # TDM
|
|
|
|
# Traitements
|
|
meds = [t.medicament for t in dossier.traitements_sortie]
|
|
assert any("Paracétamol" in m for m in meds)
|
|
assert any("Cétirizine" in m for m in meds)
|
|
|
|
# Bio
|
|
tests = {b.test for b in dossier.biologie_cle}
|
|
assert "Troponine" not in tests # pas dans ce texte minimal
|
|
|
|
# Imagerie
|
|
assert len(dossier.imagerie) >= 1
|
|
assert any("Balthazar" in (i.score or "") for i in dossier.imagerie)
|
|
|
|
# Complications
|
|
assert any("cutanée" in c.lower() for c in dossier.complications)
|
|
|
|
def test_extract_without_edsnlp(self):
|
|
"""Vérifie que l'extraction fonctionne sans résultat edsnlp."""
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol 1g matin et soir\n\nDevenir : retour."
|
|
|
|
dossier = extract_medical_info(parsed, text, edsnlp_result=None)
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
|
assert len(dossier.traitements_sortie) >= 1
|
|
|
|
def test_extract_with_edsnlp_result(self):
|
|
"""Vérifie que les résultats edsnlp enrichissent les diagnostics."""
|
|
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity, DrugEntity
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Patient admis pour douleur abdominale."
|
|
|
|
edsnlp_result = EdsnlpResult(
|
|
cim10_entities=[
|
|
CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False),
|
|
],
|
|
drug_entities=[],
|
|
)
|
|
|
|
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
|
# Le DP devrait être trouvé via edsnlp
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "R10.4"
|
|
|
|
def test_negated_edsnlp_entities_ignored(self):
|
|
"""Vérifie que les entités niées par edsnlp ne sont pas retenues."""
|
|
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Pas de fièvre. Patient en bon état."
|
|
|
|
edsnlp_result = EdsnlpResult(
|
|
cim10_entities=[
|
|
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
|
],
|
|
)
|
|
|
|
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
|
# L'entité niée ne doit pas apparaître comme diagnostic
|
|
all_codes = set()
|
|
if dossier.diagnostic_principal:
|
|
all_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
|
for d in dossier.diagnostics_associes:
|
|
all_codes.add(d.cim10_suggestion)
|
|
assert "R50.9" not in all_codes
|
|
|
|
def test_drug_atc_enrichment(self):
|
|
"""Vérifie que les codes ATC edsnlp sont ajoutés aux traitements."""
|
|
from src.medical.edsnlp_pipeline import EdsnlpResult, DrugEntity
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "TTT de sortie :\nParacétamol 1g matin\n\nDevenir : retour."
|
|
|
|
edsnlp_result = EdsnlpResult(
|
|
drug_entities=[
|
|
DrugEntity(texte="Paracétamol", code_atc="N02BE01", negation=False),
|
|
],
|
|
)
|
|
|
|
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
|
assert len(dossier.traitements_sortie) >= 1
|
|
paracetamol = next(
|
|
(t for t in dossier.traitements_sortie if "Paracétamol" in t.medicament), None
|
|
)
|
|
assert paracetamol is not None
|
|
assert paracetamol.code_atc == "N02BE01"
|
|
|
|
def test_edsnlp_negation_for_complications(self):
|
|
"""Vérifie que la négation edsnlp filtre les complications."""
|
|
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Pas de fièvre ni d'infection. Bonne évolution."
|
|
|
|
edsnlp_result = EdsnlpResult(
|
|
cim10_entities=[
|
|
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
|
CIM10Entity(texte="infection", code="A49.9", negation=True),
|
|
],
|
|
)
|
|
|
|
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
|
# Fièvre et infection sont niées, ne doivent pas apparaître dans complications
|
|
complication_terms = [c.lower() for c in dossier.complications]
|
|
assert "fièvre" not in complication_terms
|
|
assert "infection" not in complication_terms
|
|
|
|
|
|
# === Nouveaux tests : dictionnaire CIM-10, normalisation, robustesse ===
|
|
|
|
|
|
class TestCIM10Dict:
|
|
"""Tests pour le chargement du dictionnaire CIM-10 complet."""
|
|
|
|
def test_load_dict_not_empty(self):
|
|
d = load_dict()
|
|
assert len(d) > 10000
|
|
|
|
def test_known_codes_present(self):
|
|
d = load_dict()
|
|
assert "K85.1" in d
|
|
assert "K80.5" in d
|
|
assert "I10" in d
|
|
assert "E66.0" in d
|
|
assert "L27.0" in d
|
|
|
|
def test_labels_non_empty(self):
|
|
d = load_dict()
|
|
for code, label in list(d.items())[:100]:
|
|
assert label, f"Label vide pour {code}"
|
|
|
|
|
|
class TestNormalizeText:
|
|
"""Tests pour normalize_text : accents, casse, whitespace."""
|
|
|
|
def test_accents_removed(self):
|
|
assert normalize_text("Pancréatite") == "pancreatite"
|
|
|
|
def test_lowercase(self):
|
|
assert normalize_text("PANCRÉATITE AIGUË") == "pancreatite aigue"
|
|
|
|
def test_whitespace_collapsed(self):
|
|
assert normalize_text(" pancréatite aiguë ") == "pancreatite aigue"
|
|
|
|
def test_trema(self):
|
|
assert normalize_text("aigüe") == "aigue"
|
|
|
|
def test_mixed(self):
|
|
assert normalize_text("Éruption Cutanée Médicamenteuse") == "eruption cutanee medicamenteuse"
|
|
|
|
|
|
class TestDictLookup:
|
|
"""Tests pour lookup : priorité domain override, match exact, substring."""
|
|
|
|
def test_domain_override_priority(self):
|
|
"""CIM10_MAP (override) a priorité sur le dictionnaire complet."""
|
|
override = {"pancréatite aiguë biliaire": "K85.1"}
|
|
result = lookup("pancréatite aiguë biliaire", domain_overrides=override)
|
|
assert result == "K85.1"
|
|
|
|
def test_exact_normalized_match(self):
|
|
"""Match exact normalisé dans le dictionnaire complet."""
|
|
# "Hypertension essentielle (primitive)" est le label exact de I10
|
|
result = lookup("Hypertension essentielle (primitive)")
|
|
assert result == "I10"
|
|
|
|
def test_substring_match(self):
|
|
"""Match substring normalisé."""
|
|
result = lookup("patient avec cholécystite aiguë sévère")
|
|
assert result == "K81.0"
|
|
|
|
def test_unknown_returns_none(self):
|
|
result = lookup("texte complètement inconnu xyz123")
|
|
assert result is None
|
|
|
|
def test_accent_insensitive(self):
|
|
"""La recherche ignore les accents."""
|
|
result = lookup("pancreatite aigue d'origine biliaire")
|
|
assert result == "K85.1"
|
|
|
|
|
|
class TestDiagnosticAccentVariations:
|
|
"""Tests pour la détection de diagnostics avec variations d'accents."""
|
|
|
|
def _extract(self, text: str) -> DossierMedical:
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
return extract_medical_info(parsed, text)
|
|
|
|
def test_pancreatite_sans_accents(self):
|
|
dossier = self._extract("Pancreatite aigue biliaire.\nDevenir : retour.")
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
|
|
|
def test_pancreatite_trema(self):
|
|
dossier = self._extract("Pancréatite aigüe biliaire.\nDevenir : retour.")
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
|
|
|
def test_pancreatite_majuscules(self):
|
|
dossier = self._extract("PANCREATITE AIGUE BILIAIRE.\nDevenir : retour.")
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
|
|
|
def test_hta_as_das(self):
|
|
"""HTA détectée comme DAS même sans accent."""
|
|
dossier = self._extract("Douleur abdominale.\nhypertension arterielle connue.\nDevenir : retour.")
|
|
codes = {d.cim10_suggestion for d in dossier.diagnostics_associes}
|
|
assert "I10" in codes
|
|
|
|
|
|
class TestBiologieEdgeCases:
|
|
"""Tests pour l'extraction biologie avec variantes."""
|
|
|
|
def _extract_bio(self, text: str) -> list:
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
dossier = extract_medical_info(parsed, text)
|
|
return dossier.biologie_cle
|
|
|
|
def test_crp_with_unit(self):
|
|
bio = self._extract_bio("CRP=45 mg/L")
|
|
assert any(b.test == "CRP" and b.valeur == "45" for b in bio)
|
|
|
|
def test_lipasemie_ui_l(self):
|
|
bio = self._extract_bio("Lipasémie à 850 UI/L")
|
|
assert any(b.test == "Lipasémie" and b.valeur == "850" for b in bio)
|
|
|
|
def test_troponine_us(self):
|
|
bio = self._extract_bio("Troponine us négative")
|
|
assert any(b.test == "Troponine" and b.valeur == "négative" for b in bio)
|
|
|
|
def test_hb_shorthand(self):
|
|
bio = self._extract_bio("Hb = 11.5 g/dL")
|
|
assert any(b.test == "Hémoglobine" and b.valeur == "11.5" for b in bio)
|
|
|
|
def test_tgo_alias(self):
|
|
bio = self._extract_bio("TGO = 120 UI/L")
|
|
assert any(b.test == "ASAT" and b.valeur == "120" for b in bio)
|
|
|
|
def test_creatinine(self):
|
|
bio = self._extract_bio("Créatinine à 95 µmol/L")
|
|
assert any(b.test == "Créatinine" and b.valeur == "95" for b in bio)
|
|
|
|
|
|
class TestTraitementEdgeCases:
|
|
"""Tests pour l'extraction des traitements."""
|
|
|
|
def _extract_ttt(self, text: str) -> list:
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
dossier = extract_medical_info(parsed, text)
|
|
return dossier.traitements_sortie
|
|
|
|
def test_more_than_10_medications(self):
|
|
"""Vérifie que la limite de 10 est supprimée."""
|
|
meds = "\n".join(f"Médicament{i} 100mg matin" for i in range(15))
|
|
text = f"TTT de sortie :\n{meds}\n\nDevenir : retour."
|
|
ttt = self._extract_ttt(text)
|
|
assert len(ttt) >= 15
|
|
|
|
def test_posologie_sachet(self):
|
|
text = "TTT de sortie :\nMovicol 1 sachet matin\n\nDevenir : retour."
|
|
ttt = self._extract_ttt(text)
|
|
assert len(ttt) >= 1
|
|
|
|
def test_posologie_x_par_jour(self):
|
|
text = "TTT de sortie :\nParacétamol 1g 3x/jour\n\nDevenir : retour."
|
|
ttt = self._extract_ttt(text)
|
|
assert len(ttt) >= 1
|
|
assert ttt[0].posologie is not None
|
|
|
|
def test_stop_on_footer(self):
|
|
text = "TTT de sortie :\nParacétamol\nDoliprane\nDr Martin signature\nAutre médicament\n\nDevenir : retour."
|
|
ttt = self._extract_ttt(text)
|
|
meds = [t.medicament for t in ttt]
|
|
assert "Autre médicament" not in meds
|
|
|
|
def test_pendant_x_jours(self):
|
|
text = "TTT de sortie :\nAmoxicilline 1g pendant 7 jours\n\nDevenir : retour."
|
|
ttt = self._extract_ttt(text)
|
|
assert len(ttt) >= 1
|
|
assert ttt[0].posologie is not None
|
|
assert "7 jours" in ttt[0].posologie
|
|
|
|
|
|
class TestIsValidAntecedent:
|
|
"""Tests pour le filtre d'antécédents parasites Trackare."""
|
|
|
|
# --- Vrais antécédents (acceptés) ---
|
|
def test_accept_syndrome(self):
|
|
assert _is_valid_antecedent("Syndrome anxio depressif")
|
|
|
|
def test_accept_fracture(self):
|
|
assert _is_valid_antecedent("fracture des deux humérus en 2017")
|
|
|
|
def test_accept_hta_diabete(self):
|
|
assert _is_valid_antecedent("HTA, diabète type 2")
|
|
|
|
def test_accept_bilan_neurologique(self):
|
|
assert _is_valid_antecedent("Bilan neurologique: IRMc leucopathie vasculaire")
|
|
|
|
# --- Bruit surveillance Trackare (rejetés) ---
|
|
def test_reject_ventilation_concat(self):
|
|
assert not _is_valid_antecedent(
|
|
"VentilationVentilationVentilation VentilationVentilationVentilation"
|
|
)
|
|
|
|
def test_reject_spontanee_repeated(self):
|
|
assert not _is_valid_antecedent(
|
|
"spontanée spontanée spontanée spontanée spontanée"
|
|
)
|
|
|
|
def test_reject_air_repeated(self):
|
|
assert not _is_valid_antecedent("Air Air Air Air Air Air Air")
|
|
|
|
def test_reject_ambiant_repeated(self):
|
|
assert not _is_valid_antecedent("ambiant ambiant ambiant ambiant")
|
|
|
|
def test_reject_en_repeated(self):
|
|
assert not _is_valid_antecedent("EN EN EN EN")
|
|
|
|
def test_reject_moyenne_single(self):
|
|
assert not _is_valid_antecedent("Moyenne")
|
|
|
|
def test_reject_ventilation_single(self):
|
|
assert not _is_valid_antecedent("Ventilation")
|
|
|
|
def test_reject_echelle_single(self):
|
|
assert not _is_valid_antecedent("Echelle")
|
|
|
|
def test_reject_glycemie_single(self):
|
|
assert not _is_valid_antecedent("Glycémie")
|
|
|
|
def test_reject_capillaire_single(self):
|
|
assert not _is_valid_antecedent("capillaire")
|
|
|
|
def test_reject_gauche_single(self):
|
|
assert not _is_valid_antecedent("Gauche")
|
|
|
|
# --- Bruit administratif (rejetés) ---
|
|
def test_reject_service_name_caps(self):
|
|
assert not _is_valid_antecedent("MEDECINE INTERNE ET")
|
|
|
|
def test_reject_immunologie_caps(self):
|
|
assert not _is_valid_antecedent("IMMUNOLOGIE CLINIQUE")
|
|
|
|
def test_reject_rpps(self):
|
|
assert not _is_valid_antecedent("N° RPPS [RPPS_7]")
|
|
|
|
def test_reject_medecin_hospitalier(self):
|
|
assert not _is_valid_antecedent("[MEDECIN] Hospitalier")
|
|
|
|
def test_reject_mode_de_vie(self):
|
|
assert not _is_valid_antecedent("Mode de vie : divorcée, une fille")
|
|
|
|
def test_reject_texte_libre(self):
|
|
assert not _is_valid_antecedent("(texte libre)")
|
|
|
|
# --- Cas limites ---
|
|
def test_reject_too_short(self):
|
|
assert not _is_valid_antecedent("de Bo")
|
|
|
|
def test_reject_starts_with_digit(self):
|
|
assert not _is_valid_antecedent("97,00 100,00 98,00")
|
|
|
|
def test_reject_empty(self):
|
|
assert not _is_valid_antecedent("")
|
|
|
|
|
|
class TestClassifierConfidence:
|
|
"""Tests pour classify_with_confidence."""
|
|
|
|
def test_high_confidence_trackare(self):
|
|
text = "Dossier Patient\nIPP: 12345\nDétails épisode\nEpisode No: 67890\nSignes vitaux\n"
|
|
result = classify_with_confidence(text)
|
|
assert result.doc_type == "trackare"
|
|
assert result.confidence >= 0.7
|
|
|
|
def test_high_confidence_crh(self):
|
|
text = "Mon cher confrère,\nCompte rendu d'hospitalisation\nVotre patient a été admis dans le service de gastro\n"
|
|
result = classify_with_confidence(text)
|
|
assert result.doc_type == "crh"
|
|
assert result.confidence >= 0.7
|
|
|
|
def test_ambiguous_case(self):
|
|
text = "Document médical quelconque sans marqueurs spécifiques."
|
|
result = classify_with_confidence(text)
|
|
assert result.confidence <= 0.6
|
|
|
|
def test_backward_compatible(self):
|
|
"""classify() retourne toujours une string."""
|
|
text = "Dossier Patient\nIPP: 12345\n"
|
|
result = classify(text)
|
|
assert isinstance(result, str)
|
|
assert result in ("crh", "trackare")
|