"""Tests pour le module d'extraction médicale CIM-10.""" import pytest from src.config import DossierMedical, Diagnostic from src.medical.cim10_extractor import ( extract_medical_info, _lookup_cim10, _is_abnormal, ) from src.medical.cim10_dict import normalize_text, load_dict, lookup, reset_cache from src.extraction.document_classifier import classify, classify_with_confidence class TestCIM10Lookup: def test_pancréatite_lithiasique(self): assert _lookup_cim10("pancréatite aiguë lithiasique") == "K85.1" def test_lithiase_choledoque(self): assert _lookup_cim10("lithiase du cholédoque") == "K80.5" def test_eruption_medicamenteuse(self): assert _lookup_cim10("éruption médicamenteuse") == "L27.0" def test_obesite(self): assert _lookup_cim10("obésité") == "E66.0" def test_unknown(self): assert _lookup_cim10("grippe") is None class TestIsAbnormal: def test_lipasemie_high(self): assert _is_abnormal("Lipasémie", "6000") is True def test_crp_normal(self): assert _is_abnormal("CRP", "3") is False def test_crp_high(self): assert _is_abnormal("CRP", "12") is True def test_troponine_negative(self): assert _is_abnormal("Troponine", "négative") is False def test_unknown_test(self): assert _is_abnormal("TestInconnu", "42") is None class TestExtractMedicalInfo: def test_extract_from_trackare(self): parsed = { "type": "trackare", "patient": { "sexe": "F", "date_naissance": "23/02/1980", "imc": 34.37, "poids_kg": 90.2, "taille_cm": 162, }, "sejour": { "date_entree": "25/02/2023", "date_sortie": "03/03/2023", }, "urgences": {"mode_entree": "Urgences"}, "diagnostics": [ { "type": "Principal", "statut": "actif", "code_cim10": "K80.5", "libelle": "Calcul des canaux biliaires", } ], "signes_vitaux": {"imc": 34.37, "poids_kg": 90.2, "taille_cm": 162}, } text = """Pancréatite aiguë lithiasique. Cholécystectomie par cœlioscopie le 01/03. Cholangiographie retrouvant une lithiase du bas cholédoque. TDM à J3 retrouve : Absence de signe de gravité. Score de Balthazar à 0. Éruption cutanée érythémateuse. Réaction au tramadol. IMC: 34.370 TTT de sortie : Paracétamol et Acupan si besoin Cétirizine Devenir : sortie le 03/03.""" dossier = extract_medical_info(parsed, text) # Séjour assert dossier.sejour.sexe == "F" assert dossier.sejour.age == 43 assert dossier.sejour.duree_sejour == 6 assert dossier.sejour.imc == 34.37 # DP assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K80.5" # DAS codes = {d.cim10_suggestion for d in dossier.diagnostics_associes} assert "L27.0" in codes # Éruption médicamenteuse assert "E66.0" in codes # Obésité # Actes acte_codes = {a.code_ccam_suggestion for a in dossier.actes_ccam} assert "HMFC004" in acte_codes # Cholécystectomie assert "ZCQK002" in acte_codes # TDM # Traitements meds = [t.medicament for t in dossier.traitements_sortie] assert any("Paracétamol" in m for m in meds) assert any("Cétirizine" in m for m in meds) # Bio tests = {b.test for b in dossier.biologie_cle} assert "Troponine" not in tests # pas dans ce texte minimal # Imagerie assert len(dossier.imagerie) >= 1 assert any("Balthazar" in (i.score or "") for i in dossier.imagerie) # Complications assert any("cutanée" in c.lower() for c in dossier.complications) def test_extract_without_edsnlp(self): """Vérifie que l'extraction fonctionne sans résultat edsnlp.""" parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol 1g matin et soir\n\nDevenir : retour." dossier = extract_medical_info(parsed, text, edsnlp_result=None) assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" assert len(dossier.traitements_sortie) >= 1 def test_extract_with_edsnlp_result(self): """Vérifie que les résultats edsnlp enrichissent les diagnostics.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity, DrugEntity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Patient admis pour douleur abdominale." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False), ], drug_entities=[], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # Le DP devrait être trouvé via edsnlp assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "R10.4" def test_negated_edsnlp_entities_ignored(self): """Vérifie que les entités niées par edsnlp ne sont pas retenues.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pas de fièvre. Patient en bon état." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="fièvre", code="R50.9", negation=True), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # L'entité niée ne doit pas apparaître comme diagnostic all_codes = set() if dossier.diagnostic_principal: all_codes.add(dossier.diagnostic_principal.cim10_suggestion) for d in dossier.diagnostics_associes: all_codes.add(d.cim10_suggestion) assert "R50.9" not in all_codes def test_drug_atc_enrichment(self): """Vérifie que les codes ATC edsnlp sont ajoutés aux traitements.""" from src.medical.edsnlp_pipeline import EdsnlpResult, DrugEntity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "TTT de sortie :\nParacétamol 1g matin\n\nDevenir : retour." edsnlp_result = EdsnlpResult( drug_entities=[ DrugEntity(texte="Paracétamol", code_atc="N02BE01", negation=False), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) assert len(dossier.traitements_sortie) >= 1 paracetamol = next( (t for t in dossier.traitements_sortie if "Paracétamol" in t.medicament), None ) assert paracetamol is not None assert paracetamol.code_atc == "N02BE01" def test_edsnlp_negation_for_complications(self): """Vérifie que la négation edsnlp filtre les complications.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pas de fièvre ni d'infection. Bonne évolution." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="fièvre", code="R50.9", negation=True), CIM10Entity(texte="infection", code="A49.9", negation=True), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # Fièvre et infection sont niées, ne doivent pas apparaître dans complications complication_terms = [c.lower() for c in dossier.complications] assert "fièvre" not in complication_terms assert "infection" not in complication_terms # === Nouveaux tests : dictionnaire CIM-10, normalisation, robustesse === class TestCIM10Dict: """Tests pour le chargement du dictionnaire CIM-10 complet.""" def test_load_dict_not_empty(self): d = load_dict() assert len(d) > 10000 def test_known_codes_present(self): d = load_dict() assert "K85.1" in d assert "K80.5" in d assert "I10" in d assert "E66.0" in d assert "L27.0" in d def test_labels_non_empty(self): d = load_dict() for code, label in list(d.items())[:100]: assert label, f"Label vide pour {code}" class TestNormalizeText: """Tests pour normalize_text : accents, casse, whitespace.""" def test_accents_removed(self): assert normalize_text("Pancréatite") == "pancreatite" def test_lowercase(self): assert normalize_text("PANCRÉATITE AIGUË") == "pancreatite aigue" def test_whitespace_collapsed(self): assert normalize_text(" pancréatite aiguë ") == "pancreatite aigue" def test_trema(self): assert normalize_text("aigüe") == "aigue" def test_mixed(self): assert normalize_text("Éruption Cutanée Médicamenteuse") == "eruption cutanee medicamenteuse" class TestDictLookup: """Tests pour lookup : priorité domain override, match exact, substring.""" def test_domain_override_priority(self): """CIM10_MAP (override) a priorité sur le dictionnaire complet.""" override = {"pancréatite aiguë biliaire": "K85.1"} result = lookup("pancréatite aiguë biliaire", domain_overrides=override) assert result == "K85.1" def test_exact_normalized_match(self): """Match exact normalisé dans le dictionnaire complet.""" # "Hypertension essentielle (primitive)" est le label exact de I10 result = lookup("Hypertension essentielle (primitive)") assert result == "I10" def test_substring_match(self): """Match substring normalisé.""" result = lookup("patient avec cholécystite aiguë sévère") assert result == "K81.0" def test_unknown_returns_none(self): result = lookup("texte complètement inconnu xyz123") assert result is None def test_accent_insensitive(self): """La recherche ignore les accents.""" result = lookup("pancreatite aigue d'origine biliaire") assert result == "K85.1" class TestDiagnosticAccentVariations: """Tests pour la détection de diagnostics avec variations d'accents.""" def _extract(self, text: str) -> DossierMedical: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } return extract_medical_info(parsed, text) def test_pancreatite_sans_accents(self): dossier = self._extract("Pancreatite aigue biliaire.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_pancreatite_trema(self): dossier = self._extract("Pancréatite aigüe biliaire.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_pancreatite_majuscules(self): dossier = self._extract("PANCREATITE AIGUE BILIAIRE.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_hta_as_das(self): """HTA détectée comme DAS même sans accent.""" dossier = self._extract("Douleur abdominale.\nhypertension arterielle connue.\nDevenir : retour.") codes = {d.cim10_suggestion for d in dossier.diagnostics_associes} assert "I10" in codes class TestBiologieEdgeCases: """Tests pour l'extraction biologie avec variantes.""" def _extract_bio(self, text: str) -> list: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } dossier = extract_medical_info(parsed, text) return dossier.biologie_cle def test_crp_with_unit(self): bio = self._extract_bio("CRP=45 mg/L") assert any(b.test == "CRP" and b.valeur == "45" for b in bio) def test_lipasemie_ui_l(self): bio = self._extract_bio("Lipasémie à 850 UI/L") assert any(b.test == "Lipasémie" and b.valeur == "850" for b in bio) def test_troponine_us(self): bio = self._extract_bio("Troponine us négative") assert any(b.test == "Troponine" and b.valeur == "négative" for b in bio) def test_hb_shorthand(self): bio = self._extract_bio("Hb = 11.5 g/dL") assert any(b.test == "Hémoglobine" and b.valeur == "11.5" for b in bio) def test_tgo_alias(self): bio = self._extract_bio("TGO = 120 UI/L") assert any(b.test == "ASAT" and b.valeur == "120" for b in bio) def test_creatinine(self): bio = self._extract_bio("Créatinine à 95 µmol/L") assert any(b.test == "Créatinine" and b.valeur == "95" for b in bio) class TestTraitementEdgeCases: """Tests pour l'extraction des traitements.""" def _extract_ttt(self, text: str) -> list: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } dossier = extract_medical_info(parsed, text) return dossier.traitements_sortie def test_more_than_10_medications(self): """Vérifie que la limite de 10 est supprimée.""" meds = "\n".join(f"Médicament{i} 100mg matin" for i in range(15)) text = f"TTT de sortie :\n{meds}\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 15 def test_posologie_sachet(self): text = "TTT de sortie :\nMovicol 1 sachet matin\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 def test_posologie_x_par_jour(self): text = "TTT de sortie :\nParacétamol 1g 3x/jour\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 assert ttt[0].posologie is not None def test_stop_on_footer(self): text = "TTT de sortie :\nParacétamol\nDoliprane\nDr Martin signature\nAutre médicament\n\nDevenir : retour." ttt = self._extract_ttt(text) meds = [t.medicament for t in ttt] assert "Autre médicament" not in meds def test_pendant_x_jours(self): text = "TTT de sortie :\nAmoxicilline 1g pendant 7 jours\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 assert ttt[0].posologie is not None assert "7 jours" in ttt[0].posologie class TestClassifierConfidence: """Tests pour classify_with_confidence.""" def test_high_confidence_trackare(self): text = "Dossier Patient\nIPP: 12345\nDétails épisode\nEpisode No: 67890\nSignes vitaux\n" result = classify_with_confidence(text) assert result.doc_type == "trackare" assert result.confidence >= 0.7 def test_high_confidence_crh(self): text = "Mon cher confrère,\nCompte rendu d'hospitalisation\nVotre patient a été admis dans le service de gastro\n" result = classify_with_confidence(text) assert result.doc_type == "crh" assert result.confidence >= 0.7 def test_ambiguous_case(self): text = "Document médical quelconque sans marqueurs spécifiques." result = classify_with_confidence(text) assert result.confidence <= 0.6 def test_backward_compatible(self): """classify() retourne toujours une string.""" text = "Dossier Patient\nIPP: 12345\n" result = classify(text) assert isinstance(result, str) assert result in ("crh", "trackare")