"""Tests pour le module d'extraction médicale CIM-10.""" import pytest from src.config import DossierMedical, Diagnostic, Antecedent, Complication from src.medical.cim10_extractor import ( extract_medical_info, _lookup_cim10, _is_abnormal, _is_valid_antecedent, ) from src.medical.cim10_dict import normalize_text, load_dict, lookup, reset_cache from src.extraction.document_classifier import classify, classify_with_confidence class TestCIM10Lookup: def test_pancréatite_lithiasique(self): assert _lookup_cim10("pancréatite aiguë lithiasique") == "K85.1" def test_lithiase_choledoque(self): assert _lookup_cim10("lithiase du cholédoque") == "K80.5" def test_eruption_medicamenteuse(self): assert _lookup_cim10("éruption médicamenteuse") == "L27.0" def test_obesite(self): assert _lookup_cim10("obésité") == "E66.0" def test_unknown(self): assert _lookup_cim10("grippe") is None class TestIsAbnormal: def test_lipasemie_high(self): assert _is_abnormal("Lipasémie", "6000") is True def test_crp_normal(self): assert _is_abnormal("CRP", "3") is False def test_crp_high(self): assert _is_abnormal("CRP", "12") is True def test_troponine_negative(self): assert _is_abnormal("Troponine", "négative") is False def test_unknown_test(self): assert _is_abnormal("TestInconnu", "42") is None class TestExtractMedicalInfo: def test_extract_from_trackare(self): parsed = { "type": "trackare", "patient": { "sexe": "F", "date_naissance": "23/02/1980", "imc": 34.37, "poids_kg": 90.2, "taille_cm": 162, }, "sejour": { "date_entree": "25/02/2023", "date_sortie": "03/03/2023", }, "urgences": {"mode_entree": "Urgences"}, "diagnostics": [ { "type": "Principal", "statut": "actif", "code_cim10": "K80.5", "libelle": "Calcul des canaux biliaires", } ], "signes_vitaux": {"imc": 34.37, "poids_kg": 90.2, "taille_cm": 162}, } text = """Pancréatite aiguë lithiasique. Cholécystectomie par cœlioscopie le 01/03. Cholangiographie retrouvant une lithiase du bas cholédoque. TDM à J3 retrouve : Absence de signe de gravité. Score de Balthazar à 0. Éruption cutanée érythémateuse. Réaction au tramadol. IMC: 34.370 TTT de sortie : Paracétamol et Acupan si besoin Cétirizine Devenir : sortie le 03/03.""" dossier = extract_medical_info(parsed, text) # Séjour assert dossier.sejour.sexe == "F" assert dossier.sejour.age == 43 assert dossier.sejour.duree_sejour == 6 assert dossier.sejour.imc == 34.37 # DP assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K80.5" # DAS codes = {d.cim10_suggestion for d in dossier.diagnostics_associes} assert "L27.0" in codes # Éruption médicamenteuse assert "E66.0" in codes # Obésité # Actes acte_codes = {a.code_ccam_suggestion for a in dossier.actes_ccam} assert "HMFC004" in acte_codes # Cholécystectomie assert "ZCQK002" in acte_codes # TDM # Traitements meds = [t.medicament for t in dossier.traitements_sortie] assert any("Paracétamol" in m for m in meds) assert any("Cétirizine" in m for m in meds) # Bio tests = {b.test for b in dossier.biologie_cle} assert "Troponine" not in tests # pas dans ce texte minimal # Imagerie assert len(dossier.imagerie) >= 1 assert any("Balthazar" in (i.score or "") for i in dossier.imagerie) # Complications assert any("cutanée" in c.texte.lower() for c in dossier.complications) def test_extract_without_edsnlp(self): """Vérifie que l'extraction fonctionne sans résultat edsnlp.""" parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol 1g matin et soir\n\nDevenir : retour." dossier = extract_medical_info(parsed, text, edsnlp_result=None) assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" assert len(dossier.traitements_sortie) >= 1 def test_extract_with_edsnlp_result(self): """Vérifie que les résultats edsnlp enrichissent les diagnostics.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity, DrugEntity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Patient admis pour douleur abdominale." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False), ], drug_entities=[], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # Le DP devrait être trouvé via edsnlp assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "R10.4" def test_negated_edsnlp_entities_ignored(self): """Vérifie que les entités niées par edsnlp ne sont pas retenues.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pas de fièvre. Patient en bon état." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="fièvre", code="R50.9", negation=True), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # L'entité niée ne doit pas apparaître comme diagnostic all_codes = set() if dossier.diagnostic_principal: all_codes.add(dossier.diagnostic_principal.cim10_suggestion) for d in dossier.diagnostics_associes: all_codes.add(d.cim10_suggestion) assert "R50.9" not in all_codes def test_drug_atc_enrichment(self): """Vérifie que les codes ATC edsnlp sont ajoutés aux traitements.""" from src.medical.edsnlp_pipeline import EdsnlpResult, DrugEntity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "TTT de sortie :\nParacétamol 1g matin\n\nDevenir : retour." edsnlp_result = EdsnlpResult( drug_entities=[ DrugEntity(texte="Paracétamol", code_atc="N02BE01", negation=False), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) assert len(dossier.traitements_sortie) >= 1 paracetamol = next( (t for t in dossier.traitements_sortie if "Paracétamol" in t.medicament), None ) assert paracetamol is not None assert paracetamol.code_atc == "N02BE01" def test_edsnlp_negation_for_complications(self): """Vérifie que la négation edsnlp filtre les complications.""" from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pas de fièvre ni d'infection. Bonne évolution." edsnlp_result = EdsnlpResult( cim10_entities=[ CIM10Entity(texte="fièvre", code="R50.9", negation=True), CIM10Entity(texte="infection", code="A49.9", negation=True), ], ) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) # Fièvre et infection sont niées, ne doivent pas apparaître dans complications complication_terms = [c.texte.lower() for c in dossier.complications] assert "fièvre" not in complication_terms assert "infection" not in complication_terms # === Nouveaux tests : dictionnaire CIM-10, normalisation, robustesse === class TestCIM10Dict: """Tests pour le chargement du dictionnaire CIM-10 complet.""" def test_load_dict_not_empty(self): d = load_dict() assert len(d) > 10000 def test_known_codes_present(self): d = load_dict() assert "K85.1" in d assert "K80.5" in d assert "I10" in d assert "E66.0" in d assert "L27.0" in d def test_labels_non_empty(self): d = load_dict() for code, label in list(d.items())[:100]: assert label, f"Label vide pour {code}" class TestNormalizeText: """Tests pour normalize_text : accents, casse, whitespace.""" def test_accents_removed(self): assert normalize_text("Pancréatite") == "pancreatite" def test_lowercase(self): assert normalize_text("PANCRÉATITE AIGUË") == "pancreatite aigue" def test_whitespace_collapsed(self): assert normalize_text(" pancréatite aiguë ") == "pancreatite aigue" def test_trema(self): assert normalize_text("aigüe") == "aigue" def test_mixed(self): assert normalize_text("Éruption Cutanée Médicamenteuse") == "eruption cutanee medicamenteuse" class TestDictLookup: """Tests pour lookup : priorité domain override, match exact, substring.""" def test_domain_override_priority(self): """CIM10_MAP (override) a priorité sur le dictionnaire complet.""" override = {"pancréatite aiguë biliaire": "K85.1"} result = lookup("pancréatite aiguë biliaire", domain_overrides=override) assert result == "K85.1" def test_exact_normalized_match(self): """Match exact normalisé dans le dictionnaire complet.""" # "Hypertension essentielle (primitive)" est le label exact de I10 result = lookup("Hypertension essentielle (primitive)") assert result == "I10" def test_substring_match(self): """Match substring normalisé.""" result = lookup("patient avec cholécystite aiguë sévère") assert result == "K81.0" def test_unknown_returns_none(self): result = lookup("texte complètement inconnu xyz123") assert result is None def test_accent_insensitive(self): """La recherche ignore les accents.""" result = lookup("pancreatite aigue d'origine biliaire") assert result == "K85.1" class TestDiagnosticAccentVariations: """Tests pour la détection de diagnostics avec variations d'accents.""" def _extract(self, text: str) -> DossierMedical: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } return extract_medical_info(parsed, text) def test_pancreatite_sans_accents(self): dossier = self._extract("Pancreatite aigue biliaire.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_pancreatite_trema(self): dossier = self._extract("Pancréatite aigüe biliaire.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_pancreatite_majuscules(self): dossier = self._extract("PANCREATITE AIGUE BILIAIRE.\nDevenir : retour.") assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" def test_hta_as_das(self): """HTA détectée comme DAS même sans accent.""" dossier = self._extract("Douleur abdominale.\nhypertension arterielle connue.\nDevenir : retour.") codes = {d.cim10_suggestion for d in dossier.diagnostics_associes} assert "I10" in codes class TestBiologieEdgeCases: """Tests pour l'extraction biologie avec variantes.""" def _extract_bio(self, text: str) -> list: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } dossier = extract_medical_info(parsed, text) return dossier.biologie_cle def test_crp_with_unit(self): bio = self._extract_bio("CRP=45 mg/L") assert any(b.test == "CRP" and b.valeur == "45" for b in bio) def test_lipasemie_ui_l(self): bio = self._extract_bio("Lipasémie à 850 UI/L") assert any(b.test == "Lipasémie" and b.valeur == "850" for b in bio) def test_troponine_us(self): bio = self._extract_bio("Troponine us négative") assert any(b.test == "Troponine" and b.valeur == "négative" for b in bio) def test_hb_shorthand(self): bio = self._extract_bio("Hb = 11.5 g/dL") assert any(b.test == "Hémoglobine" and b.valeur == "11.5" for b in bio) def test_tgo_alias(self): bio = self._extract_bio("TGO = 120 UI/L") assert any(b.test == "ASAT" and b.valeur == "120" for b in bio) def test_creatinine(self): bio = self._extract_bio("Créatinine à 95 µmol/L") assert any(b.test == "Créatinine" and b.valeur == "95" for b in bio) class TestTraitementEdgeCases: """Tests pour l'extraction des traitements.""" def _extract_ttt(self, text: str) -> list: parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } dossier = extract_medical_info(parsed, text) return dossier.traitements_sortie def test_more_than_10_medications(self): """Vérifie que la limite de 10 est supprimée.""" meds = "\n".join(f"Médicament{i} 100mg matin" for i in range(15)) text = f"TTT de sortie :\n{meds}\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 15 def test_posologie_sachet(self): text = "TTT de sortie :\nMovicol 1 sachet matin\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 def test_posologie_x_par_jour(self): text = "TTT de sortie :\nParacétamol 1g 3x/jour\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 assert ttt[0].posologie is not None def test_stop_on_footer(self): text = "TTT de sortie :\nParacétamol\nDoliprane\nDr Martin signature\nAutre médicament\n\nDevenir : retour." ttt = self._extract_ttt(text) meds = [t.medicament for t in ttt] assert "Autre médicament" not in meds def test_pendant_x_jours(self): text = "TTT de sortie :\nAmoxicilline 1g pendant 7 jours\n\nDevenir : retour." ttt = self._extract_ttt(text) assert len(ttt) >= 1 assert ttt[0].posologie is not None assert "7 jours" in ttt[0].posologie class TestIsValidAntecedent: """Tests pour le filtre d'antécédents parasites Trackare.""" # --- Vrais antécédents (acceptés) --- def test_accept_syndrome(self): assert _is_valid_antecedent("Syndrome anxio depressif") def test_accept_fracture(self): assert _is_valid_antecedent("fracture des deux humérus en 2017") def test_accept_hta_diabete(self): assert _is_valid_antecedent("HTA, diabète type 2") def test_accept_bilan_neurologique(self): assert _is_valid_antecedent("Bilan neurologique: IRMc leucopathie vasculaire") # --- Bruit surveillance Trackare (rejetés) --- def test_reject_ventilation_concat(self): assert not _is_valid_antecedent( "VentilationVentilationVentilation VentilationVentilationVentilation" ) def test_reject_spontanee_repeated(self): assert not _is_valid_antecedent( "spontanée spontanée spontanée spontanée spontanée" ) def test_reject_air_repeated(self): assert not _is_valid_antecedent("Air Air Air Air Air Air Air") def test_reject_ambiant_repeated(self): assert not _is_valid_antecedent("ambiant ambiant ambiant ambiant") def test_reject_en_repeated(self): assert not _is_valid_antecedent("EN EN EN EN") def test_reject_moyenne_single(self): assert not _is_valid_antecedent("Moyenne") def test_reject_ventilation_single(self): assert not _is_valid_antecedent("Ventilation") def test_reject_echelle_single(self): assert not _is_valid_antecedent("Echelle") def test_reject_glycemie_single(self): assert not _is_valid_antecedent("Glycémie") def test_reject_capillaire_single(self): assert not _is_valid_antecedent("capillaire") def test_reject_gauche_single(self): assert not _is_valid_antecedent("Gauche") # --- Bruit administratif (rejetés) --- def test_reject_service_name_caps(self): assert not _is_valid_antecedent("MEDECINE INTERNE ET") def test_reject_immunologie_caps(self): assert not _is_valid_antecedent("IMMUNOLOGIE CLINIQUE") def test_reject_rpps(self): assert not _is_valid_antecedent("N° RPPS [RPPS_7]") def test_reject_medecin_hospitalier(self): assert not _is_valid_antecedent("[MEDECIN] Hospitalier") def test_reject_mode_de_vie(self): assert not _is_valid_antecedent("Mode de vie : divorcée, une fille") def test_reject_texte_libre(self): assert not _is_valid_antecedent("(texte libre)") # --- Artefacts CRH colonne gauche (médecins) --- def test_reject_medecin_tag_start(self): assert not _is_valid_antecedent( "[MEDECIN] hospitalier - Syndrome anxio depressif suivi Dr [MEDECIN_39]" ) def test_reject_medecin_assistant(self): assert not _is_valid_antecedent( "[MEDECIN] Assistant des Hôpitaux de Lyon - Bilan neurologique" ) def test_reject_medecin_contractuel(self): assert not _is_valid_antecedent("[MEDECIN] hospitalier contractuel") def test_reject_dr_medecin_tag(self): assert not _is_valid_antecedent("Dr [MEDECIN_7] (Caradoc)") def test_reject_dr_chef_clinique(self): assert not _is_valid_antecedent( "Dr [MEDECIN_37] Chef de Clinique des Hôpitaux aucune aide" ) def test_reject_de_bordeaux(self): assert not _is_valid_antecedent("de Bordeaux") def test_reject_de_lyon(self): assert not _is_valid_antecedent("de Lyon") def test_reject_secretariat(self): assert not _is_valid_antecedent("Secrétariat : [TEL_3] - fracture en 2017") def test_reject_aucune_aide(self): assert not _is_valid_antecedent("aucune aide, pas d'ide, pas d'aide ménagère") def test_accept_de_long_medical(self): """'de' suivi d'une vraie description médicale longue passe.""" assert _is_valid_antecedent("dégénérescence maculaire liée à l'âge") # --- Cas limites --- def test_reject_too_short(self): assert not _is_valid_antecedent("de Bo") def test_reject_starts_with_digit(self): assert not _is_valid_antecedent("97,00 100,00 98,00") def test_reject_empty(self): assert not _is_valid_antecedent("") class TestClassifierConfidence: """Tests pour classify_with_confidence.""" def test_high_confidence_trackare(self): text = "Dossier Patient\nIPP: 12345\nDétails épisode\nEpisode No: 67890\nSignes vitaux\n" result = classify_with_confidence(text) assert result.doc_type == "trackare" assert result.confidence >= 0.7 def test_high_confidence_crh(self): text = "Mon cher confrère,\nCompte rendu d'hospitalisation\nVotre patient a été admis dans le service de gastro\n" result = classify_with_confidence(text) assert result.doc_type == "crh" assert result.confidence >= 0.7 def test_ambiguous_case(self): text = "Document médical quelconque sans marqueurs spécifiques." result = classify_with_confidence(text) assert result.confidence <= 0.6 def test_backward_compatible(self): """classify() retourne toujours une string.""" text = "Dossier Patient\nIPP: 12345\n" result = classify(text) assert isinstance(result, str) assert result in ("crh", "trackare") class TestBackwardCompatAntecedent: """Tests de rétrocompatibilité pour les antécédents et complications.""" def test_old_format_string_list(self): """Charger un vieux JSON avec antecedents: ["HTA", "Diabète"].""" d = DossierMedical.model_validate({ "antecedents": ["HTA", "Diabète type 2"], "complications": ["Fièvre"], }) assert len(d.antecedents) == 2 assert isinstance(d.antecedents[0], Antecedent) assert d.antecedents[0].texte == "HTA" assert d.antecedents[1].texte == "Diabète type 2" assert len(d.complications) == 1 assert isinstance(d.complications[0], Complication) assert d.complications[0].texte == "Fièvre" def test_new_format_object_list(self): """Charger un nouveau JSON avec antecedents: [{texte: "HTA", source_page: 1}].""" d = DossierMedical.model_validate({ "antecedents": [{"texte": "HTA", "source_page": 2, "source_excerpt": "contexte HTA"}], "complications": [{"texte": "Fièvre", "source_page": 3}], }) assert d.antecedents[0].texte == "HTA" assert d.antecedents[0].source_page == 2 assert d.antecedents[0].source_excerpt == "contexte HTA" assert d.complications[0].source_page == 3 def test_mixed_format(self): """Un mélange de strings et d'objets est converti correctement.""" d = DossierMedical.model_validate({ "antecedents": ["HTA", {"texte": "Diabète", "source_page": 1}], }) assert len(d.antecedents) == 2 assert d.antecedents[0].texte == "HTA" assert d.antecedents[0].source_page is None assert d.antecedents[1].texte == "Diabète" assert d.antecedents[1].source_page == 1 def test_empty_list(self): d = DossierMedical.model_validate({"antecedents": [], "complications": []}) assert d.antecedents == [] assert d.complications == [] def test_antecedent_extraction_produces_objects(self): """L'extraction produit bien des objets Antecedent.""" parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Antécédents :\n- Diabète type 2\n- Hypertension artérielle\n\nHistoire de la maladie" dossier = extract_medical_info(parsed, text) assert len(dossier.antecedents) >= 1 assert all(isinstance(a, Antecedent) for a in dossier.antecedents) textes = [a.texte for a in dossier.antecedents] assert "Diabète type 2" in textes def test_complication_extraction_produces_objects(self): """L'extraction produit bien des objets Complication.""" parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Patient avec fièvre post-opératoire." dossier = extract_medical_info(parsed, text) assert all(isinstance(c, Complication) for c in dossier.complications) class TestSourceTrackingFields: """Tests que les champs source_page/source_excerpt existent sur les modèles.""" def test_biologie_source_fields(self): from src.config import BiologieCle b = BiologieCle(test="CRP", valeur="45", source_page=2, source_excerpt="CRP=45") assert b.source_page == 2 assert b.source_excerpt == "CRP=45" def test_imagerie_source_fields(self): from src.config import Imagerie i = Imagerie(type="TDM", source_page=3) assert i.source_page == 3 def test_traitement_source_fields(self): from src.config import Traitement t = Traitement(medicament="Paracétamol", source_page=4) assert t.source_page == 4 def test_acte_source_fields(self): from src.config import ActeCCAM a = ActeCCAM(texte="Cholécystectomie", source_page=5) assert a.source_page == 5 def test_antecedent_source_fields(self): a = Antecedent(texte="HTA", source_page=1, source_excerpt="Antécédents: HTA") assert a.source_page == 1 def test_complication_source_fields(self): c = Complication(texte="Fièvre", source_page=2) assert c.source_page == 2