feat: pipeline T2A - anonymisation, extraction CIM-10 et intégration edsnlp
Pipeline complet de traitement de documents médicaux PDF : - Extraction texte (pdfplumber) et classification (Trackare/CRH) - Anonymisation multi-couche (regex + NER CamemBERT + sweep) - Extraction médicale CIM-10 hybride : edsnlp (AP-HP) enrichit les diagnostics, médicaments (codes ATC via Romedi) et négation, avec fallback regex pour les patterns spécifiques - Fix sentencepiece pinné à <0.2.0 pour compatibilité CamemBERT Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
238
tests/test_medical.py
Normal file
238
tests/test_medical.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""Tests pour le module d'extraction médicale CIM-10."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import DossierMedical, Diagnostic
|
||||
from src.medical.cim10_extractor import (
|
||||
extract_medical_info,
|
||||
_lookup_cim10,
|
||||
_is_abnormal,
|
||||
)
|
||||
|
||||
|
||||
class TestCIM10Lookup:
|
||||
def test_pancréatite_lithiasique(self):
|
||||
assert _lookup_cim10("pancréatite aiguë lithiasique") == "K85.1"
|
||||
|
||||
def test_lithiase_choledoque(self):
|
||||
assert _lookup_cim10("lithiase du cholédoque") == "K80.5"
|
||||
|
||||
def test_eruption_medicamenteuse(self):
|
||||
assert _lookup_cim10("éruption médicamenteuse") == "L27.0"
|
||||
|
||||
def test_obesite(self):
|
||||
assert _lookup_cim10("obésité") == "E66.0"
|
||||
|
||||
def test_unknown(self):
|
||||
assert _lookup_cim10("grippe") is None
|
||||
|
||||
|
||||
class TestIsAbnormal:
|
||||
def test_lipasemie_high(self):
|
||||
assert _is_abnormal("Lipasémie", "6000") is True
|
||||
|
||||
def test_crp_normal(self):
|
||||
assert _is_abnormal("CRP", "3") is False
|
||||
|
||||
def test_crp_high(self):
|
||||
assert _is_abnormal("CRP", "12") is True
|
||||
|
||||
def test_troponine_negative(self):
|
||||
assert _is_abnormal("Troponine", "négative") is False
|
||||
|
||||
def test_unknown_test(self):
|
||||
assert _is_abnormal("TestInconnu", "42") is None
|
||||
|
||||
|
||||
class TestExtractMedicalInfo:
|
||||
def test_extract_from_trackare(self):
|
||||
parsed = {
|
||||
"type": "trackare",
|
||||
"patient": {
|
||||
"sexe": "F",
|
||||
"date_naissance": "23/02/1980",
|
||||
"imc": 34.37,
|
||||
"poids_kg": 90.2,
|
||||
"taille_cm": 162,
|
||||
},
|
||||
"sejour": {
|
||||
"date_entree": "25/02/2023",
|
||||
"date_sortie": "03/03/2023",
|
||||
},
|
||||
"urgences": {"mode_entree": "Urgences"},
|
||||
"diagnostics": [
|
||||
{
|
||||
"type": "Principal",
|
||||
"statut": "actif",
|
||||
"code_cim10": "K80.5",
|
||||
"libelle": "Calcul des canaux biliaires",
|
||||
}
|
||||
],
|
||||
"signes_vitaux": {"imc": 34.37, "poids_kg": 90.2, "taille_cm": 162},
|
||||
}
|
||||
text = """Pancréatite aiguë lithiasique.
|
||||
Cholécystectomie par cœlioscopie le 01/03.
|
||||
Cholangiographie retrouvant une lithiase du bas cholédoque.
|
||||
TDM à J3 retrouve : Absence de signe de gravité. Score de Balthazar à 0.
|
||||
Éruption cutanée érythémateuse. Réaction au tramadol.
|
||||
IMC: 34.370
|
||||
TTT de sortie :
|
||||
Paracétamol et Acupan si besoin
|
||||
Cétirizine
|
||||
|
||||
Devenir : sortie le 03/03."""
|
||||
|
||||
dossier = extract_medical_info(parsed, text)
|
||||
|
||||
# Séjour
|
||||
assert dossier.sejour.sexe == "F"
|
||||
assert dossier.sejour.age == 43
|
||||
assert dossier.sejour.duree_sejour == 6
|
||||
assert dossier.sejour.imc == 34.37
|
||||
|
||||
# DP
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K80.5"
|
||||
|
||||
# DAS
|
||||
codes = {d.cim10_suggestion for d in dossier.diagnostics_associes}
|
||||
assert "L27.0" in codes # Éruption médicamenteuse
|
||||
assert "E66.0" in codes # Obésité
|
||||
|
||||
# Actes
|
||||
acte_codes = {a.code_ccam_suggestion for a in dossier.actes_ccam}
|
||||
assert "HMFC004" in acte_codes # Cholécystectomie
|
||||
assert "ZCQK002" in acte_codes # TDM
|
||||
|
||||
# Traitements
|
||||
meds = [t.medicament for t in dossier.traitements_sortie]
|
||||
assert any("Paracétamol" in m for m in meds)
|
||||
assert any("Cétirizine" in m for m in meds)
|
||||
|
||||
# Bio
|
||||
tests = {b.test for b in dossier.biologie_cle}
|
||||
assert "Troponine" not in tests # pas dans ce texte minimal
|
||||
|
||||
# Imagerie
|
||||
assert len(dossier.imagerie) >= 1
|
||||
assert any("Balthazar" in (i.score or "") for i in dossier.imagerie)
|
||||
|
||||
# Complications
|
||||
assert any("cutanée" in c.lower() for c in dossier.complications)
|
||||
|
||||
def test_extract_without_edsnlp(self):
|
||||
"""Vérifie que l'extraction fonctionne sans résultat edsnlp."""
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol 1g matin et soir\n\nDevenir : retour."
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=None)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
assert len(dossier.traitements_sortie) >= 1
|
||||
|
||||
def test_extract_with_edsnlp_result(self):
|
||||
"""Vérifie que les résultats edsnlp enrichissent les diagnostics."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity, DrugEntity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Patient admis pour douleur abdominale."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False),
|
||||
],
|
||||
drug_entities=[],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# Le DP devrait être trouvé via edsnlp
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "R10.4"
|
||||
|
||||
def test_negated_edsnlp_entities_ignored(self):
|
||||
"""Vérifie que les entités niées par edsnlp ne sont pas retenues."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pas de fièvre. Patient en bon état."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# L'entité niée ne doit pas apparaître comme diagnostic
|
||||
all_codes = set()
|
||||
if dossier.diagnostic_principal:
|
||||
all_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||||
for d in dossier.diagnostics_associes:
|
||||
all_codes.add(d.cim10_suggestion)
|
||||
assert "R50.9" not in all_codes
|
||||
|
||||
def test_drug_atc_enrichment(self):
|
||||
"""Vérifie que les codes ATC edsnlp sont ajoutés aux traitements."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, DrugEntity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "TTT de sortie :\nParacétamol 1g matin\n\nDevenir : retour."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
drug_entities=[
|
||||
DrugEntity(texte="Paracétamol", code_atc="N02BE01", negation=False),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
assert len(dossier.traitements_sortie) >= 1
|
||||
paracetamol = next(
|
||||
(t for t in dossier.traitements_sortie if "Paracétamol" in t.medicament), None
|
||||
)
|
||||
assert paracetamol is not None
|
||||
assert paracetamol.code_atc == "N02BE01"
|
||||
|
||||
def test_edsnlp_negation_for_complications(self):
|
||||
"""Vérifie que la négation edsnlp filtre les complications."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pas de fièvre ni d'infection. Bonne évolution."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
||||
CIM10Entity(texte="infection", code="A49.9", negation=True),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# Fièvre et infection sont niées, ne doivent pas apparaître dans complications
|
||||
complication_terms = [c.lower() for c in dossier.complications]
|
||||
assert "fièvre" not in complication_terms
|
||||
assert "infection" not in complication_terms
|
||||
Reference in New Issue
Block a user