feat: pipeline T2A - anonymisation, extraction CIM-10 et intégration edsnlp
Pipeline complet de traitement de documents médicaux PDF : - Extraction texte (pdfplumber) et classification (Trackare/CRH) - Anonymisation multi-couche (regex + NER CamemBERT + sweep) - Extraction médicale CIM-10 hybride : edsnlp (AP-HP) enrichit les diagnostics, médicaments (codes ATC via Romedi) et négation, avec fallback regex pour les patterns spécifiques - Fix sentencepiece pinné à <0.2.0 pour compatibilité CamemBERT Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
197
tests/test_anonymization.py
Normal file
197
tests/test_anonymization.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Tests pour le module d'anonymisation."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.anonymization.entity_registry import EntityRegistry
|
||||
from src.anonymization.regex_patterns import (
|
||||
CRH_FOOTER_IPP_EPISODE,
|
||||
CRH_FOOTER_PATIENT_PATTERN,
|
||||
DATE_NAISSANCE_PATTERN,
|
||||
DR_NAME_PATTERN,
|
||||
EMAIL_PATTERN,
|
||||
EPISODE_PATTERN,
|
||||
FOOTER_PATIENT_PATTERN,
|
||||
IPP_PATTERN,
|
||||
NOTE_AUTHOR_PATTERN,
|
||||
PHONE_PATTERN,
|
||||
RPPS_PATTERN,
|
||||
)
|
||||
|
||||
|
||||
class TestRegexPatterns:
|
||||
def test_ipp_with_colon(self):
|
||||
m = IPP_PATTERN.search("IPP: 01306172")
|
||||
assert m is not None
|
||||
assert m.group(1) == "01306172"
|
||||
|
||||
def test_ipp_without_colon(self):
|
||||
m = IPP_PATTERN.search("IPP 01306172")
|
||||
assert m is not None
|
||||
assert m.group(1) == "01306172"
|
||||
|
||||
def test_ipp_in_parentheses(self):
|
||||
m = IPP_PATTERN.search("(01306172 )")
|
||||
assert m is not None
|
||||
assert m.group(2) == "01306172"
|
||||
|
||||
def test_episode_no(self):
|
||||
m = EPISODE_PATTERN.search("Episode No: 23042753")
|
||||
assert m is not None
|
||||
assert m.group(1) == "23042753"
|
||||
|
||||
def test_episode_n_degree(self):
|
||||
m = EPISODE_PATTERN.search("N° Episode 23042753")
|
||||
assert m is not None
|
||||
assert m.group(2) == "23042753"
|
||||
|
||||
def test_phone_dots(self):
|
||||
m = PHONE_PATTERN.search("06.25.39.26.82")
|
||||
assert m is not None
|
||||
assert m.group(0) == "06.25.39.26.82"
|
||||
|
||||
def test_phone_spaces(self):
|
||||
m = PHONE_PATTERN.search("05 59 44 35 35")
|
||||
assert m is not None
|
||||
|
||||
def test_email(self):
|
||||
m = EMAIL_PATTERN.search("faudemar@ch-cotebasque.fr")
|
||||
assert m is not None
|
||||
assert m.group(0) == "faudemar@ch-cotebasque.fr"
|
||||
|
||||
def test_rpps(self):
|
||||
m = RPPS_PATTERN.search("RPPS : 10100532760")
|
||||
assert m is not None
|
||||
assert m.group(1) == "10100532760"
|
||||
|
||||
def test_date_naissance_nee_le(self):
|
||||
m = DATE_NAISSANCE_PATTERN.search("née le 23/02/1980")
|
||||
assert m is not None
|
||||
assert m.group(1) == "23/02/1980"
|
||||
|
||||
def test_date_naissance_ne_e_le(self):
|
||||
m = DATE_NAISSANCE_PATTERN.search("Né(e) le 23/02/1980")
|
||||
assert m is not None
|
||||
assert m.group(1) == "23/02/1980"
|
||||
|
||||
def test_date_naissance_field(self):
|
||||
m = DATE_NAISSANCE_PATTERN.search("Date de naissance: 23/02/1980")
|
||||
assert m is not None
|
||||
assert m.group(1) == "23/02/1980"
|
||||
|
||||
def test_dr_name(self):
|
||||
m = DR_NAME_PATTERN.search("Dr F. AUDEMAR")
|
||||
assert m is not None
|
||||
assert "AUDEMAR" in m.group(1)
|
||||
|
||||
def test_dr_name_docteur(self):
|
||||
m = DR_NAME_PATTERN.search("Docteur AUDEMAR Franck")
|
||||
assert m is not None
|
||||
assert "AUDEMAR" in m.group(1)
|
||||
|
||||
def test_note_author(self):
|
||||
m = NOTE_AUTHOR_PATTERN.search("Note IDE Annie GUIRESSE Non algique")
|
||||
assert m is not None
|
||||
assert m.group(1) == "Annie GUIRESSE"
|
||||
|
||||
def test_footer_patient_trackare(self):
|
||||
m = FOOTER_PATIENT_PATTERN.search(
|
||||
"Patient: CLIER NARBAIS AUDREY - Date de naissance: 23/02/1980"
|
||||
)
|
||||
assert m is not None
|
||||
assert "CLIER" in m.group(1)
|
||||
|
||||
def test_crh_footer_patient(self):
|
||||
m = CRH_FOOTER_PATIENT_PATTERN.search(
|
||||
"Patient(e) : CLIER AUDREY NARBAIS Né(e) le 23/02/1980"
|
||||
)
|
||||
assert m is not None
|
||||
assert "CLIER" in m.group(1)
|
||||
|
||||
def test_crh_footer_ipp_episode(self):
|
||||
m = CRH_FOOTER_IPP_EPISODE.search(
|
||||
"IPP 01306172 / N° Episode 23042753 (MEDECINE GASTRO B2 HC)"
|
||||
)
|
||||
assert m is not None
|
||||
assert m.group(1) == "01306172"
|
||||
assert m.group(2) == "23042753"
|
||||
|
||||
|
||||
class TestEntityRegistry:
|
||||
def test_register_returns_pseudo(self):
|
||||
reg = EntityRegistry()
|
||||
pseudo = reg.register("Jean Dupont", "patient")
|
||||
assert pseudo == "[PATIENT_1]"
|
||||
|
||||
def test_register_same_entity_returns_same(self):
|
||||
reg = EntityRegistry()
|
||||
p1 = reg.register("Jean Dupont", "patient")
|
||||
p2 = reg.register("Jean Dupont", "patient")
|
||||
assert p1 == p2
|
||||
|
||||
def test_register_case_insensitive(self):
|
||||
reg = EntityRegistry()
|
||||
p1 = reg.register("Jean DUPONT", "patient")
|
||||
p2 = reg.register("jean dupont", "patient")
|
||||
assert p1 == p2
|
||||
|
||||
def test_register_different_categories(self):
|
||||
reg = EntityRegistry()
|
||||
p1 = reg.register("Dupont", "patient")
|
||||
p2 = reg.register("Martin", "medecin")
|
||||
assert p1 == "[PATIENT_1]"
|
||||
assert p2 == "[MEDECIN_1]"
|
||||
|
||||
def test_get_replacement(self):
|
||||
reg = EntityRegistry()
|
||||
reg.register("Jean Dupont", "patient")
|
||||
assert reg.get_replacement("jean dupont") == "[PATIENT_1]"
|
||||
assert reg.get_replacement("inconnu") is None
|
||||
|
||||
|
||||
class TestAnonymizer:
|
||||
def test_anonymize_basic(self):
|
||||
from src.anonymization.anonymizer import Anonymizer
|
||||
|
||||
parsed = {
|
||||
"patient": {"nom_prenom": "DUPONT Jean", "nom_naissance": "DUPONT"},
|
||||
"medecins": ["MARTIN Pierre"],
|
||||
"contacts": [],
|
||||
}
|
||||
anonymizer = Anonymizer(parsed_data=parsed)
|
||||
text = "Le patient DUPONT Jean a été vu par Dr MARTIN Pierre."
|
||||
result = anonymizer.anonymize(text)
|
||||
|
||||
assert "DUPONT" not in result
|
||||
assert "MARTIN" not in result
|
||||
assert "[PATIENT" in result or "[MEDECIN" in result
|
||||
|
||||
def test_preserves_medical_content(self):
|
||||
from src.anonymization.anonymizer import Anonymizer
|
||||
|
||||
anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []})
|
||||
text = "Pancréatite aiguë biliaire. Cholécystectomie par cœlioscopie. IMC 34.37."
|
||||
result = anonymizer.anonymize(text)
|
||||
|
||||
assert "Pancréatite" in result
|
||||
assert "Cholécystectomie" in result
|
||||
assert "IMC" in result
|
||||
|
||||
def test_anonymize_phone(self):
|
||||
from src.anonymization.anonymizer import Anonymizer
|
||||
|
||||
anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []})
|
||||
text = "Appeler le 06.25.39.26.82 pour le rendez-vous."
|
||||
result = anonymizer.anonymize(text)
|
||||
|
||||
assert "06.25.39.26.82" not in result
|
||||
assert "[TEL" in result
|
||||
|
||||
def test_anonymize_email(self):
|
||||
from src.anonymization.anonymizer import Anonymizer
|
||||
|
||||
anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []})
|
||||
text = "Contact: faudemar@ch-cotebasque.fr"
|
||||
result = anonymizer.anonymize(text)
|
||||
|
||||
assert "faudemar@ch-cotebasque.fr" not in result
|
||||
assert "[EMAIL" in result
|
||||
126
tests/test_extraction.py
Normal file
126
tests/test_extraction.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Tests pour le module d'extraction."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.extraction.document_classifier import classify
|
||||
from src.extraction.crh_parser import parse_crh
|
||||
from src.extraction.trackare_parser import parse_trackare, _clean_person_name
|
||||
|
||||
|
||||
class TestDocumentClassifier:
|
||||
def test_classify_trackare(self):
|
||||
text = """CENTRE HOSPITALIER COTE BASQUE
|
||||
Dossier Patient
|
||||
Détails des patients
|
||||
Nom de naissance: CLIER IPP: 01306172
|
||||
Détails épisode
|
||||
Episode No: 23042753
|
||||
Signes Vitaux"""
|
||||
assert classify(text) == "trackare"
|
||||
|
||||
def test_classify_crh(self):
|
||||
text = """N° Finess CENTRE HOSPITALIER COTE BASQUE
|
||||
Pôle Spécialités Médicales
|
||||
Service de Gastro-Entérologie
|
||||
Mon cher confrère,
|
||||
Votre patiente a été hospitalisée"""
|
||||
assert classify(text) == "crh"
|
||||
|
||||
def test_classify_trackare_by_ipp(self):
|
||||
text = "IPP: 12345678 Episode No: 87654321"
|
||||
assert classify(text) == "trackare"
|
||||
|
||||
|
||||
class TestCRHParser:
|
||||
def test_parse_patient_info(self):
|
||||
text = """MME NARBAIS AUDREY
|
||||
MAISON IRREXELAIA
|
||||
64430 ST ETIENNE DE BAIGORRY
|
||||
|
||||
Mon cher confrère,
|
||||
Votre patiente NARBAIS Audrey née le 23/02/1980 a été hospitalisée
|
||||
du 25/02/2023 au 03/03/2023 pour le motif suivant:
|
||||
Pancréatite aiguë lithiasique"""
|
||||
result = parse_crh(text)
|
||||
|
||||
assert result["patient"]["nom_complet"] == "NARBAIS AUDREY"
|
||||
assert result["patient"]["sexe"] == "F"
|
||||
assert result["patient"]["date_naissance"] == "23/02/1980"
|
||||
|
||||
def test_parse_sejour(self):
|
||||
text = """Votre patiente née le 23/02/1980 a été hospitalisée
|
||||
du 25/02/2023 au 03/03/2023 pour le motif suivant:
|
||||
Pancréatite aiguë"""
|
||||
result = parse_crh(text)
|
||||
|
||||
assert result["sejour"]["date_entree"] == "25/02/2023"
|
||||
assert result["sejour"]["date_sortie"] == "03/03/2023"
|
||||
|
||||
def test_parse_medecins(self):
|
||||
text = "Dr PUJOS. Dr F. AUDEMAR. Docteur DUTREY Sarah."
|
||||
result = parse_crh(text)
|
||||
|
||||
assert any("PUJOS" in m for m in result["medecins"])
|
||||
assert any("AUDEMAR" in m for m in result["medecins"])
|
||||
|
||||
|
||||
class TestTrackareParser:
|
||||
def test_parse_patient_info(self):
|
||||
text = """Nom de naissance: CLIER IPP: 01306172
|
||||
Nom et Prénom: NARBAIS AUDREY Date de naissance: 23/02/1980
|
||||
Sexe: Féminin Lieu de naissance: CHAMPIGNY SUR MARNE
|
||||
Adresse: MAISON IRREXELAIA QUARTIER AUZO TTIPI Ville de résidence: ST ETIENNE DE BAIGORRY
|
||||
Code Postal: 64430
|
||||
Episode No: 23042753
|
||||
Date d'admission: 25/02/2023 Heure d'admission: 03:07
|
||||
Date de sortie: 03/03/2023
|
||||
Taille: 162 cm - Poids: 90.2 kg - IMC: 34.370"""
|
||||
result = parse_trackare(text)
|
||||
|
||||
assert result["patient"]["nom_naissance"] == "CLIER"
|
||||
assert result["patient"]["nom_prenom"] == "NARBAIS AUDREY"
|
||||
assert result["patient"]["ipp"] == "01306172"
|
||||
assert result["patient"]["sexe"] == "F"
|
||||
assert result["patient"]["date_naissance"] == "23/02/1980"
|
||||
assert result["patient"]["imc"] == 34.370
|
||||
assert result["sejour"]["episode"] == "23042753"
|
||||
assert result["sejour"]["date_entree"] == "25/02/2023"
|
||||
|
||||
def test_parse_diagnostics(self):
|
||||
text = """Diagnostic aux urgences
|
||||
Type Etat Code Date
|
||||
Principal actif K80.5 Calcul des canaux biliaires (sans angiocholite ni cholécystite) [CMA2] 25/02/2023 05:27"""
|
||||
result = parse_trackare(text)
|
||||
|
||||
assert len(result["diagnostics"]) >= 1
|
||||
assert result["diagnostics"][0]["code_cim10"] == "K80.5"
|
||||
assert result["diagnostics"][0]["type"] == "Principal"
|
||||
|
||||
def test_parse_vitals(self):
|
||||
text = """Poids/Taille
|
||||
Taille [cm] 162,00
|
||||
Poids [kg] 90,20
|
||||
Indice
|
||||
de masse 34.370"""
|
||||
result = parse_trackare(text)
|
||||
|
||||
assert result["signes_vitaux"]["taille_cm"] == 162.0
|
||||
assert result["signes_vitaux"]["poids_kg"] >= 90.0
|
||||
assert result["signes_vitaux"]["imc"] == 34.370
|
||||
|
||||
|
||||
class TestCleanPersonName:
|
||||
def test_clean_simple(self):
|
||||
assert _clean_person_name("Sarah DUTREY") == "Sarah DUTREY"
|
||||
|
||||
def test_clean_with_noise(self):
|
||||
assert _clean_person_name("Sarah DUTREY une complication") == "Sarah DUTREY"
|
||||
|
||||
def test_clean_multiline(self):
|
||||
assert _clean_person_name("Sarah\nDUTREY") == "Sarah DUTREY"
|
||||
|
||||
def test_clean_medical_term(self):
|
||||
assert _clean_person_name("Bilirubine") == ""
|
||||
|
||||
def test_clean_empty(self):
|
||||
assert _clean_person_name("") == ""
|
||||
124
tests/test_integration.py
Normal file
124
tests/test_integration.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""Tests d'intégration end-to-end sur les PDFs réels."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import INPUT_DIR, ANONYMIZED_DIR, STRUCTURED_DIR
|
||||
from src.main import process_pdf
|
||||
|
||||
|
||||
# Skip si les PDFs ne sont pas disponibles
|
||||
CRH_PDF = INPUT_DIR / "CRH 23042753.pdf"
|
||||
TRACKARE_PDF = INPUT_DIR / "trackare-01306172-23042753_01306172_23042753.pdf"
|
||||
|
||||
needs_pdfs = pytest.mark.skipif(
|
||||
not CRH_PDF.exists() or not TRACKARE_PDF.exists(),
|
||||
reason="PDFs de test non disponibles dans input/",
|
||||
)
|
||||
|
||||
|
||||
# Données personnelles connues à vérifier
|
||||
PATIENT_PII = [
|
||||
"NARBAIS", "CLIER", "AUDREY", "01306172", "23042753",
|
||||
"23/02/1980", "IRREXELAIA", "BAIGORRY", "06.25.39.26.82",
|
||||
]
|
||||
|
||||
SOIGNANT_NAMES = [
|
||||
"DUTREY", "MENDIBOURE", "PUJOS", "AUDEMAR", "MELLIN",
|
||||
"GUIRESSE", "GOYTINO", "SERRE", "NOVION",
|
||||
]
|
||||
|
||||
|
||||
@needs_pdfs
|
||||
class TestCRHIntegration:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.anonymized, self.dossier, self.report = process_pdf(CRH_PDF)
|
||||
|
||||
def test_no_patient_pii(self):
|
||||
text_upper = self.anonymized.upper()
|
||||
for pii in PATIENT_PII:
|
||||
assert pii.upper() not in text_upper, f"PII trouvé : {pii}"
|
||||
|
||||
def test_medical_content_preserved(self):
|
||||
text_lower = self.anonymized.lower()
|
||||
for term in ["pancréatite", "cholécystectomie", "cholangiographie", "lithiase"]:
|
||||
assert term in text_lower, f"Terme médical manquant : {term}"
|
||||
|
||||
def test_diagnostic_principal(self):
|
||||
dp = self.dossier.diagnostic_principal
|
||||
assert dp is not None
|
||||
assert dp.cim10_suggestion == "K85.1"
|
||||
|
||||
def test_diagnostics_associes(self):
|
||||
codes = {d.cim10_suggestion for d in self.dossier.diagnostics_associes}
|
||||
assert "K80.5" in codes or "K80.2" in codes
|
||||
|
||||
def test_sejour(self):
|
||||
s = self.dossier.sejour
|
||||
assert s.sexe == "F"
|
||||
assert s.age == 43
|
||||
assert s.date_entree == "25/02/2023"
|
||||
assert s.date_sortie == "03/03/2023"
|
||||
assert s.duree_sejour == 6
|
||||
|
||||
def test_traitements_have_optional_atc(self):
|
||||
"""Vérifie que les traitements ont le champ code_atc (peut être None)."""
|
||||
for t in self.dossier.traitements_sortie:
|
||||
assert hasattr(t, "code_atc")
|
||||
|
||||
|
||||
@needs_pdfs
|
||||
class TestTrackareIntegration:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self):
|
||||
self.anonymized, self.dossier, self.report = process_pdf(TRACKARE_PDF)
|
||||
|
||||
def test_no_patient_pii(self):
|
||||
text_upper = self.anonymized.upper()
|
||||
for pii in PATIENT_PII:
|
||||
assert pii.upper() not in text_upper, f"PII trouvé : {pii}"
|
||||
|
||||
def test_no_soignant_names(self):
|
||||
# Ignorer "selles" qui contient "SELLE"
|
||||
text = self.anonymized
|
||||
for name in SOIGNANT_NAMES:
|
||||
# Chercher le nom comme mot complet
|
||||
import re
|
||||
pattern = re.compile(r"\b" + re.escape(name) + r"\b", re.IGNORECASE)
|
||||
matches = pattern.findall(text)
|
||||
assert len(matches) == 0, f"Nom soignant trouvé : {name} ({len(matches)} occurrences)"
|
||||
|
||||
def test_medical_content_preserved(self):
|
||||
text_lower = self.anonymized.lower()
|
||||
for term in ["pancréatite", "cholécystectomie", "morphine", "paracétamol"]:
|
||||
assert term in text_lower, f"Terme médical manquant : {term}"
|
||||
|
||||
def test_diagnostic_principal(self):
|
||||
dp = self.dossier.diagnostic_principal
|
||||
assert dp is not None
|
||||
assert dp.cim10_suggestion in ("K80.5", "K85.1")
|
||||
|
||||
def test_sejour_with_vitals(self):
|
||||
s = self.dossier.sejour
|
||||
assert s.sexe == "F"
|
||||
assert s.age == 43
|
||||
assert s.imc is not None
|
||||
assert s.imc > 30
|
||||
assert s.poids is not None
|
||||
assert s.taille is not None
|
||||
|
||||
def test_biologie(self):
|
||||
tests = {b.test for b in self.dossier.biologie_cle}
|
||||
assert "Lipasémie" in tests
|
||||
assert "CRP" in tests
|
||||
# Lipasémie doit être anormale
|
||||
for b in self.dossier.biologie_cle:
|
||||
if b.test == "Lipasémie":
|
||||
assert b.anomalie is True
|
||||
|
||||
def test_report_counts(self):
|
||||
assert self.report.total_replacements > 100
|
||||
assert self.report.regex_replacements > 50
|
||||
238
tests/test_medical.py
Normal file
238
tests/test_medical.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""Tests pour le module d'extraction médicale CIM-10."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import DossierMedical, Diagnostic
|
||||
from src.medical.cim10_extractor import (
|
||||
extract_medical_info,
|
||||
_lookup_cim10,
|
||||
_is_abnormal,
|
||||
)
|
||||
|
||||
|
||||
class TestCIM10Lookup:
|
||||
def test_pancréatite_lithiasique(self):
|
||||
assert _lookup_cim10("pancréatite aiguë lithiasique") == "K85.1"
|
||||
|
||||
def test_lithiase_choledoque(self):
|
||||
assert _lookup_cim10("lithiase du cholédoque") == "K80.5"
|
||||
|
||||
def test_eruption_medicamenteuse(self):
|
||||
assert _lookup_cim10("éruption médicamenteuse") == "L27.0"
|
||||
|
||||
def test_obesite(self):
|
||||
assert _lookup_cim10("obésité") == "E66.0"
|
||||
|
||||
def test_unknown(self):
|
||||
assert _lookup_cim10("grippe") is None
|
||||
|
||||
|
||||
class TestIsAbnormal:
|
||||
def test_lipasemie_high(self):
|
||||
assert _is_abnormal("Lipasémie", "6000") is True
|
||||
|
||||
def test_crp_normal(self):
|
||||
assert _is_abnormal("CRP", "3") is False
|
||||
|
||||
def test_crp_high(self):
|
||||
assert _is_abnormal("CRP", "12") is True
|
||||
|
||||
def test_troponine_negative(self):
|
||||
assert _is_abnormal("Troponine", "négative") is False
|
||||
|
||||
def test_unknown_test(self):
|
||||
assert _is_abnormal("TestInconnu", "42") is None
|
||||
|
||||
|
||||
class TestExtractMedicalInfo:
|
||||
def test_extract_from_trackare(self):
|
||||
parsed = {
|
||||
"type": "trackare",
|
||||
"patient": {
|
||||
"sexe": "F",
|
||||
"date_naissance": "23/02/1980",
|
||||
"imc": 34.37,
|
||||
"poids_kg": 90.2,
|
||||
"taille_cm": 162,
|
||||
},
|
||||
"sejour": {
|
||||
"date_entree": "25/02/2023",
|
||||
"date_sortie": "03/03/2023",
|
||||
},
|
||||
"urgences": {"mode_entree": "Urgences"},
|
||||
"diagnostics": [
|
||||
{
|
||||
"type": "Principal",
|
||||
"statut": "actif",
|
||||
"code_cim10": "K80.5",
|
||||
"libelle": "Calcul des canaux biliaires",
|
||||
}
|
||||
],
|
||||
"signes_vitaux": {"imc": 34.37, "poids_kg": 90.2, "taille_cm": 162},
|
||||
}
|
||||
text = """Pancréatite aiguë lithiasique.
|
||||
Cholécystectomie par cœlioscopie le 01/03.
|
||||
Cholangiographie retrouvant une lithiase du bas cholédoque.
|
||||
TDM à J3 retrouve : Absence de signe de gravité. Score de Balthazar à 0.
|
||||
Éruption cutanée érythémateuse. Réaction au tramadol.
|
||||
IMC: 34.370
|
||||
TTT de sortie :
|
||||
Paracétamol et Acupan si besoin
|
||||
Cétirizine
|
||||
|
||||
Devenir : sortie le 03/03."""
|
||||
|
||||
dossier = extract_medical_info(parsed, text)
|
||||
|
||||
# Séjour
|
||||
assert dossier.sejour.sexe == "F"
|
||||
assert dossier.sejour.age == 43
|
||||
assert dossier.sejour.duree_sejour == 6
|
||||
assert dossier.sejour.imc == 34.37
|
||||
|
||||
# DP
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K80.5"
|
||||
|
||||
# DAS
|
||||
codes = {d.cim10_suggestion for d in dossier.diagnostics_associes}
|
||||
assert "L27.0" in codes # Éruption médicamenteuse
|
||||
assert "E66.0" in codes # Obésité
|
||||
|
||||
# Actes
|
||||
acte_codes = {a.code_ccam_suggestion for a in dossier.actes_ccam}
|
||||
assert "HMFC004" in acte_codes # Cholécystectomie
|
||||
assert "ZCQK002" in acte_codes # TDM
|
||||
|
||||
# Traitements
|
||||
meds = [t.medicament for t in dossier.traitements_sortie]
|
||||
assert any("Paracétamol" in m for m in meds)
|
||||
assert any("Cétirizine" in m for m in meds)
|
||||
|
||||
# Bio
|
||||
tests = {b.test for b in dossier.biologie_cle}
|
||||
assert "Troponine" not in tests # pas dans ce texte minimal
|
||||
|
||||
# Imagerie
|
||||
assert len(dossier.imagerie) >= 1
|
||||
assert any("Balthazar" in (i.score or "") for i in dossier.imagerie)
|
||||
|
||||
# Complications
|
||||
assert any("cutanée" in c.lower() for c in dossier.complications)
|
||||
|
||||
def test_extract_without_edsnlp(self):
|
||||
"""Vérifie que l'extraction fonctionne sans résultat edsnlp."""
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol 1g matin et soir\n\nDevenir : retour."
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=None)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
assert len(dossier.traitements_sortie) >= 1
|
||||
|
||||
def test_extract_with_edsnlp_result(self):
|
||||
"""Vérifie que les résultats edsnlp enrichissent les diagnostics."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity, DrugEntity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Patient admis pour douleur abdominale."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="douleur abdominale", code="R10.4", negation=False),
|
||||
],
|
||||
drug_entities=[],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# Le DP devrait être trouvé via edsnlp
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "R10.4"
|
||||
|
||||
def test_negated_edsnlp_entities_ignored(self):
|
||||
"""Vérifie que les entités niées par edsnlp ne sont pas retenues."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pas de fièvre. Patient en bon état."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# L'entité niée ne doit pas apparaître comme diagnostic
|
||||
all_codes = set()
|
||||
if dossier.diagnostic_principal:
|
||||
all_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||||
for d in dossier.diagnostics_associes:
|
||||
all_codes.add(d.cim10_suggestion)
|
||||
assert "R50.9" not in all_codes
|
||||
|
||||
def test_drug_atc_enrichment(self):
|
||||
"""Vérifie que les codes ATC edsnlp sont ajoutés aux traitements."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, DrugEntity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "TTT de sortie :\nParacétamol 1g matin\n\nDevenir : retour."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
drug_entities=[
|
||||
DrugEntity(texte="Paracétamol", code_atc="N02BE01", negation=False),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
assert len(dossier.traitements_sortie) >= 1
|
||||
paracetamol = next(
|
||||
(t for t in dossier.traitements_sortie if "Paracétamol" in t.medicament), None
|
||||
)
|
||||
assert paracetamol is not None
|
||||
assert paracetamol.code_atc == "N02BE01"
|
||||
|
||||
def test_edsnlp_negation_for_complications(self):
|
||||
"""Vérifie que la négation edsnlp filtre les complications."""
|
||||
from src.medical.edsnlp_pipeline import EdsnlpResult, CIM10Entity
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pas de fièvre ni d'infection. Bonne évolution."
|
||||
|
||||
edsnlp_result = EdsnlpResult(
|
||||
cim10_entities=[
|
||||
CIM10Entity(texte="fièvre", code="R50.9", negation=True),
|
||||
CIM10Entity(texte="infection", code="A49.9", negation=True),
|
||||
],
|
||||
)
|
||||
|
||||
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
|
||||
# Fièvre et infection sont niées, ne doivent pas apparaître dans complications
|
||||
complication_terms = [c.lower() for c in dossier.complications]
|
||||
assert "fièvre" not in complication_terms
|
||||
assert "infection" not in complication_terms
|
||||
Reference in New Issue
Block a user