"""Tests d'intégration end-to-end sur les PDFs réels.""" import json from pathlib import Path import pytest from src.config import INPUT_DIR, ANONYMIZED_DIR, STRUCTURED_DIR from src.main import process_pdf # Skip si les PDFs ne sont pas disponibles CRH_PDF = INPUT_DIR / "CRH 23042753.pdf" TRACKARE_PDF = INPUT_DIR / "trackare-01306172-23042753_01306172_23042753.pdf" needs_pdfs = pytest.mark.skipif( not CRH_PDF.exists() or not TRACKARE_PDF.exists(), reason="PDFs de test non disponibles dans input/", ) # Données personnelles connues à vérifier PATIENT_PII = [ "NARBAIS", "CLIER", "AUDREY", "01306172", "23042753", "23/02/1980", "IRREXELAIA", "BAIGORRY", "06.25.39.26.82", ] SOIGNANT_NAMES = [ "DUTREY", "MENDIBOURE", "PUJOS", "AUDEMAR", "MELLIN", "GUIRESSE", "GOYTINO", "SERRE", "NOVION", ] @needs_pdfs class TestCRHIntegration: @pytest.fixture(autouse=True) def setup(self): self.anonymized, self.dossier, self.report = process_pdf(CRH_PDF) def test_no_patient_pii(self): text_upper = self.anonymized.upper() for pii in PATIENT_PII: assert pii.upper() not in text_upper, f"PII trouvé : {pii}" def test_medical_content_preserved(self): text_lower = self.anonymized.lower() for term in ["pancréatite", "cholécystectomie", "cholangiographie", "lithiase"]: assert term in text_lower, f"Terme médical manquant : {term}" def test_diagnostic_principal(self): dp = self.dossier.diagnostic_principal assert dp is not None assert dp.cim10_suggestion == "K85.1" def test_diagnostics_associes(self): codes = {d.cim10_suggestion for d in self.dossier.diagnostics_associes} assert "K80.5" in codes or "K80.2" in codes def test_sejour(self): s = self.dossier.sejour assert s.sexe == "F" assert s.age == 43 assert s.date_entree == "25/02/2023" assert s.date_sortie == "03/03/2023" assert s.duree_sejour == 6 def test_traitements_have_optional_atc(self): """Vérifie que les traitements ont le champ code_atc (peut être None).""" for t in self.dossier.traitements_sortie: assert hasattr(t, "code_atc") @needs_pdfs class TestTrackareIntegration: @pytest.fixture(autouse=True) def setup(self): self.anonymized, self.dossier, self.report = process_pdf(TRACKARE_PDF) def test_no_patient_pii(self): text_upper = self.anonymized.upper() for pii in PATIENT_PII: assert pii.upper() not in text_upper, f"PII trouvé : {pii}" def test_no_soignant_names(self): # Ignorer "selles" qui contient "SELLE" text = self.anonymized for name in SOIGNANT_NAMES: # Chercher le nom comme mot complet import re pattern = re.compile(r"\b" + re.escape(name) + r"\b", re.IGNORECASE) matches = pattern.findall(text) assert len(matches) == 0, f"Nom soignant trouvé : {name} ({len(matches)} occurrences)" def test_medical_content_preserved(self): text_lower = self.anonymized.lower() for term in ["pancréatite", "cholécystectomie", "morphine", "paracétamol"]: assert term in text_lower, f"Terme médical manquant : {term}" def test_diagnostic_principal(self): dp = self.dossier.diagnostic_principal assert dp is not None assert dp.cim10_suggestion in ("K80.5", "K85.1") def test_sejour_with_vitals(self): s = self.dossier.sejour assert s.sexe == "F" assert s.age == 43 assert s.imc is not None assert s.imc > 30 assert s.poids is not None assert s.taille is not None def test_biologie(self): tests = {b.test for b in self.dossier.biologie_cle} assert "Lipasémie" in tests assert "CRP" in tests # Lipasémie doit être anormale for b in self.dossier.biologie_cle: if b.test == "Lipasémie": assert b.anomalie is True def test_report_counts(self): assert self.report.total_replacements > 100 assert self.report.regex_replacements > 50