"""Tests pour le module d'anonymisation.""" import pytest from src.anonymization.entity_registry import EntityRegistry from src.anonymization.regex_patterns import ( CRH_FOOTER_IPP_EPISODE, CRH_FOOTER_PATIENT_PATTERN, DATE_NAISSANCE_PATTERN, DR_NAME_PATTERN, EMAIL_PATTERN, EPISODE_PATTERN, FOOTER_PATIENT_PATTERN, IPP_PATTERN, NOTE_AUTHOR_PATTERN, PHONE_PATTERN, RPPS_PATTERN, ) class TestRegexPatterns: def test_ipp_with_colon(self): m = IPP_PATTERN.search("IPP: 01306172") assert m is not None assert m.group(1) == "01306172" def test_ipp_without_colon(self): m = IPP_PATTERN.search("IPP 01306172") assert m is not None assert m.group(1) == "01306172" def test_ipp_in_parentheses(self): m = IPP_PATTERN.search("(01306172 )") assert m is not None assert m.group(2) == "01306172" def test_episode_no(self): m = EPISODE_PATTERN.search("Episode No: 23042753") assert m is not None assert m.group(1) == "23042753" def test_episode_n_degree(self): m = EPISODE_PATTERN.search("N° Episode 23042753") assert m is not None assert m.group(2) == "23042753" def test_phone_dots(self): m = PHONE_PATTERN.search("06.25.39.26.82") assert m is not None assert m.group(0) == "06.25.39.26.82" def test_phone_spaces(self): m = PHONE_PATTERN.search("05 59 44 35 35") assert m is not None def test_email(self): m = EMAIL_PATTERN.search("faudemar@ch-cotebasque.fr") assert m is not None assert m.group(0) == "faudemar@ch-cotebasque.fr" def test_rpps(self): m = RPPS_PATTERN.search("RPPS : 10100532760") assert m is not None assert m.group(1) == "10100532760" def test_date_naissance_nee_le(self): m = DATE_NAISSANCE_PATTERN.search("née le 23/02/1980") assert m is not None assert m.group(1) == "23/02/1980" def test_date_naissance_ne_e_le(self): m = DATE_NAISSANCE_PATTERN.search("Né(e) le 23/02/1980") assert m is not None assert m.group(1) == "23/02/1980" def test_date_naissance_field(self): m = DATE_NAISSANCE_PATTERN.search("Date de naissance: 23/02/1980") assert m is not None assert m.group(1) == "23/02/1980" def test_dr_name(self): m = DR_NAME_PATTERN.search("Dr F. AUDEMAR") assert m is not None assert "AUDEMAR" in m.group(1) def test_dr_name_docteur(self): m = DR_NAME_PATTERN.search("Docteur AUDEMAR Franck") assert m is not None assert "AUDEMAR" in m.group(1) def test_note_author(self): m = NOTE_AUTHOR_PATTERN.search("Note IDE Annie GUIRESSE Non algique") assert m is not None assert m.group(1) == "Annie GUIRESSE" def test_footer_patient_trackare(self): m = FOOTER_PATIENT_PATTERN.search( "Patient: CLIER NARBAIS AUDREY - Date de naissance: 23/02/1980" ) assert m is not None assert "CLIER" in m.group(1) def test_crh_footer_patient(self): m = CRH_FOOTER_PATIENT_PATTERN.search( "Patient(e) : CLIER AUDREY NARBAIS Né(e) le 23/02/1980" ) assert m is not None assert "CLIER" in m.group(1) def test_crh_footer_ipp_episode(self): m = CRH_FOOTER_IPP_EPISODE.search( "IPP 01306172 / N° Episode 23042753 (MEDECINE GASTRO B2 HC)" ) assert m is not None assert m.group(1) == "01306172" assert m.group(2) == "23042753" class TestEntityRegistry: def test_register_returns_pseudo(self): reg = EntityRegistry() pseudo = reg.register("Jean Dupont", "patient") assert pseudo == "[PATIENT_1]" def test_register_same_entity_returns_same(self): reg = EntityRegistry() p1 = reg.register("Jean Dupont", "patient") p2 = reg.register("Jean Dupont", "patient") assert p1 == p2 def test_register_case_insensitive(self): reg = EntityRegistry() p1 = reg.register("Jean DUPONT", "patient") p2 = reg.register("jean dupont", "patient") assert p1 == p2 def test_register_different_categories(self): reg = EntityRegistry() p1 = reg.register("Dupont", "patient") p2 = reg.register("Martin", "medecin") assert p1 == "[PATIENT_1]" assert p2 == "[MEDECIN_1]" def test_get_replacement(self): reg = EntityRegistry() reg.register("Jean Dupont", "patient") assert reg.get_replacement("jean dupont") == "[PATIENT_1]" assert reg.get_replacement("inconnu") is None class TestAnonymizer: def test_anonymize_basic(self): from src.anonymization.anonymizer import Anonymizer parsed = { "patient": {"nom_prenom": "DUPONT Jean", "nom_naissance": "DUPONT"}, "medecins": ["MARTIN Pierre"], "contacts": [], } anonymizer = Anonymizer(parsed_data=parsed) text = "Le patient DUPONT Jean a été vu par Dr MARTIN Pierre." result = anonymizer.anonymize(text) assert "DUPONT" not in result assert "MARTIN" not in result assert "[PATIENT" in result or "[MEDECIN" in result def test_preserves_medical_content(self): from src.anonymization.anonymizer import Anonymizer anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []}) text = "Pancréatite aiguë biliaire. Cholécystectomie par cœlioscopie. IMC 34.37." result = anonymizer.anonymize(text) assert "Pancréatite" in result assert "Cholécystectomie" in result assert "IMC" in result def test_anonymize_phone(self): from src.anonymization.anonymizer import Anonymizer anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []}) text = "Appeler le 06.25.39.26.82 pour le rendez-vous." result = anonymizer.anonymize(text) assert "06.25.39.26.82" not in result assert "[TEL" in result def test_anonymize_email(self): from src.anonymization.anonymizer import Anonymizer anonymizer = Anonymizer(parsed_data={"patient": {}, "medecins": [], "contacts": []}) text = "Contact: faudemar@ch-cotebasque.fr" result = anonymizer.anonymize(text) assert "faudemar@ch-cotebasque.fr" not in result assert "[EMAIL" in result