#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Test Phase 1 Corrections - Validation automatique des 3 corrections critiques ------------------------------------------------------------------------------ Teste les corrections sur un échantillon de documents pour vérifier: 1. [DATE] = 0 (seules les dates de naissance sont masquées) 2. Médicaments préservés (non masqués) 3. Termes médicaux structurels préservés (Chef de service, etc.) """ import sys from pathlib import Path import json import re # Ajouter le répertoire racine au path sys.path.insert(0, str(Path(__file__).parent.parent)) from anonymizer_core_refactored_onnx import process_pdf def test_phase1_corrections(): """Teste les 3 corrections Phase 1 sur un échantillon de documents.""" # Documents de test (5 documents représentatifs) test_docs = [ "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/008_23001234/CRH 23001234.pdf", "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/021_23012345/CRO 23012345.pdf", "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/033_23023456/trackare-23023456-12345678.pdf", ] print("=" * 80) print("TEST PHASE 1 CORRECTIONS") print("=" * 80) print() results = { "date_masking": {"total": 0, "passed": 0, "failed": 0}, "medication_preservation": {"total": 0, "passed": 0, "failed": 0}, "medical_terms_preservation": {"total": 0, "passed": 0, "failed": 0}, } for doc_path in test_docs: pdf_path = Path(doc_path) if not pdf_path.exists(): print(f"⚠️ Document non trouvé: {pdf_path.name}") continue print(f"\n📄 Test: {pdf_path.name}") print("-" * 80) try: # Anonymiser le document result = process_pdf( pdf_path=pdf_path, config_path=Path("config/dictionnaires.yml"), ner_manager=None, eds_pseudo_manager=None, vlm_manager=None, output_dir=None, redaction_mode="none", ) text = result["text_anonymized"] audit = result["audit"] # Test 1: Vérifier [DATE] = 0 date_count = text.count("[DATE]") date_naissance_count = text.count("[DATE_NAISSANCE]") results["date_masking"]["total"] += 1 if date_count == 0: print(f"✅ Correction 1: [DATE] = {date_count} (attendu: 0)") print(f" [DATE_NAISSANCE] = {date_naissance_count}") results["date_masking"]["passed"] += 1 else: print(f"❌ Correction 1: [DATE] = {date_count} (attendu: 0)") print(f" [DATE_NAISSANCE] = {date_naissance_count}") results["date_masking"]["failed"] += 1 # Test 2: Vérifier médicaments préservés # Chercher des médicaments courants dans le texte original medications_to_check = ["IDACIO", "SALAZOPYRINE", "INFLIXIMAB", "APRANAX", "KETOPROFENE", "PREVENAR", "PNEUMOVAX"] medications_found = [] for med in medications_to_check: if med.lower() in text.lower() and f"[NOM]" not in text: medications_found.append(med) results["medication_preservation"]["total"] += 1 if len(medications_found) > 0: print(f"✅ Correction 2: Médicaments préservés: {', '.join(medications_found)}") results["medication_preservation"]["passed"] += 1 else: # Pas de médicaments dans ce document, test non applicable print(f"⚪ Correction 2: Aucun médicament testé dans ce document") results["medication_preservation"]["total"] -= 1 # Test 3: Vérifier termes médicaux structurels préservés medical_terms_to_check = [ "Chef de service", "Chef de Clinique", "Praticien hospitalier", "service de", ] medical_terms_found = [] for term in medical_terms_to_check: if term.lower() in text.lower(): medical_terms_found.append(term) results["medical_terms_preservation"]["total"] += 1 if len(medical_terms_found) > 0: print(f"✅ Correction 3: Termes médicaux préservés: {', '.join(medical_terms_found)}") results["medical_terms_preservation"]["passed"] += 1 else: # Pas de termes médicaux dans ce document, test non applicable print(f"⚪ Correction 3: Aucun terme médical testé dans ce document") results["medical_terms_preservation"]["total"] -= 1 except Exception as e: print(f"❌ Erreur: {e}") continue # Résumé print("\n" + "=" * 80) print("RÉSUMÉ DES TESTS") print("=" * 80) for test_name, test_results in results.items(): total = test_results["total"] passed = test_results["passed"] failed = test_results["failed"] if total > 0: success_rate = (passed / total) * 100 status = "✅" if failed == 0 else "❌" print(f"{status} {test_name}: {passed}/{total} ({success_rate:.1f}%)") else: print(f"⚪ {test_name}: Aucun test applicable") print() # Verdict final all_passed = all(r["failed"] == 0 for r in results.values() if r["total"] > 0) if all_passed: print("✅ TOUS LES TESTS PASSÉS - Phase 1 corrections validées") return 0 else: print("❌ CERTAINS TESTS ONT ÉCHOUÉ - Vérifier les corrections") return 1 if __name__ == "__main__": sys.exit(test_phase1_corrections())