#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Test rapide de la correction DATE""" import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from anonymizer_core_refactored_onnx import process_pdf # Test sur 3 documents du test dataset test_docs = [ "tests/ground_truth/pdfs/001_simple_compte_rendu_460_23153652_CR_COLOSCOPIE.pdf", "tests/ground_truth/pdfs/008_moyen_compte_rendu_195_23144210_ANAPATH.pdf", "tests/ground_truth/pdfs/013_moyen_compte_rendu_363_23085243_CRO.pdf", ] print("Test correction DATE (Phase 1)") print("=" * 80) out_dir = Path("tests/phase1_test_output") out_dir.mkdir(exist_ok=True) for doc in test_docs: pdf_path = Path(doc) if not pdf_path.exists(): print(f"⚠️ {pdf_path.name}: non trouvé") continue try: result = process_pdf( pdf_path=pdf_path, out_dir=out_dir, make_vector_redaction=False, also_make_raster_burn=False, config_path=Path("config/dictionnaires.yml"), use_hf=False, ner_manager=None, vlm_manager=None, ) # Lire le fichier texte anonymisé text_file = out_dir / f"{pdf_path.stem}.pseudonymise.txt" if text_file.exists(): text = text_file.read_text(encoding='utf-8') date_count = text.count("[DATE]") date_naissance_count = text.count("[DATE_NAISSANCE]") status = "✅" if date_count == 0 else "❌" print(f"{status} {pdf_path.name}") print(f" [DATE]: {date_count} (attendu: 0)") print(f" [DATE_NAISSANCE]: {date_naissance_count}") else: print(f"⚠️ {pdf_path.name}: fichier texte non trouvé") except Exception as e: print(f"❌ {pdf_path.name}: Erreur - {e}") print("\n✅ Test terminé")