#!/usr/bin/env python3 """ Affiche les détails des faux positifs à partir des résultats d'évaluation. """ import json from pathlib import Path from collections import defaultdict, Counter # Charger l'évaluation eval_file = Path("tests/ground_truth/quality_evaluation/baseline_quality_evaluation.json") with open(eval_file, 'r', encoding='utf-8') as f: eval_data = json.load(f) # Analyser les types problématiques problematic_types = { 'EPISODE': 106, 'VILLE': 20, 'CODE_POSTAL': 10, 'ADRESSE': 10, 'TEL': 8 } print("=" * 80) print("ANALYSE DES FAUX POSITIFS PAR TYPE") print("=" * 80) # Collecter tous les exemples de détections all_detections = defaultdict(list) for doc in eval_data['per_document']: pdf_name = doc['pdf'] audit_file = Path(f"tests/ground_truth/pdfs/baseline_anonymized/{pdf_name}.audit.jsonl") if not audit_file.exists(): continue with open(audit_file, 'r', encoding='utf-8') as f: for line in f: det = json.loads(line) kind = det.get('kind', 'UNKNOWN') original = det.get('original', '') page = det.get('page', -1) all_detections[kind].append({ 'text': original, 'page': page, 'file': pdf_name }) # Afficher les statistiques pour chaque type problématique for pii_type, expected_fp in problematic_types.items(): detections = all_detections.get(pii_type, []) print(f"\n{'=' * 80}") print(f"Type: {pii_type}") print(f"Faux positifs attendus: {expected_fp}") print(f"Détections totales: {len(detections)}") print(f"{'=' * 80}") # Compter les occurrences text_counter = Counter(d['text'] for d in detections) print(f"\nTextes les plus fréquents:") for text, count in text_counter.most_common(30): print(f" {count:3d}x '{text}'") # Afficher quelques exemples avec contexte print(f"\nExemples avec fichier:") seen = set() for d in detections[:20]: key = (d['text'], d['file']) if key not in seen: seen.add(key) print(f" '{d['text']}' (page {d['page']}) - {d['file']}") print("\n" + "=" * 80)