#!/usr/bin/env python3 """ Affiche un résumé des résultats du batch d'anonymisation. """ import json import sys from pathlib import Path from collections import Counter def show_summary(): """Affiche le résumé du batch.""" baseline_dir = Path("tests/ground_truth/pdfs/baseline_anonymized") results_file = baseline_dir / "batch_results.json" if not results_file.exists(): print(f"✗ Fichier de résultats non trouvé: {results_file}") return 1 with open(results_file, 'r', encoding='utf-8') as f: data = json.load(f) # Statistiques globales print("="*80) print("RÉSUMÉ DU BATCH D'ANONYMISATION") print("="*80) print(f"\n📅 Date: {data['date']}") print(f"📄 Documents traités: {data['success_count']}/{data['total_documents']}") print(f"🔍 PII détectés: {data['total_pii']:,}") print(f"⏱️ Temps total: {data['total_time_s']:.2f}s") print(f"⏱️ Temps moyen: {data['avg_time_s']:.2f}s par document") # Analyser les résultats successful = [r for r in data['results'] if r.get('success')] failed = [r for r in data['results'] if not r.get('success')] if successful: times = [r['time_s'] for r in successful] pii_counts = [r['pii_count'] for r in successful] print(f"\n📊 Statistiques de temps:") print(f" - Min: {min(times):.2f}s") print(f" - Max: {max(times):.2f}s") print(f" - Médiane: {sorted(times)[len(times)//2]:.2f}s") print(f"\n📊 Statistiques de PII:") print(f" - Min: {min(pii_counts)}") print(f" - Max: {max(pii_counts):,}") print(f" - Médiane: {sorted(pii_counts)[len(pii_counts)//2]}") print(f" - Moyenne: {sum(pii_counts)/len(pii_counts):.1f}") # Top 5 documents les plus complexes if successful: print(f"\n🏆 Top 5 documents les plus complexes (par PII):") top5 = sorted(successful, key=lambda x: x['pii_count'], reverse=True)[:5] for i, r in enumerate(top5, 1): print(f" {i}. {r['pdf']}") print(f" → {r['pii_count']:,} PII en {r['time_s']:.2f}s") # Top 5 documents les plus rapides if successful: print(f"\n⚡ Top 5 documents les plus rapides:") fastest = sorted(successful, key=lambda x: x['time_s'])[:5] for i, r in enumerate(fastest, 1): print(f" {i}. {r['pdf']}") print(f" → {r['time_s']:.2f}s ({r['pii_count']} PII)") # Échecs if failed: print(f"\n⚠️ Échecs ({len(failed)}):") for r in failed: error = r.get('error', 'Unknown error') if not error: error = "PDF protégé par mot de passe" print(f" - {r['pdf']}") print(f" → {error}") print("\n" + "="*80) return 0 if __name__ == "__main__": sys.exit(show_summary())