86 lines
2.9 KiB
Python
Executable File
86 lines
2.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Affiche un résumé des résultats du batch d'anonymisation.
|
|
"""
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from collections import Counter
|
|
|
|
def show_summary():
|
|
"""Affiche le résumé du batch."""
|
|
|
|
baseline_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
|
|
results_file = baseline_dir / "batch_results.json"
|
|
|
|
if not results_file.exists():
|
|
print(f"✗ Fichier de résultats non trouvé: {results_file}")
|
|
return 1
|
|
|
|
with open(results_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Statistiques globales
|
|
print("="*80)
|
|
print("RÉSUMÉ DU BATCH D'ANONYMISATION")
|
|
print("="*80)
|
|
|
|
print(f"\n📅 Date: {data['date']}")
|
|
print(f"📄 Documents traités: {data['success_count']}/{data['total_documents']}")
|
|
print(f"🔍 PII détectés: {data['total_pii']:,}")
|
|
print(f"⏱️ Temps total: {data['total_time_s']:.2f}s")
|
|
print(f"⏱️ Temps moyen: {data['avg_time_s']:.2f}s par document")
|
|
|
|
# Analyser les résultats
|
|
successful = [r for r in data['results'] if r.get('success')]
|
|
failed = [r for r in data['results'] if not r.get('success')]
|
|
|
|
if successful:
|
|
times = [r['time_s'] for r in successful]
|
|
pii_counts = [r['pii_count'] for r in successful]
|
|
|
|
print(f"\n📊 Statistiques de temps:")
|
|
print(f" - Min: {min(times):.2f}s")
|
|
print(f" - Max: {max(times):.2f}s")
|
|
print(f" - Médiane: {sorted(times)[len(times)//2]:.2f}s")
|
|
|
|
print(f"\n📊 Statistiques de PII:")
|
|
print(f" - Min: {min(pii_counts)}")
|
|
print(f" - Max: {max(pii_counts):,}")
|
|
print(f" - Médiane: {sorted(pii_counts)[len(pii_counts)//2]}")
|
|
print(f" - Moyenne: {sum(pii_counts)/len(pii_counts):.1f}")
|
|
|
|
# Top 5 documents les plus complexes
|
|
if successful:
|
|
print(f"\n🏆 Top 5 documents les plus complexes (par PII):")
|
|
top5 = sorted(successful, key=lambda x: x['pii_count'], reverse=True)[:5]
|
|
for i, r in enumerate(top5, 1):
|
|
print(f" {i}. {r['pdf']}")
|
|
print(f" → {r['pii_count']:,} PII en {r['time_s']:.2f}s")
|
|
|
|
# Top 5 documents les plus rapides
|
|
if successful:
|
|
print(f"\n⚡ Top 5 documents les plus rapides:")
|
|
fastest = sorted(successful, key=lambda x: x['time_s'])[:5]
|
|
for i, r in enumerate(fastest, 1):
|
|
print(f" {i}. {r['pdf']}")
|
|
print(f" → {r['time_s']:.2f}s ({r['pii_count']} PII)")
|
|
|
|
# Échecs
|
|
if failed:
|
|
print(f"\n⚠️ Échecs ({len(failed)}):")
|
|
for r in failed:
|
|
error = r.get('error', 'Unknown error')
|
|
if not error:
|
|
error = "PDF protégé par mot de passe"
|
|
print(f" - {r['pdf']}")
|
|
print(f" → {error}")
|
|
|
|
print("\n" + "="*80)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(show_summary())
|