feat: Benchmark de performance baseline - 2.62s/doc moyen, 92% dans objectif
This commit is contained in:
85
tools/show_batch_summary.py
Executable file
85
tools/show_batch_summary.py
Executable file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Affiche un résumé des résultats du batch d'anonymisation.
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
|
||||
def show_summary():
|
||||
"""Affiche le résumé du batch."""
|
||||
|
||||
baseline_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
|
||||
results_file = baseline_dir / "batch_results.json"
|
||||
|
||||
if not results_file.exists():
|
||||
print(f"✗ Fichier de résultats non trouvé: {results_file}")
|
||||
return 1
|
||||
|
||||
with open(results_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Statistiques globales
|
||||
print("="*80)
|
||||
print("RÉSUMÉ DU BATCH D'ANONYMISATION")
|
||||
print("="*80)
|
||||
|
||||
print(f"\n📅 Date: {data['date']}")
|
||||
print(f"📄 Documents traités: {data['success_count']}/{data['total_documents']}")
|
||||
print(f"🔍 PII détectés: {data['total_pii']:,}")
|
||||
print(f"⏱️ Temps total: {data['total_time_s']:.2f}s")
|
||||
print(f"⏱️ Temps moyen: {data['avg_time_s']:.2f}s par document")
|
||||
|
||||
# Analyser les résultats
|
||||
successful = [r for r in data['results'] if r.get('success')]
|
||||
failed = [r for r in data['results'] if not r.get('success')]
|
||||
|
||||
if successful:
|
||||
times = [r['time_s'] for r in successful]
|
||||
pii_counts = [r['pii_count'] for r in successful]
|
||||
|
||||
print(f"\n📊 Statistiques de temps:")
|
||||
print(f" - Min: {min(times):.2f}s")
|
||||
print(f" - Max: {max(times):.2f}s")
|
||||
print(f" - Médiane: {sorted(times)[len(times)//2]:.2f}s")
|
||||
|
||||
print(f"\n📊 Statistiques de PII:")
|
||||
print(f" - Min: {min(pii_counts)}")
|
||||
print(f" - Max: {max(pii_counts):,}")
|
||||
print(f" - Médiane: {sorted(pii_counts)[len(pii_counts)//2]}")
|
||||
print(f" - Moyenne: {sum(pii_counts)/len(pii_counts):.1f}")
|
||||
|
||||
# Top 5 documents les plus complexes
|
||||
if successful:
|
||||
print(f"\n🏆 Top 5 documents les plus complexes (par PII):")
|
||||
top5 = sorted(successful, key=lambda x: x['pii_count'], reverse=True)[:5]
|
||||
for i, r in enumerate(top5, 1):
|
||||
print(f" {i}. {r['pdf']}")
|
||||
print(f" → {r['pii_count']:,} PII en {r['time_s']:.2f}s")
|
||||
|
||||
# Top 5 documents les plus rapides
|
||||
if successful:
|
||||
print(f"\n⚡ Top 5 documents les plus rapides:")
|
||||
fastest = sorted(successful, key=lambda x: x['time_s'])[:5]
|
||||
for i, r in enumerate(fastest, 1):
|
||||
print(f" {i}. {r['pdf']}")
|
||||
print(f" → {r['time_s']:.2f}s ({r['pii_count']} PII)")
|
||||
|
||||
# Échecs
|
||||
if failed:
|
||||
print(f"\n⚠️ Échecs ({len(failed)}):")
|
||||
for r in failed:
|
||||
error = r.get('error', 'Unknown error')
|
||||
if not error:
|
||||
error = "PDF protégé par mot de passe"
|
||||
print(f" - {r['pdf']}")
|
||||
print(f" → {error}")
|
||||
|
||||
print("\n" + "="*80)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(show_summary())
|
||||
Reference in New Issue
Block a user