200 lines
7.6 KiB
Python
Executable File
200 lines
7.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
Benchmark de performance du système d'anonymisation sur le dataset de test.
|
||
|
||
Analyse les résultats du batch pour générer un rapport de performance.
|
||
"""
|
||
import sys
|
||
import json
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
import statistics
|
||
|
||
def run_baseline_benchmark():
|
||
"""Génère le rapport de benchmark à partir des résultats du batch."""
|
||
|
||
# Répertoires
|
||
baseline_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
|
||
results_file = baseline_dir / "batch_results.json"
|
||
|
||
if not results_file.exists():
|
||
print(f"✗ Fichier de résultats non trouvé: {results_file}")
|
||
print(f" Exécutez d'abord: python3 tools/batch_anonymize_test_dataset.py")
|
||
return 1
|
||
|
||
# Charger les résultats du batch
|
||
with open(results_file, 'r', encoding='utf-8') as f:
|
||
batch_data = json.load(f)
|
||
|
||
successful = [r for r in batch_data['results'] if r.get('success')]
|
||
|
||
if not successful:
|
||
print("✗ Aucun document traité avec succès")
|
||
return 1
|
||
|
||
print("="*80)
|
||
print("BENCHMARK DE PERFORMANCE - BASELINE")
|
||
print("="*80)
|
||
print(f"\n📅 Date du batch: {batch_data['date']}")
|
||
print(f"📄 Documents: {len(successful)}/{batch_data['total_documents']}")
|
||
print(f"🔍 PII détectés: {batch_data['total_pii']:,}")
|
||
|
||
# Extraire les métriques
|
||
times = [r['time_s'] for r in successful]
|
||
pii_counts = [r['pii_count'] for r in successful]
|
||
|
||
# Calculer les statistiques
|
||
stats = {
|
||
"total_documents": len(successful),
|
||
"total_time_s": sum(times),
|
||
"avg_time_s": statistics.mean(times),
|
||
"median_time_s": statistics.median(times),
|
||
"min_time_s": min(times),
|
||
"max_time_s": max(times),
|
||
"stdev_time_s": statistics.stdev(times) if len(times) > 1 else 0.0,
|
||
"total_pii": sum(pii_counts),
|
||
"avg_pii": statistics.mean(pii_counts),
|
||
"median_pii": statistics.median(pii_counts),
|
||
"min_pii": min(pii_counts),
|
||
"max_pii": max(pii_counts),
|
||
"docs_per_second": len(successful) / sum(times),
|
||
"pii_per_second": sum(pii_counts) / sum(times)
|
||
}
|
||
|
||
# Afficher les statistiques
|
||
print("\n" + "="*80)
|
||
print("STATISTIQUES DE PERFORMANCE")
|
||
print("="*80)
|
||
|
||
print(f"\n⏱️ Temps de traitement:")
|
||
print(f" - Total: {stats['total_time_s']:.2f}s")
|
||
print(f" - Moyen: {stats['avg_time_s']:.2f}s par document")
|
||
print(f" - Médiane: {stats['median_time_s']:.2f}s")
|
||
print(f" - Min: {stats['min_time_s']:.2f}s")
|
||
print(f" - Max: {stats['max_time_s']:.2f}s")
|
||
print(f" - Écart-type: {stats['stdev_time_s']:.2f}s")
|
||
|
||
print(f"\n🔍 PII détectés:")
|
||
print(f" - Total: {stats['total_pii']:,}")
|
||
print(f" - Moyen: {stats['avg_pii']:.1f} par document")
|
||
print(f" - Médiane: {stats['median_pii']:.0f}")
|
||
print(f" - Min: {stats['min_pii']}")
|
||
print(f" - Max: {stats['max_pii']:,}")
|
||
|
||
print(f"\n📊 Débit:")
|
||
print(f" - Documents/seconde: {stats['docs_per_second']:.2f}")
|
||
print(f" - PII/seconde: {stats['pii_per_second']:.1f}")
|
||
|
||
# Identifier les documents lents (> 2× moyenne)
|
||
slow_threshold = stats['avg_time_s'] * 2
|
||
slow_docs = [r for r in successful if r['time_s'] > slow_threshold]
|
||
if slow_docs:
|
||
print(f"\n⚠️ Documents lents (> {slow_threshold:.2f}s):")
|
||
for doc in sorted(slow_docs, key=lambda x: x['time_s'], reverse=True)[:5]:
|
||
print(f" - {doc['pdf']}: {doc['time_s']:.2f}s ({doc['pii_count']} PII)")
|
||
|
||
# Identifier les documents rapides (< 0.5× moyenne)
|
||
fast_threshold = stats['avg_time_s'] * 0.5
|
||
fast_docs = [r for r in successful if r['time_s'] < fast_threshold]
|
||
if fast_docs:
|
||
print(f"\n⚡ Documents rapides (< {fast_threshold:.2f}s):")
|
||
for doc in sorted(fast_docs, key=lambda x: x['time_s'])[:5]:
|
||
print(f" - {doc['pdf']}: {doc['time_s']:.2f}s ({doc['pii_count']} PII)")
|
||
|
||
# Analyser la corrélation PII / temps
|
||
print(f"\n📈 Analyse de corrélation:")
|
||
# Documents avec beaucoup de PII
|
||
high_pii_docs = [r for r in successful if r['pii_count'] > stats['avg_pii'] * 2]
|
||
if high_pii_docs:
|
||
avg_time_high_pii = statistics.mean([r['time_s'] for r in high_pii_docs])
|
||
print(f" - Documents avec beaucoup de PII (>{stats['avg_pii']*2:.0f}): {len(high_pii_docs)}")
|
||
print(f" Temps moyen: {avg_time_high_pii:.2f}s")
|
||
|
||
# Documents avec peu de PII
|
||
low_pii_docs = [r for r in successful if r['pii_count'] < stats['avg_pii'] * 0.5]
|
||
if low_pii_docs:
|
||
avg_time_low_pii = statistics.mean([r['time_s'] for r in low_pii_docs])
|
||
print(f" - Documents avec peu de PII (<{stats['avg_pii']*0.5:.0f}): {len(low_pii_docs)}")
|
||
print(f" Temps moyen: {avg_time_low_pii:.2f}s")
|
||
|
||
# Sauvegarder les résultats
|
||
output_dir = Path("tests/ground_truth/benchmarks")
|
||
output_dir.mkdir(exist_ok=True)
|
||
|
||
benchmark_data = {
|
||
"date": datetime.now().isoformat(),
|
||
"batch_date": batch_data['date'],
|
||
"configuration": {
|
||
"use_ner": batch_data.get('use_ner', True),
|
||
"use_vlm": batch_data.get('use_vlm', False)
|
||
},
|
||
"statistics": stats,
|
||
"documents": [
|
||
{
|
||
"pdf": r['pdf'],
|
||
"time_s": r['time_s'],
|
||
"pii_count": r['pii_count']
|
||
}
|
||
for r in successful
|
||
]
|
||
}
|
||
|
||
json_file = output_dir / "baseline_benchmark.json"
|
||
with open(json_file, 'w', encoding='utf-8') as f:
|
||
json.dump(benchmark_data, f, indent=2, ensure_ascii=False)
|
||
print(f"\n📊 Résultats JSON: {json_file}")
|
||
|
||
# Export CSV
|
||
csv_file = output_dir / "baseline_benchmark.csv"
|
||
with open(csv_file, 'w', encoding='utf-8') as f:
|
||
f.write("pdf,time_s,pii_count\n")
|
||
for r in successful:
|
||
f.write(f"{r['pdf']},{r['time_s']},{r['pii_count']}\n")
|
||
print(f"📊 Résultats CSV: {csv_file}")
|
||
|
||
# Vérifier les objectifs de performance
|
||
print("\n" + "="*80)
|
||
print("VALIDATION DES OBJECTIFS")
|
||
print("="*80)
|
||
|
||
target_time_no_vlm = 10.0 # < 10s par PDF (sans VLM)
|
||
target_time_with_vlm = 30.0 # < 30s par PDF (avec VLM)
|
||
|
||
# On n'a pas utilisé le VLM dans le batch
|
||
target = target_time_no_vlm
|
||
use_vlm = batch_data.get('use_vlm', False)
|
||
|
||
if use_vlm:
|
||
target = target_time_with_vlm
|
||
|
||
print(f"\n🎯 Objectif: < {target}s par document (VLM: {'✓' if use_vlm else '✗'})")
|
||
|
||
if stats['avg_time_s'] <= target:
|
||
print(f"✅ Temps moyen atteint: {stats['avg_time_s']:.2f}s ≤ {target}s")
|
||
else:
|
||
print(f"⚠️ Temps moyen non atteint: {stats['avg_time_s']:.2f}s > {target}s")
|
||
print(f" Écart: +{stats['avg_time_s'] - target:.2f}s ({(stats['avg_time_s']/target - 1)*100:.1f}%)")
|
||
|
||
if stats['max_time_s'] <= target * 3:
|
||
print(f"✅ Temps max acceptable: {stats['max_time_s']:.2f}s ≤ {target * 3}s")
|
||
else:
|
||
print(f"⚠️ Temps max trop élevé: {stats['max_time_s']:.2f}s > {target * 3}s")
|
||
|
||
# Pourcentage de documents dans l'objectif
|
||
docs_in_target = sum(1 for r in successful if r['time_s'] <= target)
|
||
pct_in_target = (docs_in_target / len(successful)) * 100
|
||
print(f"\n📊 Documents dans l'objectif: {docs_in_target}/{len(successful)} ({pct_in_target:.1f}%)")
|
||
|
||
if pct_in_target >= 80:
|
||
print(f"✅ Objectif de couverture atteint (≥80%)")
|
||
else:
|
||
print(f"⚠️ Objectif de couverture non atteint (<80%)")
|
||
|
||
print("\n" + "="*80)
|
||
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(run_baseline_benchmark())
|