feat: Benchmark de performance baseline - 2.62s/doc moyen, 92% dans objectif
This commit is contained in:
199
tools/run_baseline_benchmark.py
Executable file
199
tools/run_baseline_benchmark.py
Executable file
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Benchmark de performance du système d'anonymisation sur le dataset de test.
|
||||
|
||||
Analyse les résultats du batch pour générer un rapport de performance.
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import statistics
|
||||
|
||||
def run_baseline_benchmark():
|
||||
"""Génère le rapport de benchmark à partir des résultats du batch."""
|
||||
|
||||
# Répertoires
|
||||
baseline_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
|
||||
results_file = baseline_dir / "batch_results.json"
|
||||
|
||||
if not results_file.exists():
|
||||
print(f"✗ Fichier de résultats non trouvé: {results_file}")
|
||||
print(f" Exécutez d'abord: python3 tools/batch_anonymize_test_dataset.py")
|
||||
return 1
|
||||
|
||||
# Charger les résultats du batch
|
||||
with open(results_file, 'r', encoding='utf-8') as f:
|
||||
batch_data = json.load(f)
|
||||
|
||||
successful = [r for r in batch_data['results'] if r.get('success')]
|
||||
|
||||
if not successful:
|
||||
print("✗ Aucun document traité avec succès")
|
||||
return 1
|
||||
|
||||
print("="*80)
|
||||
print("BENCHMARK DE PERFORMANCE - BASELINE")
|
||||
print("="*80)
|
||||
print(f"\n📅 Date du batch: {batch_data['date']}")
|
||||
print(f"📄 Documents: {len(successful)}/{batch_data['total_documents']}")
|
||||
print(f"🔍 PII détectés: {batch_data['total_pii']:,}")
|
||||
|
||||
# Extraire les métriques
|
||||
times = [r['time_s'] for r in successful]
|
||||
pii_counts = [r['pii_count'] for r in successful]
|
||||
|
||||
# Calculer les statistiques
|
||||
stats = {
|
||||
"total_documents": len(successful),
|
||||
"total_time_s": sum(times),
|
||||
"avg_time_s": statistics.mean(times),
|
||||
"median_time_s": statistics.median(times),
|
||||
"min_time_s": min(times),
|
||||
"max_time_s": max(times),
|
||||
"stdev_time_s": statistics.stdev(times) if len(times) > 1 else 0.0,
|
||||
"total_pii": sum(pii_counts),
|
||||
"avg_pii": statistics.mean(pii_counts),
|
||||
"median_pii": statistics.median(pii_counts),
|
||||
"min_pii": min(pii_counts),
|
||||
"max_pii": max(pii_counts),
|
||||
"docs_per_second": len(successful) / sum(times),
|
||||
"pii_per_second": sum(pii_counts) / sum(times)
|
||||
}
|
||||
|
||||
# Afficher les statistiques
|
||||
print("\n" + "="*80)
|
||||
print("STATISTIQUES DE PERFORMANCE")
|
||||
print("="*80)
|
||||
|
||||
print(f"\n⏱️ Temps de traitement:")
|
||||
print(f" - Total: {stats['total_time_s']:.2f}s")
|
||||
print(f" - Moyen: {stats['avg_time_s']:.2f}s par document")
|
||||
print(f" - Médiane: {stats['median_time_s']:.2f}s")
|
||||
print(f" - Min: {stats['min_time_s']:.2f}s")
|
||||
print(f" - Max: {stats['max_time_s']:.2f}s")
|
||||
print(f" - Écart-type: {stats['stdev_time_s']:.2f}s")
|
||||
|
||||
print(f"\n🔍 PII détectés:")
|
||||
print(f" - Total: {stats['total_pii']:,}")
|
||||
print(f" - Moyen: {stats['avg_pii']:.1f} par document")
|
||||
print(f" - Médiane: {stats['median_pii']:.0f}")
|
||||
print(f" - Min: {stats['min_pii']}")
|
||||
print(f" - Max: {stats['max_pii']:,}")
|
||||
|
||||
print(f"\n📊 Débit:")
|
||||
print(f" - Documents/seconde: {stats['docs_per_second']:.2f}")
|
||||
print(f" - PII/seconde: {stats['pii_per_second']:.1f}")
|
||||
|
||||
# Identifier les documents lents (> 2× moyenne)
|
||||
slow_threshold = stats['avg_time_s'] * 2
|
||||
slow_docs = [r for r in successful if r['time_s'] > slow_threshold]
|
||||
if slow_docs:
|
||||
print(f"\n⚠️ Documents lents (> {slow_threshold:.2f}s):")
|
||||
for doc in sorted(slow_docs, key=lambda x: x['time_s'], reverse=True)[:5]:
|
||||
print(f" - {doc['pdf']}: {doc['time_s']:.2f}s ({doc['pii_count']} PII)")
|
||||
|
||||
# Identifier les documents rapides (< 0.5× moyenne)
|
||||
fast_threshold = stats['avg_time_s'] * 0.5
|
||||
fast_docs = [r for r in successful if r['time_s'] < fast_threshold]
|
||||
if fast_docs:
|
||||
print(f"\n⚡ Documents rapides (< {fast_threshold:.2f}s):")
|
||||
for doc in sorted(fast_docs, key=lambda x: x['time_s'])[:5]:
|
||||
print(f" - {doc['pdf']}: {doc['time_s']:.2f}s ({doc['pii_count']} PII)")
|
||||
|
||||
# Analyser la corrélation PII / temps
|
||||
print(f"\n📈 Analyse de corrélation:")
|
||||
# Documents avec beaucoup de PII
|
||||
high_pii_docs = [r for r in successful if r['pii_count'] > stats['avg_pii'] * 2]
|
||||
if high_pii_docs:
|
||||
avg_time_high_pii = statistics.mean([r['time_s'] for r in high_pii_docs])
|
||||
print(f" - Documents avec beaucoup de PII (>{stats['avg_pii']*2:.0f}): {len(high_pii_docs)}")
|
||||
print(f" Temps moyen: {avg_time_high_pii:.2f}s")
|
||||
|
||||
# Documents avec peu de PII
|
||||
low_pii_docs = [r for r in successful if r['pii_count'] < stats['avg_pii'] * 0.5]
|
||||
if low_pii_docs:
|
||||
avg_time_low_pii = statistics.mean([r['time_s'] for r in low_pii_docs])
|
||||
print(f" - Documents avec peu de PII (<{stats['avg_pii']*0.5:.0f}): {len(low_pii_docs)}")
|
||||
print(f" Temps moyen: {avg_time_low_pii:.2f}s")
|
||||
|
||||
# Sauvegarder les résultats
|
||||
output_dir = Path("tests/ground_truth/benchmarks")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
benchmark_data = {
|
||||
"date": datetime.now().isoformat(),
|
||||
"batch_date": batch_data['date'],
|
||||
"configuration": {
|
||||
"use_ner": batch_data.get('use_ner', True),
|
||||
"use_vlm": batch_data.get('use_vlm', False)
|
||||
},
|
||||
"statistics": stats,
|
||||
"documents": [
|
||||
{
|
||||
"pdf": r['pdf'],
|
||||
"time_s": r['time_s'],
|
||||
"pii_count": r['pii_count']
|
||||
}
|
||||
for r in successful
|
||||
]
|
||||
}
|
||||
|
||||
json_file = output_dir / "baseline_benchmark.json"
|
||||
with open(json_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(benchmark_data, f, indent=2, ensure_ascii=False)
|
||||
print(f"\n📊 Résultats JSON: {json_file}")
|
||||
|
||||
# Export CSV
|
||||
csv_file = output_dir / "baseline_benchmark.csv"
|
||||
with open(csv_file, 'w', encoding='utf-8') as f:
|
||||
f.write("pdf,time_s,pii_count\n")
|
||||
for r in successful:
|
||||
f.write(f"{r['pdf']},{r['time_s']},{r['pii_count']}\n")
|
||||
print(f"📊 Résultats CSV: {csv_file}")
|
||||
|
||||
# Vérifier les objectifs de performance
|
||||
print("\n" + "="*80)
|
||||
print("VALIDATION DES OBJECTIFS")
|
||||
print("="*80)
|
||||
|
||||
target_time_no_vlm = 10.0 # < 10s par PDF (sans VLM)
|
||||
target_time_with_vlm = 30.0 # < 30s par PDF (avec VLM)
|
||||
|
||||
# On n'a pas utilisé le VLM dans le batch
|
||||
target = target_time_no_vlm
|
||||
use_vlm = batch_data.get('use_vlm', False)
|
||||
|
||||
if use_vlm:
|
||||
target = target_time_with_vlm
|
||||
|
||||
print(f"\n🎯 Objectif: < {target}s par document (VLM: {'✓' if use_vlm else '✗'})")
|
||||
|
||||
if stats['avg_time_s'] <= target:
|
||||
print(f"✅ Temps moyen atteint: {stats['avg_time_s']:.2f}s ≤ {target}s")
|
||||
else:
|
||||
print(f"⚠️ Temps moyen non atteint: {stats['avg_time_s']:.2f}s > {target}s")
|
||||
print(f" Écart: +{stats['avg_time_s'] - target:.2f}s ({(stats['avg_time_s']/target - 1)*100:.1f}%)")
|
||||
|
||||
if stats['max_time_s'] <= target * 3:
|
||||
print(f"✅ Temps max acceptable: {stats['max_time_s']:.2f}s ≤ {target * 3}s")
|
||||
else:
|
||||
print(f"⚠️ Temps max trop élevé: {stats['max_time_s']:.2f}s > {target * 3}s")
|
||||
|
||||
# Pourcentage de documents dans l'objectif
|
||||
docs_in_target = sum(1 for r in successful if r['time_s'] <= target)
|
||||
pct_in_target = (docs_in_target / len(successful)) * 100
|
||||
print(f"\n📊 Documents dans l'objectif: {docs_in_target}/{len(successful)} ({pct_in_target:.1f}%)")
|
||||
|
||||
if pct_in_target >= 80:
|
||||
print(f"✅ Objectif de couverture atteint (≥80%)")
|
||||
else:
|
||||
print(f"⚠️ Objectif de couverture non atteint (<80%)")
|
||||
|
||||
print("\n" + "="*80)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(run_baseline_benchmark())
|
||||
Reference in New Issue
Block a user