feat: Phase 1 - Système d'évaluation de la qualité
- Sélection et copie de 27 documents représentatifs (10 simples, 12 moyens, 5 complexes) - Outil d'annotation CLI complet (tools/annotation_tool.py) - Guide d'annotation détaillé (docs/annotation_guide.md) - Évaluateur de qualité (evaluation/quality_evaluator.py) * Calcul Précision, Rappel, F1-Score * Identification faux positifs/négatifs * Métriques par type de PII * Export JSON et rapports texte - Scanner de fuite (evaluation/leak_scanner.py) * Détection PII résiduels (CRITIQUE) * Détection nouveaux PII (HAUTE) * Scan métadonnées PDF (MOYENNE) - Benchmark de performance (evaluation/benchmark.py) * Mesure temps de traitement * Mesure CPU/RAM * Export JSON/CSV - Tests unitaires complets pour tous les composants - Documentation complète du module d'évaluation Tâches complétées: - 1.1.1 Sélection de 27 documents (au lieu de 30) - 1.1.2 Outil d'annotation CLI - 1.2.1 Évaluateur de qualité - 1.2.2 Scanner de fuite - 1.2.3 Benchmark de performance Prochaines étapes: - 1.1.3 Annotation des 27 documents (manuel) - 1.1.4 Enrichissement stopwords médicaux - 1.3 Mesure de la baseline
This commit is contained in:
339
evaluation/benchmark.py
Normal file
339
evaluation/benchmark.py
Normal file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Benchmark de performance du système d'anonymisation.
|
||||
|
||||
Mesure les temps de traitement, l'utilisation CPU/RAM, et les métriques de qualité.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
import psutil
|
||||
import platform
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
"""Résultat de benchmark pour un document."""
|
||||
|
||||
pdf_path: str
|
||||
processing_time_s: float = 0.0
|
||||
time_per_page_s: float = 0.0
|
||||
cpu_usage_percent: float = 0.0
|
||||
ram_usage_mb: float = 0.0
|
||||
pii_detected: int = 0
|
||||
quality_metrics: Dict = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convertit en dictionnaire."""
|
||||
return {
|
||||
"pdf_path": self.pdf_path,
|
||||
"processing_time_s": round(self.processing_time_s, 2),
|
||||
"time_per_page_s": round(self.time_per_page_s, 2),
|
||||
"cpu_usage_percent": round(self.cpu_usage_percent, 2),
|
||||
"ram_usage_mb": round(self.ram_usage_mb, 2),
|
||||
"pii_detected": self.pii_detected,
|
||||
"quality_metrics": self.quality_metrics
|
||||
}
|
||||
|
||||
|
||||
class Benchmark:
|
||||
"""Benchmark de performance."""
|
||||
|
||||
def __init__(self, test_data_dir: Path):
|
||||
"""
|
||||
Initialise le benchmark.
|
||||
|
||||
Args:
|
||||
test_data_dir: Répertoire contenant les données de test
|
||||
"""
|
||||
self.test_data_dir = Path(test_data_dir)
|
||||
self.process = psutil.Process()
|
||||
|
||||
def get_system_info(self) -> Dict:
|
||||
"""
|
||||
Récupère les informations système.
|
||||
|
||||
Returns:
|
||||
Dictionnaire des informations système
|
||||
"""
|
||||
return {
|
||||
"os": platform.system(),
|
||||
"os_version": platform.version(),
|
||||
"cpu": platform.processor(),
|
||||
"cpu_count": psutil.cpu_count(logical=False),
|
||||
"cpu_count_logical": psutil.cpu_count(logical=True),
|
||||
"ram_gb": round(psutil.virtual_memory().total / (1024**3), 2),
|
||||
"python_version": platform.python_version()
|
||||
}
|
||||
|
||||
def measure_cpu_ram(self, duration_s: float = 1.0) -> tuple:
|
||||
"""
|
||||
Mesure l'utilisation CPU et RAM pendant une durée.
|
||||
|
||||
Args:
|
||||
duration_s: Durée de mesure en secondes
|
||||
|
||||
Returns:
|
||||
Tuple (cpu_percent, ram_mb)
|
||||
"""
|
||||
# Mesurer le CPU sur une période
|
||||
cpu_percent = self.process.cpu_percent(interval=duration_s)
|
||||
|
||||
# Mesurer la RAM
|
||||
ram_mb = self.process.memory_info().rss / (1024 * 1024)
|
||||
|
||||
return cpu_percent, ram_mb
|
||||
|
||||
def benchmark_document(
|
||||
self,
|
||||
pdf_path: Path,
|
||||
anonymize_func,
|
||||
page_count: Optional[int] = None
|
||||
) -> BenchmarkResult:
|
||||
"""
|
||||
Benchmark un document.
|
||||
|
||||
Args:
|
||||
pdf_path: Chemin vers le PDF
|
||||
anonymize_func: Fonction d'anonymisation à benchmarker
|
||||
page_count: Nombre de pages (optionnel)
|
||||
|
||||
Returns:
|
||||
Résultat du benchmark
|
||||
"""
|
||||
# Mesurer le temps de traitement
|
||||
start_time = time.time()
|
||||
start_cpu = self.process.cpu_percent()
|
||||
start_ram = self.process.memory_info().rss / (1024 * 1024)
|
||||
|
||||
# Exécuter l'anonymisation
|
||||
try:
|
||||
audit_path = anonymize_func(pdf_path)
|
||||
except Exception as e:
|
||||
print(f"✗ Erreur lors de l'anonymisation de {pdf_path.name}: {e}")
|
||||
return BenchmarkResult(pdf_path=str(pdf_path))
|
||||
|
||||
# Mesurer après traitement
|
||||
end_time = time.time()
|
||||
end_cpu = self.process.cpu_percent()
|
||||
end_ram = self.process.memory_info().rss / (1024 * 1024)
|
||||
|
||||
processing_time = end_time - start_time
|
||||
cpu_usage = (start_cpu + end_cpu) / 2
|
||||
ram_usage = end_ram - start_ram
|
||||
|
||||
# Compter les PII détectés
|
||||
pii_count = 0
|
||||
if audit_path and audit_path.exists():
|
||||
try:
|
||||
with open(audit_path, 'r', encoding='utf-8') as f:
|
||||
pii_count = sum(1 for line in f if line.strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Calculer le temps par page
|
||||
time_per_page = processing_time / page_count if page_count and page_count > 0 else 0.0
|
||||
|
||||
# Créer le résultat
|
||||
result = BenchmarkResult(
|
||||
pdf_path=str(pdf_path),
|
||||
processing_time_s=processing_time,
|
||||
time_per_page_s=time_per_page,
|
||||
cpu_usage_percent=cpu_usage,
|
||||
ram_usage_mb=ram_usage,
|
||||
pii_detected=pii_count
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def run(
|
||||
self,
|
||||
pdf_list: List[Path],
|
||||
anonymize_func,
|
||||
page_counts: Optional[List[int]] = None
|
||||
) -> List[BenchmarkResult]:
|
||||
"""
|
||||
Exécute le benchmark sur une liste de documents.
|
||||
|
||||
Args:
|
||||
pdf_list: Liste des PDFs à benchmarker
|
||||
anonymize_func: Fonction d'anonymisation
|
||||
page_counts: Liste des nombres de pages (optionnel)
|
||||
|
||||
Returns:
|
||||
Liste des résultats
|
||||
"""
|
||||
results = []
|
||||
|
||||
if page_counts is None:
|
||||
page_counts = [None] * len(pdf_list)
|
||||
|
||||
for i, (pdf_path, page_count) in enumerate(zip(pdf_list, page_counts), 1):
|
||||
print(f"[{i}/{len(pdf_list)}] Benchmark: {pdf_path.name}")
|
||||
|
||||
result = self.benchmark_document(pdf_path, anonymize_func, page_count)
|
||||
results.append(result)
|
||||
|
||||
print(f" Temps: {result.processing_time_s:.2f}s "
|
||||
f"CPU: {result.cpu_usage_percent:.1f}% "
|
||||
f"RAM: {result.ram_usage_mb:.1f}MB "
|
||||
f"PII: {result.pii_detected}")
|
||||
|
||||
return results
|
||||
|
||||
def calculate_summary(self, results: List[BenchmarkResult]) -> Dict:
|
||||
"""
|
||||
Calcule les statistiques résumées.
|
||||
|
||||
Args:
|
||||
results: Liste des résultats
|
||||
|
||||
Returns:
|
||||
Dictionnaire des statistiques
|
||||
"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
processing_times = [r.processing_time_s for r in results]
|
||||
cpu_usages = [r.cpu_usage_percent for r in results]
|
||||
ram_usages = [r.ram_usage_mb for r in results]
|
||||
pii_counts = [r.pii_detected for r in results]
|
||||
|
||||
return {
|
||||
"documents_count": len(results),
|
||||
"avg_time_per_doc": round(sum(processing_times) / len(processing_times), 2),
|
||||
"min_time": round(min(processing_times), 2),
|
||||
"max_time": round(max(processing_times), 2),
|
||||
"avg_cpu_percent": round(sum(cpu_usages) / len(cpu_usages), 2),
|
||||
"avg_ram_mb": round(sum(ram_usages) / len(ram_usages), 2),
|
||||
"total_pii_detected": sum(pii_counts),
|
||||
"avg_pii_per_doc": round(sum(pii_counts) / len(pii_counts), 2)
|
||||
}
|
||||
|
||||
def generate_report(self, results: List[BenchmarkResult]) -> str:
|
||||
"""
|
||||
Génère un rapport texte.
|
||||
|
||||
Args:
|
||||
results: Liste des résultats
|
||||
|
||||
Returns:
|
||||
Rapport texte
|
||||
"""
|
||||
if not results:
|
||||
return "Aucun résultat à afficher."
|
||||
|
||||
summary = self.calculate_summary(results)
|
||||
system_info = self.get_system_info()
|
||||
|
||||
lines = []
|
||||
lines.append("=" * 80)
|
||||
lines.append("RAPPORT DE BENCHMARK - PERFORMANCE D'ANONYMISATION")
|
||||
lines.append("=" * 80)
|
||||
lines.append("")
|
||||
|
||||
# Informations système
|
||||
lines.append("SYSTÈME:")
|
||||
lines.append(f" OS: {system_info['os']} {system_info['os_version']}")
|
||||
lines.append(f" CPU: {system_info['cpu']}")
|
||||
lines.append(f" Cœurs: {system_info['cpu_count']} physiques / {system_info['cpu_count_logical']} logiques")
|
||||
lines.append(f" RAM: {system_info['ram_gb']} GB")
|
||||
lines.append(f" Python: {system_info['python_version']}")
|
||||
lines.append("")
|
||||
|
||||
# Résumé
|
||||
lines.append("RÉSUMÉ:")
|
||||
lines.append(f" Documents: {summary['documents_count']}")
|
||||
lines.append(f" Temps moyen: {summary['avg_time_per_doc']}s")
|
||||
lines.append(f" Temps min/max: {summary['min_time']}s / {summary['max_time']}s")
|
||||
lines.append(f" CPU moyen: {summary['avg_cpu_percent']}%")
|
||||
lines.append(f" RAM moyenne: {summary['avg_ram_mb']} MB")
|
||||
lines.append(f" PII détectés: {summary['total_pii_detected']} (moy: {summary['avg_pii_per_doc']})")
|
||||
lines.append("")
|
||||
|
||||
# Détails par document
|
||||
lines.append("DÉTAILS PAR DOCUMENT:")
|
||||
lines.append("")
|
||||
|
||||
for result in results:
|
||||
pdf_name = Path(result.pdf_path).name
|
||||
lines.append(f" {pdf_name}")
|
||||
lines.append(f" Temps: {result.processing_time_s:.2f}s "
|
||||
f"CPU: {result.cpu_usage_percent:.1f}% "
|
||||
f"RAM: {result.ram_usage_mb:.1f}MB "
|
||||
f"PII: {result.pii_detected}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("=" * 80)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def export_json(self, results: List[BenchmarkResult], output_path: Path):
|
||||
"""
|
||||
Exporte les résultats en JSON.
|
||||
|
||||
Args:
|
||||
results: Liste des résultats
|
||||
output_path: Chemin du fichier de sortie
|
||||
"""
|
||||
data = {
|
||||
"benchmark_date": datetime.now().isoformat(),
|
||||
"system_info": self.get_system_info(),
|
||||
"results": [r.to_dict() for r in results],
|
||||
"summary": self.calculate_summary(results)
|
||||
}
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"✓ Résultats exportés: {output_path}")
|
||||
|
||||
def export_csv(self, results: List[BenchmarkResult], output_path: Path):
|
||||
"""
|
||||
Exporte les résultats en CSV.
|
||||
|
||||
Args:
|
||||
results: Liste des résultats
|
||||
output_path: Chemin du fichier de sortie
|
||||
"""
|
||||
import csv
|
||||
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
|
||||
# En-tête
|
||||
writer.writerow([
|
||||
"pdf_path",
|
||||
"processing_time_s",
|
||||
"time_per_page_s",
|
||||
"cpu_usage_percent",
|
||||
"ram_usage_mb",
|
||||
"pii_detected"
|
||||
])
|
||||
|
||||
# Données
|
||||
for result in results:
|
||||
writer.writerow([
|
||||
result.pdf_path,
|
||||
result.processing_time_s,
|
||||
result.time_per_page_s,
|
||||
result.cpu_usage_percent,
|
||||
result.ram_usage_mb,
|
||||
result.pii_detected
|
||||
])
|
||||
|
||||
print(f"✓ Résultats exportés: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test basique
|
||||
benchmark = Benchmark(Path("tests/ground_truth/pdfs"))
|
||||
|
||||
# Afficher les informations système
|
||||
system_info = benchmark.get_system_info()
|
||||
print("Informations système:")
|
||||
for key, value in system_info.items():
|
||||
print(f" {key}: {value}")
|
||||
Reference in New Issue
Block a user