feat: Phase 1 - Système d'évaluation de la qualité
- Sélection et copie de 27 documents représentatifs (10 simples, 12 moyens, 5 complexes) - Outil d'annotation CLI complet (tools/annotation_tool.py) - Guide d'annotation détaillé (docs/annotation_guide.md) - Évaluateur de qualité (evaluation/quality_evaluator.py) * Calcul Précision, Rappel, F1-Score * Identification faux positifs/négatifs * Métriques par type de PII * Export JSON et rapports texte - Scanner de fuite (evaluation/leak_scanner.py) * Détection PII résiduels (CRITIQUE) * Détection nouveaux PII (HAUTE) * Scan métadonnées PDF (MOYENNE) - Benchmark de performance (evaluation/benchmark.py) * Mesure temps de traitement * Mesure CPU/RAM * Export JSON/CSV - Tests unitaires complets pour tous les composants - Documentation complète du module d'évaluation Tâches complétées: - 1.1.1 Sélection de 27 documents (au lieu de 30) - 1.1.2 Outil d'annotation CLI - 1.2.1 Évaluateur de qualité - 1.2.2 Scanner de fuite - 1.2.3 Benchmark de performance Prochaines étapes: - 1.1.3 Annotation des 27 documents (manuel) - 1.1.4 Enrichissement stopwords médicaux - 1.3 Mesure de la baseline
This commit is contained in:
145
tests/unit/test_quality_evaluator.py
Normal file
145
tests/unit/test_quality_evaluator.py
Normal file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests unitaires pour l'évaluateur de qualité.
|
||||
"""
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from evaluation.quality_evaluator import QualityEvaluator, EvaluationResult
|
||||
|
||||
|
||||
class TestQualityEvaluator:
|
||||
"""Tests pour QualityEvaluator."""
|
||||
|
||||
def test_normalize_text(self):
|
||||
"""Test de normalisation de texte."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
assert evaluator.normalize_text("DUPONT") == "dupont"
|
||||
assert evaluator.normalize_text(" DUPONT ") == "dupont"
|
||||
assert evaluator.normalize_text("DUPONT\n\nMARTIN") == "dupont martin"
|
||||
assert evaluator.normalize_text("Jean-Pierre") == "jean-pierre"
|
||||
|
||||
def test_types_match(self):
|
||||
"""Test de correspondance des types."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
# Correspondance directe
|
||||
assert evaluator.types_match("NOM", "NOM")
|
||||
assert evaluator.types_match("NOM", "NOM_GLOBAL")
|
||||
assert evaluator.types_match("TEL", "TEL_GLOBAL")
|
||||
|
||||
# Correspondance croisée
|
||||
assert evaluator.types_match("NOM", "PRENOM")
|
||||
assert evaluator.types_match("PRENOM", "NOM")
|
||||
|
||||
# Non correspondance
|
||||
assert not evaluator.types_match("NOM", "TEL")
|
||||
assert not evaluator.types_match("EMAIL", "ADRESSE")
|
||||
|
||||
def test_calculate_metrics(self):
|
||||
"""Test de calcul des métriques."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
# Cas parfait
|
||||
precision, recall, f1 = evaluator.calculate_metrics(10, 0, 0)
|
||||
assert precision == 1.0
|
||||
assert recall == 1.0
|
||||
assert f1 == 1.0
|
||||
|
||||
# Cas avec erreurs
|
||||
precision, recall, f1 = evaluator.calculate_metrics(8, 2, 2)
|
||||
assert precision == 0.8 # 8 / (8 + 2)
|
||||
assert recall == 0.8 # 8 / (8 + 2)
|
||||
assert f1 == 0.8
|
||||
|
||||
# Cas zéro
|
||||
precision, recall, f1 = evaluator.calculate_metrics(0, 0, 0)
|
||||
assert precision == 0.0
|
||||
assert recall == 0.0
|
||||
assert f1 == 0.0
|
||||
|
||||
def test_compare_simple(self):
|
||||
"""Test de comparaison simple."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
annotations = [
|
||||
{"page": 0, "type": "NOM", "text": "DUPONT", "context": "Dr. DUPONT"},
|
||||
{"page": 0, "type": "TEL", "text": "01 23 45 67 89", "context": "Tel: 01 23 45 67 89"}
|
||||
]
|
||||
|
||||
detections = [
|
||||
{"page": 0, "kind": "NOM", "original": "DUPONT"},
|
||||
{"page": 0, "kind": "TEL", "original": "01 23 45 67 89"}
|
||||
]
|
||||
|
||||
tp, fn, fp = evaluator.compare(annotations, detections)
|
||||
|
||||
assert len(tp) == 2
|
||||
assert len(fn) == 0
|
||||
assert len(fp) == 0
|
||||
|
||||
def test_compare_with_false_negative(self):
|
||||
"""Test avec faux négatif."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
annotations = [
|
||||
{"page": 0, "type": "NOM", "text": "DUPONT", "context": "Dr. DUPONT"},
|
||||
{"page": 0, "type": "TEL", "text": "01 23 45 67 89", "context": "Tel: 01 23 45 67 89"}
|
||||
]
|
||||
|
||||
detections = [
|
||||
{"page": 0, "kind": "NOM", "original": "DUPONT"}
|
||||
# TEL manquant
|
||||
]
|
||||
|
||||
tp, fn, fp = evaluator.compare(annotations, detections)
|
||||
|
||||
assert len(tp) == 1
|
||||
assert len(fn) == 1
|
||||
assert len(fp) == 0
|
||||
assert fn[0]["type"] == "TEL"
|
||||
assert fn[0]["reason"] == "not_detected"
|
||||
|
||||
def test_compare_with_false_positive(self):
|
||||
"""Test avec faux positif."""
|
||||
evaluator = QualityEvaluator(Path("tests/ground_truth"))
|
||||
|
||||
annotations = [
|
||||
{"page": 0, "type": "NOM", "text": "DUPONT", "context": "Dr. DUPONT"}
|
||||
]
|
||||
|
||||
detections = [
|
||||
{"page": 0, "kind": "NOM", "original": "DUPONT"},
|
||||
{"page": 0, "kind": "NOM", "original": "MARTIN"} # Faux positif
|
||||
]
|
||||
|
||||
tp, fn, fp = evaluator.compare(annotations, detections)
|
||||
|
||||
assert len(tp) == 1
|
||||
assert len(fn) == 0
|
||||
assert len(fp) == 1
|
||||
assert fp[0]["text"] == "MARTIN"
|
||||
|
||||
def test_evaluation_result_to_dict(self):
|
||||
"""Test de conversion en dictionnaire."""
|
||||
result = EvaluationResult(
|
||||
pdf_path="test.pdf",
|
||||
true_positives=10,
|
||||
false_positives=2,
|
||||
false_negatives=1,
|
||||
precision=0.8333,
|
||||
recall=0.9091,
|
||||
f1_score=0.8696
|
||||
)
|
||||
|
||||
data = result.to_dict()
|
||||
|
||||
assert data["pdf_path"] == "test.pdf"
|
||||
assert data["true_positives"] == 10
|
||||
assert data["precision"] == 0.8333
|
||||
assert data["recall"] == 0.9091
|
||||
assert data["f1_score"] == 0.8696
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user