feat(extraction): assess_quality — statut qualité dossier (4 niveaux)
complete / partial / needs_review / failed (priorité décroissante), matching rôle requis insensible casse+espaces, seuil min_confidence paramétrable (0.6). 16 tests ajoutés (31 au total, verts). Brique TDD via sous-agent, code révisé. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -189,6 +189,48 @@ def parse_vlm_json(text: str) -> dict:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_label(label: str) -> str:
|
||||||
|
"""Normalise un label pour comparaison : minuscules + strip espaces."""
|
||||||
|
return label.strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def assess_quality(
|
||||||
|
fields: Sequence[MappedField],
|
||||||
|
required_roles: Optional[Sequence[str]] = None,
|
||||||
|
min_confidence: float = 0.6,
|
||||||
|
) -> str:
|
||||||
|
"""Évalue la qualité d'extraction d'un dossier à partir des champs reconstruits.
|
||||||
|
|
||||||
|
Renvoie l'un des 4 statuts (par priorité décroissante) :
|
||||||
|
- "failed" : aucun champ, OU aucun champ ancré.
|
||||||
|
- "needs_review" : au moins un rôle requis absent ou non ancré.
|
||||||
|
- "partial" : rôles requis ok mais confidence insuffisante OU champs non ancrés.
|
||||||
|
- "complete" : tout ancré, toutes confidences >= min_confidence, aucun non ancré.
|
||||||
|
|
||||||
|
Le matching required_role ↔ field.label est insensible à la casse et aux espaces.
|
||||||
|
"""
|
||||||
|
# --- failed : aucun champ du tout, ou aucun ancré ---
|
||||||
|
anchored = [f for f in fields if f.anchored]
|
||||||
|
if not fields or not anchored:
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
# --- needs_review : rôle requis absent ou non ancré ---
|
||||||
|
if required_roles:
|
||||||
|
anchored_labels = {_norm_label(f.label) for f in anchored}
|
||||||
|
for role in required_roles:
|
||||||
|
if _norm_label(role) not in anchored_labels:
|
||||||
|
return "needs_review"
|
||||||
|
|
||||||
|
# --- partial : confidence basse sur un champ ancré OU champs non ancrés ---
|
||||||
|
has_low_confidence = any(f.confidence < min_confidence for f in anchored)
|
||||||
|
has_unanchored = any(not f.anchored for f in fields)
|
||||||
|
if has_low_confidence or has_unanchored:
|
||||||
|
return "partial"
|
||||||
|
|
||||||
|
# --- complete ---
|
||||||
|
return "complete"
|
||||||
|
|
||||||
|
|
||||||
def map_roles(
|
def map_roles(
|
||||||
image_path: str,
|
image_path: str,
|
||||||
tokens: Sequence[OcrToken],
|
tokens: Sequence[OcrToken],
|
||||||
|
|||||||
@@ -7,7 +7,9 @@ par le VLM ne doit pouvoir entrer dans une valeur -> 0 hallucination par constru
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from core.extraction.role_mapper import (
|
from core.extraction.role_mapper import (
|
||||||
|
MappedField,
|
||||||
OcrToken,
|
OcrToken,
|
||||||
|
assess_quality,
|
||||||
build_role_prompt,
|
build_role_prompt,
|
||||||
map_roles,
|
map_roles,
|
||||||
reconstruct_fields,
|
reconstruct_fields,
|
||||||
@@ -150,3 +152,145 @@ def test_map_roles_passe_les_roles_au_prompt():
|
|||||||
client = _fake_client('{"champs":[]}', capture=cap)
|
client = _fake_client('{"champs":[]}', capture=cap)
|
||||||
map_roles("img.png", tokens, client, roles=["Diagnostic", "GEMSA"])
|
map_roles("img.png", tokens, client, roles=["Diagnostic", "GEMSA"])
|
||||||
assert "Diagnostic" in cap["prompt"] and "GEMSA" in cap["prompt"]
|
assert "Diagnostic" in cap["prompt"] and "GEMSA" in cap["prompt"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# assess_quality — évaluation de la qualité d'extraction d'un dossier
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _field(label, value="val", anchored=True, confidence=0.9, value_ids=None, invalid_ids=None):
|
||||||
|
"""Helper : construit un MappedField directement (sans passer par OCR/VLM)."""
|
||||||
|
return MappedField(
|
||||||
|
label=label,
|
||||||
|
value=value if anchored else "",
|
||||||
|
value_ids=value_ids or ([0] if anchored else []),
|
||||||
|
confidence=confidence,
|
||||||
|
bbox=(0, 0, 10, 10) if anchored else None,
|
||||||
|
anchored=anchored,
|
||||||
|
invalid_ids=invalid_ids or [],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# --- failed ---
|
||||||
|
|
||||||
|
def test_assess_quality_failed_aucun_champ():
|
||||||
|
"""Liste vide → failed."""
|
||||||
|
assert assess_quality([]) == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_failed_aucun_champ_ancre():
|
||||||
|
"""Tous non ancrés → failed."""
|
||||||
|
fields = [_field("Nom", anchored=False), _field("IPP", anchored=False)]
|
||||||
|
assert assess_quality(fields) == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_failed_un_champ_value_vide():
|
||||||
|
"""Un seul champ, anchored=False, value vide → failed."""
|
||||||
|
fields = [_field("Nom", anchored=False, value_ids=[])]
|
||||||
|
assert assess_quality(fields) == "failed"
|
||||||
|
|
||||||
|
|
||||||
|
# --- needs_review ---
|
||||||
|
|
||||||
|
def test_assess_quality_needs_review_role_requis_absent():
|
||||||
|
"""Un rôle requis n'est pas dans fields → needs_review."""
|
||||||
|
fields = [_field("Nom", anchored=True)]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_needs_review_role_requis_non_ancre():
|
||||||
|
"""Rôle requis présent mais anchored=False → needs_review."""
|
||||||
|
fields = [_field("Nom", anchored=True), _field("IPP", anchored=False)]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_needs_review_matching_insensible_casse():
|
||||||
|
"""Matching label ↔ required_role insensible à la casse."""
|
||||||
|
fields = [_field("nom complet", anchored=True), _field("ipp", anchored=True)]
|
||||||
|
# required_roles en maj : doit quand même matcher
|
||||||
|
assert assess_quality(fields, required_roles=["Nom Complet", "IPP"]) != "needs_review"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_needs_review_matching_insensible_espaces():
|
||||||
|
"""Matching insensible aux espaces en trop (strip)."""
|
||||||
|
fields = [_field(" Nom ", anchored=True)]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom"]) != "needs_review"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_needs_review_priorite_sur_partial():
|
||||||
|
"""needs_review > partial : role manquant + confidence basse → needs_review."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.4), # basse
|
||||||
|
# "IPP" absent → needs_review
|
||||||
|
]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
|
||||||
|
|
||||||
|
|
||||||
|
# --- partial ---
|
||||||
|
|
||||||
|
def test_assess_quality_partial_confidence_basse():
|
||||||
|
"""Tous requis ancrés mais un champ ancré a confidence < min_confidence → partial."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.9),
|
||||||
|
_field("IPP", anchored=True, confidence=0.4), # < 0.6
|
||||||
|
]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_partial_champs_non_ancres_en_surplus():
|
||||||
|
"""Tous requis ancrés, confidence ok, mais il y a des champs non ancrés en plus → partial."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.9),
|
||||||
|
_field("Inconnu", anchored=False), # non ancré hors requis
|
||||||
|
]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom"]) == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_partial_sans_required_roles_confidence_basse():
|
||||||
|
"""Sans required_roles, un champ ancré à confidence basse → partial."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.9),
|
||||||
|
_field("IPP", anchored=True, confidence=0.3),
|
||||||
|
]
|
||||||
|
assert assess_quality(fields) == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_partial_sans_required_roles_champ_non_ancre():
|
||||||
|
"""Sans required_roles, au moins un champ non ancré → partial."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.9),
|
||||||
|
_field("IPP", anchored=False),
|
||||||
|
]
|
||||||
|
assert assess_quality(fields) == "partial"
|
||||||
|
|
||||||
|
|
||||||
|
# --- complete ---
|
||||||
|
|
||||||
|
def test_assess_quality_complete_tous_requis_ancres_confidence_ok():
|
||||||
|
"""Tous requis ancrés, toutes confidences >= 0.6, aucun non ancré → complete."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.9),
|
||||||
|
_field("IPP", anchored=True, confidence=0.7),
|
||||||
|
]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "complete"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_complete_sans_required_roles():
|
||||||
|
"""Sans required_roles, au moins un champ ancré, tous >= min_confidence, aucun non ancré → complete."""
|
||||||
|
fields = [
|
||||||
|
_field("Nom", anchored=True, confidence=0.8),
|
||||||
|
_field("IPP", anchored=True, confidence=0.95),
|
||||||
|
]
|
||||||
|
assert assess_quality(fields) == "complete"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_complete_seuil_exactement_min_confidence():
|
||||||
|
"""Confidence exactement égale à min_confidence (0.6) → complete (borne incluse)."""
|
||||||
|
fields = [_field("Nom", anchored=True, confidence=0.6)]
|
||||||
|
assert assess_quality(fields, required_roles=["Nom"]) == "complete"
|
||||||
|
|
||||||
|
|
||||||
|
def test_assess_quality_complete_min_confidence_personnalise():
|
||||||
|
"""Seuil personnalisé : confidence=0.7 >= min_confidence=0.7 → complete."""
|
||||||
|
fields = [_field("Nom", anchored=True, confidence=0.7)]
|
||||||
|
assert assess_quality(fields, min_confidence=0.7) == "complete"
|
||||||
|
|||||||
Reference in New Issue
Block a user