Files
rpa_vision_v3/tests/unit/test_role_mapper.py
Dom 509a026cfc feat(extraction): assess_quality — statut qualité dossier (4 niveaux)
complete / partial / needs_review / failed (priorité décroissante), matching
rôle requis insensible casse+espaces, seuil min_confidence paramétrable (0.6).
16 tests ajoutés (31 au total, verts). Brique TDD via sous-agent, code révisé.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-30 11:42:14 +02:00

297 lines
11 KiB
Python

"""Tests du role_mapper : reconstruction de champs ANCRÉS sur l'OCR.
Principe cardinal (cf. gate vert 30/06) : le VLM ne fournit QUE des ids de tokens OCR
(value_ids) ; la valeur est reconstruite côté Python depuis l'OCR. Aucun texte produit
par le VLM ne doit pouvoir entrer dans une valeur -> 0 hallucination par construction.
"""
import pytest
from core.extraction.role_mapper import (
MappedField,
OcrToken,
assess_quality,
build_role_prompt,
map_roles,
reconstruct_fields,
tokens_from_grid,
)
def _tok(tid, text, conf=0.9, bbox=(0, 0, 10, 10)):
return OcrToken(id=tid, text=text, confidence=conf, bbox=bbox)
def test_reconstruit_value_concatene_tokens_dans_lordre():
tokens = [_tok(0, "DUPONT"), _tok(1, "Jean")]
fields = reconstruct_fields(tokens, [{"label": "Nom complet", "value_ids": [0, 1]}])
assert len(fields) == 1
assert fields[0].label == "Nom complet"
assert fields[0].value == "DUPONT Jean"
assert fields[0].anchored is True
def test_ignore_les_ids_hors_plage_et_les_liste():
tokens = [_tok(0, "DUPONT")]
fields = reconstruct_fields(tokens, [{"label": "Nom", "value_ids": [0, 99]}])
assert fields[0].value == "DUPONT"
assert fields[0].invalid_ids == [99]
assert fields[0].anchored is True
def test_value_ids_vide_donne_champ_non_ancre():
tokens = [_tok(0, "DUPONT")]
fields = reconstruct_fields(tokens, [{"label": "Poids", "value_ids": []}])
assert fields[0].value == ""
assert fields[0].anchored is False
def test_aucun_id_valide_donne_champ_non_ancre():
tokens = [_tok(0, "DUPONT")]
fields = reconstruct_fields(tokens, [{"label": "Poids", "value_ids": [7, 8]}])
assert fields[0].anchored is False
assert fields[0].value == ""
assert fields[0].invalid_ids == [7, 8]
def test_dedup_ids_en_preservant_lordre():
tokens = [_tok(0, "DUPONT"), _tok(1, "Jean")]
fields = reconstruct_fields(tokens, [{"label": "X", "value_ids": [1, 1, 0]}])
assert fields[0].value == "Jean DUPONT"
assert fields[0].value_ids == [1, 0]
def test_confidence_est_le_min_des_tokens_ancres():
tokens = [_tok(0, "A", conf=0.95), _tok(1, "B", conf=0.70)]
fields = reconstruct_fields(tokens, [{"label": "X", "value_ids": [0, 1]}])
assert fields[0].confidence == pytest.approx(0.70)
def test_bbox_englobante_des_tokens_ancres():
tokens = [_tok(0, "A", bbox=(0, 0, 10, 10)), _tok(1, "B", bbox=(20, 5, 40, 15))]
fields = reconstruct_fields(tokens, [{"label": "X", "value_ids": [0, 1]}])
assert fields[0].bbox == (0, 0, 40, 15)
def test_invariant_aucun_texte_hors_ocr():
# 'value' fournie par le VLM est ignorée : seul value_ids compte.
tokens = [_tok(0, "DUPONT")]
fields = reconstruct_fields(
tokens, [{"label": "Nom", "value_ids": [0], "value": "HALLUCINATION"}]
)
assert fields[0].value == "DUPONT"
def test_tokens_from_grid_indexe_et_normalise_bbox():
# grille extract_grid_from_image : bbox = 4 points EasyOCR
grid = [
[
{"text": "Nom", "bbox": [[0, 0], [10, 0], [10, 8], [0, 8]],
"confidence": 0.9, "row": 0, "col": 0},
{"text": "DUPONT", "bbox": [[20, 0], [60, 0], [60, 8], [20, 8]],
"confidence": 0.95, "row": 0, "col": 1},
],
]
tokens = tokens_from_grid(grid)
assert [t.id for t in tokens] == [0, 1]
assert tokens[0].text == "Nom"
assert tokens[1].bbox == (20, 0, 60, 8)
# --- map_roles : orchestrateur (client VLM injectable, donc testable hors-ligne) ---
def _fake_client(response, capture=None):
"""Faux client VLM : enregistre éventuellement le prompt reçu, renvoie une réponse fixe."""
def client(image_path, prompt):
if capture is not None:
capture["prompt"] = prompt
capture["image_path"] = image_path
return response
return client
def test_map_roles_reconstruit_via_client_injecte():
tokens = [_tok(0, "DUPONT"), _tok(1, "Jean")]
client = _fake_client('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}')
fields = map_roles("img.png", tokens, client)
assert len(fields) == 1
assert fields[0].label == "Nom complet"
assert fields[0].value == "DUPONT Jean"
def test_map_roles_tolere_les_fences_json():
tokens = [_tok(0, "DUPONT")]
client = _fake_client('```json\n{"champs":[{"label":"Nom","value_ids":[0]}]}\n```')
fields = map_roles("img.png", tokens, client)
assert fields[0].value == "DUPONT"
def test_map_roles_json_invalide_retourne_liste_vide():
# robustesse batch : une réponse VLM non-JSON ne doit pas crasher.
tokens = [_tok(0, "DUPONT")]
client = _fake_client("désolé, je n'ai pas compris")
fields = map_roles("img.png", tokens, client)
assert fields == []
def test_build_role_prompt_inclut_les_tokens_avec_ids():
tokens = [_tok(0, "Poids"), _tok(1, "72")]
prompt = build_role_prompt(tokens)
assert "Poids" in prompt and "72" in prompt
assert "value_ids" in prompt # on demande bien des ids, pas du texte recopié
def test_build_role_prompt_guide_liste_les_roles_attendus():
tokens = [_tok(0, "X")]
prompt = build_role_prompt(tokens, roles=["Nom", "IPP", "Poids"])
assert "Nom" in prompt and "IPP" in prompt and "Poids" in prompt
def test_map_roles_passe_les_roles_au_prompt():
tokens = [_tok(0, "X")]
cap = {}
client = _fake_client('{"champs":[]}', capture=cap)
map_roles("img.png", tokens, client, roles=["Diagnostic", "GEMSA"])
assert "Diagnostic" in cap["prompt"] and "GEMSA" in cap["prompt"]
# ---------------------------------------------------------------------------
# assess_quality — évaluation de la qualité d'extraction d'un dossier
# ---------------------------------------------------------------------------
def _field(label, value="val", anchored=True, confidence=0.9, value_ids=None, invalid_ids=None):
"""Helper : construit un MappedField directement (sans passer par OCR/VLM)."""
return MappedField(
label=label,
value=value if anchored else "",
value_ids=value_ids or ([0] if anchored else []),
confidence=confidence,
bbox=(0, 0, 10, 10) if anchored else None,
anchored=anchored,
invalid_ids=invalid_ids or [],
)
# --- failed ---
def test_assess_quality_failed_aucun_champ():
"""Liste vide → failed."""
assert assess_quality([]) == "failed"
def test_assess_quality_failed_aucun_champ_ancre():
"""Tous non ancrés → failed."""
fields = [_field("Nom", anchored=False), _field("IPP", anchored=False)]
assert assess_quality(fields) == "failed"
def test_assess_quality_failed_un_champ_value_vide():
"""Un seul champ, anchored=False, value vide → failed."""
fields = [_field("Nom", anchored=False, value_ids=[])]
assert assess_quality(fields) == "failed"
# --- needs_review ---
def test_assess_quality_needs_review_role_requis_absent():
"""Un rôle requis n'est pas dans fields → needs_review."""
fields = [_field("Nom", anchored=True)]
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
def test_assess_quality_needs_review_role_requis_non_ancre():
"""Rôle requis présent mais anchored=False → needs_review."""
fields = [_field("Nom", anchored=True), _field("IPP", anchored=False)]
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
def test_assess_quality_needs_review_matching_insensible_casse():
"""Matching label ↔ required_role insensible à la casse."""
fields = [_field("nom complet", anchored=True), _field("ipp", anchored=True)]
# required_roles en maj : doit quand même matcher
assert assess_quality(fields, required_roles=["Nom Complet", "IPP"]) != "needs_review"
def test_assess_quality_needs_review_matching_insensible_espaces():
"""Matching insensible aux espaces en trop (strip)."""
fields = [_field(" Nom ", anchored=True)]
assert assess_quality(fields, required_roles=["Nom"]) != "needs_review"
def test_assess_quality_needs_review_priorite_sur_partial():
"""needs_review > partial : role manquant + confidence basse → needs_review."""
fields = [
_field("Nom", anchored=True, confidence=0.4), # basse
# "IPP" absent → needs_review
]
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "needs_review"
# --- partial ---
def test_assess_quality_partial_confidence_basse():
"""Tous requis ancrés mais un champ ancré a confidence < min_confidence → partial."""
fields = [
_field("Nom", anchored=True, confidence=0.9),
_field("IPP", anchored=True, confidence=0.4), # < 0.6
]
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "partial"
def test_assess_quality_partial_champs_non_ancres_en_surplus():
"""Tous requis ancrés, confidence ok, mais il y a des champs non ancrés en plus → partial."""
fields = [
_field("Nom", anchored=True, confidence=0.9),
_field("Inconnu", anchored=False), # non ancré hors requis
]
assert assess_quality(fields, required_roles=["Nom"]) == "partial"
def test_assess_quality_partial_sans_required_roles_confidence_basse():
"""Sans required_roles, un champ ancré à confidence basse → partial."""
fields = [
_field("Nom", anchored=True, confidence=0.9),
_field("IPP", anchored=True, confidence=0.3),
]
assert assess_quality(fields) == "partial"
def test_assess_quality_partial_sans_required_roles_champ_non_ancre():
"""Sans required_roles, au moins un champ non ancré → partial."""
fields = [
_field("Nom", anchored=True, confidence=0.9),
_field("IPP", anchored=False),
]
assert assess_quality(fields) == "partial"
# --- complete ---
def test_assess_quality_complete_tous_requis_ancres_confidence_ok():
"""Tous requis ancrés, toutes confidences >= 0.6, aucun non ancré → complete."""
fields = [
_field("Nom", anchored=True, confidence=0.9),
_field("IPP", anchored=True, confidence=0.7),
]
assert assess_quality(fields, required_roles=["Nom", "IPP"]) == "complete"
def test_assess_quality_complete_sans_required_roles():
"""Sans required_roles, au moins un champ ancré, tous >= min_confidence, aucun non ancré → complete."""
fields = [
_field("Nom", anchored=True, confidence=0.8),
_field("IPP", anchored=True, confidence=0.95),
]
assert assess_quality(fields) == "complete"
def test_assess_quality_complete_seuil_exactement_min_confidence():
"""Confidence exactement égale à min_confidence (0.6) → complete (borne incluse)."""
fields = [_field("Nom", anchored=True, confidence=0.6)]
assert assess_quality(fields, required_roles=["Nom"]) == "complete"
def test_assess_quality_complete_min_confidence_personnalise():
"""Seuil personnalisé : confidence=0.7 >= min_confidence=0.7 → complete."""
fields = [_field("Nom", anchored=True, confidence=0.7)]
assert assess_quality(fields, min_confidence=0.7) == "complete"