feat(extraction): map_roles — orchestrateur VLM ancrage strict (client injectable)
build_role_prompt (modes libre / guidé par rôles), parse_vlm_json (robuste :
tolère les fences, {} si invalide), map_roles (prompt -> VLM -> parse -> reconstruct).
Client VLM injecté => testable hors-ligne. 6 tests unit ajoutés (15 au total).
Non branché au runtime (brique validée isolément).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,8 @@ import pytest
|
||||
|
||||
from core.extraction.role_mapper import (
|
||||
OcrToken,
|
||||
build_role_prompt,
|
||||
map_roles,
|
||||
reconstruct_fields,
|
||||
tokens_from_grid,
|
||||
)
|
||||
@@ -91,3 +93,60 @@ def test_tokens_from_grid_indexe_et_normalise_bbox():
|
||||
assert [t.id for t in tokens] == [0, 1]
|
||||
assert tokens[0].text == "Nom"
|
||||
assert tokens[1].bbox == (20, 0, 60, 8)
|
||||
|
||||
|
||||
# --- map_roles : orchestrateur (client VLM injectable, donc testable hors-ligne) ---
|
||||
|
||||
def _fake_client(response, capture=None):
|
||||
"""Faux client VLM : enregistre éventuellement le prompt reçu, renvoie une réponse fixe."""
|
||||
def client(image_path, prompt):
|
||||
if capture is not None:
|
||||
capture["prompt"] = prompt
|
||||
capture["image_path"] = image_path
|
||||
return response
|
||||
return client
|
||||
|
||||
|
||||
def test_map_roles_reconstruit_via_client_injecte():
|
||||
tokens = [_tok(0, "DUPONT"), _tok(1, "Jean")]
|
||||
client = _fake_client('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}')
|
||||
fields = map_roles("img.png", tokens, client)
|
||||
assert len(fields) == 1
|
||||
assert fields[0].label == "Nom complet"
|
||||
assert fields[0].value == "DUPONT Jean"
|
||||
|
||||
|
||||
def test_map_roles_tolere_les_fences_json():
|
||||
tokens = [_tok(0, "DUPONT")]
|
||||
client = _fake_client('```json\n{"champs":[{"label":"Nom","value_ids":[0]}]}\n```')
|
||||
fields = map_roles("img.png", tokens, client)
|
||||
assert fields[0].value == "DUPONT"
|
||||
|
||||
|
||||
def test_map_roles_json_invalide_retourne_liste_vide():
|
||||
# robustesse batch : une réponse VLM non-JSON ne doit pas crasher.
|
||||
tokens = [_tok(0, "DUPONT")]
|
||||
client = _fake_client("désolé, je n'ai pas compris")
|
||||
fields = map_roles("img.png", tokens, client)
|
||||
assert fields == []
|
||||
|
||||
|
||||
def test_build_role_prompt_inclut_les_tokens_avec_ids():
|
||||
tokens = [_tok(0, "Poids"), _tok(1, "72")]
|
||||
prompt = build_role_prompt(tokens)
|
||||
assert "Poids" in prompt and "72" in prompt
|
||||
assert "value_ids" in prompt # on demande bien des ids, pas du texte recopié
|
||||
|
||||
|
||||
def test_build_role_prompt_guide_liste_les_roles_attendus():
|
||||
tokens = [_tok(0, "X")]
|
||||
prompt = build_role_prompt(tokens, roles=["Nom", "IPP", "Poids"])
|
||||
assert "Nom" in prompt and "IPP" in prompt and "Poids" in prompt
|
||||
|
||||
|
||||
def test_map_roles_passe_les_roles_au_prompt():
|
||||
tokens = [_tok(0, "X")]
|
||||
cap = {}
|
||||
client = _fake_client('{"champs":[]}', capture=cap)
|
||||
map_roles("img.png", tokens, client, roles=["Diagnostic", "GEMSA"])
|
||||
assert "Diagnostic" in cap["prompt"] and "GEMSA" in cap["prompt"]
|
||||
|
||||
Reference in New Issue
Block a user