Enchaine ocr_fn -> tokens_from_grid -> map_roles -> assess_quality. OCR et client VLM injectables (testable hors-ligne, import OCR lazy = module reste pur). C'est la brique que le handler runtime extract_dossier appellera. 4 tests (35 au total role_mapper). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
69 lines
2.2 KiB
Python
69 lines
2.2 KiB
Python
"""Tests de l'orchestrateur extract_dossier_from_image.
|
|
|
|
Enchaîne OCR → tokens_from_grid → map_roles → assess_quality. L'OCR (`ocr_fn`)
|
|
et le client VLM (`vlm_client`) sont INJECTABLES → testable sans réseau ni OCR
|
|
réel. C'est cette fonction que le handler runtime `_handle_extract_dossier_action`
|
|
appellera (avec le vrai OCR et le vrai client vLLM).
|
|
"""
|
|
from core.extraction.role_mapper import extract_dossier_from_image
|
|
|
|
|
|
def _cell(text, x0, conf=0.9, row=0, col=0):
|
|
return {"text": text, "bbox": [[x0, 0], [x0 + 10, 0], [x0 + 10, 8], [x0, 8]],
|
|
"confidence": conf, "row": row, "col": col}
|
|
|
|
|
|
def _fake_vlm(response):
|
|
def client(image_path, prompt):
|
|
return response
|
|
return client
|
|
|
|
|
|
def test_orchestre_ocr_vlm_qualite():
|
|
grid = [[_cell("DUPONT", 0, conf=0.95, col=0), _cell("Jean", 20, conf=0.9, col=1)]]
|
|
res = extract_dossier_from_image(
|
|
"img.png",
|
|
_fake_vlm('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}'),
|
|
ocr_fn=lambda path: grid,
|
|
)
|
|
assert len(res["fields"]) == 1
|
|
assert res["fields"][0].value == "DUPONT Jean"
|
|
assert res["fields"][0].anchored is True
|
|
assert res["status"] in ("complete", "partial", "needs_review", "failed")
|
|
assert res["n_tokens"] == 2
|
|
|
|
|
|
def test_ocr_vide_donne_failed():
|
|
res = extract_dossier_from_image(
|
|
"img.png",
|
|
_fake_vlm('{"champs":[]}'),
|
|
ocr_fn=lambda path: [],
|
|
)
|
|
assert res["status"] == "failed"
|
|
assert res["fields"] == []
|
|
|
|
|
|
def test_status_needs_review_si_role_requis_absent():
|
|
grid = [[_cell("X", 0)]]
|
|
res = extract_dossier_from_image(
|
|
"img.png",
|
|
_fake_vlm('{"champs":[{"label":"Autre","value_ids":[0]}]}'),
|
|
ocr_fn=lambda path: grid,
|
|
required_roles=["Nom"],
|
|
)
|
|
assert res["status"] == "needs_review"
|
|
|
|
|
|
def test_roles_transmis_au_vlm():
|
|
grid = [[_cell("X", 0)]]
|
|
captured = {}
|
|
|
|
def client(image_path, prompt):
|
|
captured["prompt"] = prompt
|
|
return '{"champs":[]}'
|
|
|
|
extract_dossier_from_image(
|
|
"img.png", client, ocr_fn=lambda path: grid, roles=["Diagnostic", "GEMSA"],
|
|
)
|
|
assert "Diagnostic" in captured["prompt"] and "GEMSA" in captured["prompt"]
|