"""Tests de l'orchestrateur extract_dossier_from_image. Enchaîne OCR → tokens_from_grid → map_roles → assess_quality. L'OCR (`ocr_fn`) et le client VLM (`vlm_client`) sont INJECTABLES → testable sans réseau ni OCR réel. C'est cette fonction que le handler runtime `_handle_extract_dossier_action` appellera (avec le vrai OCR et le vrai client vLLM). """ from core.extraction.role_mapper import extract_dossier_from_image def _cell(text, x0, conf=0.9, row=0, col=0): return {"text": text, "bbox": [[x0, 0], [x0 + 10, 0], [x0 + 10, 8], [x0, 8]], "confidence": conf, "row": row, "col": col} def _fake_vlm(response): def client(image_path, prompt): return response return client def test_orchestre_ocr_vlm_qualite(): grid = [[_cell("DUPONT", 0, conf=0.95, col=0), _cell("Jean", 20, conf=0.9, col=1)]] res = extract_dossier_from_image( "img.png", _fake_vlm('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}'), ocr_fn=lambda path: grid, ) assert len(res["fields"]) == 1 assert res["fields"][0].value == "DUPONT Jean" assert res["fields"][0].anchored is True assert res["status"] in ("complete", "partial", "needs_review", "failed") assert res["n_tokens"] == 2 def test_ocr_vide_donne_failed(): res = extract_dossier_from_image( "img.png", _fake_vlm('{"champs":[]}'), ocr_fn=lambda path: [], ) assert res["status"] == "failed" assert res["fields"] == [] def test_status_needs_review_si_role_requis_absent(): grid = [[_cell("X", 0)]] res = extract_dossier_from_image( "img.png", _fake_vlm('{"champs":[{"label":"Autre","value_ids":[0]}]}'), ocr_fn=lambda path: grid, required_roles=["Nom"], ) assert res["status"] == "needs_review" def test_roles_transmis_au_vlm(): grid = [[_cell("X", 0)]] captured = {} def client(image_path, prompt): captured["prompt"] = prompt return '{"champs":[]}' extract_dossier_from_image( "img.png", client, ocr_fn=lambda path: grid, roles=["Diagnostic", "GEMSA"], ) assert "Diagnostic" in captured["prompt"] and "GEMSA" in captured["prompt"]