diff --git a/core/extraction/role_mapper.py b/core/extraction/role_mapper.py index 80bc079e5..bc7f607ad 100644 --- a/core/extraction/role_mapper.py +++ b/core/extraction/role_mapper.py @@ -247,3 +247,33 @@ def map_roles( data = parse_vlm_json(raw) vlm_fields = data.get("champs", []) if isinstance(data, dict) else [] return reconstruct_fields(tokens, vlm_fields) + + +def extract_dossier_from_image( + image_path: str, + vlm_client: VlmClient, + roles: Optional[Sequence[str]] = None, + ocr_fn: Optional[Callable[[str], Sequence[Sequence[dict]]]] = None, + min_confidence: float = 0.6, + required_roles: Optional[Sequence[str]] = None, +) -> dict: + """Orchestre l'extraction d'un dossier depuis une capture : OCR → rôles → qualité. + + Enchaîne `ocr_fn` (grille OCR) → `tokens_from_grid` → `map_roles` (VLM, ancrage + strict) → `assess_quality`. C'est la brique que le handler runtime + `_handle_extract_dossier_action` appellera, avec le vrai OCR et le vrai client + vLLM. `ocr_fn` et `vlm_client` sont INJECTABLES (testable hors-ligne). + + `ocr_fn` par défaut = `core.llm.ocr_extractor.extract_grid_from_image` (import + LAZY : le module reste pur quand l'OCR est injecté en test). + + Returns: + {fields: List[MappedField], status: str, n_tokens: int} + """ + if ocr_fn is None: + from core.llm.ocr_extractor import extract_grid_from_image as ocr_fn + grid = ocr_fn(image_path) + tokens = tokens_from_grid(grid) + fields = map_roles(image_path, tokens, vlm_client, roles) + status = assess_quality(fields, required_roles=required_roles, min_confidence=min_confidence) + return {"fields": fields, "status": status, "n_tokens": len(tokens)} diff --git a/tests/unit/test_extract_dossier_from_image.py b/tests/unit/test_extract_dossier_from_image.py new file mode 100644 index 000000000..ec77e1b3c --- /dev/null +++ b/tests/unit/test_extract_dossier_from_image.py @@ -0,0 +1,68 @@ +"""Tests de l'orchestrateur extract_dossier_from_image. + +Enchaîne OCR → tokens_from_grid → map_roles → assess_quality. L'OCR (`ocr_fn`) +et le client VLM (`vlm_client`) sont INJECTABLES → testable sans réseau ni OCR +réel. C'est cette fonction que le handler runtime `_handle_extract_dossier_action` +appellera (avec le vrai OCR et le vrai client vLLM). +""" +from core.extraction.role_mapper import extract_dossier_from_image + + +def _cell(text, x0, conf=0.9, row=0, col=0): + return {"text": text, "bbox": [[x0, 0], [x0 + 10, 0], [x0 + 10, 8], [x0, 8]], + "confidence": conf, "row": row, "col": col} + + +def _fake_vlm(response): + def client(image_path, prompt): + return response + return client + + +def test_orchestre_ocr_vlm_qualite(): + grid = [[_cell("DUPONT", 0, conf=0.95, col=0), _cell("Jean", 20, conf=0.9, col=1)]] + res = extract_dossier_from_image( + "img.png", + _fake_vlm('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}'), + ocr_fn=lambda path: grid, + ) + assert len(res["fields"]) == 1 + assert res["fields"][0].value == "DUPONT Jean" + assert res["fields"][0].anchored is True + assert res["status"] in ("complete", "partial", "needs_review", "failed") + assert res["n_tokens"] == 2 + + +def test_ocr_vide_donne_failed(): + res = extract_dossier_from_image( + "img.png", + _fake_vlm('{"champs":[]}'), + ocr_fn=lambda path: [], + ) + assert res["status"] == "failed" + assert res["fields"] == [] + + +def test_status_needs_review_si_role_requis_absent(): + grid = [[_cell("X", 0)]] + res = extract_dossier_from_image( + "img.png", + _fake_vlm('{"champs":[{"label":"Autre","value_ids":[0]}]}'), + ocr_fn=lambda path: grid, + required_roles=["Nom"], + ) + assert res["status"] == "needs_review" + + +def test_roles_transmis_au_vlm(): + grid = [[_cell("X", 0)]] + captured = {} + + def client(image_path, prompt): + captured["prompt"] = prompt + return '{"champs":[]}' + + extract_dossier_from_image( + "img.png", client, ocr_fn=lambda path: grid, roles=["Diagnostic", "GEMSA"], + ) + assert "Diagnostic" in captured["prompt"] and "GEMSA" in captured["prompt"]