rpa_vision_v3/tests/unit/test_extract_dossier_from_image.py

"""Tests de l'orchestrateur extract_dossier_from_image.

Enchaîne OCR → tokens_from_grid → map_roles → assess_quality. L'OCR (`ocr_fn`)
et le client VLM (`vlm_client`) sont INJECTABLES → testable sans réseau ni OCR
réel. C'est cette fonction que le handler runtime `_handle_extract_dossier_action`
appellera (avec le vrai OCR et le vrai client vLLM).
"""
from core.extraction.role_mapper import extract_dossier_from_image


def _cell(text, x0, conf=0.9, row=0, col=0):
    return {"text": text, "bbox": [[x0, 0], [x0 + 10, 0], [x0 + 10, 8], [x0, 8]],
            "confidence": conf, "row": row, "col": col}


def _fake_vlm(response):
    def client(image_path, prompt):
        return response
    return client


def test_orchestre_ocr_vlm_qualite():
    grid = [[_cell("DUPONT", 0, conf=0.95, col=0), _cell("Jean", 20, conf=0.9, col=1)]]
    res = extract_dossier_from_image(
        "img.png",
        _fake_vlm('{"champs":[{"label":"Nom complet","value_ids":[0,1]}]}'),
        ocr_fn=lambda path: grid,
    )
    assert len(res["fields"]) == 1
    assert res["fields"][0].value == "DUPONT Jean"
    assert res["fields"][0].anchored is True
    assert res["status"] in ("complete", "partial", "needs_review", "failed")
    assert res["n_tokens"] == 2


def test_ocr_vide_donne_failed():
    res = extract_dossier_from_image(
        "img.png",
        _fake_vlm('{"champs":[]}'),
        ocr_fn=lambda path: [],
    )
    assert res["status"] == "failed"
    assert res["fields"] == []


def test_status_needs_review_si_role_requis_absent():
    grid = [[_cell("X", 0)]]
    res = extract_dossier_from_image(
        "img.png",
        _fake_vlm('{"champs":[{"label":"Autre","value_ids":[0]}]}'),
        ocr_fn=lambda path: grid,
        required_roles=["Nom"],
    )
    assert res["status"] == "needs_review"


def test_roles_transmis_au_vlm():
    grid = [[_cell("X", 0)]]
    captured = {}

    def client(image_path, prompt):
        captured["prompt"] = prompt
        return '{"champs":[]}'

    extract_dossier_from_image(
        "img.png", client, ocr_fn=lambda path: grid, roles=["Diagnostic", "GEMSA"],
    )
    assert "Diagnostic" in captured["prompt"] and "GEMSA" in captured["prompt"]