- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens), grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue), visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime) - api_stream/replay_engine : dispatch action navigate server-side, never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti) - 131 tests verts (wiring boot, e2e handler, unit modules) Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
491 lines
18 KiB
Python
491 lines
18 KiB
Python
"""Tests for core/navigation/visual_verifier.py — OCR-anchored architecture.
|
|
|
|
Tests pure functions (normalize_text, fuzzy_match, ocr_presence_check,
|
|
build_role_confirm_prompt, parse_role_confirm_response) offline,
|
|
then verifies verify_screen_match with mock OcrClient + VlmClient.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from core.navigation.visual_verifier import (
|
|
normalize_text,
|
|
fuzzy_match,
|
|
ocr_presence_check,
|
|
build_role_confirm_prompt,
|
|
parse_role_confirm_response,
|
|
verify_screen_match,
|
|
verify_before,
|
|
verify_after,
|
|
ScreenMatchResult,
|
|
OcrPresenceResult,
|
|
)
|
|
|
|
|
|
# ── Mock factories ─────────────────────────────────────────────────────
|
|
|
|
|
|
def mock_ocr_client_factory(tokens: list):
|
|
"""Factory that creates a mock OcrClient returning the given tokens."""
|
|
def client(image_path: str) -> list:
|
|
return tokens
|
|
return client
|
|
|
|
|
|
def mock_vlm_client_factory(response_json: dict):
|
|
"""Factory that creates a mock VlmClient returning the given JSON."""
|
|
def client(image_path: str, prompt: str) -> str:
|
|
return json.dumps(response_json)
|
|
return client
|
|
|
|
|
|
# ── normalize_text tests ──────────────────────────────────────────────
|
|
|
|
|
|
class TestNormalizeText:
|
|
def test_lowercase(self):
|
|
assert normalize_text("RECHERCHER") == "rechercher"
|
|
|
|
def test_strip_accents(self):
|
|
assert normalize_text("Recherché") == "recherche"
|
|
|
|
def test_collapse_whitespace(self):
|
|
assert normalize_text(" hello world ") == "hello world"
|
|
|
|
def test_combined(self):
|
|
assert normalize_text(" Nom Prénom ") == "nom prenom"
|
|
|
|
def test_empty(self):
|
|
assert normalize_text("") == ""
|
|
|
|
def test_numbers_preserved(self):
|
|
assert normalize_text("IPP 12345") == "ipp 12345"
|
|
|
|
|
|
# ── fuzzy_match tests ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestFuzzyMatch:
|
|
def test_exact_match(self):
|
|
assert fuzzy_match("Rechercher", "Rechercher") == True
|
|
|
|
def test_case_insensitive(self):
|
|
assert fuzzy_match("rechercher", "RECHERCHER") == True
|
|
|
|
def test_accent_match(self):
|
|
assert fuzzy_match("Recherché", "Recherche") == True
|
|
|
|
def test_substring_containment(self):
|
|
# Short text contained in longer OCR token
|
|
assert fuzzy_match("Rechercher", "Bouton Rechercher") == True
|
|
|
|
def test_reverse_containment(self):
|
|
# OCR token contained in expected text
|
|
assert fuzzy_match("Nom Prénom Patient", "Nom") == True
|
|
|
|
def test_fuzzy_ratio(self):
|
|
# Similar but not exact/substring — ratio ~0.90
|
|
assert fuzzy_match("Connexion", "Connection", threshold=0.8) == True
|
|
|
|
def test_no_match(self):
|
|
assert fuzzy_match("Dashboard", "Login", threshold=0.8) == False
|
|
|
|
def test_custom_threshold(self):
|
|
# "Connection" vs "Connexion" ratio ~0.90, passes at 0.8 but fails at 0.95
|
|
assert fuzzy_match("Connexion", "Connection", threshold=0.95) == False
|
|
|
|
|
|
# ── ocr_presence_check tests ──────────────────────────────────────────
|
|
|
|
|
|
class TestOcrPresenceCheck:
|
|
def test_all_found(self):
|
|
tokens = ["Rechercher", "Connexion", "Nom Patient"]
|
|
elements = [
|
|
{"role": "bouton", "text": "Rechercher"},
|
|
{"role": "bouton", "text": "Connexion"},
|
|
]
|
|
result = ocr_presence_check(tokens, elements)
|
|
assert result.all_found == True
|
|
assert result.presence_ratio == 1.0
|
|
assert len(result.missing) == 0
|
|
assert result.found_texts["Rechercher"] == "Rechercher"
|
|
|
|
def test_partial_found(self):
|
|
tokens = ["Rechercher"]
|
|
elements = [
|
|
{"role": "bouton", "text": "Rechercher"},
|
|
{"role": "bouton", "text": "Connexion"},
|
|
]
|
|
result = ocr_presence_check(tokens, elements)
|
|
assert result.all_found == False
|
|
assert result.presence_ratio == 0.5
|
|
assert "bouton: Connexion" in result.missing
|
|
|
|
def test_none_found(self):
|
|
tokens = ["Accueil", "Paramètres"]
|
|
elements = [
|
|
{"role": "bouton", "text": "Rechercher"},
|
|
]
|
|
result = ocr_presence_check(tokens, elements)
|
|
assert result.all_found == False
|
|
assert result.presence_ratio == 0.0
|
|
assert "bouton: Rechercher" in result.missing
|
|
|
|
def test_fuzzy_match_in_presence(self):
|
|
tokens = ["Rechércher"] # OCR with accent variation
|
|
elements = [{"role": "bouton", "text": "Rechercher"}]
|
|
result = ocr_presence_check(tokens, elements)
|
|
assert result.all_found == True
|
|
|
|
def test_empty_tokens(self):
|
|
result = ocr_presence_check([], [{"role": "bouton", "text": "Login"}])
|
|
assert result.all_found == False
|
|
assert result.presence_ratio == 0.0
|
|
|
|
def test_empty_elements(self):
|
|
result = ocr_presence_check(["Login", "Password"], [])
|
|
assert result.all_found == True
|
|
assert result.presence_ratio == 1.0
|
|
|
|
def test_no_text_key(self):
|
|
elements = [{"role": "page"}] # no text key
|
|
result = ocr_presence_check(["Dashboard"], elements)
|
|
assert result.all_found == True # no text to check → trivially found
|
|
|
|
def test_multiple_elements_same_text(self):
|
|
tokens = ["Connexion"]
|
|
elements = [
|
|
{"role": "bouton", "text": "Connexion"},
|
|
{"role": "label", "text": "Connexion"},
|
|
]
|
|
result = ocr_presence_check(tokens, elements)
|
|
assert result.all_found == True
|
|
|
|
|
|
# ── build_role_confirm_prompt tests ───────────────────────────────────
|
|
|
|
|
|
class TestBuildRoleConfirmPrompt:
|
|
def test_basic_prompt(self):
|
|
found = [
|
|
{"text": "Rechercher", "expected_role": "bouton", "matched_ocr": "Rechercher"},
|
|
]
|
|
expected = [{"role": "bouton", "text": "Rechercher"}]
|
|
prompt = build_role_confirm_prompt(found, expected)
|
|
assert "Text \"Rechercher\"" in prompt
|
|
assert "expected role: bouton" in prompt
|
|
assert "role_confirmed" in prompt
|
|
|
|
def test_with_context(self):
|
|
found = [
|
|
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
|
|
]
|
|
expected = [{"role": "bouton", "text": "Connexion"}]
|
|
prompt = build_role_confirm_prompt(found, expected, context="page login DPI")
|
|
assert "Context: page login DPI" in prompt
|
|
|
|
def test_multiple_elements(self):
|
|
found = [
|
|
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
|
|
{"text": "Password", "expected_role": "champ", "matched_ocr": "Password"},
|
|
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
|
|
]
|
|
expected = [
|
|
{"role": "champ", "text": "Login"},
|
|
{"role": "champ", "text": "Password"},
|
|
{"role": "bouton", "text": "Connexion"},
|
|
]
|
|
prompt = build_role_confirm_prompt(found, expected)
|
|
assert "1." in prompt
|
|
assert "2." in prompt
|
|
assert "3." in prompt
|
|
|
|
def test_no_self_declaration(self):
|
|
"""Prompt must NOT ask VLM to declare presence — only role."""
|
|
found = [
|
|
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
|
|
]
|
|
expected = [{"role": "champ", "text": "Login"}]
|
|
prompt = build_role_confirm_prompt(found, expected)
|
|
assert "present" not in prompt.lower() or "confirmed" in prompt.lower()
|
|
|
|
|
|
# ── parse_role_confirm_response tests ─────────────────────────────────
|
|
|
|
|
|
class TestParseRoleConfirmResponse:
|
|
def test_valid_json(self):
|
|
data = json.dumps({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
|
|
],
|
|
"overall_confidence": 0.92,
|
|
})
|
|
result = parse_role_confirm_response(data)
|
|
assert len(result["confirmed"]) == 1
|
|
assert result["overall_confidence"] == 0.92
|
|
|
|
def test_json_in_markdown(self):
|
|
vlm_text = "```json\n{\"confirmed\": [], \"overall_confidence\": 0.0}\n```"
|
|
result = parse_role_confirm_response(vlm_text)
|
|
assert result["overall_confidence"] == 0.0
|
|
|
|
def test_garbled_response(self):
|
|
result = parse_role_confirm_response("I cannot determine the roles")
|
|
assert result["overall_confidence"] == 0.0
|
|
assert len(result["confirmed"]) == 0
|
|
|
|
def test_confidence_as_string(self):
|
|
data = json.dumps({"confirmed": [], "overall_confidence": "0.85"})
|
|
result = parse_role_confirm_response(data)
|
|
assert result["overall_confidence"] == 0.85
|
|
|
|
|
|
# ── verify_screen_match (OCR-anchored) tests ─────────────────────────
|
|
|
|
|
|
class TestVerifyScreenMatchOcrAnchored:
|
|
def test_full_match(self):
|
|
ocr = mock_ocr_client_factory(["Rechercher", "Connexion", "Dashboard"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
|
|
],
|
|
"overall_confidence": 0.92,
|
|
})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == True
|
|
assert result.confidence >= 0.7
|
|
|
|
def test_ocr_presence_fail(self):
|
|
"""OCR doesn't find expected text → mismatch (deterministic, no VLM needed)."""
|
|
ocr = mock_ocr_client_factory(["Accueil", "Paramètres"])
|
|
vlm = mock_vlm_client_factory({})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == False
|
|
assert "OCR presence" in result.reason
|
|
assert len(result.mismatches) > 0
|
|
|
|
def test_role_not_confirmed(self):
|
|
"""OCR finds text, VLM says it's a label not a button → mismatch."""
|
|
ocr = mock_ocr_client_factory(["Rechercher"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": False, "actual_role": "label", "confidence": 0.6},
|
|
],
|
|
"overall_confidence": 0.6,
|
|
})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == False
|
|
|
|
def test_ocr_error(self):
|
|
"""OCR engine fails → fail-safe mismatch."""
|
|
def failing_ocr(image_path):
|
|
raise RuntimeError("OCR engine down")
|
|
vlm = mock_vlm_client_factory({})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=failing_ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == False
|
|
assert "OCR error" in result.reason
|
|
|
|
def test_vlm_error_partial_match(self):
|
|
"""OCR finds texts, VLM fails → partial match (presence OK, role unknown)."""
|
|
ocr = mock_ocr_client_factory(["Rechercher"])
|
|
def failing_vlm(image_path, prompt):
|
|
raise RuntimeError("VLM service down")
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=ocr,
|
|
vlm_client=failing_vlm,
|
|
)
|
|
# Presence confirmed by OCR → partial match, confidence=0.5
|
|
assert result.match == True
|
|
assert result.confidence == 0.5
|
|
assert "VLM role confirm failed" in result.reason
|
|
|
|
def test_no_expected_elements(self):
|
|
ocr = mock_ocr_client_factory(["Login"])
|
|
vlm = mock_vlm_client_factory({})
|
|
result = verify_screen_match("/tmp/test.png", [], ocr_client=ocr, vlm_client=vlm)
|
|
assert result.match == True
|
|
assert result.confidence == 1.0
|
|
|
|
def test_describe_match(self):
|
|
result = ScreenMatchResult(match=True, confidence=0.92)
|
|
assert "OK" in result.describe()
|
|
|
|
def test_describe_mismatch(self):
|
|
result = ScreenMatchResult(
|
|
match=False, confidence=0.3,
|
|
mismatches=["bouton: Rechercher"],
|
|
)
|
|
assert "mismatch" in result.describe()
|
|
|
|
def test_multiple_elements_mixed(self):
|
|
"""2 elements: 1 found+role OK, 1 not found in OCR → mismatch."""
|
|
ocr = mock_ocr_client_factory(["Connexion"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
|
|
],
|
|
"overall_confidence": 0.9,
|
|
})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[
|
|
{"role": "bouton", "text": "Connexion"},
|
|
{"role": "champ", "text": "Nom Patient"},
|
|
],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == False # "Nom Patient" not found by OCR
|
|
|
|
def test_fuzzy_ocr_match(self):
|
|
"""OCR reads 'Rechércher' (accent), expected 'Rechercher' → still found."""
|
|
ocr = mock_ocr_client_factory(["Rechércher"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
|
|
],
|
|
"overall_confidence": 0.9,
|
|
})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "bouton", "text": "Rechercher"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == True
|
|
|
|
def test_no_text_elements_trivially_match(self):
|
|
"""Elements without text key → no presence check needed → trivially OK."""
|
|
ocr = mock_ocr_client_factory(["Dashboard"])
|
|
vlm = mock_vlm_client_factory({})
|
|
result = verify_screen_match(
|
|
"/tmp/test.png",
|
|
[{"role": "page"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == True
|
|
|
|
|
|
# ── verify_before / verify_after tests ────────────────────────────────
|
|
|
|
|
|
class TestVerifyBeforeAfter:
|
|
def test_verify_before_match(self):
|
|
ocr = mock_ocr_client_factory(["Login", "Password", "Connexion"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.85},
|
|
],
|
|
"overall_confidence": 0.85,
|
|
})
|
|
result = verify_before(
|
|
"/tmp/login.png",
|
|
[{"role": "champ", "text": "Login"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
context="page login",
|
|
)
|
|
assert result.match == True
|
|
|
|
def test_verify_after_higher_threshold(self):
|
|
"""verify_after uses min_confidence=0.8. VLM returns 0.75 → mismatch."""
|
|
ocr = mock_ocr_client_factory(["Dashboard"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.75},
|
|
],
|
|
"overall_confidence": 0.75,
|
|
})
|
|
result = verify_after(
|
|
"/tmp/dashboard.png",
|
|
[{"role": "page", "text": "Dashboard"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
# 0.75 < 0.8 threshold → role mismatch
|
|
assert result.match == False
|
|
|
|
def test_verify_after_passes_at_0_8(self):
|
|
ocr = mock_ocr_client_factory(["Dashboard"])
|
|
vlm = mock_vlm_client_factory({
|
|
"confirmed": [
|
|
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.85},
|
|
],
|
|
"overall_confidence": 0.85,
|
|
})
|
|
result = verify_after(
|
|
"/tmp/dashboard.png",
|
|
[{"role": "page", "text": "Dashboard"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
)
|
|
assert result.match == True
|
|
|
|
def test_verify_before_ocr_missing(self):
|
|
"""Pre-action: expected text not on screen → mismatch (can't proceed)."""
|
|
ocr = mock_ocr_client_factory(["Accueil"])
|
|
vlm = mock_vlm_client_factory({})
|
|
result = verify_before(
|
|
"/tmp/page.png",
|
|
[{"role": "bouton", "text": "Connexion"}],
|
|
ocr_client=ocr,
|
|
vlm_client=vlm,
|
|
context="pre-login",
|
|
)
|
|
assert result.match == False
|
|
assert "OCR presence" in result.reason
|
|
|
|
|
|
# ── OcrPresenceResult dataclass tests ─────────────────────────────────
|
|
|
|
|
|
class TestOcrPresenceResult:
|
|
def test_presence_ratio_all_found(self):
|
|
result = OcrPresenceResult(
|
|
found_texts={"Login": "Login", "Password": "Password"},
|
|
missing=[],
|
|
all_found=True,
|
|
)
|
|
assert result.presence_ratio == 1.0
|
|
|
|
def test_presence_ratio_half_found(self):
|
|
result = OcrPresenceResult(
|
|
found_texts={"Login": "Login", "Password": ""},
|
|
missing=["champ: Password"],
|
|
all_found=False,
|
|
)
|
|
assert result.presence_ratio == 0.5
|
|
|
|
def test_presence_ratio_empty(self):
|
|
result = OcrPresenceResult(
|
|
found_texts={},
|
|
missing=[],
|
|
all_found=True,
|
|
)
|
|
assert result.presence_ratio == 1.0
|