feat(navigation): brique login visuel OCR-ancre + action navigate au replay
- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens), grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue), visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime) - api_stream/replay_engine : dispatch action navigate server-side, never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti) - 131 tests verts (wiring boot, e2e handler, unit modules) Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
490
tests/unit/test_visual_verifier.py
Normal file
490
tests/unit/test_visual_verifier.py
Normal file
@@ -0,0 +1,490 @@
|
||||
"""Tests for core/navigation/visual_verifier.py — OCR-anchored architecture.
|
||||
|
||||
Tests pure functions (normalize_text, fuzzy_match, ocr_presence_check,
|
||||
build_role_confirm_prompt, parse_role_confirm_response) offline,
|
||||
then verifies verify_screen_match with mock OcrClient + VlmClient.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from core.navigation.visual_verifier import (
|
||||
normalize_text,
|
||||
fuzzy_match,
|
||||
ocr_presence_check,
|
||||
build_role_confirm_prompt,
|
||||
parse_role_confirm_response,
|
||||
verify_screen_match,
|
||||
verify_before,
|
||||
verify_after,
|
||||
ScreenMatchResult,
|
||||
OcrPresenceResult,
|
||||
)
|
||||
|
||||
|
||||
# ── Mock factories ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def mock_ocr_client_factory(tokens: list):
|
||||
"""Factory that creates a mock OcrClient returning the given tokens."""
|
||||
def client(image_path: str) -> list:
|
||||
return tokens
|
||||
return client
|
||||
|
||||
|
||||
def mock_vlm_client_factory(response_json: dict):
|
||||
"""Factory that creates a mock VlmClient returning the given JSON."""
|
||||
def client(image_path: str, prompt: str) -> str:
|
||||
return json.dumps(response_json)
|
||||
return client
|
||||
|
||||
|
||||
# ── normalize_text tests ──────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestNormalizeText:
|
||||
def test_lowercase(self):
|
||||
assert normalize_text("RECHERCHER") == "rechercher"
|
||||
|
||||
def test_strip_accents(self):
|
||||
assert normalize_text("Recherché") == "recherche"
|
||||
|
||||
def test_collapse_whitespace(self):
|
||||
assert normalize_text(" hello world ") == "hello world"
|
||||
|
||||
def test_combined(self):
|
||||
assert normalize_text(" Nom Prénom ") == "nom prenom"
|
||||
|
||||
def test_empty(self):
|
||||
assert normalize_text("") == ""
|
||||
|
||||
def test_numbers_preserved(self):
|
||||
assert normalize_text("IPP 12345") == "ipp 12345"
|
||||
|
||||
|
||||
# ── fuzzy_match tests ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFuzzyMatch:
|
||||
def test_exact_match(self):
|
||||
assert fuzzy_match("Rechercher", "Rechercher") == True
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert fuzzy_match("rechercher", "RECHERCHER") == True
|
||||
|
||||
def test_accent_match(self):
|
||||
assert fuzzy_match("Recherché", "Recherche") == True
|
||||
|
||||
def test_substring_containment(self):
|
||||
# Short text contained in longer OCR token
|
||||
assert fuzzy_match("Rechercher", "Bouton Rechercher") == True
|
||||
|
||||
def test_reverse_containment(self):
|
||||
# OCR token contained in expected text
|
||||
assert fuzzy_match("Nom Prénom Patient", "Nom") == True
|
||||
|
||||
def test_fuzzy_ratio(self):
|
||||
# Similar but not exact/substring — ratio ~0.90
|
||||
assert fuzzy_match("Connexion", "Connection", threshold=0.8) == True
|
||||
|
||||
def test_no_match(self):
|
||||
assert fuzzy_match("Dashboard", "Login", threshold=0.8) == False
|
||||
|
||||
def test_custom_threshold(self):
|
||||
# "Connection" vs "Connexion" ratio ~0.90, passes at 0.8 but fails at 0.95
|
||||
assert fuzzy_match("Connexion", "Connection", threshold=0.95) == False
|
||||
|
||||
|
||||
# ── ocr_presence_check tests ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestOcrPresenceCheck:
|
||||
def test_all_found(self):
|
||||
tokens = ["Rechercher", "Connexion", "Nom Patient"]
|
||||
elements = [
|
||||
{"role": "bouton", "text": "Rechercher"},
|
||||
{"role": "bouton", "text": "Connexion"},
|
||||
]
|
||||
result = ocr_presence_check(tokens, elements)
|
||||
assert result.all_found == True
|
||||
assert result.presence_ratio == 1.0
|
||||
assert len(result.missing) == 0
|
||||
assert result.found_texts["Rechercher"] == "Rechercher"
|
||||
|
||||
def test_partial_found(self):
|
||||
tokens = ["Rechercher"]
|
||||
elements = [
|
||||
{"role": "bouton", "text": "Rechercher"},
|
||||
{"role": "bouton", "text": "Connexion"},
|
||||
]
|
||||
result = ocr_presence_check(tokens, elements)
|
||||
assert result.all_found == False
|
||||
assert result.presence_ratio == 0.5
|
||||
assert "bouton: Connexion" in result.missing
|
||||
|
||||
def test_none_found(self):
|
||||
tokens = ["Accueil", "Paramètres"]
|
||||
elements = [
|
||||
{"role": "bouton", "text": "Rechercher"},
|
||||
]
|
||||
result = ocr_presence_check(tokens, elements)
|
||||
assert result.all_found == False
|
||||
assert result.presence_ratio == 0.0
|
||||
assert "bouton: Rechercher" in result.missing
|
||||
|
||||
def test_fuzzy_match_in_presence(self):
|
||||
tokens = ["Rechércher"] # OCR with accent variation
|
||||
elements = [{"role": "bouton", "text": "Rechercher"}]
|
||||
result = ocr_presence_check(tokens, elements)
|
||||
assert result.all_found == True
|
||||
|
||||
def test_empty_tokens(self):
|
||||
result = ocr_presence_check([], [{"role": "bouton", "text": "Login"}])
|
||||
assert result.all_found == False
|
||||
assert result.presence_ratio == 0.0
|
||||
|
||||
def test_empty_elements(self):
|
||||
result = ocr_presence_check(["Login", "Password"], [])
|
||||
assert result.all_found == True
|
||||
assert result.presence_ratio == 1.0
|
||||
|
||||
def test_no_text_key(self):
|
||||
elements = [{"role": "page"}] # no text key
|
||||
result = ocr_presence_check(["Dashboard"], elements)
|
||||
assert result.all_found == True # no text to check → trivially found
|
||||
|
||||
def test_multiple_elements_same_text(self):
|
||||
tokens = ["Connexion"]
|
||||
elements = [
|
||||
{"role": "bouton", "text": "Connexion"},
|
||||
{"role": "label", "text": "Connexion"},
|
||||
]
|
||||
result = ocr_presence_check(tokens, elements)
|
||||
assert result.all_found == True
|
||||
|
||||
|
||||
# ── build_role_confirm_prompt tests ───────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildRoleConfirmPrompt:
|
||||
def test_basic_prompt(self):
|
||||
found = [
|
||||
{"text": "Rechercher", "expected_role": "bouton", "matched_ocr": "Rechercher"},
|
||||
]
|
||||
expected = [{"role": "bouton", "text": "Rechercher"}]
|
||||
prompt = build_role_confirm_prompt(found, expected)
|
||||
assert "Text \"Rechercher\"" in prompt
|
||||
assert "expected role: bouton" in prompt
|
||||
assert "role_confirmed" in prompt
|
||||
|
||||
def test_with_context(self):
|
||||
found = [
|
||||
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
|
||||
]
|
||||
expected = [{"role": "bouton", "text": "Connexion"}]
|
||||
prompt = build_role_confirm_prompt(found, expected, context="page login DPI")
|
||||
assert "Context: page login DPI" in prompt
|
||||
|
||||
def test_multiple_elements(self):
|
||||
found = [
|
||||
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
|
||||
{"text": "Password", "expected_role": "champ", "matched_ocr": "Password"},
|
||||
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
|
||||
]
|
||||
expected = [
|
||||
{"role": "champ", "text": "Login"},
|
||||
{"role": "champ", "text": "Password"},
|
||||
{"role": "bouton", "text": "Connexion"},
|
||||
]
|
||||
prompt = build_role_confirm_prompt(found, expected)
|
||||
assert "1." in prompt
|
||||
assert "2." in prompt
|
||||
assert "3." in prompt
|
||||
|
||||
def test_no_self_declaration(self):
|
||||
"""Prompt must NOT ask VLM to declare presence — only role."""
|
||||
found = [
|
||||
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
|
||||
]
|
||||
expected = [{"role": "champ", "text": "Login"}]
|
||||
prompt = build_role_confirm_prompt(found, expected)
|
||||
assert "present" not in prompt.lower() or "confirmed" in prompt.lower()
|
||||
|
||||
|
||||
# ── parse_role_confirm_response tests ─────────────────────────────────
|
||||
|
||||
|
||||
class TestParseRoleConfirmResponse:
|
||||
def test_valid_json(self):
|
||||
data = json.dumps({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
|
||||
],
|
||||
"overall_confidence": 0.92,
|
||||
})
|
||||
result = parse_role_confirm_response(data)
|
||||
assert len(result["confirmed"]) == 1
|
||||
assert result["overall_confidence"] == 0.92
|
||||
|
||||
def test_json_in_markdown(self):
|
||||
vlm_text = "```json\n{\"confirmed\": [], \"overall_confidence\": 0.0}\n```"
|
||||
result = parse_role_confirm_response(vlm_text)
|
||||
assert result["overall_confidence"] == 0.0
|
||||
|
||||
def test_garbled_response(self):
|
||||
result = parse_role_confirm_response("I cannot determine the roles")
|
||||
assert result["overall_confidence"] == 0.0
|
||||
assert len(result["confirmed"]) == 0
|
||||
|
||||
def test_confidence_as_string(self):
|
||||
data = json.dumps({"confirmed": [], "overall_confidence": "0.85"})
|
||||
result = parse_role_confirm_response(data)
|
||||
assert result["overall_confidence"] == 0.85
|
||||
|
||||
|
||||
# ── verify_screen_match (OCR-anchored) tests ─────────────────────────
|
||||
|
||||
|
||||
class TestVerifyScreenMatchOcrAnchored:
|
||||
def test_full_match(self):
|
||||
ocr = mock_ocr_client_factory(["Rechercher", "Connexion", "Dashboard"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
|
||||
],
|
||||
"overall_confidence": 0.92,
|
||||
})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == True
|
||||
assert result.confidence >= 0.7
|
||||
|
||||
def test_ocr_presence_fail(self):
|
||||
"""OCR doesn't find expected text → mismatch (deterministic, no VLM needed)."""
|
||||
ocr = mock_ocr_client_factory(["Accueil", "Paramètres"])
|
||||
vlm = mock_vlm_client_factory({})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == False
|
||||
assert "OCR presence" in result.reason
|
||||
assert len(result.mismatches) > 0
|
||||
|
||||
def test_role_not_confirmed(self):
|
||||
"""OCR finds text, VLM says it's a label not a button → mismatch."""
|
||||
ocr = mock_ocr_client_factory(["Rechercher"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": False, "actual_role": "label", "confidence": 0.6},
|
||||
],
|
||||
"overall_confidence": 0.6,
|
||||
})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == False
|
||||
|
||||
def test_ocr_error(self):
|
||||
"""OCR engine fails → fail-safe mismatch."""
|
||||
def failing_ocr(image_path):
|
||||
raise RuntimeError("OCR engine down")
|
||||
vlm = mock_vlm_client_factory({})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=failing_ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == False
|
||||
assert "OCR error" in result.reason
|
||||
|
||||
def test_vlm_error_partial_match(self):
|
||||
"""OCR finds texts, VLM fails → partial match (presence OK, role unknown)."""
|
||||
ocr = mock_ocr_client_factory(["Rechercher"])
|
||||
def failing_vlm(image_path, prompt):
|
||||
raise RuntimeError("VLM service down")
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=failing_vlm,
|
||||
)
|
||||
# Presence confirmed by OCR → partial match, confidence=0.5
|
||||
assert result.match == True
|
||||
assert result.confidence == 0.5
|
||||
assert "VLM role confirm failed" in result.reason
|
||||
|
||||
def test_no_expected_elements(self):
|
||||
ocr = mock_ocr_client_factory(["Login"])
|
||||
vlm = mock_vlm_client_factory({})
|
||||
result = verify_screen_match("/tmp/test.png", [], ocr_client=ocr, vlm_client=vlm)
|
||||
assert result.match == True
|
||||
assert result.confidence == 1.0
|
||||
|
||||
def test_describe_match(self):
|
||||
result = ScreenMatchResult(match=True, confidence=0.92)
|
||||
assert "OK" in result.describe()
|
||||
|
||||
def test_describe_mismatch(self):
|
||||
result = ScreenMatchResult(
|
||||
match=False, confidence=0.3,
|
||||
mismatches=["bouton: Rechercher"],
|
||||
)
|
||||
assert "mismatch" in result.describe()
|
||||
|
||||
def test_multiple_elements_mixed(self):
|
||||
"""2 elements: 1 found+role OK, 1 not found in OCR → mismatch."""
|
||||
ocr = mock_ocr_client_factory(["Connexion"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
|
||||
],
|
||||
"overall_confidence": 0.9,
|
||||
})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[
|
||||
{"role": "bouton", "text": "Connexion"},
|
||||
{"role": "champ", "text": "Nom Patient"},
|
||||
],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == False # "Nom Patient" not found by OCR
|
||||
|
||||
def test_fuzzy_ocr_match(self):
|
||||
"""OCR reads 'Rechércher' (accent), expected 'Rechercher' → still found."""
|
||||
ocr = mock_ocr_client_factory(["Rechércher"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
|
||||
],
|
||||
"overall_confidence": 0.9,
|
||||
})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "bouton", "text": "Rechercher"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == True
|
||||
|
||||
def test_no_text_elements_trivially_match(self):
|
||||
"""Elements without text key → no presence check needed → trivially OK."""
|
||||
ocr = mock_ocr_client_factory(["Dashboard"])
|
||||
vlm = mock_vlm_client_factory({})
|
||||
result = verify_screen_match(
|
||||
"/tmp/test.png",
|
||||
[{"role": "page"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == True
|
||||
|
||||
|
||||
# ── verify_before / verify_after tests ────────────────────────────────
|
||||
|
||||
|
||||
class TestVerifyBeforeAfter:
|
||||
def test_verify_before_match(self):
|
||||
ocr = mock_ocr_client_factory(["Login", "Password", "Connexion"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.85},
|
||||
],
|
||||
"overall_confidence": 0.85,
|
||||
})
|
||||
result = verify_before(
|
||||
"/tmp/login.png",
|
||||
[{"role": "champ", "text": "Login"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
context="page login",
|
||||
)
|
||||
assert result.match == True
|
||||
|
||||
def test_verify_after_higher_threshold(self):
|
||||
"""verify_after uses min_confidence=0.8. VLM returns 0.75 → mismatch."""
|
||||
ocr = mock_ocr_client_factory(["Dashboard"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.75},
|
||||
],
|
||||
"overall_confidence": 0.75,
|
||||
})
|
||||
result = verify_after(
|
||||
"/tmp/dashboard.png",
|
||||
[{"role": "page", "text": "Dashboard"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
# 0.75 < 0.8 threshold → role mismatch
|
||||
assert result.match == False
|
||||
|
||||
def test_verify_after_passes_at_0_8(self):
|
||||
ocr = mock_ocr_client_factory(["Dashboard"])
|
||||
vlm = mock_vlm_client_factory({
|
||||
"confirmed": [
|
||||
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.85},
|
||||
],
|
||||
"overall_confidence": 0.85,
|
||||
})
|
||||
result = verify_after(
|
||||
"/tmp/dashboard.png",
|
||||
[{"role": "page", "text": "Dashboard"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
)
|
||||
assert result.match == True
|
||||
|
||||
def test_verify_before_ocr_missing(self):
|
||||
"""Pre-action: expected text not on screen → mismatch (can't proceed)."""
|
||||
ocr = mock_ocr_client_factory(["Accueil"])
|
||||
vlm = mock_vlm_client_factory({})
|
||||
result = verify_before(
|
||||
"/tmp/page.png",
|
||||
[{"role": "bouton", "text": "Connexion"}],
|
||||
ocr_client=ocr,
|
||||
vlm_client=vlm,
|
||||
context="pre-login",
|
||||
)
|
||||
assert result.match == False
|
||||
assert "OCR presence" in result.reason
|
||||
|
||||
|
||||
# ── OcrPresenceResult dataclass tests ─────────────────────────────────
|
||||
|
||||
|
||||
class TestOcrPresenceResult:
|
||||
def test_presence_ratio_all_found(self):
|
||||
result = OcrPresenceResult(
|
||||
found_texts={"Login": "Login", "Password": "Password"},
|
||||
missing=[],
|
||||
all_found=True,
|
||||
)
|
||||
assert result.presence_ratio == 1.0
|
||||
|
||||
def test_presence_ratio_half_found(self):
|
||||
result = OcrPresenceResult(
|
||||
found_texts={"Login": "Login", "Password": ""},
|
||||
missing=["champ: Password"],
|
||||
all_found=False,
|
||||
)
|
||||
assert result.presence_ratio == 0.5
|
||||
|
||||
def test_presence_ratio_empty(self):
|
||||
result = OcrPresenceResult(
|
||||
found_texts={},
|
||||
missing=[],
|
||||
all_found=True,
|
||||
)
|
||||
assert result.presence_ratio == 1.0
|
||||
Reference in New Issue
Block a user