Files
rpa_vision_v3/tests/unit/test_visual_verifier.py
Dom f9a0531325
Some checks failed
tests / Lint (ruff + black) (push) Failing after 1m52s
tests / Tests unitaires (sans GPU) (push) Failing after 1m58s
tests / Tests sécurité (critique) (push) Has been skipped
feat(navigation): brique login visuel OCR-ancre + action navigate au replay
- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens),
  grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue),
  visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime)
- api_stream/replay_engine : dispatch action navigate server-side,
  never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti)
- 131 tests verts (wiring boot, e2e handler, unit modules)

Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 10:31:44 +02:00

491 lines
18 KiB
Python

"""Tests for core/navigation/visual_verifier.py — OCR-anchored architecture.
Tests pure functions (normalize_text, fuzzy_match, ocr_presence_check,
build_role_confirm_prompt, parse_role_confirm_response) offline,
then verifies verify_screen_match with mock OcrClient + VlmClient.
"""
import json
import pytest
from core.navigation.visual_verifier import (
normalize_text,
fuzzy_match,
ocr_presence_check,
build_role_confirm_prompt,
parse_role_confirm_response,
verify_screen_match,
verify_before,
verify_after,
ScreenMatchResult,
OcrPresenceResult,
)
# ── Mock factories ─────────────────────────────────────────────────────
def mock_ocr_client_factory(tokens: list):
"""Factory that creates a mock OcrClient returning the given tokens."""
def client(image_path: str) -> list:
return tokens
return client
def mock_vlm_client_factory(response_json: dict):
"""Factory that creates a mock VlmClient returning the given JSON."""
def client(image_path: str, prompt: str) -> str:
return json.dumps(response_json)
return client
# ── normalize_text tests ──────────────────────────────────────────────
class TestNormalizeText:
def test_lowercase(self):
assert normalize_text("RECHERCHER") == "rechercher"
def test_strip_accents(self):
assert normalize_text("Recherché") == "recherche"
def test_collapse_whitespace(self):
assert normalize_text(" hello world ") == "hello world"
def test_combined(self):
assert normalize_text(" Nom Prénom ") == "nom prenom"
def test_empty(self):
assert normalize_text("") == ""
def test_numbers_preserved(self):
assert normalize_text("IPP 12345") == "ipp 12345"
# ── fuzzy_match tests ─────────────────────────────────────────────────
class TestFuzzyMatch:
def test_exact_match(self):
assert fuzzy_match("Rechercher", "Rechercher") == True
def test_case_insensitive(self):
assert fuzzy_match("rechercher", "RECHERCHER") == True
def test_accent_match(self):
assert fuzzy_match("Recherché", "Recherche") == True
def test_substring_containment(self):
# Short text contained in longer OCR token
assert fuzzy_match("Rechercher", "Bouton Rechercher") == True
def test_reverse_containment(self):
# OCR token contained in expected text
assert fuzzy_match("Nom Prénom Patient", "Nom") == True
def test_fuzzy_ratio(self):
# Similar but not exact/substring — ratio ~0.90
assert fuzzy_match("Connexion", "Connection", threshold=0.8) == True
def test_no_match(self):
assert fuzzy_match("Dashboard", "Login", threshold=0.8) == False
def test_custom_threshold(self):
# "Connection" vs "Connexion" ratio ~0.90, passes at 0.8 but fails at 0.95
assert fuzzy_match("Connexion", "Connection", threshold=0.95) == False
# ── ocr_presence_check tests ──────────────────────────────────────────
class TestOcrPresenceCheck:
def test_all_found(self):
tokens = ["Rechercher", "Connexion", "Nom Patient"]
elements = [
{"role": "bouton", "text": "Rechercher"},
{"role": "bouton", "text": "Connexion"},
]
result = ocr_presence_check(tokens, elements)
assert result.all_found == True
assert result.presence_ratio == 1.0
assert len(result.missing) == 0
assert result.found_texts["Rechercher"] == "Rechercher"
def test_partial_found(self):
tokens = ["Rechercher"]
elements = [
{"role": "bouton", "text": "Rechercher"},
{"role": "bouton", "text": "Connexion"},
]
result = ocr_presence_check(tokens, elements)
assert result.all_found == False
assert result.presence_ratio == 0.5
assert "bouton: Connexion" in result.missing
def test_none_found(self):
tokens = ["Accueil", "Paramètres"]
elements = [
{"role": "bouton", "text": "Rechercher"},
]
result = ocr_presence_check(tokens, elements)
assert result.all_found == False
assert result.presence_ratio == 0.0
assert "bouton: Rechercher" in result.missing
def test_fuzzy_match_in_presence(self):
tokens = ["Rechércher"] # OCR with accent variation
elements = [{"role": "bouton", "text": "Rechercher"}]
result = ocr_presence_check(tokens, elements)
assert result.all_found == True
def test_empty_tokens(self):
result = ocr_presence_check([], [{"role": "bouton", "text": "Login"}])
assert result.all_found == False
assert result.presence_ratio == 0.0
def test_empty_elements(self):
result = ocr_presence_check(["Login", "Password"], [])
assert result.all_found == True
assert result.presence_ratio == 1.0
def test_no_text_key(self):
elements = [{"role": "page"}] # no text key
result = ocr_presence_check(["Dashboard"], elements)
assert result.all_found == True # no text to check → trivially found
def test_multiple_elements_same_text(self):
tokens = ["Connexion"]
elements = [
{"role": "bouton", "text": "Connexion"},
{"role": "label", "text": "Connexion"},
]
result = ocr_presence_check(tokens, elements)
assert result.all_found == True
# ── build_role_confirm_prompt tests ───────────────────────────────────
class TestBuildRoleConfirmPrompt:
def test_basic_prompt(self):
found = [
{"text": "Rechercher", "expected_role": "bouton", "matched_ocr": "Rechercher"},
]
expected = [{"role": "bouton", "text": "Rechercher"}]
prompt = build_role_confirm_prompt(found, expected)
assert "Text \"Rechercher\"" in prompt
assert "expected role: bouton" in prompt
assert "role_confirmed" in prompt
def test_with_context(self):
found = [
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
]
expected = [{"role": "bouton", "text": "Connexion"}]
prompt = build_role_confirm_prompt(found, expected, context="page login DPI")
assert "Context: page login DPI" in prompt
def test_multiple_elements(self):
found = [
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
{"text": "Password", "expected_role": "champ", "matched_ocr": "Password"},
{"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
]
expected = [
{"role": "champ", "text": "Login"},
{"role": "champ", "text": "Password"},
{"role": "bouton", "text": "Connexion"},
]
prompt = build_role_confirm_prompt(found, expected)
assert "1." in prompt
assert "2." in prompt
assert "3." in prompt
def test_no_self_declaration(self):
"""Prompt must NOT ask VLM to declare presence — only role."""
found = [
{"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
]
expected = [{"role": "champ", "text": "Login"}]
prompt = build_role_confirm_prompt(found, expected)
assert "present" not in prompt.lower() or "confirmed" in prompt.lower()
# ── parse_role_confirm_response tests ─────────────────────────────────
class TestParseRoleConfirmResponse:
def test_valid_json(self):
data = json.dumps({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
],
"overall_confidence": 0.92,
})
result = parse_role_confirm_response(data)
assert len(result["confirmed"]) == 1
assert result["overall_confidence"] == 0.92
def test_json_in_markdown(self):
vlm_text = "```json\n{\"confirmed\": [], \"overall_confidence\": 0.0}\n```"
result = parse_role_confirm_response(vlm_text)
assert result["overall_confidence"] == 0.0
def test_garbled_response(self):
result = parse_role_confirm_response("I cannot determine the roles")
assert result["overall_confidence"] == 0.0
assert len(result["confirmed"]) == 0
def test_confidence_as_string(self):
data = json.dumps({"confirmed": [], "overall_confidence": "0.85"})
result = parse_role_confirm_response(data)
assert result["overall_confidence"] == 0.85
# ── verify_screen_match (OCR-anchored) tests ─────────────────────────
class TestVerifyScreenMatchOcrAnchored:
def test_full_match(self):
ocr = mock_ocr_client_factory(["Rechercher", "Connexion", "Dashboard"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
],
"overall_confidence": 0.92,
})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == True
assert result.confidence >= 0.7
def test_ocr_presence_fail(self):
"""OCR doesn't find expected text → mismatch (deterministic, no VLM needed)."""
ocr = mock_ocr_client_factory(["Accueil", "Paramètres"])
vlm = mock_vlm_client_factory({})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == False
assert "OCR presence" in result.reason
assert len(result.mismatches) > 0
def test_role_not_confirmed(self):
"""OCR finds text, VLM says it's a label not a button → mismatch."""
ocr = mock_ocr_client_factory(["Rechercher"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": False, "actual_role": "label", "confidence": 0.6},
],
"overall_confidence": 0.6,
})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == False
def test_ocr_error(self):
"""OCR engine fails → fail-safe mismatch."""
def failing_ocr(image_path):
raise RuntimeError("OCR engine down")
vlm = mock_vlm_client_factory({})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=failing_ocr,
vlm_client=vlm,
)
assert result.match == False
assert "OCR error" in result.reason
def test_vlm_error_partial_match(self):
"""OCR finds texts, VLM fails → partial match (presence OK, role unknown)."""
ocr = mock_ocr_client_factory(["Rechercher"])
def failing_vlm(image_path, prompt):
raise RuntimeError("VLM service down")
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=ocr,
vlm_client=failing_vlm,
)
# Presence confirmed by OCR → partial match, confidence=0.5
assert result.match == True
assert result.confidence == 0.5
assert "VLM role confirm failed" in result.reason
def test_no_expected_elements(self):
ocr = mock_ocr_client_factory(["Login"])
vlm = mock_vlm_client_factory({})
result = verify_screen_match("/tmp/test.png", [], ocr_client=ocr, vlm_client=vlm)
assert result.match == True
assert result.confidence == 1.0
def test_describe_match(self):
result = ScreenMatchResult(match=True, confidence=0.92)
assert "OK" in result.describe()
def test_describe_mismatch(self):
result = ScreenMatchResult(
match=False, confidence=0.3,
mismatches=["bouton: Rechercher"],
)
assert "mismatch" in result.describe()
def test_multiple_elements_mixed(self):
"""2 elements: 1 found+role OK, 1 not found in OCR → mismatch."""
ocr = mock_ocr_client_factory(["Connexion"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
],
"overall_confidence": 0.9,
})
result = verify_screen_match(
"/tmp/test.png",
[
{"role": "bouton", "text": "Connexion"},
{"role": "champ", "text": "Nom Patient"},
],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == False # "Nom Patient" not found by OCR
def test_fuzzy_ocr_match(self):
"""OCR reads 'Rechércher' (accent), expected 'Rechercher' → still found."""
ocr = mock_ocr_client_factory(["Rechércher"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
],
"overall_confidence": 0.9,
})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "bouton", "text": "Rechercher"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == True
def test_no_text_elements_trivially_match(self):
"""Elements without text key → no presence check needed → trivially OK."""
ocr = mock_ocr_client_factory(["Dashboard"])
vlm = mock_vlm_client_factory({})
result = verify_screen_match(
"/tmp/test.png",
[{"role": "page"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == True
# ── verify_before / verify_after tests ────────────────────────────────
class TestVerifyBeforeAfter:
def test_verify_before_match(self):
ocr = mock_ocr_client_factory(["Login", "Password", "Connexion"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.85},
],
"overall_confidence": 0.85,
})
result = verify_before(
"/tmp/login.png",
[{"role": "champ", "text": "Login"}],
ocr_client=ocr,
vlm_client=vlm,
context="page login",
)
assert result.match == True
def test_verify_after_higher_threshold(self):
"""verify_after uses min_confidence=0.8. VLM returns 0.75 → mismatch."""
ocr = mock_ocr_client_factory(["Dashboard"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.75},
],
"overall_confidence": 0.75,
})
result = verify_after(
"/tmp/dashboard.png",
[{"role": "page", "text": "Dashboard"}],
ocr_client=ocr,
vlm_client=vlm,
)
# 0.75 < 0.8 threshold → role mismatch
assert result.match == False
def test_verify_after_passes_at_0_8(self):
ocr = mock_ocr_client_factory(["Dashboard"])
vlm = mock_vlm_client_factory({
"confirmed": [
{"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.85},
],
"overall_confidence": 0.85,
})
result = verify_after(
"/tmp/dashboard.png",
[{"role": "page", "text": "Dashboard"}],
ocr_client=ocr,
vlm_client=vlm,
)
assert result.match == True
def test_verify_before_ocr_missing(self):
"""Pre-action: expected text not on screen → mismatch (can't proceed)."""
ocr = mock_ocr_client_factory(["Accueil"])
vlm = mock_vlm_client_factory({})
result = verify_before(
"/tmp/page.png",
[{"role": "bouton", "text": "Connexion"}],
ocr_client=ocr,
vlm_client=vlm,
context="pre-login",
)
assert result.match == False
assert "OCR presence" in result.reason
# ── OcrPresenceResult dataclass tests ─────────────────────────────────
class TestOcrPresenceResult:
def test_presence_ratio_all_found(self):
result = OcrPresenceResult(
found_texts={"Login": "Login", "Password": "Password"},
missing=[],
all_found=True,
)
assert result.presence_ratio == 1.0
def test_presence_ratio_half_found(self):
result = OcrPresenceResult(
found_texts={"Login": "Login", "Password": ""},
missing=["champ: Password"],
all_found=False,
)
assert result.presence_ratio == 0.5
def test_presence_ratio_empty(self):
result = OcrPresenceResult(
found_texts={},
missing=[],
all_found=True,
)
assert result.presence_ratio == 1.0