"""Tests for core/navigation/visual_verifier.py — OCR-anchored architecture. Tests pure functions (normalize_text, fuzzy_match, ocr_presence_check, build_role_confirm_prompt, parse_role_confirm_response) offline, then verifies verify_screen_match with mock OcrClient + VlmClient. """ import json import pytest from core.navigation.visual_verifier import ( normalize_text, fuzzy_match, ocr_presence_check, build_role_confirm_prompt, parse_role_confirm_response, verify_screen_match, verify_before, verify_after, ScreenMatchResult, OcrPresenceResult, ) # ── Mock factories ───────────────────────────────────────────────────── def mock_ocr_client_factory(tokens: list): """Factory that creates a mock OcrClient returning the given tokens.""" def client(image_path: str) -> list: return tokens return client def mock_vlm_client_factory(response_json: dict): """Factory that creates a mock VlmClient returning the given JSON.""" def client(image_path: str, prompt: str) -> str: return json.dumps(response_json) return client # ── normalize_text tests ────────────────────────────────────────────── class TestNormalizeText: def test_lowercase(self): assert normalize_text("RECHERCHER") == "rechercher" def test_strip_accents(self): assert normalize_text("Recherché") == "recherche" def test_collapse_whitespace(self): assert normalize_text(" hello world ") == "hello world" def test_combined(self): assert normalize_text(" Nom Prénom ") == "nom prenom" def test_empty(self): assert normalize_text("") == "" def test_numbers_preserved(self): assert normalize_text("IPP 12345") == "ipp 12345" # ── fuzzy_match tests ───────────────────────────────────────────────── class TestFuzzyMatch: def test_exact_match(self): assert fuzzy_match("Rechercher", "Rechercher") == True def test_case_insensitive(self): assert fuzzy_match("rechercher", "RECHERCHER") == True def test_accent_match(self): assert fuzzy_match("Recherché", "Recherche") == True def test_substring_containment(self): # Short text contained in longer OCR token assert fuzzy_match("Rechercher", "Bouton Rechercher") == True def test_reverse_containment(self): # OCR token contained in expected text assert fuzzy_match("Nom Prénom Patient", "Nom") == True def test_fuzzy_ratio(self): # Similar but not exact/substring — ratio ~0.90 assert fuzzy_match("Connexion", "Connection", threshold=0.8) == True def test_no_match(self): assert fuzzy_match("Dashboard", "Login", threshold=0.8) == False def test_custom_threshold(self): # "Connection" vs "Connexion" ratio ~0.90, passes at 0.8 but fails at 0.95 assert fuzzy_match("Connexion", "Connection", threshold=0.95) == False # ── ocr_presence_check tests ────────────────────────────────────────── class TestOcrPresenceCheck: def test_all_found(self): tokens = ["Rechercher", "Connexion", "Nom Patient"] elements = [ {"role": "bouton", "text": "Rechercher"}, {"role": "bouton", "text": "Connexion"}, ] result = ocr_presence_check(tokens, elements) assert result.all_found == True assert result.presence_ratio == 1.0 assert len(result.missing) == 0 assert result.found_texts["Rechercher"] == "Rechercher" def test_partial_found(self): tokens = ["Rechercher"] elements = [ {"role": "bouton", "text": "Rechercher"}, {"role": "bouton", "text": "Connexion"}, ] result = ocr_presence_check(tokens, elements) assert result.all_found == False assert result.presence_ratio == 0.5 assert "bouton: Connexion" in result.missing def test_none_found(self): tokens = ["Accueil", "Paramètres"] elements = [ {"role": "bouton", "text": "Rechercher"}, ] result = ocr_presence_check(tokens, elements) assert result.all_found == False assert result.presence_ratio == 0.0 assert "bouton: Rechercher" in result.missing def test_fuzzy_match_in_presence(self): tokens = ["Rechércher"] # OCR with accent variation elements = [{"role": "bouton", "text": "Rechercher"}] result = ocr_presence_check(tokens, elements) assert result.all_found == True def test_empty_tokens(self): result = ocr_presence_check([], [{"role": "bouton", "text": "Login"}]) assert result.all_found == False assert result.presence_ratio == 0.0 def test_empty_elements(self): result = ocr_presence_check(["Login", "Password"], []) assert result.all_found == True assert result.presence_ratio == 1.0 def test_no_text_key(self): elements = [{"role": "page"}] # no text key result = ocr_presence_check(["Dashboard"], elements) assert result.all_found == True # no text to check → trivially found def test_multiple_elements_same_text(self): tokens = ["Connexion"] elements = [ {"role": "bouton", "text": "Connexion"}, {"role": "label", "text": "Connexion"}, ] result = ocr_presence_check(tokens, elements) assert result.all_found == True # ── build_role_confirm_prompt tests ─────────────────────────────────── class TestBuildRoleConfirmPrompt: def test_basic_prompt(self): found = [ {"text": "Rechercher", "expected_role": "bouton", "matched_ocr": "Rechercher"}, ] expected = [{"role": "bouton", "text": "Rechercher"}] prompt = build_role_confirm_prompt(found, expected) assert "Text \"Rechercher\"" in prompt assert "expected role: bouton" in prompt assert "role_confirmed" in prompt def test_with_context(self): found = [ {"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"}, ] expected = [{"role": "bouton", "text": "Connexion"}] prompt = build_role_confirm_prompt(found, expected, context="page login DPI") assert "Context: page login DPI" in prompt def test_multiple_elements(self): found = [ {"text": "Login", "expected_role": "champ", "matched_ocr": "Login"}, {"text": "Password", "expected_role": "champ", "matched_ocr": "Password"}, {"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"}, ] expected = [ {"role": "champ", "text": "Login"}, {"role": "champ", "text": "Password"}, {"role": "bouton", "text": "Connexion"}, ] prompt = build_role_confirm_prompt(found, expected) assert "1." in prompt assert "2." in prompt assert "3." in prompt def test_no_self_declaration(self): """Prompt must NOT ask VLM to declare presence — only role.""" found = [ {"text": "Login", "expected_role": "champ", "matched_ocr": "Login"}, ] expected = [{"role": "champ", "text": "Login"}] prompt = build_role_confirm_prompt(found, expected) assert "present" not in prompt.lower() or "confirmed" in prompt.lower() # ── parse_role_confirm_response tests ───────────────────────────────── class TestParseRoleConfirmResponse: def test_valid_json(self): data = json.dumps({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92}, ], "overall_confidence": 0.92, }) result = parse_role_confirm_response(data) assert len(result["confirmed"]) == 1 assert result["overall_confidence"] == 0.92 def test_json_in_markdown(self): vlm_text = "```json\n{\"confirmed\": [], \"overall_confidence\": 0.0}\n```" result = parse_role_confirm_response(vlm_text) assert result["overall_confidence"] == 0.0 def test_garbled_response(self): result = parse_role_confirm_response("I cannot determine the roles") assert result["overall_confidence"] == 0.0 assert len(result["confirmed"]) == 0 def test_confidence_as_string(self): data = json.dumps({"confirmed": [], "overall_confidence": "0.85"}) result = parse_role_confirm_response(data) assert result["overall_confidence"] == 0.85 # ── verify_screen_match (OCR-anchored) tests ───────────────────────── class TestVerifyScreenMatchOcrAnchored: def test_full_match(self): ocr = mock_ocr_client_factory(["Rechercher", "Connexion", "Dashboard"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92}, ], "overall_confidence": 0.92, }) result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == True assert result.confidence >= 0.7 def test_ocr_presence_fail(self): """OCR doesn't find expected text → mismatch (deterministic, no VLM needed).""" ocr = mock_ocr_client_factory(["Accueil", "Paramètres"]) vlm = mock_vlm_client_factory({}) result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == False assert "OCR presence" in result.reason assert len(result.mismatches) > 0 def test_role_not_confirmed(self): """OCR finds text, VLM says it's a label not a button → mismatch.""" ocr = mock_ocr_client_factory(["Rechercher"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": False, "actual_role": "label", "confidence": 0.6}, ], "overall_confidence": 0.6, }) result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == False def test_ocr_error(self): """OCR engine fails → fail-safe mismatch.""" def failing_ocr(image_path): raise RuntimeError("OCR engine down") vlm = mock_vlm_client_factory({}) result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=failing_ocr, vlm_client=vlm, ) assert result.match == False assert "OCR error" in result.reason def test_vlm_error_partial_match(self): """OCR finds texts, VLM fails → partial match (presence OK, role unknown).""" ocr = mock_ocr_client_factory(["Rechercher"]) def failing_vlm(image_path, prompt): raise RuntimeError("VLM service down") result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=ocr, vlm_client=failing_vlm, ) # Presence confirmed by OCR → partial match, confidence=0.5 assert result.match == True assert result.confidence == 0.5 assert "VLM role confirm failed" in result.reason def test_no_expected_elements(self): ocr = mock_ocr_client_factory(["Login"]) vlm = mock_vlm_client_factory({}) result = verify_screen_match("/tmp/test.png", [], ocr_client=ocr, vlm_client=vlm) assert result.match == True assert result.confidence == 1.0 def test_describe_match(self): result = ScreenMatchResult(match=True, confidence=0.92) assert "OK" in result.describe() def test_describe_mismatch(self): result = ScreenMatchResult( match=False, confidence=0.3, mismatches=["bouton: Rechercher"], ) assert "mismatch" in result.describe() def test_multiple_elements_mixed(self): """2 elements: 1 found+role OK, 1 not found in OCR → mismatch.""" ocr = mock_ocr_client_factory(["Connexion"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9}, ], "overall_confidence": 0.9, }) result = verify_screen_match( "/tmp/test.png", [ {"role": "bouton", "text": "Connexion"}, {"role": "champ", "text": "Nom Patient"}, ], ocr_client=ocr, vlm_client=vlm, ) assert result.match == False # "Nom Patient" not found by OCR def test_fuzzy_ocr_match(self): """OCR reads 'Rechércher' (accent), expected 'Rechercher' → still found.""" ocr = mock_ocr_client_factory(["Rechércher"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9}, ], "overall_confidence": 0.9, }) result = verify_screen_match( "/tmp/test.png", [{"role": "bouton", "text": "Rechercher"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == True def test_no_text_elements_trivially_match(self): """Elements without text key → no presence check needed → trivially OK.""" ocr = mock_ocr_client_factory(["Dashboard"]) vlm = mock_vlm_client_factory({}) result = verify_screen_match( "/tmp/test.png", [{"role": "page"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == True # ── verify_before / verify_after tests ──────────────────────────────── class TestVerifyBeforeAfter: def test_verify_before_match(self): ocr = mock_ocr_client_factory(["Login", "Password", "Connexion"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.85}, ], "overall_confidence": 0.85, }) result = verify_before( "/tmp/login.png", [{"role": "champ", "text": "Login"}], ocr_client=ocr, vlm_client=vlm, context="page login", ) assert result.match == True def test_verify_after_higher_threshold(self): """verify_after uses min_confidence=0.8. VLM returns 0.75 → mismatch.""" ocr = mock_ocr_client_factory(["Dashboard"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.75}, ], "overall_confidence": 0.75, }) result = verify_after( "/tmp/dashboard.png", [{"role": "page", "text": "Dashboard"}], ocr_client=ocr, vlm_client=vlm, ) # 0.75 < 0.8 threshold → role mismatch assert result.match == False def test_verify_after_passes_at_0_8(self): ocr = mock_ocr_client_factory(["Dashboard"]) vlm = mock_vlm_client_factory({ "confirmed": [ {"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.85}, ], "overall_confidence": 0.85, }) result = verify_after( "/tmp/dashboard.png", [{"role": "page", "text": "Dashboard"}], ocr_client=ocr, vlm_client=vlm, ) assert result.match == True def test_verify_before_ocr_missing(self): """Pre-action: expected text not on screen → mismatch (can't proceed).""" ocr = mock_ocr_client_factory(["Accueil"]) vlm = mock_vlm_client_factory({}) result = verify_before( "/tmp/page.png", [{"role": "bouton", "text": "Connexion"}], ocr_client=ocr, vlm_client=vlm, context="pre-login", ) assert result.match == False assert "OCR presence" in result.reason # ── OcrPresenceResult dataclass tests ───────────────────────────────── class TestOcrPresenceResult: def test_presence_ratio_all_found(self): result = OcrPresenceResult( found_texts={"Login": "Login", "Password": "Password"}, missing=[], all_found=True, ) assert result.presence_ratio == 1.0 def test_presence_ratio_half_found(self): result = OcrPresenceResult( found_texts={"Login": "Login", "Password": ""}, missing=["champ: Password"], all_found=False, ) assert result.presence_ratio == 0.5 def test_presence_ratio_empty(self): result = OcrPresenceResult( found_texts={}, missing=[], all_found=True, ) assert result.presence_ratio == 1.0