feat(navigation): brique login visuel OCR-ancre + action navigate au replay

- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens), grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue), visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime) - api_stream/replay_engine : dispatch action navigate server-side, never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti) - 131 tests verts (wiring boot, e2e handler, unit modules) Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 10:31:44 +02:00
parent ab78ae390a
commit f9a0531325
13 changed files with 2998 additions and 0 deletions
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -436,6 +436,9 @@ from .replay_engine import (
    _notify_error_callback as _notify_error_callback_impl,
 )
 # Navigate handler — import direct depuis core/navigation (pas via replay_engine)
 from core.navigation import _handle_navigate_action
 # Wrappers pour les fonctions replay_engine qui accèdent aux variables globales du module.
@@ -4453,6 +4456,15 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
                            ),
                            timeout=180,
                        )
                    elif type_ == "navigate":
                        await asyncio.wait_for(
                            loop.run_in_executor(
                                None,
                                _handle_navigate_action,
                                action, owning_replay, session_id,
                            ),
                            timeout=180,
                        )
                    elif type_ == "t2a_decision":
                        await asyncio.wait_for(
                            loop.run_in_executor(
--- a/agent_v0/server_v1/replay_engine.py
+++ b/agent_v0/server_v1/replay_engine.py
@@ -41,6 +41,7 @@ _ALLOWED_ACTION_TYPES = {
    "extract_text",      # OCR serveur sur dernier heartbeat → variable workflow
    "extract_table",     # OCR serveur + filtre regex → liste structurée (boucle)
    "extract_dossier",   # OCR grille structurée → dossier patient persisté (brique 3)
    "navigate",          # Navigation visuelle → coords login/recherche (brique navigation)
    "extract_text_scroll", # Marker côté graphe — expansé en sous-actions par _edge_to_normalized_actions
    "_concat_text_vars",   # Action serveur interne (générée par expansion extract_text_scroll)
    "t2a_decision",      # Analyse LLM facturation T2A → variable workflow
@@ -55,6 +56,7 @@ _SERVER_SIDE_ACTION_TYPES = {
    "extract_text",
    "extract_table",
    "extract_dossier",
    "navigate",
    "t2a_decision",
    "llm_generate",
    "_concat_text_vars",
--- a/core/navigation/init.py
+++ b/core/navigation/init.py
@@ -0,0 +1,119 @@
 """Navigation brique — login visuel, recherche dossiers, vérification écran.
 Modules :
 - visual_verifier : verify_before / verify_after chaque action (vision = validateur, OCR-ancré)
 - grounding : résolution visuelle d'éléments UI (OCR-anchor first, VLM fallback, coords cache)
 - visual_login : login form resolution + verification (DPI urgences default config)
 - action_resolver : pont navigation → runtime (coords normalisés, OCR/VLM adapters)
 Pattern d'injection : VlmClient + OcrClient + OcrDetailedClient injectables
 """
 from .visual_verifier import verify_screen_match, ScreenMatchResult
 from .action_resolver import navigate_login, NavigateResult
 __all__ = [
    "verify_screen_match",
    "ScreenMatchResult",
    "navigate_login",
    "NavigateResult",
    "_handle_navigate_action",
 ]
 # Handler pour replay_engine — importé par api_stream.py
 def _handle_navigate_action(
    action: dict,
    replay_state: dict,
    session_id: str,
 ) -> bool:
    """Handler serveur pour action navigate (branchement replay_engine).
    Thin wrapper : résout coords du login form et les stocke dans
    replay_state["variables"] pour les actions type/click suivantes.
    N'échoue jamais le replay — toute erreur → log + needs_review.
    """
    import logging
    logger = logging.getLogger("navigation._handle_navigate_action")
    params = action.get("parameters") or {}
    navigate_action = params.get("action", "login")
    # Noms des variables output (configurable)
    login_var = (params.get("login_coords_var") or "navigate_login_coords").strip()
    password_var = (params.get("password_coords_var") or "navigate_password_coords").strip()
    submit_var = (params.get("submit_coords_var") or "navigate_submit_coords").strip()
    variables = replay_state.setdefault("variables", {})
    try:
        screenshot_path = ""
        # Résoudre screenshot depuis replay_state
        if "last_screenshot_path" in replay_state:
            screenshot_path = replay_state["last_screenshot_path"]
        elif "last_heartbeat" in replay_state:
            hb = replay_state["last_heartbeat"]
            screenshot_path = hb.get("screenshot_path", "") if isinstance(hb, dict) else ""
        if not screenshot_path:
            logger.warning("navigate: no screenshot for session %s", session_id)
            variables[login_var] = {"error": "no_screenshot"}
            return False
        # Dimensions écran (fallback 1920×1080)
        screen_width = replay_state.get("screen_width", 1920)
        screen_height = replay_state.get("screen_height", 1080)
        # OCR/VLM clients — lazy import pour éviter circular dependency
        from core.llm import extract_grid_from_image
        from core.extraction.vlm_client import make_vllm_client
        from core.navigation.action_resolver import make_ocr_detailed_from_grid
        ocr_detailed = make_ocr_detailed_from_grid(extract_grid_from_image)
        vlm_client = make_vllm_client()
        # Config login
        from core.navigation.visual_login import LoginFormConfig, dpi_urgences_login_config
        config = dpi_urgences_login_config()
        if "login_field" in params:
            config = LoginFormConfig(
                login_field=params.get("login_field", config.login_field),
                password_field=params.get("password_field", config.password_field),
                submit_button=params.get("submit_button", config.submit_button),
                success_elements=params.get("success_elements", config.success_elements),
                context=params.get("context", config.context),
            )
        # Orchestration navigate
        from core.navigation.action_resolver import navigate_login
        result = navigate_login(
            screenshot_path, config=config,
            ocr_client=ocr_detailed, vlm_client=vlm_client,
            screen_width=screen_width, screen_height=screen_height,
        )
        # Stocker coords dans variables (format dict pour substitution)
        if result.login_coords:
            variables[login_var] = result.login_coords.to_dict()
        if result.password_coords:
            variables[password_var] = result.password_coords.to_dict()
        if result.submit_coords:
            variables[submit_var] = result.submit_coords.to_dict()
        variables["navigate_result"] = {
            "all_resolved": result.all_resolved,
            "method": result.login_coords.method if result.login_coords else "",
            "error": result.error,
        }
        if not result.all_resolved:
            logger.warning("navigate: incomplete — %s", result.error)
            return False
        logger.info("navigate: login form resolved OK (method=%s)", result.login_coords.method if result.login_coords else "?")
        return True
    except Exception as e:
        logger.warning("navigate: exception (%s) — needs_review", e)
        variables["navigate_result"] = {"all_resolved": False, "error": str(e)}
        return False
--- a/core/navigation/action_resolver.py
+++ b/core/navigation/action_resolver.py
@@ -0,0 +1,205 @@
 """Action resolver — pont entre modules navigation et runtime replay.
 Orchestre verify → ground → store coords pour le handler replay_engine.
 Convertit coords pixels → normalisé (x_pct/y_pct) pour le client Agent V1.
 Architecture :
 - handler replay_engine = thin wrapper (appelle action_resolver)
 - action_resolver = bridge (adapte OCR/VLM runtime → interfaces navigation)
 - modules navigation = pure functions (ne connaissent pas le runtime)
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple
 from core.navigation.grounding import (
    BBox,
    CoordsCache,
    GroundedElement,
    OcrDetailedClient,
    OcrTokenInfo,
    ground_element,
 )
 from core.navigation.visual_login import (
    LoginFormConfig,
    LoginResolution,
    dpi_urgences_login_config,
    resolve_login_form,
    verify_login_visible,
    verify_login_success,
 )
 from core.navigation.visual_verifier import (
    OcrClient,
    ScreenMatchResult,
    VlmClient,
 )
 logger = logging.getLogger(__name__)
 # ── Dataclasses ──────────────────────────────────────────────────────
@dataclass
 class NavigateCoords:
    """Normalized coords for a grounded element — format Agent V1 client."""
    x_pct: float  # center x normalized [0-1]
    y_pct: float  # center y normalized [0-1]
    bbox_pct: Optional[Tuple[float, float, float, float]] = None  # (x1, y1, x2, y2) normalized
    method: str = ""  # grounding method used
    def to_dict(self) -> Dict[str, Any]:
        d = {"x_pct": self.x_pct, "y_pct": self.y_pct, "method": self.method}
        if self.bbox_pct:
            d["bbox_pct"] = list(self.bbox_pct)
        return d
@dataclass
 class NavigateResult:
    """Result of a navigate action — coords for each resolved field."""
    login_coords: Optional[NavigateCoords] = None
    password_coords: Optional[NavigateCoords] = None
    submit_coords: Optional[NavigateCoords] = None
    all_resolved: bool = False
    pre_verify: Optional[ScreenMatchResult] = None
    post_verify: Optional[ScreenMatchResult] = None  # set later by verify_after
    error: str = ""
 # ── Coordinate conversion ────────────────────────────────────────────
 def grounded_to_coords(
    element: GroundedElement,
    screen_width: int,
    screen_height: int,
 ) -> NavigateCoords:
    """Convert GroundedElement (pixels) to NavigateCoords (normalized pct)."""
    x_pct = element.center[0] / screen_width if screen_width else 0
    y_pct = element.center[1] / screen_height if screen_height else 0
    x1_pct = element.bbox[0] / screen_width if screen_width else 0
    y1_pct = element.bbox[1] / screen_height if screen_height else 0
    x2_pct = element.bbox[2] / screen_width if screen_width else 0
    y2_pct = element.bbox[3] / screen_height if screen_height else 0
    return NavigateCoords(
        x_pct=x_pct,
        y_pct=y_pct,
        bbox_pct=(x1_pct, y1_pct, x2_pct, y2_pct),
        method=element.method,
    )
 # ── OCR adapter ──────────────────────────────────────────────────────
 def make_ocr_detailed_from_grid(
    grid_fn: Callable[[str], List[List[Dict[str, Any]]]],
 ) -> OcrDetailedClient:
    """Adapt extract_grid_from_image → OcrDetailedClient (List[OcrTokenInfo]).
    Converts the grid format (list of rows of cells with bbox) into
    flat OcrTokenInfo list with normalized LTRB bbox.
    """
    from core.extraction.role_mapper import tokens_from_grid
    def client(image_path: str) -> List[OcrTokenInfo]:
        grid = grid_fn(image_path)
        ocr_tokens = tokens_from_grid(grid)
        return [
            OcrTokenInfo(
                text=t.text,
                bbox=t.bbox,
                confidence=t.confidence,
            )
            for t in ocr_tokens
        ]
    return client
 def make_ocr_simple_from_detailed(
    ocr_detailed: OcrDetailedClient,
 ) -> OcrClient:
    """Derive text-only OcrClient from OcrDetailedClient."""
    def client(image_path: str) -> List[str]:
        return [t.text for t in ocr_detailed(image_path)]
    return client
 # ── Navigate login orchestration ─────────────────────────────────────
 def navigate_login(
    screenshot_path: str,
    config: Optional[LoginFormConfig] = None,
    ocr_client: Optional[OcrDetailedClient] = None,
    vlm_client: Optional[VlmClient] = None,
    screen_width: int = 1920,
    screen_height: int = 1080,
    coords_cache: Optional[CoordsCache] = None,
    skip_pre_verify: bool = False,
 ) -> NavigateResult:
    """Orchestrate login navigation: verify → ground → convert coords.
    Returns NavigateResult with normalized coords for each field.
    The handler stores these in replay_state variables for subsequent
    type/click actions.
    """
    if config is None:
        config = dpi_urgences_login_config()
    if ocr_client is None or vlm_client is None:
        return NavigateResult(
            all_resolved=False,
            error="ocr_client and vlm_client required",
        )
    ocr_simple = make_ocr_simple_from_detailed(ocr_client)
    # Step 1: Pre-verification (optional)
    pre_verify = None
    if not skip_pre_verify:
        pre_verify = verify_login_visible(
            screenshot_path, config, ocr_simple, vlm_client,
        )
        if not pre_verify.match:
            logger.warning("navigate_login: pre-verify failed — %s", pre_verify.describe())
            return NavigateResult(
                all_resolved=False,
                pre_verify=pre_verify,
                error=f"pre-verify failed: {pre_verify.describe()}",
            )
    # Step 2: Ground all fields
    resolution = resolve_login_form(
        screenshot_path, config, ocr_client, vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache,
    )
    if not resolution.all_resolved:
        logger.warning("navigate_login: incomplete resolution — %s", resolution.describe())
        return NavigateResult(
            all_resolved=False,
            pre_verify=pre_verify,
            error=f"incomplete resolution: {resolution.describe()}",
        )
    # Step 3: Convert to normalized coords
    login_coords = grounded_to_coords(resolution.login_field, screen_width, screen_height) if resolution.login_field else None
    password_coords = grounded_to_coords(resolution.password_field, screen_width, screen_height) if resolution.password_field else None
    submit_coords = grounded_to_coords(resolution.submit_button, screen_width, screen_height) if resolution.submit_button else None
    return NavigateResult(
        login_coords=login_coords,
        password_coords=password_coords,
        submit_coords=submit_coords,
        all_resolved=True,
        pre_verify=pre_verify,
    )
--- a/core/navigation/grounding.py
+++ b/core/navigation/grounding.py
@@ -0,0 +1,375 @@
 """Grounding — résolution visuelle d'éléments UI → coords (bbox + center).
 Architecture OCR-ancrée (alignée avec visual_verifier) :
 - STRATÉGIE 1 : OCR-anchor — si le texte cible est trouvé par OCR,
  utiliser le bbox du token OCR (déterministe, zero hallucination).
 - STRATÉGIE 2 : VLM grounder — si OCR ne trouve pas le texte,
  le VLM localise l'élément visuellement (fallback, risque contrôlé).
 - CACHE coords : mémorise les coords résolues, validées par vision avant usage.
  Si cached coords fail → re-résolution visuelle.
 Coords = cache local validé par vue (Dom/Claude recadrage 01/07).
 Vision = source de vérité, coords = shortcut validé.
 BBox format interne : LTRB (x1, y1, x2, y2) pixels absolus —
 cohérent avec SomElement, OcrToken, DetectedUIElement.
 """
 from __future__ import annotations
 import json
 import logging
 import re
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional, Tuple
 from core.navigation.visual_verifier import (
    fuzzy_match,
    normalize_text,
    OcrClient,
    VlmClient,
 )
 logger = logging.getLogger(__name__)
 # BBox format: LTRB pixels (x1, y1, x2, y2)
 BBox = Tuple[int, int, int, int]
 # ── Dataclasses ──────────────────────────────────────────────────────
@dataclass
 class OcrTokenInfo:
    """OCR token with bounding box — for grounding (richer than text-only)."""
    text: str
    bbox: Optional[BBox] = None  # (x1, y1, x2, y2) LTRB pixels
    confidence: float = 1.0
 # Type alias — injectable OCR client returning tokens with bbox
 # More detailed than visual_verifier's OcrClient (which returns List[str])
 OcrDetailedClient = Callable[[str], List[OcrTokenInfo]]
@dataclass
 class GroundedElement:
    """A UI element grounded on screen with coordinates."""
    role: str
    text: str
    bbox: BBox  # (x1, y1, x2, y2) LTRB pixels
    center: Tuple[int, int]  # (cx, cy) — click target
    confidence: float
    method: str  # "ocr_anchor" or "vlm_grounder" or "cache"
    source_ocr_text: str = ""  # actual OCR text that matched (for fuzzy)
@dataclass
 class CoordsCacheEntry:
    """Cached coordinates for a UI element."""
    element_key: str  # "role:text"
    bbox: BBox
    center: Tuple[int, int]
    method: str  # how it was originally resolved
    validation_count: int = 0
 class CoordsCache:
    """In-memory cache of grounded coordinates.
    Entries are validated by vision before use (verify_after).
    If cached coords fail verification → invalidate + re-resolve.
    """
    def __init__(self) -> None:
        self._entries: Dict[str, CoordsCacheEntry] = {}
    def get(self, element_key: str) -> Optional[CoordsCacheEntry]:
        return self._entries.get(element_key)
    def put(
        self,
        element_key: str,
        bbox: BBox,
        center: Tuple[int, int],
        method: str,
    ) -> None:
        entry = self._entries.get(element_key)
        if entry:
            entry.bbox = bbox
            entry.center = center
            entry.method = method
            entry.validation_count += 1
        else:
            self._entries[element_key] = CoordsCacheEntry(
                element_key=element_key,
                bbox=bbox,
                center=center,
                method=method,
                validation_count=1,
            )
    def invalidate(self, element_key: str) -> None:
        self._entries.pop(element_key, None)
    def clear(self) -> None:
        self._entries.clear()
    def keys(self) -> List[str]:
        return list(self._entries.keys())
 # ── Helper functions ─────────────────────────────────────────────────
 def bbox_center(bbox: BBox) -> Tuple[int, int]:
    """Compute center point from LTRB bbox."""
    x1, y1, x2, y2 = bbox
    return ((x1 + x2) // 2, (y1 + y2) // 2)
 def make_element_key(role: str, text: str) -> str:
    """Create a stable cache key from role + text."""
    return f"{role}:{normalize_text(text)}"
 # ── OCR-anchored grounding (deterministic) ───────────────────────────
 def ocr_anchor_ground(
    ocr_tokens: List[OcrTokenInfo],
    target: Dict[str, Any],
    fuzzy_threshold: float = 0.8,
 ) -> Optional[GroundedElement]:
    """Ground an element using OCR tokens with bbox (deterministic).
    Finds the target text in OCR tokens via fuzzy match.
    Returns GroundedElement with bbox from the matching OCR token.
    """
    target_text = target.get("text", "")
    target_role = target.get("role", "?")
    if not target_text:
        return None
    for token in ocr_tokens:
        if fuzzy_match(target_text, token.text, threshold=fuzzy_threshold):
            if token.bbox is None:
                continue  # token found but no bbox → can't ground
            return GroundedElement(
                role=target_role,
                text=target_text,
                bbox=token.bbox,
                center=bbox_center(token.bbox),
                confidence=token.confidence,
                method="ocr_anchor",
                source_ocr_text=token.text,
            )
    return None
 # ── VLM grounder (fallback) ─────────────────────────────────────────
 def build_grounder_prompt(
    target: Dict[str, Any],
    context: str = "",
 ) -> str:
    """Build VLM prompt for locating a UI element on screen.
    Asks for bounding box in normalized coordinates [0-1].
    """
    role = target.get("role", "?")
    text = target.get("text", "")
    extra = target.get("extra", "")
    prompt = (
        "You are a UI element locator. Find the specified element on this "
        "screenshot and return its bounding box.\n"
    )
    if context:
        prompt += f"Context: {context}\n"
    prompt += f"Target element: {role} with text \"{text}\""
    if extra:
        prompt += f" ({extra})"
    prompt += (
        "\n\nRespond in JSON format:\n"
        "{\"found\": true/false, "
        "\"bbox\": [x1_norm, y1_norm, x2_norm, y2_norm], "
        "\"confidence\": 0.0-1.0, "
        "\"description\": \"...\"}\n"
        "bbox coordinates are normalized [0.0-1.0] relative to image dimensions "
        "(x1=left, y1=top, x2=right, y2=bottom). "
        "Only return found=true if you can clearly locate the element."
    )
    return prompt
 def parse_grounder_response(
    vlm_text: str,
    screen_width: int,
    screen_height: int,
    target: Dict[str, Any],
 ) -> Optional[GroundedElement]:
    """Parse VLM grounder response into GroundedElement.
    Converts normalized bbox [0-1] to absolute pixels.
    """
    try:
        data = json.loads(vlm_text)
    except json.JSONDecodeError:
        json_match = re.search(r"\{[\s\S]*\}", vlm_text)
        if json_match:
            try:
                data = json.loads(json_match.group())
            except json.JSONDecodeError:
                logger.warning("grounding: VLM response not parseable as JSON")
                return None
        else:
            return None
    if not data.get("found", False):
        return None
    bbox_norm = data.get("bbox", [])
    if not isinstance(bbox_norm, list) or len(bbox_norm) != 4:
        logger.warning("grounding: invalid bbox format from VLM")
        return None
    # Convert normalized [0-1] to absolute pixels
    try:
        x1 = int(float(bbox_norm[0]) * screen_width)
        y1 = int(float(bbox_norm[1]) * screen_height)
        x2 = int(float(bbox_norm[2]) * screen_width)
        y2 = int(float(bbox_norm[3]) * screen_height)
    except (ValueError, TypeError):
        logger.warning("grounding: bbox values not numeric")
        return None
    # Clamp to screen bounds
    x1 = max(0, min(x1, screen_width))
    y1 = max(0, min(y1, screen_height))
    x2 = max(x1, min(x2, screen_width))
    y2 = max(y1, min(y2, screen_height))
    confidence = data.get("confidence", 0.5)
    if isinstance(confidence, str):
        try:
            confidence = float(confidence)
        except ValueError:
            confidence = 0.5
    bbox_abs: BBox = (x1, y1, x2, y2)
    return GroundedElement(
        role=target.get("role", "?"),
        text=target.get("text", ""),
        bbox=bbox_abs,
        center=bbox_center(bbox_abs),
        confidence=confidence,
        method="vlm_grounder",
    )
 # ── Core grounding function (composition) ───────────────────────────
 def ground_element(
    screenshot_path: str,
    target: Dict[str, Any],
    ocr_client: OcrDetailedClient,
    vlm_client: VlmClient,
    screen_width: int = 1920,
    screen_height: int = 1080,
    coords_cache: Optional[CoordsCache] = None,
    context: str = "",
    fuzzy_threshold: float = 0.8,
 ) -> Optional[GroundedElement]:
    """Ground a UI element on screen — OCR-anchor first, VLM fallback.
    Resolution strategy:
    1. Cache: if cached coords exist → return cached (validated separately)
    2. OCR-anchor: deterministic, zero hallucination
    3. VLM grounder: fallback when OCR can't find the text
    Args:
        screenshot_path: path to screenshot image
        target: {"role": "bouton", "text": "Connexion"} — element to find
        ocr_client: injectable OCR client returning List[OcrTokenInfo]
        vlm_client: injectable VLM client (image_path, prompt) -> text
        screen_width/height: screen dimensions for pixel conversion
        coords_cache: optional CoordsCache for memoization
        context: optional context (e.g. "page login DPI")
        fuzzy_threshold: fuzzy match threshold for OCR anchoring
    Returns:
        GroundedElement with bbox + center, or None if not found
    """
    target_text = target.get("text", "")
    target_role = target.get("role", "?")
    element_key = make_element_key(target_role, target_text)
    # Step 0: Check cache
    if coords_cache:
        cached = coords_cache.get(element_key)
        if cached:
            cached.validation_count += 1
            logger.info("grounding: using cached coords for %s", element_key)
            return GroundedElement(
                role=target_role,
                text=target_text,
                bbox=cached.bbox,
                center=cached.center,
                confidence=1.0,  # cached = previously validated
                method="cache",
            )
    # Step 1: OCR-anchor (deterministic)
    try:
        ocr_tokens = ocr_client(screenshot_path)
    except Exception as e:
        logger.warning("grounding: OCR call failed (%s)", e)
        ocr_tokens = []
    ocr_result = ocr_anchor_ground(ocr_tokens, target, fuzzy_threshold)
    if ocr_result:
        if coords_cache:
            coords_cache.put(element_key, ocr_result.bbox, ocr_result.center, "ocr_anchor")
        logger.info(
            "grounding: OCR-anchor found '%s' (matched OCR='%s', conf=%.2f)",
            target_text, ocr_result.source_ocr_text, ocr_result.confidence,
        )
        return ocr_result
    # Step 2: VLM grounder (fallback)
    if not target_text:
        logger.warning("grounding: no text for target, VLM grounder needs text")
        return None
    prompt = build_grounder_prompt(target, context)
    try:
        vlm_text = vlm_client(screenshot_path, prompt)
    except Exception as e:
        logger.warning("grounding: VLM grounder call failed (%s)", e)
        return None
    vlm_result = parse_grounder_response(vlm_text, screen_width, screen_height, target)
    if vlm_result:
        if coords_cache:
            coords_cache.put(element_key, vlm_result.bbox, vlm_result.center, "vlm_grounder")
        logger.info(
            "grounding: VLM grounder found '%s' (conf=%.2f)",
            target_text, vlm_result.confidence,
        )
        return vlm_result
    logger.warning("grounding: element '%s' not found by OCR or VLM", target_text)
    return None
--- a/core/navigation/visual_login.py
+++ b/core/navigation/visual_login.py
@@ -0,0 +1,227 @@
 """Visual login — résolution + vérification du formulaire de login par grounding.
 Architecture (alignée visual_verifier + grounding) :
 - verify_before : formulaire login visible (champs + bouton présents)
 - resolve_login_form : ground chaque champ (login, password, bouton) → coords
 - verify_after : dashboard/accueil visible (post-login)
 - Chaque étape encadrée par vision (DETTE-023 couvert)
 Coords = cache local validé par vue (Dom/Claude recadrage).
 Le runtime exécute les actions (type/click) — ce module résout + valide.
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional, Tuple
 from core.navigation.grounding import (
    BBox,
    CoordsCache,
    GroundedElement,
    OcrDetailedClient,
    OcrTokenInfo,
    ground_element,
 )
 from core.navigation.visual_verifier import (
    OcrClient,
    ScreenMatchResult,
    VlmClient,
    verify_before,
    verify_after,
 )
 logger = logging.getLogger(__name__)
 # ── Dataclasses ──────────────────────────────────────────────────────
@dataclass
 class LoginFormConfig:
    """Configuration for a login form — what to look for."""
    login_field: Dict[str, Any]  # {"role": "champ", "text": "Login"}
    password_field: Dict[str, Any]  # {"role": "champ", "text": "Mot de passe"}
    submit_button: Dict[str, Any]  # {"role": "bouton", "text": "Connexion"}
    success_elements: List[Dict[str, Any]] = field(default_factory=list)
    context: str = ""  # e.g. "DPI urgences"
@dataclass
 class LoginResolution:
    """Result of login form resolution — grounded coords for each field."""
    login_field: Optional[GroundedElement] = None
    password_field: Optional[GroundedElement] = None
    submit_button: Optional[GroundedElement] = None
    all_resolved: bool = False
    method: str = ""  # "ocr_anchor", "vlm_grounder", "mixed", "cache"
    def describe(self) -> str:
        parts = []
        if self.login_field:
            parts.append(f"login@{self.login_field.center} ({self.login_field.method})")
        else:
            parts.append("login: NOT FOUND")
        if self.password_field:
            parts.append(f"password@{self.password_field.center} ({self.password_field.method})")
        else:
            parts.append("password: NOT FOUND")
        if self.submit_button:
            parts.append(f"button@{self.submit_button.center} ({self.submit_button.method})")
        else:
            parts.append("button: NOT FOUND")
        status = "OK" if self.all_resolved else "INCOMPLETE"
        return f"Login resolution [{status}]: " + ", ".join(parts)
 # ── Default configs ──────────────────────────────────────────────────
 def dpi_urgences_login_config() -> LoginFormConfig:
    """Default config for DPI urgences login form."""
    return LoginFormConfig(
        login_field={"role": "champ", "text": "Login", "extra": "champ identifiant"},
        password_field={"role": "champ", "text": "Mot de passe", "extra": "champ password"},
        submit_button={"role": "bouton", "text": "Connexion", "extra": "bouton submit"},
        success_elements=[
            {"role": "page", "text": "Accueil"},
            {"role": "page", "text": "Dashboard"},
        ],
        context="DPI urgences — page login",
    )
 # ── Helper ───────────────────────────────────────────────────────────
 def _ocr_detailed_to_simple(ocr_detailed: OcrDetailedClient) -> OcrClient:
    """Convert OcrDetailedClient (text+bbox) to OcrClient (text-only) for verification."""
    def client(image_path: str) -> List[str]:
        return [t.text for t in ocr_detailed(image_path)]
    return client
 # ── Core functions ───────────────────────────────────────────────────
 def verify_login_visible(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrClient,
    vlm_client: VlmClient,
 ) -> ScreenMatchResult:
    """Verify login form is visible on screen (pre-condition).
    Checks that login field, password field, and submit button are present.
    Uses OCR-anchored verification (deterministic presence, VLM role).
    """
    expected = [
        config.login_field,
        config.password_field,
        config.submit_button,
    ]
    return verify_before(
        screenshot_path, expected, ocr_client, vlm_client,
        context=config.context,
    )
 def verify_login_success(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrClient,
    vlm_client: VlmClient,
 ) -> ScreenMatchResult:
    """Verify dashboard/accueil visible after login (post-condition).
    Higher threshold (verify_after = 0.8) — false positive = Léa proceeds wrong.
    """
    if not config.success_elements:
        # No success criteria defined → can't verify
        return ScreenMatchResult(
            match=False,
            confidence=0.0,
            reason="no success_elements defined in config",
        )
    return verify_after(
        screenshot_path, config.success_elements, ocr_client, vlm_client,
        context=f"POST-LOGIN: {config.context}",
    )
 def resolve_login_form(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrDetailedClient,
    vlm_client: VlmClient,
    screen_width: int = 1920,
    screen_height: int = 1080,
    coords_cache: Optional[CoordsCache] = None,
 ) -> LoginResolution:
    """Ground all login form elements → coords for runtime action.
    Resolution strategy per element:
    1. Cache hit → return cached coords (validated separately)
    2. OCR-anchor → deterministic bbox from OCR token
    3. VLM grounder → fallback visual grounding
    Returns LoginResolution with grounded coords for each field.
    Runtime uses these coords to type/click.
    """
    login_el = ground_element(
        screenshot_path, config.login_field,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )
    password_el = ground_element(
        screenshot_path, config.password_field,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )
    button_el = ground_element(
        screenshot_path, config.submit_button,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )
    all_resolved = login_el is not None and password_el is not None and button_el is not None
    # Determine overall method
    methods = []
    if login_el:
        methods.append(login_el.method)
    if password_el:
        methods.append(password_el.method)
    if button_el:
        methods.append(button_el.method)
    unique_methods = set(methods)
    if len(unique_methods) == 1:
        method = unique_methods.pop()
    elif len(unique_methods) > 1:
        method = "mixed"
    else:
        method = ""
    resolution = LoginResolution(
        login_field=login_el,
        password_field=password_el,
        submit_button=button_el,
        all_resolved=all_resolved,
        method=method,
    )
    if all_resolved:
        logger.info("resolve_login_form: %s", resolution.describe())
    else:
        logger.warning("resolve_login_form: incomplete — %s", resolution.describe())
    return resolution
--- a/core/navigation/visual_verifier.py
+++ b/core/navigation/visual_verifier.py
@@ -0,0 +1,408 @@
 """Visual verifier — verify_before / verify_after avec ancrage OCR.
 Architecture OCR-ancrée (challenge Claude 01/07, gate-vert 30/06) :
 - PRESENCE = tokens OCR (déterministe, pas d'hallucination possible)
 - RÔLE = VLM confirmation (semantic, ancré sur tokens OCR trouvés)
 - VLM ne décide JAMAIS de la présence d'un élément
 - Faux positif impossible par construction ; faux négatif = retry acceptable
 Pattern d'injection : OcrClient + VlmClient injectables (tests sans réseau).
 """
 from __future__ import annotations
 import json
 import logging
 import re
 import unicodedata
 from dataclasses import dataclass, field
 from difflib import SequenceMatcher
 from typing import Any, Callable, Dict, List, Optional
 logger = logging.getLogger(__name__)
 # Type aliases — injectable callables for offline testing
 VlmClient = Callable[[str, str], str]  # (image_path, prompt) -> text
 OcrClient = Callable[[str], List[str]]  # (image_path) -> list of OCR text strings
@dataclass
 class ScreenMatchResult:
    """Result of a screen verification check."""
    match: bool
    confidence: float = 0.0
    reason: str = ""
    observed_elements: List[Dict[str, Any]] = field(default_factory=list)
    expected_elements: List[Dict[str, Any]] = field(default_factory=list)
    mismatches: List[str] = field(default_factory=list)
    def describe(self) -> str:
        if self.match:
            return f"Screen match OK (conf={self.confidence:.2f})"
        parts = [f"Screen mismatch (conf={self.confidence:.2f})"]
        if self.mismatches:
            parts.append("missing: " + ", ".join(self.mismatches))
        if self.reason:
            parts.append(self.reason)
        return " | ".join(parts)
 # ── Text normalization (pure functions) ────────────────────────────────
 def normalize_text(text: str) -> str:
    """Normalize text for fuzzy matching: lowercase, strip accents, collapse whitespace."""
    text = text.lower().strip()
    # Strip accents: é→e, è→e, ê→e, à→a, etc.
    text = unicodedata.normalize("NFKD", text)
    text = "".join(c for c in text if not unicodedata.combining(c))
    # Collapse whitespace
    text = re.sub(r"\s+", " ", text)
    return text
 def fuzzy_match(expected: str, observed: str, threshold: float = 0.8) -> bool:
    """Check if observed text fuzzy-matches expected text.
    Three strategies (any wins):
    1. Exact match after normalization
    2. Substring containment (either direction)
    3. SequenceMatcher ratio >= threshold
    """
    norm_expected = normalize_text(expected)
    norm_observed = normalize_text(observed)
    if norm_expected == norm_observed:
        return True
    if norm_expected in norm_observed or norm_observed in norm_expected:
        return True
    ratio = SequenceMatcher(None, norm_expected, norm_observed).ratio()
    return ratio >= threshold
 # ── OCR presence check (deterministic, no VLM) ──────────────────────
@dataclass
 class OcrPresenceResult:
    """Result of OCR-based presence check."""
    found_texts: Dict[str, str] = field(default_factory=dict)
    missing: List[str] = field(default_factory=list)
    all_found: bool = False
    @property
    def presence_ratio(self) -> float:
        if not self.found_texts:
            return 1.0
        found_count = sum(1 for v in self.found_texts.values() if v != "")
        return found_count / len(self.found_texts)
 def ocr_presence_check(
    ocr_tokens: List[str],
    expected_elements: List[Dict[str, Any]],
    fuzzy_threshold: float = 0.8,
 ) -> OcrPresenceResult:
    """Check presence of expected texts against OCR tokens (deterministic).
    Pure function — no VLM call, zero hallucination risk.
    """
    found_texts: Dict[str, str] = {}
    missing: List[str] = []
    for el in expected_elements:
        expected_text = el.get("text", "")
        if not expected_text:
            found_texts[""] = ""
            continue
        matched_ocr = ""
        for token in ocr_tokens:
            if fuzzy_match(expected_text, token, threshold=fuzzy_threshold):
                matched_ocr = token
                break
        if matched_ocr:
            found_texts[expected_text] = matched_ocr
        else:
            found_texts[expected_text] = ""
            missing.append(f"{el.get('role', '?')}: {expected_text}")
    all_found = len(missing) == 0
    return OcrPresenceResult(
        found_texts=found_texts,
        missing=missing,
        all_found=all_found,
    )
 # ── VLM role confirmation (semantic, anchored on found OCR texts) ────
 def build_role_confirm_prompt(
    found_elements: List[Dict[str, Any]],
    expected_elements: List[Dict[str, Any]],
    context: str = "",
 ) -> str:
    """Build VLM prompt for role confirmation of OCR-found elements.
    VLM receives found texts and confirms their ROLE only — never presence.
    """
    found_lines = []
    for i, el in enumerate(found_elements):
        matched_ocr = el.get("matched_ocr", "")
        expected_role = el.get("expected_role", "?")
        line = f"{i+1}. Text \"{matched_ocr}\" — expected role: {expected_role}"
        found_lines.append(line)
    found_block = "\n".join(found_lines)
    prompt = (
        "You are a screen role validator. OCR has confirmed these texts are "
        "present on the screen. Your job is ONLY to confirm their ROLE — "
        "do NOT re-declare whether they are present.\n"
    )
    if context:
        prompt += f"Context: {context}\n"
    prompt += (
        f"Found texts with expected roles:\n{found_block}\n\n"
        "Respond in JSON format:\n"
        "{\"confirmed\": [{\"index\": 1, \"role_confirmed\": true/false, "
        "\"actual_role\": \"...\", \"confidence\": 0.0-1.0}], "
        "\"overall_confidence\": 0.0-1.0}\n"
        "Only confirm role_confirmed=true if the text clearly plays the "
        "expected role (e.g., a button, not just a label with the same text)."
    )
    return prompt
 def parse_role_confirm_response(vlm_text: str) -> Dict[str, Any]:
    """Parse VLM role confirmation JSON response."""
    try:
        data = json.loads(vlm_text)
    except json.JSONDecodeError:
        json_match = re.search(r"\{[\s\S]*\}", vlm_text)
        if json_match:
            try:
                data = json.loads(json_match.group())
            except json.JSONDecodeError:
                logger.warning("role_confirm: VLM response not parseable as JSON")
                return {"confirmed": [], "overall_confidence": 0.0}
        else:
            return {"confirmed": [], "overall_confidence": 0.0}
    confirmed = data.get("confirmed", [])
    overall_conf = data.get("overall_confidence", 0.0)
    if isinstance(overall_conf, str):
        try:
            overall_conf = float(overall_conf)
        except ValueError:
            overall_conf = 0.0
    return {
        "confirmed": confirmed,
        "overall_confidence": float(overall_conf),
    }
 # ── Core verification (OCR-anchored composition) ────────────────────
 def verify_screen_match(
    screenshot_path: str,
    expected_elements: List[Dict[str, Any]],
    ocr_client: OcrClient,
    vlm_client: VlmClient,
    context: str = "",
    min_confidence: float = 0.7,
 ) -> ScreenMatchResult:
    """Verify screen state with OCR-anchored presence + VLM role confirmation.
    Step 1: OCR screenshot → tokens → deterministic presence check
    Step 2: VLM confirms role of found elements (not presence!)
    Eliminates VLM self-report hallucination for presence checks.
    """
    if not expected_elements:
        return ScreenMatchResult(
            match=True,
            confidence=1.0,
            reason="no expected elements to verify",
        )
    # Step 1: OCR presence check (deterministic)
    try:
        ocr_tokens = ocr_client(screenshot_path)
    except Exception as e:
        logger.warning("verify_screen_match: OCR call failed (%s)", e)
        return ScreenMatchResult(
            match=False,
            confidence=0.0,
            reason=f"OCR error: {e}",
            expected_elements=expected_elements,
        )
    presence = ocr_presence_check(ocr_tokens, expected_elements)
    if not presence.all_found:
        observed = []
        for el in expected_elements:
            text = el.get("text", "")
            matched = presence.found_texts.get(text, "")
            observed.append({
                "role": el.get("role", "?"),
                "expected_text": text,
                "matched_ocr": matched,
                "found": matched != "",
            })
        return ScreenMatchResult(
            match=False,
            confidence=presence.presence_ratio,
            reason="OCR presence check: some texts not found",
            observed_elements=observed,
            expected_elements=expected_elements,
            mismatches=presence.missing,
        )
    # Step 2: VLM role confirmation (only for found elements)
    found_elements = []
    for el in expected_elements:
        text = el.get("text", "")
        matched_ocr = presence.found_texts.get(text, "")
        if text and matched_ocr:
            found_elements.append({
                "text": text,
                "expected_role": el.get("role", "?"),
                "matched_ocr": matched_ocr,
            })
    if not found_elements:
        # All elements had no text → presence trivially OK
        return ScreenMatchResult(
            match=True,
            confidence=1.0,
            reason="no text-based elements to verify",
            expected_elements=expected_elements,
        )
    prompt = build_role_confirm_prompt(found_elements, expected_elements, context)
    try:
        vlm_text = vlm_client(screenshot_path, prompt)
    except Exception as e:
        logger.warning("verify_screen_match: VLM role confirm failed (%s)", e)
        observed = []
        for el in expected_elements:
            text = el.get("text", "")
            observed.append({
                "role": el.get("role", "?"),
                "expected_text": text,
                "matched_ocr": presence.found_texts.get(text, ""),
                "found": True,
                "role_confirmed": False,
                "role_confidence": 0.0,
            })
        return ScreenMatchResult(
            match=True,
            confidence=0.5,
            reason=f"OCR presence OK, VLM role confirm failed: {e}",
            observed_elements=observed,
            expected_elements=expected_elements,
        )
    parsed = parse_role_confirm_response(vlm_text)
    overall_conf = parsed.get("overall_confidence", 0.0)
    confirmed = parsed.get("confirmed", [])
    observed = []
    role_mismatches = []
    for i, el in enumerate(expected_elements):
        text = el.get("text", "")
        expected_role = el.get("role", "?")
        matched_ocr = presence.found_texts.get(text, "")
        role_entry = None
        for c in confirmed:
            if c.get("index") == i + 1:
                role_entry = c
                break
        role_confirmed = False
        actual_role = ""
        role_confidence = 0.0
        if role_entry:
            role_confirmed = role_entry.get("role_confirmed", False)
            actual_role = role_entry.get("actual_role", "")
            role_confidence = role_entry.get("confidence", 0.0)
            if isinstance(role_confidence, str):
                try:
                    role_confidence = float(role_confidence)
                except ValueError:
                    role_confidence = 0.0
        observed.append({
            "role": expected_role,
            "expected_text": text,
            "matched_ocr": matched_ocr,
            "found": True,
            "role_confirmed": role_confirmed,
            "actual_role": actual_role,
            "role_confidence": role_confidence,
        })
        if not role_confirmed or role_confidence < min_confidence:
            role_mismatches.append(
                f"{expected_role}: {text} (actual={actual_role}, conf={role_confidence:.2f})"
            )
    is_match = len(role_mismatches) == 0 and overall_conf >= min_confidence
    return ScreenMatchResult(
        match=is_match,
        confidence=overall_conf,
        reason=f"OCR presence: {presence.presence_ratio:.0%}, VLM role: {overall_conf:.2f}",
        observed_elements=observed,
        expected_elements=expected_elements,
        mismatches=presence.missing + role_mismatches,
    )
 def verify_before(
    screenshot_path: str,
    expected_elements: List[Dict[str, Any]],
    ocr_client: OcrClient,
    vlm_client: VlmClient,
    context: str = "",
 ) -> ScreenMatchResult:
    """Verify screen state BEFORE an action (OCR-anchored).
    Checks pre-conditions: expected texts present + roles correct.
    min_confidence=0.7 — some tolerance for pre-action verification.
    """
    return verify_screen_match(
        screenshot_path, expected_elements, ocr_client, vlm_client,
        context=f"PRE-ACTION: {context}", min_confidence=0.7,
    )
 def verify_after(
    screenshot_path: str,
    expected_elements: List[Dict[str, Any]],
    ocr_client: OcrClient,
    vlm_client: VlmClient,
    context: str = "",
 ) -> ScreenMatchResult:
    """Verify screen state AFTER an action (OCR-anchored).
    Checks post-conditions with higher threshold (0.8).
    False positive = Léa proceeds on wrong assumption → stricter gate.
    """
    return verify_screen_match(
        screenshot_path, expected_elements, ocr_client, vlm_client,
        context=f"POST-ACTION: {context}", min_confidence=0.8,
    )
--- a/tests/unit/test_action_resolver.py
+++ b/tests/unit/test_action_resolver.py
@@ -0,0 +1,205 @@
 """Tests for core/navigation/action_resolver.py — coordinate conversion + OCR adapters."""
 import json
 import pytest
 from core.navigation.action_resolver import (
    NavigateCoords,
    NavigateResult,
    grounded_to_coords,
    make_ocr_simple_from_detailed,
    navigate_login,
 )
 from core.navigation.grounding import (
    CoordsCache,
    GroundedElement,
    OcrTokenInfo,
    OcrDetailedClient,
 )
 from core.navigation.visual_verifier import VlmClient
 # ── Mock factories ─────────────────────────────────────────────────────
 def mock_ocr_detailed_client_factory(tokens: list):
    def client(image_path: str) -> list:
        return tokens
    return client
 def mock_vlm_client_factory(response_json: dict):
    def client(image_path: str, prompt: str) -> str:
        return json.dumps(response_json)
    return client
 # ── grounded_to_coords tests ───────────────────────────────────────────
 class TestGroundedToCoords:
    def test_basic_conversion(self):
        el = GroundedElement(
            role="bouton", text="Connexion",
            bbox=(200, 50, 400, 100), center=(300, 75),
            confidence=0.9, method="ocr_anchor",
        )
        coords = grounded_to_coords(el, 1920, 1080)
        assert coords.x_pct == pytest.approx(300 / 1920, abs=0.01)
        assert coords.y_pct == pytest.approx(75 / 1080, abs=0.01)
        assert coords.method == "ocr_anchor"
        assert coords.bbox_pct is not None
    def test_to_dict(self):
        coords = NavigateCoords(x_pct=0.15, y_pct=0.07, method="ocr_anchor")
        d = coords.to_dict()
        assert d["x_pct"] == 0.15
        assert d["y_pct"] == 0.07
        assert d["method"] == "ocr_anchor"
    def test_to_dict_with_bbox(self):
        coords = NavigateCoords(
            x_pct=0.15, y_pct=0.07,
            bbox_pct=(0.10, 0.05, 0.20, 0.09),
            method="vlm_grounder",
        )
        d = coords.to_dict()
        assert "bbox_pct" in d
        assert len(d["bbox_pct"]) == 4
 # ── make_ocr_simple_from_detailed tests ────────────────────────────────
 class TestMakeOcrSimpleFromDetailed:
    def test_conversion(self):
        tokens = [
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Password", bbox=(100, 100, 250, 140)),
        ]
        detailed = mock_ocr_detailed_client_factory(tokens)
        simple = make_ocr_simple_from_detailed(detailed)
        result = simple("/tmp/test.png")
        assert result == ["Login", "Password"]
    def test_empty_tokens(self):
        detailed = mock_ocr_detailed_client_factory([])
        simple = make_ocr_simple_from_detailed(detailed)
        result = simple("/tmp/test.png")
        assert result == []
 # ── navigate_login tests ───────────────────────────────────────────────
 class TestNavigateLogin:
    def test_full_success(self):
        """All fields grounded → NavigateResult with coords."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90), confidence=0.95),
            OcrTokenInfo(text="Mot de passe", bbox=(100, 100, 250, 140), confidence=0.95),
            OcrTokenInfo(text="Connexion", bbox=(100, 150, 250, 190), confidence=0.95),
        ])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 2, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 3, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
            ],
            "overall_confidence": 0.9,
        })
        result = navigate_login(
            "/tmp/login.png",
            ocr_client=ocr, vlm_client=vlm,
            skip_pre_verify=True,
        )
        assert result.all_resolved == True
        assert result.login_coords is not None
        assert result.password_coords is not None
        assert result.submit_coords is not None
        assert result.submit_coords.x_pct > 0
        assert result.submit_coords.y_pct > 0
    def test_no_clients_error(self):
        """Missing OCR/VLM clients → error."""
        result = navigate_login("/tmp/login.png", ocr_client=None, vlm_client=None)
        assert result.all_resolved == False
        assert "required" in result.error
    def test_pre_verify_fail(self):
        """Pre-verify fails → early abort."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Accueil", bbox=(0, 0, 100, 40)),
        ])
        vlm = mock_vlm_client_factory({})
        result = navigate_login(
            "/tmp/page.png",
            ocr_client=ocr, vlm_client=vlm,
            skip_pre_verify=False,
        )
        assert result.all_resolved == False
        assert result.pre_verify is not None
        assert result.pre_verify.match == False
    def test_skip_pre_verify(self):
        """Skip pre-verify → proceed to grounding even if form incomplete."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Mot de passe", bbox=(100, 100, 250, 140)),
            OcrTokenInfo(text="Connexion", bbox=(100, 150, 250, 190)),
        ])
        vlm = mock_vlm_client_factory({})
        result = navigate_login(
            "/tmp/login.png",
            ocr_client=ocr, vlm_client=vlm,
            skip_pre_verify=True,
        )
        assert result.pre_verify is None  # skipped
        assert result.all_resolved == True
 # ── NavigateResult dataclass tests ─────────────────────────────────────
 class TestNavigateResult:
    def test_default(self):
        result = NavigateResult()
        assert result.all_resolved == False
        assert result.login_coords is None
        assert result.error == ""
    def test_with_coords(self):
        result = NavigateResult(
            login_coords=NavigateCoords(x_pct=0.15, y_pct=0.07, method="ocr_anchor"),
            all_resolved=True,
        )
        assert result.login_coords.x_pct == 0.15
 # ── Import validation ──────────────────────────────────────────────────
 class TestImportValidation:
    def test_action_resolver_imports(self):
        """Verify action_resolver module imports cleanly."""
        from core.navigation.action_resolver import (
            NavigateCoords,
            NavigateResult,
            grounded_to_coords,
            make_ocr_detailed_from_grid,
            make_ocr_simple_from_detailed,
            navigate_login,
        )
        assert NavigateCoords is not None
        assert NavigateResult is not None
    def test_navigation_package_handler(self):
        """Verify _handle_navigate_action is importable from package."""
        from core.navigation import _handle_navigate_action
        assert callable(_handle_navigate_action)
    def test_navigation_package_exports(self):
        """Verify package __all__ includes navigate exports."""
        import core.navigation as nav
        assert "navigate_login" in nav.__all__
        assert "NavigateResult" in nav.__all__
        assert "_handle_navigate_action" in nav.__all__
--- a/tests/unit/test_grounding.py
+++ b/tests/unit/test_grounding.py
@@ -0,0 +1,406 @@
 """Tests for core/navigation/grounding.py — OCR-anchored grounding + VLM fallback + coords cache."""
 import json
 import pytest
 from core.navigation.grounding import (
    OcrTokenInfo,
    GroundedElement,
    CoordsCacheEntry,
    CoordsCache,
    bbox_center,
    make_element_key,
    ocr_anchor_ground,
    build_grounder_prompt,
    parse_grounder_response,
    ground_element,
 )
 from core.navigation.visual_verifier import normalize_text
 # ── Mock factories ─────────────────────────────────────────────────────
 def mock_ocr_detailed_client_factory(tokens: list):
    """Factory for mock OcrDetailedClient returning List[OcrTokenInfo]."""
    def client(image_path: str) -> list:
        return tokens
    return client
 def mock_vlm_client_factory(response_json: dict):
    """Factory for mock VlmClient returning given JSON."""
    def client(image_path: str, prompt: str) -> str:
        return json.dumps(response_json)
    return client
 # ── bbox_center tests ──────────────────────────────────────────────────
 class TestBboxCenter:
    def test_basic(self):
        assert bbox_center((100, 200, 300, 400)) == (200, 300)
    def test_zero_origin(self):
        assert bbox_center((0, 0, 100, 100)) == (50, 50)
    def test_symmetric(self):
        assert bbox_center((10, 10, 20, 20)) == (15, 15)
 # ── make_element_key tests ─────────────────────────────────────────────
 class TestMakeElementKey:
    def test_basic(self):
        key = make_element_key("bouton", "Rechercher")
        assert key == "bouton:rechercher"
    def test_normalized(self):
        key = make_element_key("champ", "Nom Prénom")
        assert "nom" in key and "prenom" in key
    def test_consistent(self):
        # Same element always produces same key
        assert make_element_key("bouton", "Connexion") == make_element_key("bouton", "CONNEXION")
 # ── ocr_anchor_ground tests ────────────────────────────────────────────
 class TestOcrAnchorGround:
    def test_exact_match(self):
        tokens = [OcrTokenInfo(text="Rechercher", bbox=(100, 50, 250, 90), confidence=0.95)]
        result = ocr_anchor_ground(tokens, {"role": "bouton", "text": "Rechercher"})
        assert result is not None
        assert result.method == "ocr_anchor"
        assert result.bbox == (100, 50, 250, 90)
        assert result.center == (175, 70)
        assert result.confidence == 0.95
    def test_fuzzy_match(self):
        tokens = [OcrTokenInfo(text="Rechércher", bbox=(100, 50, 250, 90))]
        result = ocr_anchor_ground(tokens, {"role": "bouton", "text": "Rechercher"})
        assert result is not None
        assert result.source_ocr_text == "Rechércher"
    def test_no_match(self):
        tokens = [OcrTokenInfo(text="Accueil", bbox=(100, 50, 250, 90))]
        result = ocr_anchor_ground(tokens, {"role": "bouton", "text": "Rechercher"})
        assert result is None
    def test_token_without_bbox(self):
        tokens = [OcrTokenInfo(text="Rechercher", bbox=None)]
        result = ocr_anchor_ground(tokens, {"role": "bouton", "text": "Rechercher"})
        assert result is None  # found text but no bbox → can't ground
    def test_no_text_target(self):
        tokens = [OcrTokenInfo(text="Dashboard", bbox=(0, 0, 1920, 1080))]
        result = ocr_anchor_ground(tokens, {"role": "page"})  # no text key
        assert result is None  # no text to match
    def test_multiple_tokens_first_match(self):
        tokens = [
            OcrTokenInfo(text="Accueil", bbox=(0, 0, 100, 40)),
            OcrTokenInfo(text="Connexion", bbox=(200, 50, 350, 90)),
        ]
        result = ocr_anchor_ground(tokens, {"role": "bouton", "text": "Connexion"})
        assert result is not None
        assert result.bbox == (200, 50, 350, 90)
 # ── build_grounder_prompt tests ────────────────────────────────────────
 class TestBuildGrounderPrompt:
    def test_basic_prompt(self):
        prompt = build_grounder_prompt({"role": "bouton", "text": "Connexion"})
        assert "bouton" in prompt
        assert "Connexion" in prompt
        assert "bbox" in prompt
    def test_with_context(self):
        prompt = build_grounder_prompt(
            {"role": "champ", "text": "Login"},
            context="page login DPI",
        )
        assert "page login DPI" in prompt
    def test_with_extra(self):
        prompt = build_grounder_prompt(
            {"role": "champ", "text": "IPP", "extra": "colonne gauche"},
        )
        assert "colonne gauche" in prompt
 # ── parse_grounder_response tests ──────────────────────────────────────
 class TestParseGrounderResponse:
    def test_valid_response(self):
        vlm_text = json.dumps({
            "found": True,
            "bbox": [0.1, 0.2, 0.3, 0.4],
            "confidence": 0.92,
            "description": "login button",
        })
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is not None
        assert result.method == "vlm_grounder"
        assert result.bbox == (192, 216, 576, 432)  # 0.1*1920, 0.2*1080, 0.3*1920, 0.4*1080
        assert result.confidence == 0.92
    def test_not_found(self):
        vlm_text = json.dumps({"found": False, "bbox": [], "confidence": 0.0})
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is None
    def test_json_in_markdown(self):
        vlm_text = "```json\n{\"found\": true, \"bbox\": [0.5, 0.5, 0.6, 0.6], \"confidence\": 0.8}\n```"
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is not None
    def test_garbled_response(self):
        result = parse_grounder_response("I cannot find the element", 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is None
    def test_invalid_bbox_format(self):
        vlm_text = json.dumps({"found": True, "bbox": [0.1, 0.2], "confidence": 0.8})
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is None  # bbox must have 4 values
    def test_confidence_as_string(self):
        vlm_text = json.dumps({"found": True, "bbox": [0.1, 0.2, 0.3, 0.4], "confidence": "0.85"})
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is not None
        assert result.confidence == 0.85
    def test_bbox_clamped_to_screen(self):
        vlm_text = json.dumps({"found": True, "bbox": [-0.1, -0.1, 1.5, 1.5], "confidence": 0.7})
        result = parse_grounder_response(vlm_text, 1920, 1080, {"role": "bouton", "text": "Connexion"})
        assert result is not None
        assert result.bbox[0] >= 0
        assert result.bbox[1] >= 0
        assert result.bbox[2] <= 1920
        assert result.bbox[3] <= 1080
 # ── ground_element (composition) tests ─────────────────────────────────
 class TestGroundElement:
    def test_ocr_anchor_success(self):
        """OCR finds text with bbox → grounded via OCR (deterministic)."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Connexion", bbox=(200, 50, 350, 90), confidence=0.95),
        ])
        vlm = mock_vlm_client_factory({})
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result is not None
        assert result.method == "ocr_anchor"
        assert result.bbox == (200, 50, 350, 90)
    def test_vlm_fallback(self):
        """OCR doesn't find text → VLM grounder succeeds."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Accueil", bbox=(0, 0, 100, 40)),
        ])
        vlm = mock_vlm_client_factory({
            "found": True,
            "bbox": [0.2, 0.3, 0.4, 0.5],
            "confidence": 0.85,
        })
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result is not None
        assert result.method == "vlm_grounder"
    def test_not_found_any_method(self):
        """Both OCR and VLM fail → None."""
        ocr = mock_ocr_detailed_client_factory([OcrTokenInfo(text="Accueil", bbox=(0, 0, 100, 40))])
        vlm = mock_vlm_client_factory({"found": False, "bbox": [], "confidence": 0.0})
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result is None
    def test_ocr_error_vlm_fallback(self):
        """OCR engine fails → VLM fallback."""
        def failing_ocr(image_path):
            raise RuntimeError("OCR engine down")
        vlm = mock_vlm_client_factory({
            "found": True,
            "bbox": [0.2, 0.3, 0.4, 0.5],
            "confidence": 0.8,
        })
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=failing_ocr,
            vlm_client=vlm,
        )
        assert result is not None
        assert result.method == "vlm_grounder"
    def test_vlm_error_ocr_success(self):
        """VLM fails but OCR succeeds → OCR anchor used."""
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Connexion", bbox=(200, 50, 350, 90)),
        ])
        def failing_vlm(image_path, prompt):
            raise RuntimeError("VLM down")
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=failing_vlm,
        )
        assert result is not None
        assert result.method == "ocr_anchor"
    def test_both_fail(self):
        """OCR + VLM both fail → None."""
        def failing_ocr(image_path):
            raise RuntimeError("OCR down")
        def failing_vlm(image_path, prompt):
            raise RuntimeError("VLM down")
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=failing_ocr,
            vlm_client=failing_vlm,
        )
        assert result is None
    def test_no_text_target(self):
        """Target without text → VLM grounder skipped, None."""
        ocr = mock_ocr_detailed_client_factory([])
        vlm = mock_vlm_client_factory({})
        result = ground_element(
            "/tmp/page.png",
            {"role": "page"},
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result is None
    def test_cache_hit(self):
        """Cached coords exist → returned directly."""
        cache = CoordsCache()
        cache.put("bouton:connexion", (200, 50, 350, 90), (275, 70), "ocr_anchor")
        ocr = mock_ocr_detailed_client_factory([])
        vlm = mock_vlm_client_factory({})
        result = ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
            coords_cache=cache,
        )
        assert result is not None
        assert result.method == "cache"
        assert result.bbox == (200, 50, 350, 90)
    def test_cache_stored_on_ocr_anchor(self):
        """OCR anchor result → stored in cache."""
        cache = CoordsCache()
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Connexion", bbox=(200, 50, 350, 90)),
        ])
        vlm = mock_vlm_client_factory({})
        ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
            coords_cache=cache,
        )
        cached = cache.get("bouton:connexion")
        assert cached is not None
        assert cached.bbox == (200, 50, 350, 90)
        assert cached.method == "ocr_anchor"
    def test_cache_stored_on_vlm_grounder(self):
        """VLM grounder result → stored in cache."""
        cache = CoordsCache()
        ocr = mock_ocr_detailed_client_factory([])
        vlm = mock_vlm_client_factory({
            "found": True,
            "bbox": [0.2, 0.3, 0.4, 0.5],
            "confidence": 0.85,
        })
        ground_element(
            "/tmp/login.png",
            {"role": "bouton", "text": "Connexion"},
            ocr_client=ocr,
            vlm_client=vlm,
            coords_cache=cache,
        )
        cached = cache.get("bouton:connexion")
        assert cached is not None
        assert cached.method == "vlm_grounder"
 # ── CoordsCache tests ──────────────────────────────────────────────────
 class TestCoordsCache:
    def test_put_and_get(self):
        cache = CoordsCache()
        cache.put("bouton:connexion", (200, 50, 350, 90), (275, 70), "ocr_anchor")
        entry = cache.get("bouton:connexion")
        assert entry is not None
        assert entry.bbox == (200, 50, 350, 90)
    def test_get_missing(self):
        cache = CoordsCache()
        assert cache.get("bouton:connexion") is None
    def test_invalidate(self):
        cache = CoordsCache()
        cache.put("bouton:connexion", (200, 50, 350, 90), (275, 70), "ocr_anchor")
        cache.invalidate("bouton:connexion")
        assert cache.get("bouton:connexion") is None
    def test_clear(self):
        cache = CoordsCache()
        cache.put("a", (0, 0, 10, 10), (5, 5), "ocr_anchor")
        cache.put("b", (0, 0, 20, 20), (10, 10), "vlm_grounder")
        cache.clear()
        assert cache.get("a") is None
        assert cache.get("b") is None
    def test_keys(self):
        cache = CoordsCache()
        cache.put("a", (0, 0, 10, 10), (5, 5), "ocr_anchor")
        cache.put("b", (0, 0, 20, 20), (10, 10), "vlm_grounder")
        assert sorted(cache.keys()) == ["a", "b"]
    def test_update_existing(self):
        cache = CoordsCache()
        cache.put("bouton:connexion", (200, 50, 350, 90), (275, 70), "ocr_anchor")
        cache.put("bouton:connexion", (300, 60, 400, 100), (350, 80), "vlm_grounder")
        entry = cache.get("bouton:connexion")
        assert entry is not None
        assert entry.bbox == (300, 60, 400, 100)  # updated
        assert entry.validation_count == 2
    def test_validation_count_increments(self):
        cache = CoordsCache()
        cache.put("a", (0, 0, 10, 10), (5, 5), "ocr_anchor")
        assert cache.get("a").validation_count == 1
        cache.put("a", (0, 0, 10, 10), (5, 5), "ocr_anchor")
        assert cache.get("a").validation_count == 2
--- a/tests/unit/test_navigate_handler_e2e.py
+++ b/tests/unit/test_navigate_handler_e2e.py
@@ -0,0 +1,151 @@
 """End-to-end mocked test for navigate action handler — 3 edge-case scenarios.
 Tests the _handle_navigate_action handler with mocked OCR/VLM, verifying:
 - Nominal: all resolved, coords populated in variables
 - OCR miss + VLM fail: no phantom coords, all_resolved=False
 - No screenshot: error="no_screenshot", False return
 NOTE: The handler uses lazy imports inside its body. Mock targets must be
 at the source module (core.navigation.action_resolver.navigate_login) rather
 than the package-level re-export (core.navigation.navigate_login).
 """
 import pytest
 from unittest.mock import patch, MagicMock
 from core.navigation.action_resolver import NavigateCoords, NavigateResult
 from core.navigation import _handle_navigate_action
 def _patch_all_deps(navigate_login_result=None, navigate_login_side_effect=None):
    """Return stacked patches for handler's lazy imports + navigate_login."""
    nl_mock = MagicMock(return_value=navigate_login_result) if navigate_login_result else None
    if navigate_login_side_effect:
        nl_mock = MagicMock(side_effect=navigate_login_side_effect)
    return (
        patch("core.llm.extract_grid_from_image", return_value=[]),
        patch("core.extraction.vlm_client.make_vllm_client", return_value=MagicMock()),
        patch("core.navigation.action_resolver.make_ocr_detailed_from_grid",
              return_value=MagicMock(return_value=[])),
        patch("core.navigation.action_resolver.navigate_login", nl_mock),
    )
 class TestNominalCase:
    """All fields grounded → coords populated, all_resolved=True."""
    def test_nominal_coords_populated(self):
        mock_result = NavigateResult(
            login_coords=NavigateCoords(x_pct=0.15, y_pct=0.07, method="ocr_anchor"),
            password_coords=NavigateCoords(x_pct=0.15, y_pct=0.25, method="ocr_anchor"),
            submit_coords=NavigateCoords(x_pct=0.50, y_pct=0.35, method="ocr_anchor"),
            all_resolved=True,
        )
        action = {"parameters": {"action": "login"}}
        replay_state = {
            "last_screenshot_path": "/tmp/login_screen.png",
            "screen_width": 1920,
            "screen_height": 1080,
        }
        p1, p2, p3, p4 = _patch_all_deps(navigate_login_result=mock_result)
        with p1, p2, p3, p4:
            result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is True
        vars_ = replay_state["variables"]
        assert "navigate_login_coords" in vars_
        assert vars_["navigate_login_coords"]["x_pct"] == 0.15
        assert "navigate_password_coords" in vars_
        assert "navigate_submit_coords" in vars_
        assert vars_["navigate_result"]["all_resolved"] is True
 class TestOcrMissVlmFail:
    """OCR misses target + VLM grounder also fails → no phantom coords."""
    def test_no_phantom_coords_on_failure(self):
        mock_result = NavigateResult(
            login_coords=None,
            password_coords=None,
            submit_coords=None,
            all_resolved=False,
            error="grounding failed — no login form elements found",
        )
        action = {"parameters": {"action": "login"}}
        replay_state = {
            "last_screenshot_path": "/tmp/no_login_form.png",
            "screen_width": 1920,
            "screen_height": 1080,
        }
        p1, p2, p3, p4 = _patch_all_deps(navigate_login_result=mock_result)
        with p1, p2, p3, p4:
            result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is False
        vars_ = replay_state["variables"]
        # No coords keys should be present (coords are None → not stored)
        assert "navigate_login_coords" not in vars_
        assert "navigate_password_coords" not in vars_
        assert "navigate_submit_coords" not in vars_
        # Error must be non-empty
        assert vars_["navigate_result"]["all_resolved"] is False
        assert "grounding failed" in vars_["navigate_result"]["error"]
 class TestNoScreenshot:
    """No screenshot in replay_state → error="no_screenshot", False."""
    def test_no_screenshot_error(self):
        action = {"parameters": {"action": "login"}}
        replay_state = {}  # No screenshot at all
        result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is False
        vars_ = replay_state["variables"]
        assert vars_["navigate_login_coords"]["error"] == "no_screenshot"
    def test_empty_screenshot_path(self):
        action = {"parameters": {"action": "login"}}
        replay_state = {"last_screenshot_path": ""}
        result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is False
        vars_ = replay_state["variables"]
        assert vars_["navigate_login_coords"]["error"] == "no_screenshot"
 class TestNeverFailReplay:
    """Handler must never raise — even on malformed input, returns False."""
    def test_missing_parameters(self):
        action = {}  # No "parameters" key
        replay_state = {"last_screenshot_path": "/tmp/x.png"}
        mock_result = NavigateResult(all_resolved=False, error="no params")
        p1, p2, p3, p4 = _patch_all_deps(navigate_login_result=mock_result)
        with p1, p2, p3, p4:
            result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is False
    def test_exception_in_inner_call(self):
        action = {"parameters": {"action": "login"}}
        replay_state = {
            "last_screenshot_path": "/tmp/login.png",
            "screen_width": 1920,
            "screen_height": 1080,
        }
        p1, p2, p3, p4 = _patch_all_deps(navigate_login_side_effect=RuntimeError("boom"))
        with p1, p2, p3, p4:
            result = _handle_navigate_action(action, replay_state, "test-session")
        assert result is False
        vars_ = replay_state["variables"]
        assert vars_["navigate_result"]["all_resolved"] is False
        assert "boom" in vars_["navigate_result"]["error"]
--- a/tests/unit/test_navigate_wiring.py
+++ b/tests/unit/test_navigate_wiring.py
@@ -0,0 +1,62 @@
 """Boot non-regression test for navigate wiring — catches import/regression bugs.
 This test would have caught the ImportError where _handle_navigate_action
 was incorrectly imported from replay_engine instead of core/navigation.
 """
 import pytest
 class TestApiStreamImports:
    """(1) api_stream must import without error."""
    def test_import_api_stream(self):
        from agent_v0.server_v1 import api_stream
        assert api_stream is not None
 class TestAllowedActionTypes:
    """(2) 'navigate' must be in both _ALLOWED and _SERVER_SIDE."""
    def test_navigate_in_allowed(self):
        from agent_v0.server_v1.replay_engine import _ALLOWED_ACTION_TYPES
        assert "navigate" in _ALLOWED_ACTION_TYPES
    def test_navigate_in_server_side(self):
        from agent_v0.server_v1.replay_engine import _SERVER_SIDE_ACTION_TYPES
        assert "navigate" in _SERVER_SIDE_ACTION_TYPES
 class TestNavigateHandlerCallable:
    """(3) _handle_navigate_action must be callable with correct signature."""
    def test_handler_imported_from_core_navigation(self):
        from core.navigation import _handle_navigate_action
        assert callable(_handle_navigate_action)
    def test_handler_imported_in_api_stream(self):
        from agent_v0.server_v1 import api_stream
        handler = api_stream._handle_navigate_action
        assert callable(handler)
    def test_handler_signature(self):
        """Signature: (action: dict, replay_state: dict, session_id: str) -> bool."""
        from core.navigation import _handle_navigate_action
        import inspect
        sig = inspect.signature(_handle_navigate_action)
        params = list(sig.parameters.keys())
        assert params == ["action", "replay_state", "session_id"]
        assert sig.return_annotation == bool
 class TestDispatchBlockExists:
    """Verify the navigate dispatch block is wired in api_stream."""
    def test_navigate_dispatch_reference(self):
        """Source must contain the navigate dispatch elif block."""
        import agent_v0.server_v1.api_stream as mod
        source = inspect.getsource(mod)
        assert "type_ == \"navigate\"" in source
 import inspect
--- a/tests/unit/test_visual_login.py
+++ b/tests/unit/test_visual_login.py
@@ -0,0 +1,336 @@
 """Tests for core/navigation/visual_login.py — login form resolution + verification."""
 import json
 import pytest
 from core.navigation.visual_login import (
    LoginFormConfig,
    LoginResolution,
    dpi_urgences_login_config,
    verify_login_visible,
    verify_login_success,
    resolve_login_form,
    _ocr_detailed_to_simple,
 )
 from core.navigation.grounding import (
    CoordsCache,
    GroundedElement,
    OcrTokenInfo,
    OcrDetailedClient,
 )
 from core.navigation.visual_verifier import (
    ScreenMatchResult,
    VlmClient,
    OcrClient,
 )
 # ── Mock factories ─────────────────────────────────────────────────────
 def mock_ocr_detailed_client_factory(tokens: list):
    """Factory for mock OcrDetailedClient."""
    def client(image_path: str) -> list:
        return tokens
    return client
 def mock_ocr_simple_client_factory(tokens: list):
    """Factory for mock OcrClient (text-only)."""
    def client(image_path: str) -> list:
        return tokens
    return client
 def mock_vlm_client_factory(response_json: dict):
    """Factory for mock VlmClient."""
    def client(image_path: str, prompt: str) -> str:
        return json.dumps(response_json)
    return client
 # ── Default config tests ───────────────────────────────────────────────
 class TestDpiUrgencesLoginConfig:
    def test_default_config(self):
        config = dpi_urgences_login_config()
        assert config.login_field["role"] == "champ"
        assert config.login_field["text"] == "Login"
        assert config.password_field["text"] == "Mot de passe"
        assert config.submit_button["text"] == "Connexion"
        assert len(config.success_elements) >= 1
        assert config.context != ""
    def test_config_fields_are_dicts(self):
        config = dpi_urgences_login_config()
        assert isinstance(config.login_field, dict)
        assert isinstance(config.password_field, dict)
        assert isinstance(config.submit_button, dict)
 # ── _ocr_detailed_to_simple tests ────────────────────────────────────
 class TestOcrDetailedToSimple:
    def test_conversion(self):
        tokens = [
            OcrTokenInfo(text="Login", bbox=(100, 50, 200, 90)),
            OcrTokenInfo(text="Password", bbox=(100, 100, 200, 140)),
        ]
        detailed = mock_ocr_detailed_client_factory(tokens)
        simple = _ocr_detailed_to_simple(detailed)
        result = simple("/tmp/test.png")
        assert result == ["Login", "Password"]
    def test_empty_tokens(self):
        detailed = mock_ocr_detailed_client_factory([])
        simple = _ocr_detailed_to_simple(detailed)
        result = simple("/tmp/test.png")
        assert result == []
 # ── verify_login_visible tests ────────────────────────────────────────
 class TestVerifyLoginVisible:
    def test_form_visible(self):
        """All 3 fields found by OCR + roles confirmed → match."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
            context="DPI login",
        )
        ocr = mock_ocr_simple_client_factory(["Login", "Mot de passe", "Connexion"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 2, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 3, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
            ],
            "overall_confidence": 0.9,
        })
        result = verify_login_visible("/tmp/login.png", config, ocr, vlm)
        assert result.match == True
    def test_form_missing_button(self):
        """Connexion button not found by OCR → mismatch."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_simple_client_factory(["Login", "Mot de passe"])  # missing Connexion
        vlm = mock_vlm_client_factory({})
        result = verify_login_visible("/tmp/login.png", config, ocr, vlm)
        assert result.match == False
    def test_form_wrong_role(self):
        """OCR finds text but VLM says button is a label → mismatch."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_simple_client_factory(["Login", "Mot de passe", "Connexion"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 2, "role_confirmed": True, "actual_role": "champ", "confidence": 0.9},
                {"index": 3, "role_confirmed": False, "actual_role": "label", "confidence": 0.5},
            ],
            "overall_confidence": 0.5,
        })
        result = verify_login_visible("/tmp/login.png", config, ocr, vlm)
        assert result.match == False
 # ── verify_login_success tests ────────────────────────────────────────
 class TestVerifyLoginSuccess:
    def test_dashboard_visible(self):
        """Dashboard found by OCR + role confirmed → success."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
            success_elements=[{"role": "page", "text": "Dashboard"}],
        )
        ocr = mock_ocr_simple_client_factory(["Dashboard", "Accueil"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.92},
            ],
            "overall_confidence": 0.92,
        })
        result = verify_login_success("/tmp/dashboard.png", config, ocr, vlm)
        assert result.match == True
    def test_no_success_elements(self):
        """Config has no success_elements → can't verify."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
            success_elements=[],  # empty!
        )
        ocr = mock_ocr_simple_client_factory(["Dashboard"])
        vlm = mock_vlm_client_factory({})
        result = verify_login_success("/tmp/page.png", config, ocr, vlm)
        assert result.match == False
        assert "no success_elements" in result.reason
    def test_still_on_login_page(self):
        """After login, still seeing login form → mismatch."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
            success_elements=[{"role": "page", "text": "Dashboard"}],
        )
        # OCR sees login form texts, not Dashboard
        ocr = mock_ocr_simple_client_factory(["Login", "Mot de passe", "Connexion"])
        vlm = mock_vlm_client_factory({})
        result = verify_login_success("/tmp/still_login.png", config, ocr, vlm)
        assert result.match == False
 # ── resolve_login_form tests ──────────────────────────────────────────
 class TestResolveLoginForm:
    def test_all_fields_ocr_anchor(self):
        """All 3 fields found by OCR with bbox → full resolution."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Mot de passe", bbox=(100, 100, 250, 140)),
            OcrTokenInfo(text="Connexion", bbox=(100, 150, 250, 190)),
        ])
        vlm = mock_vlm_client_factory({})
        result = resolve_login_form("/tmp/login.png", config, ocr, vlm)
        assert result.all_resolved == True
        assert result.login_field is not None
        assert result.login_field.method == "ocr_anchor"
        assert result.password_field is not None
        assert result.submit_button is not None
        assert result.method == "ocr_anchor"
    def test_partial_ocr_vlm_fallback(self):
        """Login + password by OCR, button by VLM → mixed method."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Password"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Password", bbox=(100, 100, 250, 140)),
            # Connexion not in OCR → VLM fallback
        ])
        vlm = mock_vlm_client_factory({
            "found": True,
            "bbox": [0.2, 0.4, 0.4, 0.5],
            "confidence": 0.85,
        })
        result = resolve_login_form("/tmp/login.png", config, ocr, vlm)
        assert result.all_resolved == True
        assert result.login_field.method == "ocr_anchor"
        assert result.submit_button.method == "vlm_grounder"
        assert result.method == "mixed"
    def test_incomplete_resolution(self):
        """Button not found by OCR or VLM → incomplete."""
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Password"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Password", bbox=(100, 100, 250, 140)),
        ])
        vlm = mock_vlm_client_factory({"found": False, "bbox": [], "confidence": 0.0})
        result = resolve_login_form("/tmp/login.png", config, ocr, vlm)
        assert result.all_resolved == False
        assert result.submit_button is None
    def test_cache_hit(self):
        """All fields cached → returned directly."""
        cache = CoordsCache()
        cache.put("champ:login", (100, 50, 250, 90), (175, 70), "ocr_anchor")
        cache.put("champ:mot de passe", (100, 100, 250, 140), (175, 120), "ocr_anchor")
        cache.put("bouton:connexion", (100, 150, 250, 190), (175, 170), "ocr_anchor")
        config = LoginFormConfig(
            login_field={"role": "champ", "text": "Login"},
            password_field={"role": "champ", "text": "Mot de passe"},
            submit_button={"role": "bouton", "text": "Connexion"},
        )
        ocr = mock_ocr_detailed_client_factory([])
        vlm = mock_vlm_client_factory({})
        result = resolve_login_form(
            "/tmp/login.png", config, ocr, vlm, coords_cache=cache,
        )
        assert result.all_resolved == True
        assert result.method == "cache"
        assert result.login_field.center == (175, 70)
    def test_with_dpi_default_config(self):
        """Full flow with dpi_urgences_login_config."""
        config = dpi_urgences_login_config()
        ocr = mock_ocr_detailed_client_factory([
            OcrTokenInfo(text="Login", bbox=(100, 50, 250, 90)),
            OcrTokenInfo(text="Mot de passe", bbox=(100, 100, 250, 140)),
            OcrTokenInfo(text="Connexion", bbox=(100, 150, 250, 190)),
        ])
        vlm = mock_vlm_client_factory({})
        result = resolve_login_form("/tmp/login.png", config, ocr, vlm)
        assert result.all_resolved == True
 # ── LoginResolution describe tests ────────────────────────────────────
 class TestLoginResolutionDescribe:
    def test_all_resolved(self):
        resolution = LoginResolution(
            login_field=GroundedElement(
                role="champ", text="Login",
                bbox=(100, 50, 250, 90), center=(175, 70),
                confidence=0.9, method="ocr_anchor",
            ),
            password_field=GroundedElement(
                role="champ", text="Mot de passe",
                bbox=(100, 100, 250, 140), center=(175, 120),
                confidence=0.9, method="ocr_anchor",
            ),
            submit_button=GroundedElement(
                role="bouton", text="Connexion",
                bbox=(100, 150, 250, 190), center=(175, 170),
                confidence=0.9, method="ocr_anchor",
            ),
            all_resolved=True,
            method="ocr_anchor",
        )
        desc = resolution.describe()
        assert "OK" in desc
        assert "login@" in desc
        assert "button@" in desc
    def test_incomplete(self):
        resolution = LoginResolution(
            login_field=None,
            password_field=None,
            submit_button=None,
            all_resolved=False,
            method="",
        )
        desc = resolution.describe()
        assert "INCOMPLETE" in desc
        assert "NOT FOUND" in desc
--- a/tests/unit/test_visual_verifier.py
+++ b/tests/unit/test_visual_verifier.py
@@ -0,0 +1,490 @@
 """Tests for core/navigation/visual_verifier.py — OCR-anchored architecture.
 Tests pure functions (normalize_text, fuzzy_match, ocr_presence_check,
 build_role_confirm_prompt, parse_role_confirm_response) offline,
 then verifies verify_screen_match with mock OcrClient + VlmClient.
 """
 import json
 import pytest
 from core.navigation.visual_verifier import (
    normalize_text,
    fuzzy_match,
    ocr_presence_check,
    build_role_confirm_prompt,
    parse_role_confirm_response,
    verify_screen_match,
    verify_before,
    verify_after,
    ScreenMatchResult,
    OcrPresenceResult,
 )
 # ── Mock factories ─────────────────────────────────────────────────────
 def mock_ocr_client_factory(tokens: list):
    """Factory that creates a mock OcrClient returning the given tokens."""
    def client(image_path: str) -> list:
        return tokens
    return client
 def mock_vlm_client_factory(response_json: dict):
    """Factory that creates a mock VlmClient returning the given JSON."""
    def client(image_path: str, prompt: str) -> str:
        return json.dumps(response_json)
    return client
 # ── normalize_text tests ──────────────────────────────────────────────
 class TestNormalizeText:
    def test_lowercase(self):
        assert normalize_text("RECHERCHER") == "rechercher"
    def test_strip_accents(self):
        assert normalize_text("Recherché") == "recherche"
    def test_collapse_whitespace(self):
        assert normalize_text("  hello   world  ") == "hello world"
    def test_combined(self):
        assert normalize_text("  Nom Prénom  ") == "nom prenom"
    def test_empty(self):
        assert normalize_text("") == ""
    def test_numbers_preserved(self):
        assert normalize_text("IPP 12345") == "ipp 12345"
 # ── fuzzy_match tests ─────────────────────────────────────────────────
 class TestFuzzyMatch:
    def test_exact_match(self):
        assert fuzzy_match("Rechercher", "Rechercher") == True
    def test_case_insensitive(self):
        assert fuzzy_match("rechercher", "RECHERCHER") == True
    def test_accent_match(self):
        assert fuzzy_match("Recherché", "Recherche") == True
    def test_substring_containment(self):
        # Short text contained in longer OCR token
        assert fuzzy_match("Rechercher", "Bouton Rechercher") == True
    def test_reverse_containment(self):
        # OCR token contained in expected text
        assert fuzzy_match("Nom Prénom Patient", "Nom") == True
    def test_fuzzy_ratio(self):
        # Similar but not exact/substring — ratio ~0.90
        assert fuzzy_match("Connexion", "Connection", threshold=0.8) == True
    def test_no_match(self):
        assert fuzzy_match("Dashboard", "Login", threshold=0.8) == False
    def test_custom_threshold(self):
        # "Connection" vs "Connexion" ratio ~0.90, passes at 0.8 but fails at 0.95
        assert fuzzy_match("Connexion", "Connection", threshold=0.95) == False
 # ── ocr_presence_check tests ──────────────────────────────────────────
 class TestOcrPresenceCheck:
    def test_all_found(self):
        tokens = ["Rechercher", "Connexion", "Nom Patient"]
        elements = [
            {"role": "bouton", "text": "Rechercher"},
            {"role": "bouton", "text": "Connexion"},
        ]
        result = ocr_presence_check(tokens, elements)
        assert result.all_found == True
        assert result.presence_ratio == 1.0
        assert len(result.missing) == 0
        assert result.found_texts["Rechercher"] == "Rechercher"
    def test_partial_found(self):
        tokens = ["Rechercher"]
        elements = [
            {"role": "bouton", "text": "Rechercher"},
            {"role": "bouton", "text": "Connexion"},
        ]
        result = ocr_presence_check(tokens, elements)
        assert result.all_found == False
        assert result.presence_ratio == 0.5
        assert "bouton: Connexion" in result.missing
    def test_none_found(self):
        tokens = ["Accueil", "Paramètres"]
        elements = [
            {"role": "bouton", "text": "Rechercher"},
        ]
        result = ocr_presence_check(tokens, elements)
        assert result.all_found == False
        assert result.presence_ratio == 0.0
        assert "bouton: Rechercher" in result.missing
    def test_fuzzy_match_in_presence(self):
        tokens = ["Rechércher"]  # OCR with accent variation
        elements = [{"role": "bouton", "text": "Rechercher"}]
        result = ocr_presence_check(tokens, elements)
        assert result.all_found == True
    def test_empty_tokens(self):
        result = ocr_presence_check([], [{"role": "bouton", "text": "Login"}])
        assert result.all_found == False
        assert result.presence_ratio == 0.0
    def test_empty_elements(self):
        result = ocr_presence_check(["Login", "Password"], [])
        assert result.all_found == True
        assert result.presence_ratio == 1.0
    def test_no_text_key(self):
        elements = [{"role": "page"}]  # no text key
        result = ocr_presence_check(["Dashboard"], elements)
        assert result.all_found == True  # no text to check → trivially found
    def test_multiple_elements_same_text(self):
        tokens = ["Connexion"]
        elements = [
            {"role": "bouton", "text": "Connexion"},
            {"role": "label", "text": "Connexion"},
        ]
        result = ocr_presence_check(tokens, elements)
        assert result.all_found == True
 # ── build_role_confirm_prompt tests ───────────────────────────────────
 class TestBuildRoleConfirmPrompt:
    def test_basic_prompt(self):
        found = [
            {"text": "Rechercher", "expected_role": "bouton", "matched_ocr": "Rechercher"},
        ]
        expected = [{"role": "bouton", "text": "Rechercher"}]
        prompt = build_role_confirm_prompt(found, expected)
        assert "Text \"Rechercher\"" in prompt
        assert "expected role: bouton" in prompt
        assert "role_confirmed" in prompt
    def test_with_context(self):
        found = [
            {"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
        ]
        expected = [{"role": "bouton", "text": "Connexion"}]
        prompt = build_role_confirm_prompt(found, expected, context="page login DPI")
        assert "Context: page login DPI" in prompt
    def test_multiple_elements(self):
        found = [
            {"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
            {"text": "Password", "expected_role": "champ", "matched_ocr": "Password"},
            {"text": "Connexion", "expected_role": "bouton", "matched_ocr": "Connexion"},
        ]
        expected = [
            {"role": "champ", "text": "Login"},
            {"role": "champ", "text": "Password"},
            {"role": "bouton", "text": "Connexion"},
        ]
        prompt = build_role_confirm_prompt(found, expected)
        assert "1." in prompt
        assert "2." in prompt
        assert "3." in prompt
    def test_no_self_declaration(self):
        """Prompt must NOT ask VLM to declare presence — only role."""
        found = [
            {"text": "Login", "expected_role": "champ", "matched_ocr": "Login"},
        ]
        expected = [{"role": "champ", "text": "Login"}]
        prompt = build_role_confirm_prompt(found, expected)
        assert "present" not in prompt.lower() or "confirmed" in prompt.lower()
 # ── parse_role_confirm_response tests ─────────────────────────────────
 class TestParseRoleConfirmResponse:
    def test_valid_json(self):
        data = json.dumps({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
            ],
            "overall_confidence": 0.92,
        })
        result = parse_role_confirm_response(data)
        assert len(result["confirmed"]) == 1
        assert result["overall_confidence"] == 0.92
    def test_json_in_markdown(self):
        vlm_text = "```json\n{\"confirmed\": [], \"overall_confidence\": 0.0}\n```"
        result = parse_role_confirm_response(vlm_text)
        assert result["overall_confidence"] == 0.0
    def test_garbled_response(self):
        result = parse_role_confirm_response("I cannot determine the roles")
        assert result["overall_confidence"] == 0.0
        assert len(result["confirmed"]) == 0
    def test_confidence_as_string(self):
        data = json.dumps({"confirmed": [], "overall_confidence": "0.85"})
        result = parse_role_confirm_response(data)
        assert result["overall_confidence"] == 0.85
 # ── verify_screen_match (OCR-anchored) tests ─────────────────────────
 class TestVerifyScreenMatchOcrAnchored:
    def test_full_match(self):
        ocr = mock_ocr_client_factory(["Rechercher", "Connexion", "Dashboard"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.92},
            ],
            "overall_confidence": 0.92,
        })
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == True
        assert result.confidence >= 0.7
    def test_ocr_presence_fail(self):
        """OCR doesn't find expected text → mismatch (deterministic, no VLM needed)."""
        ocr = mock_ocr_client_factory(["Accueil", "Paramètres"])
        vlm = mock_vlm_client_factory({})
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == False
        assert "OCR presence" in result.reason
        assert len(result.mismatches) > 0
    def test_role_not_confirmed(self):
        """OCR finds text, VLM says it's a label not a button → mismatch."""
        ocr = mock_ocr_client_factory(["Rechercher"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": False, "actual_role": "label", "confidence": 0.6},
            ],
            "overall_confidence": 0.6,
        })
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == False
    def test_ocr_error(self):
        """OCR engine fails → fail-safe mismatch."""
        def failing_ocr(image_path):
            raise RuntimeError("OCR engine down")
        vlm = mock_vlm_client_factory({})
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=failing_ocr,
            vlm_client=vlm,
        )
        assert result.match == False
        assert "OCR error" in result.reason
    def test_vlm_error_partial_match(self):
        """OCR finds texts, VLM fails → partial match (presence OK, role unknown)."""
        ocr = mock_ocr_client_factory(["Rechercher"])
        def failing_vlm(image_path, prompt):
            raise RuntimeError("VLM service down")
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=ocr,
            vlm_client=failing_vlm,
        )
        # Presence confirmed by OCR → partial match, confidence=0.5
        assert result.match == True
        assert result.confidence == 0.5
        assert "VLM role confirm failed" in result.reason
    def test_no_expected_elements(self):
        ocr = mock_ocr_client_factory(["Login"])
        vlm = mock_vlm_client_factory({})
        result = verify_screen_match("/tmp/test.png", [], ocr_client=ocr, vlm_client=vlm)
        assert result.match == True
        assert result.confidence == 1.0
    def test_describe_match(self):
        result = ScreenMatchResult(match=True, confidence=0.92)
        assert "OK" in result.describe()
    def test_describe_mismatch(self):
        result = ScreenMatchResult(
            match=False, confidence=0.3,
            mismatches=["bouton: Rechercher"],
        )
        assert "mismatch" in result.describe()
    def test_multiple_elements_mixed(self):
        """2 elements: 1 found+role OK, 1 not found in OCR → mismatch."""
        ocr = mock_ocr_client_factory(["Connexion"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
            ],
            "overall_confidence": 0.9,
        })
        result = verify_screen_match(
            "/tmp/test.png",
            [
                {"role": "bouton", "text": "Connexion"},
                {"role": "champ", "text": "Nom Patient"},
            ],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == False  # "Nom Patient" not found by OCR
    def test_fuzzy_ocr_match(self):
        """OCR reads 'Rechércher' (accent), expected 'Rechercher' → still found."""
        ocr = mock_ocr_client_factory(["Rechércher"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "bouton", "confidence": 0.9},
            ],
            "overall_confidence": 0.9,
        })
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "bouton", "text": "Rechercher"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == True
    def test_no_text_elements_trivially_match(self):
        """Elements without text key → no presence check needed → trivially OK."""
        ocr = mock_ocr_client_factory(["Dashboard"])
        vlm = mock_vlm_client_factory({})
        result = verify_screen_match(
            "/tmp/test.png",
            [{"role": "page"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == True
 # ── verify_before / verify_after tests ────────────────────────────────
 class TestVerifyBeforeAfter:
    def test_verify_before_match(self):
        ocr = mock_ocr_client_factory(["Login", "Password", "Connexion"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "champ", "confidence": 0.85},
            ],
            "overall_confidence": 0.85,
        })
        result = verify_before(
            "/tmp/login.png",
            [{"role": "champ", "text": "Login"}],
            ocr_client=ocr,
            vlm_client=vlm,
            context="page login",
        )
        assert result.match == True
    def test_verify_after_higher_threshold(self):
        """verify_after uses min_confidence=0.8. VLM returns 0.75 → mismatch."""
        ocr = mock_ocr_client_factory(["Dashboard"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.75},
            ],
            "overall_confidence": 0.75,
        })
        result = verify_after(
            "/tmp/dashboard.png",
            [{"role": "page", "text": "Dashboard"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        # 0.75 < 0.8 threshold → role mismatch
        assert result.match == False
    def test_verify_after_passes_at_0_8(self):
        ocr = mock_ocr_client_factory(["Dashboard"])
        vlm = mock_vlm_client_factory({
            "confirmed": [
                {"index": 1, "role_confirmed": True, "actual_role": "page", "confidence": 0.85},
            ],
            "overall_confidence": 0.85,
        })
        result = verify_after(
            "/tmp/dashboard.png",
            [{"role": "page", "text": "Dashboard"}],
            ocr_client=ocr,
            vlm_client=vlm,
        )
        assert result.match == True
    def test_verify_before_ocr_missing(self):
        """Pre-action: expected text not on screen → mismatch (can't proceed)."""
        ocr = mock_ocr_client_factory(["Accueil"])
        vlm = mock_vlm_client_factory({})
        result = verify_before(
            "/tmp/page.png",
            [{"role": "bouton", "text": "Connexion"}],
            ocr_client=ocr,
            vlm_client=vlm,
            context="pre-login",
        )
        assert result.match == False
        assert "OCR presence" in result.reason
 # ── OcrPresenceResult dataclass tests ─────────────────────────────────
 class TestOcrPresenceResult:
    def test_presence_ratio_all_found(self):
        result = OcrPresenceResult(
            found_texts={"Login": "Login", "Password": "Password"},
            missing=[],
            all_found=True,
        )
        assert result.presence_ratio == 1.0
    def test_presence_ratio_half_found(self):
        result = OcrPresenceResult(
            found_texts={"Login": "Login", "Password": ""},
            missing=["champ: Password"],
            all_found=False,
        )
        assert result.presence_ratio == 0.5
    def test_presence_ratio_empty(self):
        result = OcrPresenceResult(
            found_texts={},
            missing=[],
            all_found=True,
        )
        assert result.presence_ratio == 1.0