snapshot: WIP 5j replay reliability (B1 watchdog + dialog handlers + grounding drift)

Snapshot avant correction du blocage relance Léa (3 incidents 24h: SSH refusé, polls morts ×2). Point de rollback stable. Contenu: - agent_v1/core/executor.py: 5 patchs dialog handling (saveas drift, close_tab hotkey fallback, confirm_save Unicode apostrophe, foreground dialog recontextualization, runtime_dialog in-loop) + helpers normalize_window_hint, requires_post_verify_window_transition - agent_v1/core/grounding.py: garde drift template fix (fallback_x/y plumbed) - server_v1/replay_watchdog.py (NEW): orphan watchdog B1, scan 10s timeout 30s - server_v1/api_stream.py: dispatched_action plumbing, watchdog lifespan, metrics endpoint - server_v1/replay_engine.py: _schedule_retry préserve original_action + dispatched_action - stream_processor.py: gardes _infer_tab_switch_target (no false switch_tab on save_as dialog open) + _attach_expected_window_before - tests/integration: test_replay_watchdog.py (8 cas), test_stream_processor.py - tests/unit: test_executor_verify_window_guard.py (start_button, close_tab, runtime_dialog, post_verify, transition fallbacks) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 16:48:37 +02:00
parent 5ea4960e65
commit 7df51d2c79
47 changed files with 9811 additions and 451 deletions
--- a/agent_v0/server_v1/resolve_engine.py
+++ b/agent_v0/server_v1/resolve_engine.py
@@ -243,6 +243,168 @@ def _validate_match_context(
    return True


+def _has_meaningful_recorded_coords(
+    fallback_x_pct: float,
+    fallback_y_pct: float,
+) -> bool:
+    """Indiquer si les coordonnées fallback représentent une vraie position source."""
+    return (
+        fallback_x_pct > 0.001
+        and fallback_y_pct > 0.001
+        and not (
+            abs(fallback_x_pct - 0.5) < 0.001
+            and abs(fallback_y_pct - 0.5) < 0.001
+        )
+    )
+
+
+def _is_close_tab_target(target_spec: Optional[Dict[str, Any]]) -> bool:
+    """Détecter une action close_tab issue du compilateur replay."""
+    if not isinstance(target_spec, dict):
+        return False
+    context_hints = target_spec.get("context_hints") or {}
+    return str((context_hints.get("interaction") or "")).strip().lower() == "close_tab"
+
+
+def _get_expected_close_tab_coords(
+    target_spec: Optional[Dict[str, Any]],
+    screen_width: int,
+    screen_height: int,
+    fallback_x_pct: float = 0.0,
+    fallback_y_pct: float = 0.0,
+) -> Optional[tuple[float, float]]:
+    """Retrouver la position attendue la plus fiable pour un close_tab.
+
+    Ordre de préférence :
+    1. Coordonnées fallback explicites de l'action replay
+    2. centre SoM calibré à l'enregistrement
+    3. click_relative + rect fenêtre source
+    """
+    if _has_meaningful_recorded_coords(fallback_x_pct, fallback_y_pct):
+        return float(fallback_x_pct), float(fallback_y_pct)
+
+    if not isinstance(target_spec, dict):
+        return None
+
+    som_center = (target_spec.get("som_element") or {}).get("center_norm")
+    if isinstance(som_center, (list, tuple)) and len(som_center) >= 2:
+        try:
+            exp_x = float(som_center[0])
+            exp_y = float(som_center[1])
+            if 0.0 <= exp_x <= 1.0 and 0.0 <= exp_y <= 1.0:
+                return exp_x, exp_y
+        except (TypeError, ValueError):
+            pass
+
+    window_capture = target_spec.get("window_capture") or {}
+    rect = window_capture.get("rect")
+    click_relative = window_capture.get("click_relative")
+    if (
+        isinstance(rect, (list, tuple))
+        and len(rect) >= 4
+        and isinstance(click_relative, (list, tuple))
+        and len(click_relative) >= 2
+        and screen_width > 0
+        and screen_height > 0
+    ):
+        try:
+            abs_x = float(rect[0]) + float(click_relative[0])
+            abs_y = float(rect[1]) + float(click_relative[1])
+            exp_x = abs_x / float(screen_width)
+            exp_y = abs_y / float(screen_height)
+            if 0.0 <= exp_x <= 1.0 and 0.0 <= exp_y <= 1.0:
+                return exp_x, exp_y
+        except (TypeError, ValueError, ZeroDivisionError):
+            pass
+
+    return None
+
+
+def _is_close_tab_result_plausible(
+    resolved_x: float,
+    resolved_y: float,
+    target_spec: Optional[Dict[str, Any]],
+    screen_width: int,
+    screen_height: int,
+    fallback_x_pct: float = 0.0,
+    fallback_y_pct: float = 0.0,
+) -> bool:
+    """Filtrer les faux positifs close_tab qui dérivent vers le bouton fermer."""
+    if not _is_close_tab_target(target_spec):
+        return True
+
+    expected = _get_expected_close_tab_coords(
+        target_spec,
+        screen_width,
+        screen_height,
+        fallback_x_pct=fallback_x_pct,
+        fallback_y_pct=fallback_y_pct,
+    )
+    if expected is None:
+        return True
+
+    exp_x, exp_y = expected
+    dx = abs(float(resolved_x) - exp_x)
+    dy = abs(float(resolved_y) - exp_y)
+    distance = (dx ** 2 + dy ** 2) ** 0.5
+    is_plausible = dx <= 0.18 and distance <= 0.20
+    if not is_plausible:
+        logger.warning(
+            "close_tab guard : résultat rejeté car trop éloigné de la zone "
+            "source (resolved=(%.4f, %.4f), expected=(%.4f, %.4f), "
+            "drift=(%.4f, %.4f), dist=%.4f)",
+            float(resolved_x),
+            float(resolved_y),
+            exp_x,
+            exp_y,
+            dx,
+            dy,
+            distance,
+        )
+    return is_plausible
+
+
+def _is_start_button_vlm_result_plausible(
+    result: Dict[str, Any],
+    fallback_x_pct: float,
+    fallback_y_pct: float,
+    target_spec: Dict[str, Any],
+    max_distance: float = 0.20,
+) -> bool:
+    """Filtrer les faux positifs VLM sur le bouton Démarrer.
+
+    Le bouton Démarrer est un singleton système. Quand on dispose d'un vrai clic
+    enregistré (`fallback_*`), une localisation VLM très éloignée de cette zone
+    est plus probablement un faux positif qu'un vrai déplacement UI.
+    """
+    by_role = str(target_spec.get("by_role", "") or "").strip().lower()
+    if by_role != "start_button":
+        return True
+
+    if not _has_meaningful_recorded_coords(fallback_x_pct, fallback_y_pct):
+        return True
+
+    if _validate_match_context(
+        result,
+        fallback_x_pct,
+        fallback_y_pct,
+        target_spec,
+        max_distance=max_distance,
+    ):
+        return True
+
+    logger.warning(
+        "Start button guard : résultat VLM rejeté car trop éloigné de la "
+        "position enregistrée (resolved=(%.4f, %.4f), expected=(%.4f, %.4f), max=%.2f)",
+        float(result.get("x_pct", 0) or 0),
+        float(result.get("y_pct", 0) or 0),
+        fallback_x_pct,
+        fallback_y_pct,
+        max_distance,
+    )
+    return False
+
+
 # =========================================================================
 # YOLO/OmniParser — Résolution par détection d'éléments UI
 # =========================================================================
@@ -1109,16 +1271,66 @@ def _resolve_by_som(
                        # Centre du match
                        match_cx = max_loc[0] + anc_w // 2
                        match_cy = max_loc[1] + anc_h // 2
+                        interaction = str(
+                            (target_spec.get("context_hints") or {}).get("interaction", "") or ""
+                        ).strip().lower()
+
+                        if interaction == "close_tab":
+                            elapsed = time.time() - t0
+                            cx_norm = match_cx / screen_width if screen_width > 0 else 0.0
+                            cy_norm = match_cy / screen_height if screen_height > 0 else 0.0
+                            if _is_close_tab_result_plausible(
+                                cx_norm,
+                                cy_norm,
+                                target_spec,
+                                screen_width,
+                                screen_height,
+                            ):
+                                logger.info(
+                                    "SoM resolve ANCHOR exact close_tab : score=%.3f "
+                                    "centre=(%d, %d) → (%.4f, %.4f) en %.1fs",
+                                    max_score, match_cx, match_cy, cx_norm, cy_norm, elapsed,
+                                )
+                                return {
+                                    "resolved": True,
+                                    "method": "som_anchor_match",
+                                    "x_pct": round(cx_norm, 6),
+                                    "y_pct": round(cy_norm, 6),
+                                    "matched_element": {
+                                        "label": "close_tab_button",
+                                        "type": "visual_anchor",
+                                        "role": "som_anchor_exact",
+                                        "confidence": max_score,
+                                    },
+                                    "score": max_score,
+                                    "match_box": {
+                                        "x": int(max_loc[0]),
+                                        "y": int(max_loc[1]),
+                                        "width": int(anc_w),
+                                        "height": int(anc_h),
+                                    },
+                                }
+                            logger.warning(
+                                "SoM resolve ANCHOR exact close_tab rejeté : score=%.3f "
+                                "centre=(%d, %d) → (%.4f, %.4f), passage VLM/fallback",
+                                max_score, match_cx, match_cy, cx_norm, cy_norm,
+                            )
+                            # Ne pas recycler ce faux match vers l'élément SoM le plus
+                            # proche : pour close_tab, cela retombe facilement sur le
+                            # bouton de fermeture de la fenêtre.
+                            best_elem = None
+                        else:
+                            best_elem = None

                        # Trouver l'élément SomEngine le plus proche du centre du match
-                        best_elem = None
                        best_dist = float("inf")
-                        for elem in som_result.elements:
-                            cx, cy = elem.center
-                            dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5
-                            if dist < best_dist:
-                                best_dist = dist
-                                best_elem = elem
+                        if best_elem is None and interaction != "close_tab":
+                            for elem in som_result.elements:
+                                cx, cy = elem.center
+                                dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5
+                                if dist < best_dist:
+                                    best_dist = dist
+                                    best_elem = elem

                        if best_elem and best_dist < 100:  # Max 100px de distance
                            elapsed = time.time() - t0
@@ -1584,6 +1796,49 @@ def _resolve_target_sync(
            "fallback cascade legacy"
        )

+    # ===================================================================
+    # Cas spécial : boutons de dialogue runtime ("Oui", "Non", "OK", ...)
+    # ===================================================================
+    # Ces boutons sont textuels, sans ancre stable, et apparaissent souvent
+    # au milieu d'une action déjà en cours. Si on les laisse partir dans la
+    # cascade générique (VLM -> SoM -> ScreenAnalyzer), on peut bloquer
+    # l'action principale assez longtemps pour déclencher le watchdog.
+    # Contrat voulu : OCR direct rapide, sinon abandon immédiat pour que le
+    # client essaie son fallback local par template texte.
+    dialog_role = str(target_spec.get("by_role", "") or "").strip().lower()
+    dialog_text = str(target_spec.get("by_text", "") or "").strip()
+    if dialog_role == "dialog_button" and dialog_text and not anchor_image_b64:
+        ocr_result = _resolve_by_ocr_text(
+            screenshot_path=screenshot_path,
+            target_text=dialog_text,
+            screen_width=screen_width,
+            screen_height=screen_height,
+        )
+        if ocr_result and ocr_result.get("score", 0) >= 0.80:
+            ocr_result["method"] = "hybrid_text_direct"
+            logger.info(
+                "Resolve dialog_button OCR-DIRECT : OK '%s' → (%.4f, %.4f) score=%.2f",
+                dialog_text[:40],
+                ocr_result.get("x_pct", 0),
+                ocr_result.get("y_pct", 0),
+                ocr_result.get("score", 0),
+            )
+            return ocr_result
+
+        logger.info(
+            "Resolve dialog_button OCR-only : '%s' non trouvé "
+            "(fenêtre='%s') — skip VLM/SoM/ScreenAnalyzer",
+            dialog_text[:40],
+            str(target_spec.get("window_title", "") or "")[:80],
+        )
+        return {
+            "resolved": False,
+            "method": "dialog_button_ocr_only",
+            "reason": "ocr_direct_failed_dialog_button_no_vlm",
+            "x_pct": fallback_x_pct,
+            "y_pct": fallback_y_pct,
+        }
+
    # ===================================================================
    # MODE STRICT (replay sessions) — Stratégie VLM-FIRST
    # ===================================================================
@@ -1656,13 +1911,25 @@ def _resolve_target_sync(
                screen_height=screen_height,
            )
            if grounding_result and grounding_result.get("resolved"):
-                logger.info(
-                    "Strict resolve GROUNDING : OK (%.4f, %.4f) pour '%s'",
-                    grounding_result.get("x_pct", 0),
-                    grounding_result.get("y_pct", 0),
-                    by_text_strict[:50],
+                if _is_close_tab_result_plausible(
+                    float(grounding_result.get("x_pct", 0) or 0),
+                    float(grounding_result.get("y_pct", 0) or 0),
+                    target_spec,
+                    screen_width,
+                    screen_height,
+                    fallback_x_pct=fallback_x_pct,
+                    fallback_y_pct=fallback_y_pct,
+                ):
+                    logger.info(
+                        "Strict resolve GROUNDING : OK (%.4f, %.4f) pour '%s'",
+                        grounding_result.get("x_pct", 0),
+                        grounding_result.get("y_pct", 0),
+                        by_text_strict[:50],
+                    )
+                    return grounding_result
+                logger.warning(
+                    "Strict resolve GROUNDING : résultat close_tab rejeté, passage template/VLM"
                )
-                return grounding_result

        if not by_text_strict or by_text_source not in ("ocr", "vlm"):
            # Template matching pour les éléments sans texte (icônes pures)
@@ -1690,11 +1957,23 @@ def _resolve_target_sync(
                    abs_y = window_rect[1] + y_tm * tm_screen_h
                    result["x_pct"] = round(abs_x / screen_width, 6)
                    result["y_pct"] = round(abs_y / screen_height, 6)
-                logger.info(
-                    "Strict resolve TEMPLATE : icon match (score=%.3f)",
-                    result.get("score", 0),
+                if _is_close_tab_result_plausible(
+                    float(result.get("x_pct", 0) or 0),
+                    float(result.get("y_pct", 0) or 0),
+                    target_spec,
+                    screen_width,
+                    screen_height,
+                    fallback_x_pct=fallback_x_pct,
+                    fallback_y_pct=fallback_y_pct,
+                ):
+                    logger.info(
+                        "Strict resolve TEMPLATE : icon match (score=%.3f)",
+                        result.get("score", 0),
+                    )
+                    return result
+                logger.warning(
+                    "Strict resolve TEMPLATE : résultat close_tab rejeté, passage cascade suivante"
                )
-                return result

        # ---------------------------------------------------------------
        # Étape 0.5 : OCR direct (hybrid_text_direct) — chemin rapide
@@ -1739,6 +2018,27 @@ def _resolve_target_sync(
                    by_text_strict[:40],
                )

+        # Les boutons de dialogues runtime connus ("Oui", "Non", "OK", etc.)
+        # ne doivent pas partir dans la cascade lente VLM -> SoM. Si l'OCR
+        # direct ne les trouve pas immédiatement, on rend la main au client
+        # pour son fallback local par template texte, sinon on bloque l'action
+        # principale assez longtemps pour déclencher le watchdog.
+        dialog_role = str(target_spec.get("by_role", "") or "").strip().lower()
+        if dialog_role == "dialog_button" and by_text_strict and not anchor_image_b64:
+            logger.info(
+                "Strict resolve dialog_button : OCR-direct only pour '%s' "
+                "(fenêtre='%s') — skip VLM/SoM/template",
+                by_text_strict[:40],
+                str(target_spec.get("window_title", "") or "")[:80],
+            )
+            return {
+                "resolved": False,
+                "method": "dialog_button_ocr_only",
+                "reason": "ocr_direct_failed_dialog_button_no_vlm",
+                "x_pct": fallback_x_pct,
+                "y_pct": fallback_y_pct,
+            }
+
        # ---------------------------------------------------------------
        # Étape 1 : VLM Quick Find (fallback, multi-image)
        # ---------------------------------------------------------------
@@ -1750,12 +2050,29 @@ def _resolve_target_sync(
            )
            if vlm_result and vlm_result.get("resolved"):
                if vlm_result.get("score", 0) >= 0.3:
-                    logger.info(
-                        "Strict resolve VLM-first : VLM OK (score=%.2f) pour '%s'",
-                        vlm_result.get("score", 0),
-                        vlm_description[:60] if vlm_description else "(anchor)",
+                    if _is_start_button_vlm_result_plausible(
+                        vlm_result,
+                        fallback_x_pct,
+                        fallback_y_pct,
+                        target_spec,
+                    ) and _is_close_tab_result_plausible(
+                        float(vlm_result.get("x_pct", 0) or 0),
+                        float(vlm_result.get("y_pct", 0) or 0),
+                        target_spec,
+                        screen_width,
+                        screen_height,
+                        fallback_x_pct=fallback_x_pct,
+                        fallback_y_pct=fallback_y_pct,
+                    ):
+                        logger.info(
+                            "Strict resolve VLM-first : VLM OK (score=%.2f) pour '%s'",
+                            vlm_result.get("score", 0),
+                            vlm_description[:60] if vlm_description else "(anchor)",
+                        )
+                        return vlm_result
+                    logger.warning(
+                        "Strict resolve VLM-first : résultat VLM rejeté par un garde-fou, passage SoM/template"
                    )
-                    return vlm_result
                else:
                    logger.info(
                        "Strict resolve VLM-first : VLM score=%.2f trop bas, passage template",
@@ -1782,12 +2099,24 @@ def _resolve_target_sync(
                screen_height=screen_height,
            )
            if som_result and som_result.get("resolved"):
-                logger.info(
-                    "Strict resolve SoM+VLM : OK (score=%.2f, mark=#%s)",
-                    som_result.get("score", 0),
-                    som_result.get("matched_element", {}).get("som_id", "?"),
+                if _is_close_tab_result_plausible(
+                    float(som_result.get("x_pct", 0) or 0),
+                    float(som_result.get("y_pct", 0) or 0),
+                    target_spec,
+                    screen_width,
+                    screen_height,
+                    fallback_x_pct=fallback_x_pct,
+                    fallback_y_pct=fallback_y_pct,
+                ):
+                    logger.info(
+                        "Strict resolve SoM+VLM : OK (score=%.2f, mark=#%s)",
+                        som_result.get("score", 0),
+                        som_result.get("matched_element", {}).get("som_id", "?"),
+                    )
+                    return som_result
+                logger.warning(
+                    "Strict resolve SoM+VLM : résultat close_tab rejeté, passage template matching"
                )
-                return som_result
            else:
                logger.info("Strict resolve SoM+VLM : échoué, passage template matching")

@@ -1805,12 +2134,24 @@ def _resolve_target_sync(
            score = result.get("score", 0)
            # Score >= 0.95 : match quasi-parfait, pas besoin de valider le contexte
            if score >= 0.95:
-                logger.info(
-                    "Strict resolve VLM-first : template matching fallback OK "
-                    "(score=%.3f >= 0.95, contexte skip — match quasi-parfait)",
-                    score,
+                if _is_close_tab_result_plausible(
+                    float(result.get("x_pct", 0) or 0),
+                    float(result.get("y_pct", 0) or 0),
+                    target_spec,
+                    screen_width,
+                    screen_height,
+                    fallback_x_pct=fallback_x_pct,
+                    fallback_y_pct=fallback_y_pct,
+                ):
+                    logger.info(
+                        "Strict resolve VLM-first : template matching fallback OK "
+                        "(score=%.3f >= 0.95, contexte skip — match quasi-parfait)",
+                        score,
+                    )
+                    return result
+                logger.warning(
+                    "Strict resolve TEMPLATE : match close_tab très fort mais hors zone source, rejeté"
                )
-                return result
            elif _validate_match_context(result, fallback_x_pct, fallback_y_pct, target_spec):
                logger.info(
                    "Strict resolve VLM-first : template matching fallback OK "
@@ -2189,6 +2530,37 @@ def _text_match_fuzzy(expected: str, observed: str, min_token_ratio: float = 0.6
    return matched / len(tokens) >= min_token_ratio


+_SOM_BBOX_OCR_PADDING_PX: int = 8
+_SOM_BBOX_MIN_DIM_PX: int = 12
+
+
+def _should_reject_on_text_mismatch(
+    is_valid: bool,
+    observed: Optional[str],
+) -> bool:
+    """Décide si le pré-check OCR doit rejeter la résolution.
+
+    Patch 2026-05-23 : on distingue deux cas d'échec du fuzzy match :
+
+    - ``observed`` contient du texte (ex: ``'9 ?'``, ``'OBS Studio…'``)
+      → mismatch confirmé, la cascade a probablement cliqué ailleurs
+      → on rejette.
+    - ``observed`` est vide ou whitespace
+      → l'OCR n'a rien lu (zone trop petite, texte peu contrasté,
+      modèle EasyOCR sous le seuil de détection). C'est ambigu :
+      ce n'est PAS la preuve d'un faux positif, on accepte la
+      résolution serveur. La garde drift ANCHOR-TM côté agent
+      protège en aval contre les vrais faux positifs.
+
+    Si ``is_valid=True`` → jamais de rejet (cas nominal).
+    """
+    if is_valid:
+        return False
+    if observed is None:
+        return False
+    return bool(str(observed).strip())
+
+
 def _validate_text_at_position(
    screenshot_path: str,
    x_pct: float,
@@ -2197,9 +2569,20 @@ def _validate_text_at_position(
    screen_width: int,
    screen_height: int,
    radius_px: int = 280,
+    som_bbox_norm: Optional[List[float]] = None,
 ) -> tuple:
-    """Pré-check sémantique : OCR sur une zone autour de (x_pct, y_pct) et
-    vérifie que `expected_text` y est présent (substring ou fuzzy 50%).
+    """Pré-check sémantique : OCR sur une zone et vérifie que
+    `expected_text` y est présent (substring ou fuzzy 50%).
+
+    Zone OCR (par priorité) :
+      1. Si ``som_bbox_norm = [x1, y1, x2, y2]`` (normalisé 0..1) est
+         fourni et a une largeur/hauteur > _SOM_BBOX_MIN_DIM_PX en
+         pixels écran : OCR sur cette bbox élargie d'un padding court.
+         Plus précis pour les éléments étroits (onglets Notepad
+         moderne, ~30-40px haut) que le radius générique qui capture
+         le texte voisin (status bar, etc.).
+      2. Sinon : fallback historique → carré de ``radius_px`` autour
+         de (x_pct, y_pct).

    Retourne (is_valid: bool, observed_text: str, elapsed_ms: float).

@@ -2219,16 +2602,52 @@ def _validate_text_at_position(
        t0 = time.time()
        img = Image.open(screenshot_path).convert("RGB")
        img_w, img_h = img.size
-        cx = int(x_pct * screen_width)
-        cy = int(y_pct * screen_height)
-        # Saturer dans les bornes de l'image (le screenshot peut être plus
-        # large que la fenêtre logique — utiliser min(img_*, screen_*) en sécurité).
        max_x = min(img_w, screen_width)
        max_y = min(img_h, screen_height)
-        x1 = max(0, cx - radius_px)
-        y1 = max(0, cy - radius_px)
-        x2 = min(max_x, cx + radius_px)
-        y2 = min(max_y, cy + radius_px)
+
+        # --- Tentative 1 : zone OCR depuis la bbox SoM (préférée) ---
+        x1 = y1 = x2 = y2 = None
+        if (
+            isinstance(som_bbox_norm, (list, tuple))
+            and len(som_bbox_norm) == 4
+        ):
+            try:
+                bx1, by1, bx2, by2 = (float(v) for v in som_bbox_norm)
+                # Tolérer ordre inversé.
+                bx1, bx2 = sorted((bx1, bx2))
+                by1, by2 = sorted((by1, by2))
+                # Refuser les bboxes dégénérées AVANT padding : si
+                # l'élément cible fait < _SOM_BBOX_MIN_DIM_PX en
+                # natif, c'est probablement une bbox d'apparence
+                # (curseur, séparateur 1px) — pas un label OCRable.
+                raw_w = (bx2 - bx1) * screen_width
+                raw_h = (by2 - by1) * screen_height
+                if (
+                    raw_w >= _SOM_BBOX_MIN_DIM_PX
+                    and raw_h >= _SOM_BBOX_MIN_DIM_PX
+                ):
+                    # Conversion en pixels écran + clipping et padding.
+                    px1 = int(bx1 * screen_width) - _SOM_BBOX_OCR_PADDING_PX
+                    py1 = int(by1 * screen_height) - _SOM_BBOX_OCR_PADDING_PX
+                    px2 = int(bx2 * screen_width) + _SOM_BBOX_OCR_PADDING_PX
+                    py2 = int(by2 * screen_height) + _SOM_BBOX_OCR_PADDING_PX
+                    x1 = max(0, px1)
+                    y1 = max(0, py1)
+                    x2 = min(max_x, px2)
+                    y2 = min(max_y, py2)
+            except (TypeError, ValueError):
+                # Bbox malformée : fallback silencieux sur le radius.
+                x1 = y1 = x2 = y2 = None
+
+        # --- Fallback : carré radius_px autour de (x_pct, y_pct) ---
+        if x1 is None:
+            cx = int(x_pct * screen_width)
+            cy = int(y_pct * screen_height)
+            x1 = max(0, cx - radius_px)
+            y1 = max(0, cy - radius_px)
+            x2 = min(max_x, cx + radius_px)
+            y2 = min(max_y, cy + radius_px)
+
        if x2 - x1 < 10 or y2 - y1 < 10:
            return True, "", 0.0
        crop = img.crop((x1, y1, x2, y2))
@@ -2246,6 +2665,7 @@ def _validate_resolution_quality(
    result: Optional[Dict[str, Any]],
    fallback_x_pct: float,
    fallback_y_pct: float,
+    target_spec: Optional[Dict[str, Any]] = None,
 ) -> Optional[Dict[str, Any]]:
    """Valide un résultat de résolution et le rejette s'il est peu fiable.

@@ -2263,6 +2683,16 @@ def _validate_resolution_quality(
    elle n'est PAS appelée par les méthodes internes de la cascade, mais
    uniquement depuis le handler HTTP `/resolve_target` après que la
    cascade a produit son meilleur candidat.
+
+    Argument optionnel `target_spec` : permet d'appliquer des relaxations
+    contextuelles. Cas couvert (2026-05-22) : pour une cible
+    `context_hints.interaction == "switch_tab"` qui dispose d'un
+    `som_element.bbox_norm`, on abaisse le seuil des méthodes ``som_*``
+    de 0.75 → 0.60. Justification : (1) le focus_change pré-clic
+    prouve qu'on est dans la bonne fenêtre, (2) la bbox SoM a été
+    calibrée à l'enregistrement et reste valide, (3) les onglets
+    Notepad moderne sont visuellement quasi-identiques → score VLM
+    inévitablement lower.
    """
    if not result or not isinstance(result, dict):
        return result
@@ -2291,6 +2721,52 @@ def _validate_resolution_quality(
                min_score = threshold
                break

+    # Relaxation contextuelle pour switch_tab + SoM calibré (2026-05-22).
+    # Les onglets Notepad moderne (et apps similaires) sont visuellement
+    # quasi-identiques : le grounding VLM/SoM produit fréquemment un
+    # score 0.65-0.75, juste sous le seuil strict. Comme le contexte
+    # `interaction=switch_tab` + bbox SoM enregistrée + focus_change
+    # pré-clic confirment déjà la fenêtre et la zone, on relâche le
+    # seuil des méthodes som_* à 0.60 dans CE cas précis uniquement.
+    if (
+        min_score is not None
+        and target_spec
+        and method.startswith("som_")
+    ):
+        context_hints = target_spec.get("context_hints") or {}
+        is_tab_switch = (
+            context_hints.get("interaction") == "switch_tab"
+            and target_spec.get("by_role") == "tab"
+        )
+        som_element = target_spec.get("som_element") or {}
+        has_calibrated_som = bool(som_element.get("bbox_norm"))
+        if is_tab_switch and has_calibrated_som:
+            relaxed = 0.60
+            if relaxed < min_score:
+                logger.info(
+                    "[REPLAY] switch_tab + som_element calibré → seuil "
+                    "som_* relâché %.2f → %.2f (cible='%s')",
+                    min_score, relaxed,
+                    target_spec.get("by_text", ""),
+                )
+                min_score = relaxed
+
+        is_close_tab = (
+            method == "som_anchor_match"
+            and str((context_hints.get("interaction") or "")).strip().lower() == "close_tab"
+            and not str(target_spec.get("by_text", "") or "").strip()
+            and bool(target_spec.get("anchor_image_base64"))
+        )
+        if is_close_tab:
+            relaxed = 0.70
+            if relaxed < min_score:
+                logger.info(
+                    "[REPLAY] close_tab + anchor-only → seuil som_anchor_match "
+                    "relâché %.2f → %.2f",
+                    min_score, relaxed,
+                )
+                min_score = relaxed
+
    if min_score is not None and score < min_score:
        logger.warning(
            "[REPLAY] Resolution REJETÉE (score trop bas) : method=%s score=%.3f < %.2f",
@@ -2306,13 +2782,40 @@ def _validate_resolution_quality(
            "y_pct": fallback_y_pct,
        }

+    if _is_close_tab_target(target_spec) and not _is_close_tab_result_plausible(
+        resolved_x,
+        resolved_y,
+        target_spec,
+        0,
+        0,
+        fallback_x_pct=fallback_x_pct,
+        fallback_y_pct=fallback_y_pct,
+    ):
+        logger.warning(
+            "[REPLAY] Resolution REJETÉE (close_tab hors zone source) : "
+            "method=%s resolved=(%.3f, %.3f) expected=(%.3f, %.3f)",
+            method,
+            resolved_x,
+            resolved_y,
+            fallback_x_pct,
+            fallback_y_pct,
+        )
+        return {
+            "resolved": False,
+            "method": f"rejected_close_tab_zone_{method}",
+            "reason": "close_tab_out_of_recorded_zone",
+            "original_method": method,
+            "original_score": score,
+            "x_pct": fallback_x_pct,
+            "y_pct": fallback_y_pct,
+        }
+
    # --- Check 2 : garde de proximité ---
    # On n'applique la garde que si les coordonnées enregistrées ont un
    # sens (pas des placeholders 0.5/0.5 des plans V4 ni des 0.0/0.0).
-    _has_recorded_coords = (
-        fallback_x_pct > 0.001
-        and fallback_y_pct > 0.001
-        and not (abs(fallback_x_pct - 0.5) < 0.001 and abs(fallback_y_pct - 0.5) < 0.001)
+    _has_recorded_coords = _has_meaningful_recorded_coords(
+        fallback_x_pct,
+        fallback_y_pct,
    )
    if _has_recorded_coords:
        dx = abs(resolved_x - fallback_x_pct)