diff --git a/agent_v0/agent_v1/core/executor.py b/agent_v0/agent_v1/core/executor.py index a07edc3dd..d62b20a1a 100644 --- a/agent_v0/agent_v1/core/executor.py +++ b/agent_v0/agent_v1/core/executor.py @@ -2244,6 +2244,7 @@ class ActionExecutorV1: # ---- ÉTAPE 1 : Résolution serveur (SomEngine + VLM) ---- # Le serveur comprend sémantiquement ce qu'on cherche. Pas de faux positifs. + skip_text_fallback_after_server_reject = False if server_url: server_result = self._server_resolve_target( server_url, screenshot_b64, target_spec, @@ -2251,6 +2252,18 @@ class ActionExecutorV1: ) if server_result and server_result.get("resolved"): return _with_metrics(server_result) + if server_result: + reason = str(server_result.get("reason") or "") + method = str(server_result.get("method") or "") + # If the server explicitly rejected a text/position candidate, + # do not replay the same broad by_text search locally. That + # bypasses semantic guards such as close-tab drift checks. + skip_text_fallback_after_server_reject = ( + method.startswith("rejected_") + or reason.startswith("close_tab_") + or reason.startswith("drift_") + or "below_threshold" in reason + ) # ---- ÉTAPE 2 : Template matching local (fallback si serveur down) ---- anchor_b64 = target_spec.get("anchor_image_base64", "") @@ -2265,6 +2278,17 @@ class ActionExecutorV1: # ---- ÉTAPE 3 : VLM local (fallback dev/test Linux) ---- by_text = target_spec.get("by_text", "") vlm_description = target_spec.get("vlm_description", "") + if skip_text_fallback_after_server_reject and by_text: + logger.info( + "[VISUAL] Rejet serveur explicite pour '%s' — " + "skip fallback local hybrid_text_direct", + by_text, + ) + print( + f" [VISUAL] Rejet serveur explicite pour '{by_text}' " + "→ pas de fallback texte local" + ) + return None if vlm_description or by_text: hybrid_result = self._hybrid_vlm_resolve( screenshot_b64, target_spec, screen_width, screen_height