diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index 33f713d92..83bcf68e5 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -5230,8 +5230,10 @@ def _resolve_target_sync( # --------------------------------------------------------------- by_text_source = target_spec.get("by_text_source", "") - if by_text_strict and by_text_source in ("ocr", "vlm"): - # Texte visible (OCR ou lu par gemma4) → grounding VLM direct + has_window = bool(target_spec.get("window_capture", {}).get("rect")) + + if by_text_strict and by_text_source in ("ocr", "vlm") and has_window: + # Texte visible DANS une fenêtre → grounding VLM sur fenêtre croppée grounding_result = _resolve_by_grounding( screenshot_path=screenshot_path, target_spec=target_spec,