From 8589e87a13bf43286c21006cafc510b47737fc01 Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 5 Apr 2026 21:19:36 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20grounding=20uniquement=20dans=20les=20fe?= =?UTF-8?q?n=C3=AAtres,=20template=20pour=20la=20taskbar?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Les clics taskbar (sans window_capture.rect) ne passent plus par le grounding VLM qui trouve "Rechercher" dans l'explorateur au lieu de la taskbar. Le template matching du crop 80x80 est utilisé à la place. Règle : fenêtre = grounding, taskbar = template matching. Co-Authored-By: Claude Opus 4.6 (1M context) --- agent_v0/server_v1/api_stream.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index 33f713d92..83bcf68e5 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -5230,8 +5230,10 @@ def _resolve_target_sync( # --------------------------------------------------------------- by_text_source = target_spec.get("by_text_source", "") - if by_text_strict and by_text_source in ("ocr", "vlm"): - # Texte visible (OCR ou lu par gemma4) → grounding VLM direct + has_window = bool(target_spec.get("window_capture", {}).get("rect")) + + if by_text_strict and by_text_source in ("ocr", "vlm") and has_window: + # Texte visible DANS une fenêtre → grounding VLM sur fenêtre croppée grounding_result = _resolve_by_grounding( screenshot_path=screenshot_path, target_spec=target_spec,