From c50adab3a15688405c672e5d103e0ed971669515 Mon Sep 17 00:00:00 2001 From: Dom Date: Tue, 21 Apr 2026 10:52:13 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20aligner=20capture=20monitors[0]=20partou?= =?UTF-8?q?t=20(cause=20de=20la=20r=C3=A9gression)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit La capture VWB utilisait monitors[0] (composite) mais l'exécution utilisait monitors[1] (premier écran). Images incompatibles → CLIP retournait 0.00 sur un écran identique. Tous les fichiers alignés sur monitors[0]. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/execution/input_handler.py | 8 ++++---- .../backend/services/intelligent_executor.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/execution/input_handler.py b/core/execution/input_handler.py index 9eff35955..801dc10ae 100644 --- a/core/execution/input_handler.py +++ b/core/execution/input_handler.py @@ -93,7 +93,7 @@ def check_screen_for_patterns() -> Optional[Dict[str, Any]]: lib = UIPatternLibrary() with mss.mss() as sct: - monitor = sct.monitors[1] + monitor = sct.monitors[0] screenshot = sct.grab(monitor) screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') @@ -160,7 +160,7 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool: return extractor.extract_words_from_image(img) with mss.mss() as sct: - monitor = sct.monitors[1] + monitor = sct.monitors[0] screenshot = sct.grab(monitor) screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') @@ -262,7 +262,7 @@ def vlm_reason_about_screen(objective: str = "", context: str = "") -> Optional[ from PIL import Image with mss.mss() as sct: - monitor = sct.monitors[1] + monitor = sct.monitors[0] screenshot = sct.grab(monitor) screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') @@ -432,7 +432,7 @@ def _capture_screen(): from PIL import Image as PILImage with mss.mss() as sct: - monitor = sct.monitors[1] + monitor = sct.monitors[0] screenshot = sct.grab(monitor) screen = PILImage.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') return screen, monitor['width'], monitor['height'] diff --git a/visual_workflow_builder/backend/services/intelligent_executor.py b/visual_workflow_builder/backend/services/intelligent_executor.py index b35d03d0f..447ffbd58 100644 --- a/visual_workflow_builder/backend/services/intelligent_executor.py +++ b/visual_workflow_builder/backend/services/intelligent_executor.py @@ -700,7 +700,7 @@ def find_and_click( import mss with mss.mss() as sct: - monitor = sct.monitors[1] # Premier écran + monitor = sct.monitors[0] # Écran composite (identique à la capture VWB) screenshot = sct.grab(monitor) screen_image = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')