diff --git a/agent_v0/server_v1/stream_processor.py b/agent_v0/server_v1/stream_processor.py index 832f27da6..ef83ff9a1 100644 --- a/agent_v0/server_v1/stream_processor.py +++ b/agent_v0/server_v1/stream_processor.py @@ -2042,6 +2042,24 @@ class StreamProcessor: self._screen_states[session_id] = [] self._screen_states[session_id].append(screen_state) + # Enrichir avec les patterns UI connus + try: + from core.knowledge.ui_patterns import UIPatternLibrary + detected_text = getattr(screen_state.perception, "detected_text", []) + if detected_text: + ocr_text = " ".join(str(t) for t in detected_text) if isinstance(detected_text, list) else str(detected_text) + lib = UIPatternLibrary() + pattern = lib.find_pattern(ocr_text) + if pattern: + result["ui_pattern"] = pattern["pattern"] + result["ui_pattern_action"] = pattern["action"] + result["ui_pattern_target"] = pattern["target"] + logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['target']}") + except ImportError: + pass + except Exception as e: + logger.debug(f"Pattern check: {e}") + logger.info( f"Screenshot analysé: {shot_id} | " f"{result['ui_elements_count']} UI elements, " diff --git a/visual_workflow_builder/backend/api_v3/execute.py b/visual_workflow_builder/backend/api_v3/execute.py index 6807f90d5..5f09c3340 100644 --- a/visual_workflow_builder/backend/api_v3/execute.py +++ b/visual_workflow_builder/backend/api_v3/execute.py @@ -178,6 +178,82 @@ _execution_state = { } +def _check_screen_for_patterns() -> Optional[Dict[str, Any]]: + """Vérifie si l'écran actuel contient un pattern UI connu (dialogue, popup). + + Capture l'écran, extrait le texte via OCR léger, et cherche + un pattern dans la UIPatternLibrary. + + Returns: + Dict avec le pattern trouvé et l'action à effectuer, ou None. + """ + try: + from core.knowledge.ui_patterns import UIPatternLibrary + import mss + from PIL import Image + import numpy as np + + lib = UIPatternLibrary() + + with mss.mss() as sct: + monitor = sct.monitors[1] + screenshot = sct.grab(monitor) + screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') + + try: + from services.ocr_service import ocr_extract_text + ocr_text = ocr_extract_text(screen) + except ImportError: + return None + + if not ocr_text or len(ocr_text) < 5: + return None + + pattern = lib.find_pattern(ocr_text) + if pattern and pattern['category'] in ('dialog', 'popup'): + print(f"🧠 [Pattern] Détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'") + print(f" Texte OCR: {ocr_text[:100]}...") + return pattern + + return None + + except Exception as e: + logger.debug(f"Pattern check échoué: {e}") + return None + + +def _handle_detected_pattern(pattern: Dict[str, Any]) -> bool: + """Gère automatiquement un pattern UI détecté (clic sur OK, fermer popup, etc.). + + Returns: + True si le pattern a été géré avec succès. + """ + import pyautogui + + action = pattern.get('action') + target = pattern.get('target', '') + bbox = pattern.get('typical_bbox') + alternatives = pattern.get('alternatives', []) + + if action == 'click' and bbox: + screen_w, screen_h = pyautogui.size() + x = int((bbox[0] + bbox[2]) / 2 * screen_w) + y = int((bbox[1] + bbox[3]) / 2 * screen_h) + print(f"🤖 [Pattern] Clic automatique sur '{target}' à ({x}, {y})") + pyautogui.click(x, y) + time.sleep(1.0) + return True + + elif action == 'hotkey': + keys = target.split('+') + print(f"🤖 [Pattern] Raccourci automatique: {target}") + pyautogui.hotkey(*keys) + time.sleep(0.5) + return True + + return False + + def execute_workflow_thread(execution_id: str, workflow_id: str, app): """ Thread d'exécution du workflow. @@ -231,6 +307,12 @@ def execute_workflow_thread(execution_id: str, workflow_id: str, app): db.session.add(step_result) db.session.commit() + # Vérifier si un dialogue/popup bloque l'écran avant l'étape + if index > 0 and _execution_state.get('execution_mode') in ('intelligent', 'debug'): + detected = _check_screen_for_patterns() + if detected: + _handle_detected_pattern(detected) + print(f"\n{'='*60}") print(f"📋 [Execute] Étape {index + 1}/{len(steps)}: {step.action_type}") print(f" step_id={step.id}, label={step.label}")