diff --git a/core/knowledge/ui_patterns.py b/core/knowledge/ui_patterns.py index 740c62e0e..7cc401b5a 100644 --- a/core/knowledge/ui_patterns.py +++ b/core/knowledge/ui_patterns.py @@ -50,6 +50,7 @@ BUILTIN_PATTERNS: List[Dict[str, Any]] = [ "triggers": [ "voulez-vous enregistrer", "do you want to save", "save changes", "enregistrer les modifications", + "enregistrer sous", "save as", "sauvegarder", "unsaved changes", ], "action": "click", @@ -328,7 +329,7 @@ class UIPatternLibrary: score = trigger_score matched_trigger = trigger - if score > best_score and score > 0.05: + if score > best_score and matched_trigger is not None: best_score = score best_match = { "pattern": pattern.name, diff --git a/visual_workflow_builder/backend/api_v3/execute.py b/visual_workflow_builder/backend/api_v3/execute.py index df57d08a2..4e27fc831 100644 --- a/visual_workflow_builder/backend/api_v3/execute.py +++ b/visual_workflow_builder/backend/api_v3/execute.py @@ -195,6 +195,10 @@ def _check_screen_for_patterns() -> Optional[Dict[str, Any]]: import numpy as np lib = UIPatternLibrary() + # Debug: vérifier les triggers du dialog_save + save_patterns = [p for p in lib._patterns if p.name == 'dialog_save'] + if save_patterns: + print(f" 🔎 [Pattern] dialog_save triggers: {save_patterns[0].triggers}") with mss.mss() as sct: monitor = sct.monitors[1] @@ -208,42 +212,107 @@ def _check_screen_for_patterns() -> Optional[Dict[str, Any]]: return None if not ocr_text or len(ocr_text) < 5: + print(f" 🔎 [Pattern] OCR vide ou trop court ({len(ocr_text) if ocr_text else 0} chars)") return None - pattern = lib.find_pattern(ocr_text) - if pattern and pattern['category'] in ('dialog', 'popup'): - print(f"🧠 [Pattern] Détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'") - print(f" Texte OCR: {ocr_text[:100]}...") - return pattern + print(f" 🔎 [Pattern] OCR ({len(ocr_text)} chars): {ocr_text[:500]}") - return None + pattern = lib.find_pattern(ocr_text) + if pattern: + print(f" 🔎 [Pattern] Match: {pattern['pattern']} (category={pattern['category']})") + if pattern['category'] in ('dialog', 'popup'): + print(f"🧠 [Pattern] DÉTECTÉ: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'") + return pattern + else: + print(f" 🔎 [Pattern] Ignoré (catégorie {pattern['category']})") + return None + else: + print(f" 🔎 [Pattern] Aucun match dans le texte OCR") + return None except Exception as e: - logger.debug(f"Pattern check échoué: {e}") + import traceback + print(f" 🔎 [Pattern] EXCEPTION: {e}") + traceback.print_exc() return None def _handle_detected_pattern(pattern: Dict[str, Any]) -> bool: - """Gère automatiquement un pattern UI détecté (clic sur OK, fermer popup, etc.). + """Gère automatiquement un pattern UI détecté. - Returns: - True si le pattern a été géré avec succès. + Cherche le bouton cible via OCR (position réelle sur l'écran), + avec fallback sur les coordonnées typiques si l'OCR ne trouve pas. """ import pyautogui action = pattern.get('action') target = pattern.get('target', '') - bbox = pattern.get('typical_bbox') alternatives = pattern.get('alternatives', []) - if action == 'click' and bbox: - screen_w, screen_h = pyautogui.size() - x = int((bbox[0] + bbox[2]) / 2 * screen_w) - y = int((bbox[1] + bbox[3]) / 2 * screen_h) - print(f"🤖 [Pattern] Clic automatique sur '{target}' à ({x}, {y})") - pyautogui.click(x, y) - time.sleep(1.0) - return True + if action == 'click': + candidates = [target] + alternatives + + # Chercher le bouton via OCR sur l'écran actuel + try: + import mss + from PIL import Image + from services.ocr_service import ocr_extract_words + + with mss.mss() as sct: + monitor = sct.monitors[1] + screenshot = sct.grab(monitor) + screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') + + words = ocr_extract_words(screen) + + # Collecter TOUS les matchs, puis prendre le plus bas (boutons = bas du dialogue) + all_matches = [] + + for candidate in candidates: + candidate_lower = candidate.lower() + for word in words: + word_text = word['text'].lower() + if len(word_text) < 3 or len(candidate_lower) < 3: + continue + if word_text == candidate_lower: + x1, y1, x2, y2 = word['bbox'] + all_matches.append({ + 'text': word['text'], + 'x': int((x1 + x2) / 2), + 'y': int((y1 + y2) / 2), + 'match_type': 'exact', + }) + + # Recherche partielle (ex: "nregistrer" sans le E souligné) + if not all_matches: + for candidate in candidates: + if len(candidate) > 3: + partial = candidate[1:].lower() + for word in words: + if partial in word['text'].lower(): + x1, y1, x2, y2 = word['bbox'] + all_matches.append({ + 'text': word['text'], + 'x': int((x1 + x2) / 2), + 'y': int((y1 + y2) / 2), + 'match_type': 'partial', + }) + + if all_matches: + for m in all_matches: + print(f" 🔎 [Pattern] Candidat: '{m['text']}' à ({m['x']}, {m['y']}) [{m['match_type']}]") + + best = max(all_matches, key=lambda m: m['y']) + print(f"🤖 [Pattern] Clic sur '{best['text']}' à ({best['x']}, {best['y']}) [le plus bas = bouton]") + pyautogui.click(best['x'], best['y']) + time.sleep(1.0) + return True + + except Exception as e: + print(f" 🔎 [Pattern] OCR bouton échoué: {e}") + + print(f" 🔎 [Pattern] Bouton '{target}' introuvable par OCR — pas de clic") + return False elif action == 'hotkey': keys = target.split('+') @@ -309,10 +378,18 @@ def execute_workflow_thread(execution_id: str, workflow_id: str, app): db.session.commit() # Vérifier si un dialogue/popup bloque l'écran avant l'étape - if index > 0 and _execution_state.get('execution_mode') in ('intelligent', 'debug'): - detected = _check_screen_for_patterns() - if detected: - _handle_detected_pattern(detected) + if index > 0: + exec_mode = _execution_state.get('execution_mode', 'basic') + print(f" 🔎 [Pattern] Vérification avant étape {index+1} (mode={exec_mode})") + if exec_mode in ('intelligent', 'debug'): + detected = _check_screen_for_patterns() + if detected: + print(f" 🧠 [Pattern] TROUVÉ: {detected.get('pattern')} → {detected.get('action')} '{detected.get('target')}'") + _handle_detected_pattern(detected) + else: + print(f" 🔎 [Pattern] Aucun dialogue détecté") + else: + print(f" 🔎 [Pattern] Skip (mode {exec_mode})") print(f"\n{'='*60}") print(f"📋 [Execute] Étape {index + 1}/{len(steps)}: {step.action_type}") @@ -1084,9 +1161,35 @@ def execute_action(action_type: str, params: dict) -> dict: elif action_type in ['wait_for_anchor', 'wait']: timeout_ms = params.get('timeout_ms', params.get('timeout', 5000)) - print(f"⏳ [Action] Attente {timeout_ms}ms") - time.sleep(timeout_ms / 1000) - return {'success': True, 'output': {'waited_ms': timeout_ms}} + print(f"⏳ [Action] Attente {timeout_ms}ms (avec surveillance patterns)") + + elapsed = 0 + check_interval = 1000 + pattern_handled = None + + while elapsed < timeout_ms: + wait_chunk = min(check_interval, timeout_ms - elapsed) + time.sleep(wait_chunk / 1000) + elapsed += wait_chunk + + if execution_mode in ('intelligent', 'debug'): + try: + detected = _check_screen_for_patterns() + if detected: + print(f"🧠 [Wait] Dialogue détecté: {detected.get('pattern')} → {detected.get('target')}") + _handle_detected_pattern(detected) + pattern_handled = detected + break + except Exception as e: + print(f" 🔎 [Wait] Erreur check: {e}") + + return { + 'success': True, + 'output': { + 'waited_ms': elapsed, + 'pattern_handled': pattern_handled.get('pattern') if pattern_handled else None + } + } elif action_type == 'keyboard_shortcut': keys = params.get('keys', [])