From f73a2a59a9be61198f05f96f2f023761dbafdbea Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 26 Apr 2026 04:26:32 +0200 Subject: [PATCH] =?UTF-8?q?feat(r=C3=A9flexes):=20patterns=20overwrite/don?= =?UTF-8?q?t=5Fsave=20+=20handler=20EasyOCR=20+=20prints=20diagnostic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nouveaux patterns : - dialog_overwrite : "voulez-vous remplacer/écraser", "fichier existe déjà" → Oui - dialog_dont_save : "ne pas enregistrer", "quitter sans enregistrer" → Ne pas enregistrer Handler amélioré (handle_detected_pattern) : - EasyOCR au lieu de docTR (meilleure lecture des boutons GUI) - Match par inclusion (pas seulement exact) - Suppression fallback VLM (Ollama n'a plus de VRAM) - Prints visibles pour diagnostic 28 patterns au total, testés sur 6 dialogues types. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/execution/input_handler.py | 80 +++++++++++++-------------------- core/knowledge/ui_patterns.py | 29 ++++++++++++ 2 files changed, 61 insertions(+), 48 deletions(-) diff --git a/core/execution/input_handler.py b/core/execution/input_handler.py index bd0bd9e5b..c53ba7098 100644 --- a/core/execution/input_handler.py +++ b/core/execution/input_handler.py @@ -116,13 +116,13 @@ def check_screen_for_patterns() -> Optional[Dict[str, Any]]: pattern = lib.find_pattern(ocr_text) if pattern and pattern['category'] in ('dialog', 'popup'): - logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'") + print(f"🧠 [PatternCheck] Détecté: '{pattern['pattern']}' → {pattern['action']} '{pattern['target']}'") return pattern return None except Exception as e: - logger.debug(f"Pattern check échoué: {e}") + print(f"⚠️ [PatternCheck] Erreur: {e}") return None @@ -145,26 +145,40 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool: if action == 'click': candidates_labels = [target] + alternatives + print(f"🔧 [Réflexe/handle] Recherche bouton parmi: {candidates_labels}") try: import mss + import numpy as np from PIL import Image - # Importer OCR (essayer les deux chemins) - try: - from services.ocr_service import ocr_extract_words - except ImportError: - from core.extraction.field_extractor import FieldExtractor - extractor = FieldExtractor() - def ocr_extract_words(img): - return extractor.extract_words_from_image(img) - with mss.mss() as sct: monitor = sct.monitors[0] screenshot = sct.grab(monitor) screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') - words = ocr_extract_words(screen) + # EasyOCR (rapide, bonne qualité GUI) avec fallback docTR + words = [] + try: + import easyocr + _reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False) + results = _reader.readtext(np.array(screen)) + for (bbox_pts, text, conf) in results: + if not text or len(text.strip()) < 1: + continue + x1 = int(min(p[0] for p in bbox_pts)) + y1 = int(min(p[1] for p in bbox_pts)) + x2 = int(max(p[0] for p in bbox_pts)) + y2 = int(max(p[1] for p in bbox_pts)) + words.append({'text': text.strip(), 'bbox': [x1, y1, x2, y2]}) + except ImportError: + try: + from services.ocr_service import ocr_extract_words + words = ocr_extract_words(screen) or [] + except ImportError: + pass + + print(f"🔧 [Réflexe/handle] {len(words)} mots OCR détectés") # Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue) all_matches = [] @@ -175,58 +189,28 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool: word_text = word['text'].lower() if len(word_text) < 2 or len(candidate_lower) < 2: continue - if word_text == candidate_lower: + # Match exact ou inclusion + if word_text == candidate_lower or candidate_lower in word_text or word_text in candidate_lower: x1, y1, x2, y2 = word['bbox'] all_matches.append({ 'text': word['text'], 'x': int((x1 + x2) / 2), 'y': int((y1 + y2) / 2), - 'match_type': 'exact', + 'candidate': candidate, }) - # Recherche partielle (lettre soulignée manquante) - if not all_matches: - for candidate in candidates_labels: - if len(candidate) > 3: - partial = candidate[1:].lower() - for word in words: - if partial in word['text'].lower(): - x1, y1, x2, y2 = word['bbox'] - all_matches.append({ - 'text': word['text'], - 'x': int((x1 + x2) / 2), - 'y': int((y1 + y2) / 2), - 'match_type': 'partial', - }) - if all_matches: best = max(all_matches, key=lambda m: m['y']) - logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})") + print(f"✅ [Réflexe/handle] Clic sur '{best['text']}' à ({best['x']}, {best['y']})") pyautogui.click(best['x'], best['y']) time.sleep(1.0) return True - logger.info(f"Bouton '{target}' introuvable par OCR — appel VLM...") - vlm_result = vlm_reason_about_screen( - objective=f"Cliquer sur le bouton '{target}'", - context=f"Un dialogue '{pattern.get('pattern')}' est détecté" - ) - if vlm_result and vlm_result.get('action') == 'click' and vlm_result.get('target'): - vlm_target = vlm_result['target'] - for word in words: - if vlm_target.lower() in word['text'].lower(): - x1, y1, x2, y2 = word['bbox'] - x = int((x1 + x2) / 2) - y = int((y1 + y2) / 2) - logger.info(f"VLM → clic sur '{word['text']}' à ({x}, {y})") - pyautogui.click(x, y) - time.sleep(1.0) - return True - + print(f"⚠️ [Réflexe/handle] Bouton '{target}' introuvable parmi {[w['text'] for w in words[:15]]}") return False except Exception as e: - logger.warning(f"OCR bouton échoué: {e}") + print(f"⚠️ [Réflexe/handle] Erreur: {e}") return False elif action == 'hotkey': diff --git a/core/knowledge/ui_patterns.py b/core/knowledge/ui_patterns.py index 6ccff3ba6..86fa9cba9 100644 --- a/core/knowledge/ui_patterns.py +++ b/core/knowledge/ui_patterns.py @@ -101,6 +101,35 @@ BUILTIN_PATTERNS: List[Dict[str, Any]] = [ "typical_bbox": [0.35, 0.60, 0.45, 0.68], "os": "any", }, + { + "name": "dialog_overwrite", + "category": "dialog", + "triggers": [ + "voulez-vous remplacer", "voulez-vous écraser", + "remplacer le fichier", "replace existing", + "fichier existe déjà", "already exists", + "overwrite", "écraser", + ], + "action": "click", + "target": "Oui", + "alternatives": ["Yes", "Remplacer", "Replace", "Confirmer"], + "typical_zone": "dialog_center", + "os": "any", + }, + { + "name": "dialog_dont_save", + "category": "dialog", + "triggers": [ + "ne pas enregistrer", "don't save", + "ne pas sauvegarder", "quitter sans enregistrer", + "discard changes", + ], + "action": "click", + "target": "Ne pas enregistrer", + "alternatives": ["Don't Save", "Ne pas sauvegarder", "Non"], + "typical_zone": "dialog_center", + "os": "any", + }, # === NAVIGATION FENÊTRE === {