feat(réflexes): patterns overwrite/dont_save + handler EasyOCR + prints diagnostic

Nouveaux patterns : - dialog_overwrite : "voulez-vous remplacer/écraser", "fichier existe déjà" → Oui - dialog_dont_save : "ne pas enregistrer", "quitter sans enregistrer" → Ne pas enregistrer Handler amélioré (handle_detected_pattern) : - EasyOCR au lieu de docTR (meilleure lecture des boutons GUI) - Match par inclusion (pas seulement exact) - Suppression fallback VLM (Ollama n'a plus de VRAM) - Prints visibles pour diagnostic 28 patterns au total, testés sur 6 dialogues types. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-26 04:26:32 +02:00
parent 77faa03ec9
commit f73a2a59a9
2 changed files with 61 additions and 48 deletions
--- a/core/execution/input_handler.py
+++ b/core/execution/input_handler.py
@@ -116,13 +116,13 @@ def check_screen_for_patterns() -> Optional[Dict[str, Any]]:

        pattern = lib.find_pattern(ocr_text)
        if pattern and pattern['category'] in ('dialog', 'popup'):
-            logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'")
+            print(f"🧠 [PatternCheck] Détecté: '{pattern['pattern']}' → {pattern['action']} '{pattern['target']}'")
            return pattern

        return None

    except Exception as e:
-        logger.debug(f"Pattern check échoué: {e}")
+        print(f"⚠️ [PatternCheck] Erreur: {e}")
        return None


@@ -145,26 +145,40 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:

    if action == 'click':
        candidates_labels = [target] + alternatives
+        print(f"🔧 [Réflexe/handle] Recherche bouton parmi: {candidates_labels}")

        try:
            import mss
+            import numpy as np
            from PIL import Image

-            # Importer OCR (essayer les deux chemins)
-            try:
-                from services.ocr_service import ocr_extract_words
-            except ImportError:
-                from core.extraction.field_extractor import FieldExtractor
-                extractor = FieldExtractor()
-                def ocr_extract_words(img):
-                    return extractor.extract_words_from_image(img)
-
            with mss.mss() as sct:
                monitor = sct.monitors[0]
                screenshot = sct.grab(monitor)
                screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')

-            words = ocr_extract_words(screen)
+            # EasyOCR (rapide, bonne qualité GUI) avec fallback docTR
+            words = []
+            try:
+                import easyocr
+                _reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
+                results = _reader.readtext(np.array(screen))
+                for (bbox_pts, text, conf) in results:
+                    if not text or len(text.strip()) < 1:
+                        continue
+                    x1 = int(min(p[0] for p in bbox_pts))
+                    y1 = int(min(p[1] for p in bbox_pts))
+                    x2 = int(max(p[0] for p in bbox_pts))
+                    y2 = int(max(p[1] for p in bbox_pts))
+                    words.append({'text': text.strip(), 'bbox': [x1, y1, x2, y2]})
+            except ImportError:
+                try:
+                    from services.ocr_service import ocr_extract_words
+                    words = ocr_extract_words(screen) or []
+                except ImportError:
+                    pass
+
+            print(f"🔧 [Réflexe/handle] {len(words)} mots OCR détectés")

            # Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue)
            all_matches = []
@@ -175,58 +189,28 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
                    word_text = word['text'].lower()
                    if len(word_text) < 2 or len(candidate_lower) < 2:
                        continue
-                    if word_text == candidate_lower:
+                    # Match exact ou inclusion
+                    if word_text == candidate_lower or candidate_lower in word_text or word_text in candidate_lower:
                        x1, y1, x2, y2 = word['bbox']
                        all_matches.append({
                            'text': word['text'],
                            'x': int((x1 + x2) / 2),
                            'y': int((y1 + y2) / 2),
-                            'match_type': 'exact',
+                            'candidate': candidate,
                        })

-            # Recherche partielle (lettre soulignée manquante)
-            if not all_matches:
-                for candidate in candidates_labels:
-                    if len(candidate) > 3:
-                        partial = candidate[1:].lower()
-                        for word in words:
-                            if partial in word['text'].lower():
-                                x1, y1, x2, y2 = word['bbox']
-                                all_matches.append({
-                                    'text': word['text'],
-                                    'x': int((x1 + x2) / 2),
-                                    'y': int((y1 + y2) / 2),
-                                    'match_type': 'partial',
-                                })
-
            if all_matches:
                best = max(all_matches, key=lambda m: m['y'])
-                logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
+                print(f"✅ [Réflexe/handle] Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
                pyautogui.click(best['x'], best['y'])
                time.sleep(1.0)
                return True

-            logger.info(f"Bouton '{target}' introuvable par OCR — appel VLM...")
-            vlm_result = vlm_reason_about_screen(
-                objective=f"Cliquer sur le bouton '{target}'",
-                context=f"Un dialogue '{pattern.get('pattern')}' est détecté"
-            )
-            if vlm_result and vlm_result.get('action') == 'click' and vlm_result.get('target'):
-                vlm_target = vlm_result['target']
-                for word in words:
-                    if vlm_target.lower() in word['text'].lower():
-                        x1, y1, x2, y2 = word['bbox']
-                        x = int((x1 + x2) / 2)
-                        y = int((y1 + y2) / 2)
-                        logger.info(f"VLM → clic sur '{word['text']}' à ({x}, {y})")
-                        pyautogui.click(x, y)
-                        time.sleep(1.0)
-                        return True
-
+            print(f"⚠️ [Réflexe/handle] Bouton '{target}' introuvable parmi {[w['text'] for w in words[:15]]}")
            return False

        except Exception as e:
-            logger.warning(f"OCR bouton échoué: {e}")
+            print(f"⚠️ [Réflexe/handle] Erreur: {e}")
            return False

    elif action == 'hotkey':