feat(réflexes): patterns overwrite/dont_save + handler EasyOCR + prints diagnostic

Nouveaux patterns :
- dialog_overwrite : "voulez-vous remplacer/écraser", "fichier existe déjà" → Oui
- dialog_dont_save : "ne pas enregistrer", "quitter sans enregistrer" → Ne pas enregistrer

Handler amélioré (handle_detected_pattern) :
- EasyOCR au lieu de docTR (meilleure lecture des boutons GUI)
- Match par inclusion (pas seulement exact)
- Suppression fallback VLM (Ollama n'a plus de VRAM)
- Prints visibles pour diagnostic

28 patterns au total, testés sur 6 dialogues types.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-26 04:26:32 +02:00
parent 77faa03ec9
commit f73a2a59a9
2 changed files with 61 additions and 48 deletions

View File

@@ -116,13 +116,13 @@ def check_screen_for_patterns() -> Optional[Dict[str, Any]]:
pattern = lib.find_pattern(ocr_text)
if pattern and pattern['category'] in ('dialog', 'popup'):
logger.info(f"Pattern UI détecté: {pattern['pattern']}{pattern['action']} '{pattern['target']}'")
print(f"🧠 [PatternCheck] Détecté: '{pattern['pattern']}'{pattern['action']} '{pattern['target']}'")
return pattern
return None
except Exception as e:
logger.debug(f"Pattern check échoué: {e}")
print(f"⚠️ [PatternCheck] Erreur: {e}")
return None
@@ -145,26 +145,40 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
if action == 'click':
candidates_labels = [target] + alternatives
print(f"🔧 [Réflexe/handle] Recherche bouton parmi: {candidates_labels}")
try:
import mss
import numpy as np
from PIL import Image
# Importer OCR (essayer les deux chemins)
try:
from services.ocr_service import ocr_extract_words
except ImportError:
from core.extraction.field_extractor import FieldExtractor
extractor = FieldExtractor()
def ocr_extract_words(img):
return extractor.extract_words_from_image(img)
with mss.mss() as sct:
monitor = sct.monitors[0]
screenshot = sct.grab(monitor)
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
words = ocr_extract_words(screen)
# EasyOCR (rapide, bonne qualité GUI) avec fallback docTR
words = []
try:
import easyocr
_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
results = _reader.readtext(np.array(screen))
for (bbox_pts, text, conf) in results:
if not text or len(text.strip()) < 1:
continue
x1 = int(min(p[0] for p in bbox_pts))
y1 = int(min(p[1] for p in bbox_pts))
x2 = int(max(p[0] for p in bbox_pts))
y2 = int(max(p[1] for p in bbox_pts))
words.append({'text': text.strip(), 'bbox': [x1, y1, x2, y2]})
except ImportError:
try:
from services.ocr_service import ocr_extract_words
words = ocr_extract_words(screen) or []
except ImportError:
pass
print(f"🔧 [Réflexe/handle] {len(words)} mots OCR détectés")
# Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue)
all_matches = []
@@ -175,58 +189,28 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
word_text = word['text'].lower()
if len(word_text) < 2 or len(candidate_lower) < 2:
continue
if word_text == candidate_lower:
# Match exact ou inclusion
if word_text == candidate_lower or candidate_lower in word_text or word_text in candidate_lower:
x1, y1, x2, y2 = word['bbox']
all_matches.append({
'text': word['text'],
'x': int((x1 + x2) / 2),
'y': int((y1 + y2) / 2),
'match_type': 'exact',
'candidate': candidate,
})
# Recherche partielle (lettre soulignée manquante)
if not all_matches:
for candidate in candidates_labels:
if len(candidate) > 3:
partial = candidate[1:].lower()
for word in words:
if partial in word['text'].lower():
x1, y1, x2, y2 = word['bbox']
all_matches.append({
'text': word['text'],
'x': int((x1 + x2) / 2),
'y': int((y1 + y2) / 2),
'match_type': 'partial',
})
if all_matches:
best = max(all_matches, key=lambda m: m['y'])
logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
print(f"✅ [Réflexe/handle] Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
pyautogui.click(best['x'], best['y'])
time.sleep(1.0)
return True
logger.info(f"Bouton '{target}' introuvable par OCR — appel VLM...")
vlm_result = vlm_reason_about_screen(
objective=f"Cliquer sur le bouton '{target}'",
context=f"Un dialogue '{pattern.get('pattern')}' est détecté"
)
if vlm_result and vlm_result.get('action') == 'click' and vlm_result.get('target'):
vlm_target = vlm_result['target']
for word in words:
if vlm_target.lower() in word['text'].lower():
x1, y1, x2, y2 = word['bbox']
x = int((x1 + x2) / 2)
y = int((y1 + y2) / 2)
logger.info(f"VLM → clic sur '{word['text']}' à ({x}, {y})")
pyautogui.click(x, y)
time.sleep(1.0)
return True
print(f"⚠️ [Réflexe/handle] Bouton '{target}' introuvable parmi {[w['text'] for w in words[:15]]}")
return False
except Exception as e:
logger.warning(f"OCR bouton échoué: {e}")
print(f"⚠️ [Réflexe/handle] Erreur: {e}")
return False
elif action == 'hotkey':