feat(réflexes): patterns overwrite/dont_save + handler EasyOCR + prints diagnostic
Nouveaux patterns : - dialog_overwrite : "voulez-vous remplacer/écraser", "fichier existe déjà" → Oui - dialog_dont_save : "ne pas enregistrer", "quitter sans enregistrer" → Ne pas enregistrer Handler amélioré (handle_detected_pattern) : - EasyOCR au lieu de docTR (meilleure lecture des boutons GUI) - Match par inclusion (pas seulement exact) - Suppression fallback VLM (Ollama n'a plus de VRAM) - Prints visibles pour diagnostic 28 patterns au total, testés sur 6 dialogues types. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -116,13 +116,13 @@ def check_screen_for_patterns() -> Optional[Dict[str, Any]]:
|
||||
|
||||
pattern = lib.find_pattern(ocr_text)
|
||||
if pattern and pattern['category'] in ('dialog', 'popup'):
|
||||
logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'")
|
||||
print(f"🧠 [PatternCheck] Détecté: '{pattern['pattern']}' → {pattern['action']} '{pattern['target']}'")
|
||||
return pattern
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Pattern check échoué: {e}")
|
||||
print(f"⚠️ [PatternCheck] Erreur: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -145,26 +145,40 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
|
||||
|
||||
if action == 'click':
|
||||
candidates_labels = [target] + alternatives
|
||||
print(f"🔧 [Réflexe/handle] Recherche bouton parmi: {candidates_labels}")
|
||||
|
||||
try:
|
||||
import mss
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
# Importer OCR (essayer les deux chemins)
|
||||
try:
|
||||
from services.ocr_service import ocr_extract_words
|
||||
except ImportError:
|
||||
from core.extraction.field_extractor import FieldExtractor
|
||||
extractor = FieldExtractor()
|
||||
def ocr_extract_words(img):
|
||||
return extractor.extract_words_from_image(img)
|
||||
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[0]
|
||||
screenshot = sct.grab(monitor)
|
||||
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||
|
||||
words = ocr_extract_words(screen)
|
||||
# EasyOCR (rapide, bonne qualité GUI) avec fallback docTR
|
||||
words = []
|
||||
try:
|
||||
import easyocr
|
||||
_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
|
||||
results = _reader.readtext(np.array(screen))
|
||||
for (bbox_pts, text, conf) in results:
|
||||
if not text or len(text.strip()) < 1:
|
||||
continue
|
||||
x1 = int(min(p[0] for p in bbox_pts))
|
||||
y1 = int(min(p[1] for p in bbox_pts))
|
||||
x2 = int(max(p[0] for p in bbox_pts))
|
||||
y2 = int(max(p[1] for p in bbox_pts))
|
||||
words.append({'text': text.strip(), 'bbox': [x1, y1, x2, y2]})
|
||||
except ImportError:
|
||||
try:
|
||||
from services.ocr_service import ocr_extract_words
|
||||
words = ocr_extract_words(screen) or []
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
print(f"🔧 [Réflexe/handle] {len(words)} mots OCR détectés")
|
||||
|
||||
# Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue)
|
||||
all_matches = []
|
||||
@@ -175,58 +189,28 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
|
||||
word_text = word['text'].lower()
|
||||
if len(word_text) < 2 or len(candidate_lower) < 2:
|
||||
continue
|
||||
if word_text == candidate_lower:
|
||||
# Match exact ou inclusion
|
||||
if word_text == candidate_lower or candidate_lower in word_text or word_text in candidate_lower:
|
||||
x1, y1, x2, y2 = word['bbox']
|
||||
all_matches.append({
|
||||
'text': word['text'],
|
||||
'x': int((x1 + x2) / 2),
|
||||
'y': int((y1 + y2) / 2),
|
||||
'match_type': 'exact',
|
||||
'candidate': candidate,
|
||||
})
|
||||
|
||||
# Recherche partielle (lettre soulignée manquante)
|
||||
if not all_matches:
|
||||
for candidate in candidates_labels:
|
||||
if len(candidate) > 3:
|
||||
partial = candidate[1:].lower()
|
||||
for word in words:
|
||||
if partial in word['text'].lower():
|
||||
x1, y1, x2, y2 = word['bbox']
|
||||
all_matches.append({
|
||||
'text': word['text'],
|
||||
'x': int((x1 + x2) / 2),
|
||||
'y': int((y1 + y2) / 2),
|
||||
'match_type': 'partial',
|
||||
})
|
||||
|
||||
if all_matches:
|
||||
best = max(all_matches, key=lambda m: m['y'])
|
||||
logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
|
||||
print(f"✅ [Réflexe/handle] Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
|
||||
pyautogui.click(best['x'], best['y'])
|
||||
time.sleep(1.0)
|
||||
return True
|
||||
|
||||
logger.info(f"Bouton '{target}' introuvable par OCR — appel VLM...")
|
||||
vlm_result = vlm_reason_about_screen(
|
||||
objective=f"Cliquer sur le bouton '{target}'",
|
||||
context=f"Un dialogue '{pattern.get('pattern')}' est détecté"
|
||||
)
|
||||
if vlm_result and vlm_result.get('action') == 'click' and vlm_result.get('target'):
|
||||
vlm_target = vlm_result['target']
|
||||
for word in words:
|
||||
if vlm_target.lower() in word['text'].lower():
|
||||
x1, y1, x2, y2 = word['bbox']
|
||||
x = int((x1 + x2) / 2)
|
||||
y = int((y1 + y2) / 2)
|
||||
logger.info(f"VLM → clic sur '{word['text']}' à ({x}, {y})")
|
||||
pyautogui.click(x, y)
|
||||
time.sleep(1.0)
|
||||
return True
|
||||
|
||||
print(f"⚠️ [Réflexe/handle] Bouton '{target}' introuvable parmi {[w['text'] for w in words[:15]]}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"OCR bouton échoué: {e}")
|
||||
print(f"⚠️ [Réflexe/handle] Erreur: {e}")
|
||||
return False
|
||||
|
||||
elif action == 'hotkey':
|
||||
|
||||
Reference in New Issue
Block a user