feat(knowledge): câblage UIPatternLibrary dans executor + stream processor
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 14s
tests / Tests sécurité (critique) (push) Has been skipped
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 14s
tests / Tests sécurité (critique) (push) Has been skipped
VWB Executor : - _check_screen_for_patterns() : capture écran + OCR + pattern matching - _handle_detected_pattern() : clic automatique sur dialogues connus - Vérifie entre chaque étape en mode intelligent/debug - Si un dialogue bloque (OK, Save, Cancel), Léa le gère seule Stream Processor : - Enrichit les ScreenState avec ui_pattern/ui_pattern_action/ui_pattern_target - Les patterns détectés sont loggés et stockés dans les résultats - Permet au GraphBuilder de savoir quels écrans sont des dialogues Phase 2 du plan "connaissance native de l'environnement". Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2042,6 +2042,24 @@ class StreamProcessor:
|
||||
self._screen_states[session_id] = []
|
||||
self._screen_states[session_id].append(screen_state)
|
||||
|
||||
# Enrichir avec les patterns UI connus
|
||||
try:
|
||||
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||
detected_text = getattr(screen_state.perception, "detected_text", [])
|
||||
if detected_text:
|
||||
ocr_text = " ".join(str(t) for t in detected_text) if isinstance(detected_text, list) else str(detected_text)
|
||||
lib = UIPatternLibrary()
|
||||
pattern = lib.find_pattern(ocr_text)
|
||||
if pattern:
|
||||
result["ui_pattern"] = pattern["pattern"]
|
||||
result["ui_pattern_action"] = pattern["action"]
|
||||
result["ui_pattern_target"] = pattern["target"]
|
||||
logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['target']}")
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Pattern check: {e}")
|
||||
|
||||
logger.info(
|
||||
f"Screenshot analysé: {shot_id} | "
|
||||
f"{result['ui_elements_count']} UI elements, "
|
||||
|
||||
@@ -178,6 +178,82 @@ _execution_state = {
|
||||
}
|
||||
|
||||
|
||||
def _check_screen_for_patterns() -> Optional[Dict[str, Any]]:
|
||||
"""Vérifie si l'écran actuel contient un pattern UI connu (dialogue, popup).
|
||||
|
||||
Capture l'écran, extrait le texte via OCR léger, et cherche
|
||||
un pattern dans la UIPatternLibrary.
|
||||
|
||||
Returns:
|
||||
Dict avec le pattern trouvé et l'action à effectuer, ou None.
|
||||
"""
|
||||
try:
|
||||
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||
import mss
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
lib = UIPatternLibrary()
|
||||
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
screenshot = sct.grab(monitor)
|
||||
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||
|
||||
try:
|
||||
from services.ocr_service import ocr_extract_text
|
||||
ocr_text = ocr_extract_text(screen)
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
if not ocr_text or len(ocr_text) < 5:
|
||||
return None
|
||||
|
||||
pattern = lib.find_pattern(ocr_text)
|
||||
if pattern and pattern['category'] in ('dialog', 'popup'):
|
||||
print(f"🧠 [Pattern] Détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'")
|
||||
print(f" Texte OCR: {ocr_text[:100]}...")
|
||||
return pattern
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Pattern check échoué: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
|
||||
"""Gère automatiquement un pattern UI détecté (clic sur OK, fermer popup, etc.).
|
||||
|
||||
Returns:
|
||||
True si le pattern a été géré avec succès.
|
||||
"""
|
||||
import pyautogui
|
||||
|
||||
action = pattern.get('action')
|
||||
target = pattern.get('target', '')
|
||||
bbox = pattern.get('typical_bbox')
|
||||
alternatives = pattern.get('alternatives', [])
|
||||
|
||||
if action == 'click' and bbox:
|
||||
screen_w, screen_h = pyautogui.size()
|
||||
x = int((bbox[0] + bbox[2]) / 2 * screen_w)
|
||||
y = int((bbox[1] + bbox[3]) / 2 * screen_h)
|
||||
print(f"🤖 [Pattern] Clic automatique sur '{target}' à ({x}, {y})")
|
||||
pyautogui.click(x, y)
|
||||
time.sleep(1.0)
|
||||
return True
|
||||
|
||||
elif action == 'hotkey':
|
||||
keys = target.split('+')
|
||||
print(f"🤖 [Pattern] Raccourci automatique: {target}")
|
||||
pyautogui.hotkey(*keys)
|
||||
time.sleep(0.5)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def execute_workflow_thread(execution_id: str, workflow_id: str, app):
|
||||
"""
|
||||
Thread d'exécution du workflow.
|
||||
@@ -231,6 +307,12 @@ def execute_workflow_thread(execution_id: str, workflow_id: str, app):
|
||||
db.session.add(step_result)
|
||||
db.session.commit()
|
||||
|
||||
# Vérifier si un dialogue/popup bloque l'écran avant l'étape
|
||||
if index > 0 and _execution_state.get('execution_mode') in ('intelligent', 'debug'):
|
||||
detected = _check_screen_for_patterns()
|
||||
if detected:
|
||||
_handle_detected_pattern(detected)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"📋 [Execute] Étape {index + 1}/{len(steps)}: {step.action_type}")
|
||||
print(f" step_id={step.id}, label={step.label}")
|
||||
|
||||
Reference in New Issue
Block a user