feat: boucle ORA (observe→raisonne→agit) avec vérification post-action
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 13s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 8s
tests / Lint (ruff + black) (push) Successful in 14s
tests / Tests unitaires (sans GPU) (push) Failing after 15s
tests / Tests sécurité (critique) (push) Has been skipped
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 13s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 8s
tests / Lint (ruff + black) (push) Successful in 14s
tests / Tests unitaires (sans GPU) (push) Failing after 15s
tests / Tests sécurité (critique) (push) Has been skipped
Nouveau module core/execution/observe_reason_act.py (794 lignes) : - ORALoop : boucle unifiée pour workflow VWB et instructions - observe() : capture écran + pHash + titre fenêtre - reason_workflow_step() : mappe step VWB → Decision (sans VLM) - act() : template matching → find_element → pyautogui - verify() : Level 1 pHash + Level 2 VLM conditionnel - run_workflow() : boucle complète avec retries et callbacks Nouveau mode execution_mode='verified' dans execute.py : - run_workflow_verified() utilise ORALoop - Modes basic/intelligent/debug inchangés (zéro risque) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ from .error_handler import ErrorHandler, ErrorType, RecoveryStrategy
|
||||
from .workflow_runner import WorkflowRunner, RunResult, RunStatus, RunnerConfig
|
||||
from .dag_executor import DAGExecutor, WorkflowStep, StepType, StepStatus, DAGExecutionResult
|
||||
from .llm_actions import LLMActionHandler
|
||||
from .observe_reason_act import ORALoop, Observation, Decision, VerificationResult, LoopResult
|
||||
|
||||
# Import tardif pour éviter import circulaire avec pipeline
|
||||
def _get_execution_loop():
|
||||
@@ -34,5 +35,11 @@ __all__ = [
|
||||
'StepStatus',
|
||||
'DAGExecutionResult',
|
||||
'LLMActionHandler',
|
||||
# ORA — boucle Observe-Raisonne-Agit avec vérification
|
||||
'ORALoop',
|
||||
'Observation',
|
||||
'Decision',
|
||||
'VerificationResult',
|
||||
'LoopResult',
|
||||
# ExecutionLoop accessible via import direct du module
|
||||
]
|
||||
|
||||
794
core/execution/observe_reason_act.py
Normal file
794
core/execution/observe_reason_act.py
Normal file
@@ -0,0 +1,794 @@
|
||||
"""
|
||||
Boucle Observe → Raisonne → Agit (ORA) pour l'exécution vérifiée de workflows.
|
||||
|
||||
Chaque étape du workflow passe par 3 phases :
|
||||
1. 👁 OBSERVE : capture écran + pHash + titre fenêtre
|
||||
2. 🧠 RAISONNE : traduit le step VWB en Decision exécutable
|
||||
3. 🎯 AGIT : exécute l'action (clic, saisie, raccourci, attente)
|
||||
4. ✅/❌ VÉRIFIE : compare pré/post screenshot pour confirmer le résultat
|
||||
|
||||
Module standalone — importable depuis n'importe où.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import subprocess
|
||||
import base64
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# --- Imports conditionnels ---
|
||||
try:
|
||||
from PIL import Image
|
||||
PIL_AVAILABLE = True
|
||||
except ImportError:
|
||||
Image = None
|
||||
PIL_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import mss as mss_lib
|
||||
MSS_AVAILABLE = True
|
||||
except ImportError:
|
||||
mss_lib = None
|
||||
MSS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import cv2
|
||||
import numpy as np
|
||||
CV2_AVAILABLE = True
|
||||
except ImportError:
|
||||
cv2 = None
|
||||
np = None
|
||||
CV2_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import pyautogui
|
||||
PYAUTOGUI_AVAILABLE = True
|
||||
except ImportError:
|
||||
pyautogui = None
|
||||
PYAUTOGUI_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import imagehash
|
||||
IMAGEHASH_AVAILABLE = True
|
||||
except ImportError:
|
||||
imagehash = None
|
||||
IMAGEHASH_AVAILABLE = False
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# Dataclasses
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class Observation:
|
||||
"""Capture de l'état courant de l'écran."""
|
||||
screenshot: Any # PIL Image
|
||||
phash: Any # imagehash.ImageHash
|
||||
window_title: str
|
||||
timestamp: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class Decision:
|
||||
"""Décision d'action à exécuter."""
|
||||
action: str # "click" | "type" | "hotkey" | "wait" | "done" | "need_help"
|
||||
target: str
|
||||
value: str # texte à saisir ou touches
|
||||
reasoning: str
|
||||
expected_after: str
|
||||
confidence: float
|
||||
done: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class VerificationResult:
|
||||
"""Résultat de la vérification post-action."""
|
||||
success: bool
|
||||
change_level: str # "same" | "minor" | "major"
|
||||
matches_expected: bool
|
||||
detail: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopResult:
|
||||
"""Résultat global de l'exécution de la boucle ORA."""
|
||||
success: bool
|
||||
steps_completed: int
|
||||
total_steps: int
|
||||
reason: str = ""
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
# Classe principale
|
||||
# ═══════════════════════════════════════════════════════════════
|
||||
|
||||
class ORALoop:
|
||||
"""
|
||||
Boucle Observe-Raisonne-Agit avec vérification intégrée.
|
||||
|
||||
Args:
|
||||
max_retries: Nombre de réessais par étape en cas d'échec de vérification.
|
||||
max_steps: Nombre maximal d'étapes autorisées (garde-fou).
|
||||
verify_level: 'none' | 'phash' | 'vlm' | 'auto'
|
||||
- none : pas de vérification post-action
|
||||
- phash : comparaison pHash uniquement
|
||||
- vlm : pHash + appel VLM systématique
|
||||
- auto : pHash toujours, VLM seulement si confiance < 0.95
|
||||
"""
|
||||
|
||||
def __init__(self, max_retries: int = 2, max_steps: int = 50, verify_level: str = 'auto'):
|
||||
self.max_retries = max_retries
|
||||
self.max_steps = max_steps
|
||||
self.verify_level = verify_level
|
||||
|
||||
# Variables runtime injectées par le workflow
|
||||
self._variables: Dict[str, Any] = {}
|
||||
|
||||
# ─── Phase 1 : OBSERVE ────────────────────────────────
|
||||
|
||||
def observe(self) -> Observation:
|
||||
"""👁 Capture l'état courant de l'écran.
|
||||
|
||||
Returns:
|
||||
Observation avec screenshot PIL, pHash, titre fenêtre, timestamp.
|
||||
"""
|
||||
screenshot = self._capture_screen()
|
||||
phash = self._compute_phash(screenshot)
|
||||
window_title = self._get_active_window_title()
|
||||
ts = time.time()
|
||||
|
||||
logger.info(f"👁 [ORA/observe] titre='{window_title}' phash={phash}")
|
||||
return Observation(
|
||||
screenshot=screenshot,
|
||||
phash=phash,
|
||||
window_title=window_title,
|
||||
timestamp=ts,
|
||||
)
|
||||
|
||||
# ─── Phase 2 : RAISONNE ───────────────────────────────
|
||||
|
||||
def reason_workflow_step(self, step_params: dict, observation: Observation) -> Decision:
|
||||
"""🧠 Traduit un step VWB en Decision exécutable.
|
||||
|
||||
Le workflow dit déjà quoi faire — pas d'appel VLM ici.
|
||||
On se contente de mapper les paramètres du step vers une Decision.
|
||||
|
||||
Args:
|
||||
step_params: Paramètres du step (action_type, visual_anchor, keys, text, etc.)
|
||||
observation: Observation courante (pour contexte)
|
||||
|
||||
Returns:
|
||||
Decision prête à exécuter.
|
||||
"""
|
||||
action_type = step_params.get('action_type', '')
|
||||
params = step_params.get('parameters', {})
|
||||
anchor = step_params.get('visual_anchor', {})
|
||||
label = step_params.get('label', action_type)
|
||||
|
||||
# --- Mapper action_type vers action Decision ---
|
||||
|
||||
if action_type in ('click_anchor', 'click', 'double_click_anchor', 'right_click_anchor'):
|
||||
target_text = anchor.get('target_text', '') or label
|
||||
action = 'click'
|
||||
value = 'double' if action_type == 'double_click_anchor' else (
|
||||
'right' if action_type == 'right_click_anchor' else 'left')
|
||||
expected = f"L'écran devrait changer après le clic sur '{target_text}'"
|
||||
confidence = 0.9
|
||||
|
||||
elif action_type in ('type_text', 'type'):
|
||||
text = params.get('text', '')
|
||||
# Résoudre les variables {{var}}
|
||||
text = self._resolve_variables(text)
|
||||
action = 'type'
|
||||
target_text = ''
|
||||
value = text
|
||||
expected = f"Le texte '{text[:30]}...' devrait apparaître dans le champ"
|
||||
confidence = 0.95
|
||||
|
||||
elif action_type == 'keyboard_shortcut':
|
||||
keys = params.get('keys', [])
|
||||
action = 'hotkey'
|
||||
target_text = ''
|
||||
value = '+'.join(keys)
|
||||
expected = f"L'écran devrait réagir au raccourci {value}"
|
||||
confidence = 0.9
|
||||
|
||||
elif action_type in ('wait_for_anchor', 'wait'):
|
||||
timeout_ms = params.get('timeout_ms', params.get('timeout', 5000))
|
||||
action = 'wait'
|
||||
target_text = ''
|
||||
value = str(timeout_ms)
|
||||
expected = "Attente passive"
|
||||
confidence = 1.0
|
||||
|
||||
elif action_type in ('hover_anchor', 'hover'):
|
||||
target_text = anchor.get('target_text', '') or label
|
||||
action = 'click' # On réutilise click avec value='hover'
|
||||
value = 'hover'
|
||||
expected = f"Le curseur survole '{target_text}'"
|
||||
confidence = 0.95
|
||||
|
||||
elif action_type in ('scroll_to_anchor', 'scroll'):
|
||||
direction = params.get('scroll_direction', 'down')
|
||||
amount = params.get('scroll_amount', params.get('scroll_step_pixels', 3))
|
||||
action = 'hotkey' # Scroll passe par pyautogui.scroll
|
||||
target_text = ''
|
||||
value = f"scroll_{direction}_{amount}"
|
||||
expected = f"L'écran devrait défiler vers {direction}"
|
||||
confidence = 0.9
|
||||
|
||||
elif action_type in ('focus_anchor', 'focus'):
|
||||
target_text = anchor.get('target_text', '') or label
|
||||
action = 'click'
|
||||
value = 'focus'
|
||||
expected = f"L'élément '{target_text}' a le focus"
|
||||
confidence = 0.95
|
||||
|
||||
elif action_type in ('ai_analyze_text', 'ai_ocr', 'ai_summarize',
|
||||
'ai_extract', 'ai_classify', 'ai_custom',
|
||||
'extract_text', 'screenshot_evidence',
|
||||
'verify_element_exists', 'verify_element'):
|
||||
# Actions passthrough : exécutées tel quel par execute_action existant
|
||||
action = 'passthrough'
|
||||
target_text = ''
|
||||
value = action_type
|
||||
expected = "L'action IA/extraction devrait retourner un résultat"
|
||||
confidence = 0.95
|
||||
|
||||
else:
|
||||
action = 'passthrough'
|
||||
target_text = ''
|
||||
value = action_type
|
||||
expected = f"Action '{action_type}' exécutée"
|
||||
confidence = 0.8
|
||||
|
||||
decision = Decision(
|
||||
action=action,
|
||||
target=target_text,
|
||||
value=value,
|
||||
reasoning=f"Step VWB '{label}' → {action} (confiance={confidence:.2f})",
|
||||
expected_after=expected,
|
||||
confidence=confidence,
|
||||
)
|
||||
logger.info(f"🧠 [ORA/reason] {decision.action} target='{decision.target}' value='{decision.value[:50]}'")
|
||||
return decision
|
||||
|
||||
# ─── Phase 3 : AGIT ───────────────────────────────────
|
||||
|
||||
def act(self, decision: Decision, step_params: dict = None) -> bool:
|
||||
"""🎯 Exécute la Decision.
|
||||
|
||||
Utilise le même pipeline que execute.py :
|
||||
- Template matching → CLIP → OCR/UI-TARS pour les clics
|
||||
- safe_type_text pour la saisie
|
||||
- pyautogui.hotkey pour les raccourcis
|
||||
|
||||
Args:
|
||||
decision: Decision à exécuter.
|
||||
step_params: Paramètres originaux du step (pour l'ancre, etc.)
|
||||
|
||||
Returns:
|
||||
True si l'action a été exécutée (pas encore vérifiée).
|
||||
"""
|
||||
if step_params is None:
|
||||
step_params = {}
|
||||
|
||||
logger.info(f"🎯 [ORA/act] {decision.action} target='{decision.target}' value='{decision.value[:50]}'")
|
||||
|
||||
try:
|
||||
if decision.action == 'click':
|
||||
return self._act_click(decision, step_params)
|
||||
|
||||
elif decision.action == 'type':
|
||||
return self._act_type(decision)
|
||||
|
||||
elif decision.action == 'hotkey':
|
||||
return self._act_hotkey(decision, step_params)
|
||||
|
||||
elif decision.action == 'wait':
|
||||
return self._act_wait(decision)
|
||||
|
||||
elif decision.action == 'passthrough':
|
||||
# Exécuté directement via execute_action dans run_workflow
|
||||
return True
|
||||
|
||||
elif decision.action == 'done':
|
||||
return True
|
||||
|
||||
elif decision.action == 'need_help':
|
||||
logger.warning("🆘 [ORA/act] L'action nécessite une aide humaine")
|
||||
return False
|
||||
|
||||
else:
|
||||
logger.warning(f"🎯 [ORA/act] Action inconnue: {decision.action}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ [ORA/act] Erreur: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
# ─── Phase 4 : VÉRIFIE ────────────────────────────────
|
||||
|
||||
def verify(self, pre: Observation, post: Observation, decision: Decision) -> VerificationResult:
|
||||
"""✅/❌ Vérifie que l'action a produit l'effet attendu.
|
||||
|
||||
Level 1 (toujours) : compare pHash pre vs post.
|
||||
Level 2 (conditionnel) : appel VLM si verify_level='vlm' ou 'auto' avec confiance < 0.95.
|
||||
|
||||
Args:
|
||||
pre: Observation avant l'action.
|
||||
post: Observation après l'action.
|
||||
decision: Decision exécutée.
|
||||
|
||||
Returns:
|
||||
VerificationResult.
|
||||
"""
|
||||
# Pas de vérification demandée
|
||||
if self.verify_level == 'none':
|
||||
return VerificationResult(
|
||||
success=True, change_level='unknown',
|
||||
matches_expected=True, detail="Vérification désactivée"
|
||||
)
|
||||
|
||||
# Actions qui ne changent pas l'écran
|
||||
if decision.action in ('wait', 'done', 'passthrough'):
|
||||
return VerificationResult(
|
||||
success=True, change_level='n/a',
|
||||
matches_expected=True, detail=f"Action '{decision.action}' — pas de changement attendu"
|
||||
)
|
||||
|
||||
# --- Level 1 : pHash ---
|
||||
distance = self._phash_distance(pre.phash, post.phash)
|
||||
change_level = self._classify_change(distance)
|
||||
|
||||
logger.info(f"🔍 [ORA/verify] pHash distance={distance} → {change_level}")
|
||||
|
||||
# Déterminer le succès selon le niveau de changement
|
||||
if change_level == 'same':
|
||||
# Rien n'a changé — probable échec
|
||||
# Exception : certains clics (focus, hover) peuvent ne pas changer l'écran
|
||||
if decision.value in ('hover', 'focus'):
|
||||
success = True
|
||||
detail = f"Pas de changement visible (normal pour {decision.value})"
|
||||
else:
|
||||
success = False
|
||||
detail = f"Aucun changement détecté (distance={distance})"
|
||||
elif change_level == 'minor':
|
||||
# Changement mineur — OK si confiance élevée
|
||||
success = decision.confidence > 0.9
|
||||
detail = f"Changement mineur (distance={distance})"
|
||||
else:
|
||||
# Changement majeur — succès
|
||||
success = True
|
||||
detail = f"Changement majeur détecté (distance={distance})"
|
||||
|
||||
matches_expected = success
|
||||
|
||||
# --- Level 2 : VLM (conditionnel) ---
|
||||
if self.verify_level == 'vlm' or (self.verify_level == 'auto' and decision.confidence < 0.95 and not success):
|
||||
vlm_result = self._vlm_verify(post, decision)
|
||||
if vlm_result is not None:
|
||||
matches_expected = vlm_result.get('matches', success)
|
||||
actual_state = vlm_result.get('actual_state', '')
|
||||
detail += f" | VLM: matches={matches_expected}, état='{actual_state[:80]}'"
|
||||
# Le VLM a le dernier mot
|
||||
success = matches_expected
|
||||
logger.info(f"🔍 [ORA/verify/VLM] matches={matches_expected} état='{actual_state[:60]}'")
|
||||
|
||||
emoji = "✅" if success else "❌"
|
||||
logger.info(f"{emoji} [ORA/verify] success={success} level={change_level} — {detail[:100]}")
|
||||
|
||||
return VerificationResult(
|
||||
success=success,
|
||||
change_level=change_level,
|
||||
matches_expected=matches_expected,
|
||||
detail=detail,
|
||||
)
|
||||
|
||||
# ─── Boucle principale ────────────────────────────────
|
||||
|
||||
def run_workflow(
|
||||
self,
|
||||
steps: list,
|
||||
on_progress: Optional[Callable] = None,
|
||||
execute_action_fn: Optional[Callable] = None,
|
||||
) -> LoopResult:
|
||||
"""Exécute un workflow complet avec la boucle ORA.
|
||||
|
||||
Args:
|
||||
steps: Liste de dicts avec les paramètres de chaque étape.
|
||||
Chaque dict contient : action_type, parameters, visual_anchor, label.
|
||||
on_progress: Callback(step_index, total_steps, verification_result).
|
||||
execute_action_fn: Fonction externe pour les actions passthrough
|
||||
(signature: execute_action(action_type, params) -> dict).
|
||||
|
||||
Returns:
|
||||
LoopResult.
|
||||
"""
|
||||
total = len(steps)
|
||||
if total == 0:
|
||||
return LoopResult(success=True, steps_completed=0, total_steps=0, reason="Aucune étape")
|
||||
|
||||
if total > self.max_steps:
|
||||
return LoopResult(
|
||||
success=False, steps_completed=0, total_steps=total,
|
||||
reason=f"Trop d'étapes ({total} > max {self.max_steps})"
|
||||
)
|
||||
|
||||
logger.info(f"🚀 [ORA] Démarrage workflow: {total} étapes, verify={self.verify_level}, retries={self.max_retries}")
|
||||
|
||||
for i, step in enumerate(steps):
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info(f"📋 [ORA] Étape {i+1}/{total}: {step.get('action_type', '?')} — {step.get('label', '')}")
|
||||
|
||||
# --- 1. Observer l'état pré-action ---
|
||||
pre = self.observe()
|
||||
|
||||
# --- 2. Raisonner : construire la Decision ---
|
||||
decision = self.reason_workflow_step(step, pre)
|
||||
|
||||
# --- 3. Agir ---
|
||||
if decision.action == 'passthrough' and execute_action_fn:
|
||||
# Déléguer aux fonctions existantes (IA, extraction, etc.)
|
||||
action_type = step.get('action_type', '')
|
||||
params = step.get('_full_params', step.get('parameters', {}))
|
||||
result = execute_action_fn(action_type, params)
|
||||
act_success = result.get('success', False)
|
||||
if not act_success:
|
||||
logger.warning(f"❌ [ORA] Action passthrough échouée: {result.get('error', '?')}")
|
||||
return LoopResult(
|
||||
success=False, steps_completed=i, total_steps=total,
|
||||
reason=f"Étape {i+1} passthrough échouée: {result.get('error', '?')}"
|
||||
)
|
||||
# Pas de vérification pour les passthrough
|
||||
if on_progress:
|
||||
on_progress(i + 1, total, VerificationResult(
|
||||
success=True, change_level='n/a',
|
||||
matches_expected=True, detail="Passthrough OK"
|
||||
))
|
||||
continue
|
||||
|
||||
act_success = self.act(decision, step)
|
||||
if not act_success and decision.action not in ('wait', 'done'):
|
||||
logger.warning(f"❌ [ORA] Action échouée immédiatement")
|
||||
return LoopResult(
|
||||
success=False, steps_completed=i, total_steps=total,
|
||||
reason=f"Étape {i+1}: action '{decision.action}' échouée"
|
||||
)
|
||||
|
||||
# Petit délai pour laisser l'écran se stabiliser
|
||||
time.sleep(0.3)
|
||||
|
||||
# --- 4. Observer l'état post-action ---
|
||||
post = self.observe()
|
||||
|
||||
# --- 5. Vérifier ---
|
||||
verification = self.verify(pre, post, decision)
|
||||
|
||||
if not verification.success:
|
||||
# Réessayer
|
||||
retried = False
|
||||
for retry in range(self.max_retries):
|
||||
logger.info(f"🔄 [ORA] Retry {retry+1}/{self.max_retries} pour étape {i+1}")
|
||||
pre_retry = self.observe()
|
||||
act_success = self.act(decision, step)
|
||||
time.sleep(0.3)
|
||||
post_retry = self.observe()
|
||||
verification = self.verify(pre_retry, post_retry, decision)
|
||||
if verification.success:
|
||||
retried = True
|
||||
logger.info(f"✅ [ORA] Retry {retry+1} réussi")
|
||||
break
|
||||
if not retried and not verification.success:
|
||||
logger.warning(f"❌ [ORA] Étape {i+1} échouée après {self.max_retries} retries")
|
||||
return LoopResult(
|
||||
success=False, steps_completed=i, total_steps=total,
|
||||
reason=f"Étape {i+1}: vérification échouée — {verification.detail}"
|
||||
)
|
||||
|
||||
# --- Callback de progression ---
|
||||
if on_progress:
|
||||
on_progress(i + 1, total, verification)
|
||||
|
||||
logger.info(f"✅ [ORA] Workflow terminé avec succès: {total}/{total} étapes")
|
||||
return LoopResult(success=True, steps_completed=total, total_steps=total)
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
# Méthodes privées — actions
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
def _act_click(self, decision: Decision, step_params: dict) -> bool:
|
||||
"""Exécute un clic (simple, double, droit, hover, focus).
|
||||
|
||||
Pipeline : template matching → find_element_on_screen (OCR → UI-TARS → VLM).
|
||||
"""
|
||||
if not PYAUTOGUI_AVAILABLE:
|
||||
logger.error("pyautogui non disponible")
|
||||
return False
|
||||
|
||||
anchor = step_params.get('visual_anchor', {})
|
||||
screenshot_b64 = anchor.get('screenshot')
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
target_text = anchor.get('target_text', '') or decision.target
|
||||
target_desc = anchor.get('description', '')
|
||||
|
||||
x, y = None, None
|
||||
method_used = ''
|
||||
|
||||
# --- Méthode 1 : Template matching direct (~1-10ms) ---
|
||||
if screenshot_b64 and CV2_AVAILABLE and PIL_AVAILABLE and MSS_AVAILABLE:
|
||||
try:
|
||||
import io as _io
|
||||
|
||||
# Capturer l'écran
|
||||
with mss_lib.mss() as sct:
|
||||
mon = sct.monitors[0]
|
||||
grab = sct.grab(mon)
|
||||
screen_img = Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
|
||||
|
||||
# Décoder l'ancre
|
||||
raw_b64 = screenshot_b64.split(',')[1] if ',' in screenshot_b64 else screenshot_b64
|
||||
anchor_data = base64.b64decode(raw_b64)
|
||||
anchor_img = Image.open(_io.BytesIO(anchor_data))
|
||||
|
||||
screen_cv = cv2.cvtColor(np.array(screen_img), cv2.COLOR_RGB2BGR)
|
||||
anchor_cv = cv2.cvtColor(np.array(anchor_img), cv2.COLOR_RGB2BGR)
|
||||
|
||||
if anchor_cv.shape[0] < screen_cv.shape[0] and anchor_cv.shape[1] < screen_cv.shape[1]:
|
||||
t0 = time.time()
|
||||
result_tm = cv2.matchTemplate(screen_cv, anchor_cv, cv2.TM_CCOEFF_NORMED)
|
||||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result_tm)
|
||||
elapsed_ms = (time.time() - t0) * 1000
|
||||
logger.info(f"⚡ [ORA/template] score={max_val:.3f} pos={max_loc} ({elapsed_ms:.0f}ms)")
|
||||
|
||||
if max_val > 0.75:
|
||||
x = max_loc[0] + anchor_cv.shape[1] // 2
|
||||
y = max_loc[1] + anchor_cv.shape[0] // 2
|
||||
method_used = 'template'
|
||||
except Exception as e:
|
||||
logger.debug(f"⚠️ [ORA/template] Erreur: {e}")
|
||||
|
||||
# --- Méthode 2 : find_element_on_screen (OCR → UI-TARS → VLM) ---
|
||||
if x is None and (target_text or target_desc):
|
||||
try:
|
||||
from core.execution.input_handler import find_element_on_screen
|
||||
grounding_result = find_element_on_screen(
|
||||
target_text=target_text,
|
||||
target_description=target_desc,
|
||||
anchor_image_base64=screenshot_b64,
|
||||
anchor_bbox=bbox if bbox else None,
|
||||
)
|
||||
if grounding_result:
|
||||
x, y = grounding_result['x'], grounding_result['y']
|
||||
method_used = f"grounding_{grounding_result['method']}"
|
||||
logger.info(f"🔍 [ORA/grounding] Trouvé via {method_used} à ({x}, {y})")
|
||||
except Exception as e:
|
||||
logger.debug(f"⚠️ [ORA/grounding] Erreur: {e}")
|
||||
|
||||
# --- Exécuter le clic ---
|
||||
if x is None:
|
||||
# Dernier recours : coordonnées statiques de l'ancre
|
||||
if bbox and bbox.get('width') and bbox.get('height'):
|
||||
x = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
|
||||
y = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
|
||||
method_used = 'static_fallback'
|
||||
logger.warning(f"⚠️ [ORA/click] Fallback coordonnées statiques: ({x}, {y})")
|
||||
else:
|
||||
logger.error(f"❌ [ORA/click] Impossible de localiser '{target_text}' — aucune méthode n'a fonctionné")
|
||||
return False
|
||||
|
||||
logger.info(f"🖱️ [ORA/click] {decision.value} à ({x}, {y}) via {method_used}")
|
||||
|
||||
if decision.value == 'double':
|
||||
pyautogui.doubleClick(x, y)
|
||||
elif decision.value == 'right':
|
||||
pyautogui.rightClick(x, y)
|
||||
elif decision.value == 'hover':
|
||||
pyautogui.moveTo(x, y, duration=0.3)
|
||||
time.sleep(step_params.get('parameters', {}).get('hover_duration_ms',
|
||||
step_params.get('parameters', {}).get('duration_ms', 1000)) / 1000)
|
||||
elif decision.value == 'focus':
|
||||
pyautogui.click(x, y)
|
||||
time.sleep(0.3)
|
||||
else:
|
||||
pyautogui.click(x, y)
|
||||
|
||||
return True
|
||||
|
||||
def _act_type(self, decision: Decision) -> bool:
|
||||
"""Saisie de texte via safe_type_text."""
|
||||
if not decision.value:
|
||||
logger.warning("🎯 [ORA/type] Pas de texte à saisir")
|
||||
return True # Vide = rien à faire, pas un échec
|
||||
|
||||
try:
|
||||
from core.execution.input_handler import safe_type_text
|
||||
safe_type_text(decision.value)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"❌ [ORA/type] Erreur saisie: {e}")
|
||||
return False
|
||||
|
||||
def _act_hotkey(self, decision: Decision, step_params: dict) -> bool:
|
||||
"""Raccourci clavier ou scroll."""
|
||||
if not PYAUTOGUI_AVAILABLE:
|
||||
logger.error("pyautogui non disponible")
|
||||
return False
|
||||
|
||||
value = decision.value
|
||||
|
||||
# Gestion du scroll (encodé dans value)
|
||||
if value.startswith('scroll_'):
|
||||
parts = value.split('_')
|
||||
# scroll_down_3 ou scroll_up_5
|
||||
direction = parts[1] if len(parts) > 1 else 'down'
|
||||
amount = int(parts[2]) if len(parts) > 2 else 3
|
||||
|
||||
# Positionner la souris si ancre disponible
|
||||
anchor = step_params.get('visual_anchor', {})
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
if bbox and bbox.get('width'):
|
||||
sx = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
|
||||
sy = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
|
||||
pyautogui.moveTo(sx, sy, duration=0.1)
|
||||
|
||||
scroll_value = amount if direction in ('up', 'left') else -amount
|
||||
if direction in ('left', 'right'):
|
||||
pyautogui.hscroll(scroll_value)
|
||||
else:
|
||||
pyautogui.scroll(scroll_value)
|
||||
time.sleep(0.5)
|
||||
return True
|
||||
|
||||
# Raccourci clavier normal
|
||||
keys = value.split('+')
|
||||
logger.info(f"⌨️ [ORA/hotkey] {'+'.join(keys)}")
|
||||
pyautogui.hotkey(*keys)
|
||||
return True
|
||||
|
||||
def _act_wait(self, decision: Decision) -> bool:
|
||||
"""Attente passive."""
|
||||
try:
|
||||
timeout_ms = int(decision.value)
|
||||
except (ValueError, TypeError):
|
||||
timeout_ms = 5000
|
||||
|
||||
logger.info(f"⏳ [ORA/wait] {timeout_ms}ms")
|
||||
time.sleep(timeout_ms / 1000)
|
||||
return True
|
||||
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
# Méthodes privées — utilitaires
|
||||
# ═══════════════════════════════════════════════════════════
|
||||
|
||||
def _capture_screen(self) -> Any:
|
||||
"""Capture l'écran principal, retourne une PIL Image."""
|
||||
if not MSS_AVAILABLE or not PIL_AVAILABLE:
|
||||
logger.warning("mss ou PIL non disponible — capture impossible")
|
||||
return None
|
||||
|
||||
try:
|
||||
with mss_lib.mss() as sct:
|
||||
monitor = sct.monitors[0]
|
||||
grab = sct.grab(monitor)
|
||||
return Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture écran: {e}")
|
||||
return None
|
||||
|
||||
def _compute_phash(self, image: Any) -> Any:
|
||||
"""Calcule le pHash d'une image PIL."""
|
||||
if image is None or not IMAGEHASH_AVAILABLE:
|
||||
return None
|
||||
try:
|
||||
from core.analytics.screen_change_detector import compute_phash
|
||||
return compute_phash(image)
|
||||
except Exception as e:
|
||||
logger.debug(f"Erreur compute_phash: {e}")
|
||||
return None
|
||||
|
||||
def _get_active_window_title(self) -> str:
|
||||
"""Récupère le titre de la fenêtre active via xdotool."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['xdotool', 'getactivewindow', 'getwindowname'],
|
||||
capture_output=True, text=True, timeout=2
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def _phash_distance(self, hash1: Any, hash2: Any) -> int:
|
||||
"""Distance de Hamming entre deux pHash. Retourne 999 si non calculable."""
|
||||
if hash1 is None or hash2 is None:
|
||||
return 999
|
||||
try:
|
||||
return hash1 - hash2
|
||||
except Exception:
|
||||
return 999
|
||||
|
||||
def _classify_change(self, distance: int) -> str:
|
||||
"""Classifie un changement d'écran selon la distance pHash."""
|
||||
if distance < 5:
|
||||
return 'same'
|
||||
elif distance < 15:
|
||||
return 'minor'
|
||||
else:
|
||||
return 'major'
|
||||
|
||||
def _vlm_verify(self, post: Observation, decision: Decision) -> Optional[dict]:
|
||||
"""Appelle le VLM pour vérifier l'écran post-action.
|
||||
|
||||
Returns:
|
||||
Dict {'matches': bool, 'actual_state': str} ou None si échec.
|
||||
"""
|
||||
if post.screenshot is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
import requests
|
||||
import io as _io
|
||||
|
||||
buffer = _io.BytesIO()
|
||||
post.screenshot.save(buffer, format='JPEG', quality=70)
|
||||
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
|
||||
prompt = (
|
||||
f"L'action '{decision.action} {decision.target}' vient d'être exécutée. "
|
||||
f"Le résultat attendu était: '{decision.expected_after}'. "
|
||||
f"L'écran correspond-il ? Réponds JSON: "
|
||||
f'{{\"matches\": true/false, \"actual_state\": \"...\"}}'
|
||||
)
|
||||
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||
|
||||
response = requests.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"images": [image_b64],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 200}
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"[ORA/vlm_verify] HTTP {response.status_code}")
|
||||
return None
|
||||
|
||||
text = response.json().get('response', '').strip()
|
||||
match = re.search(r'\{[\s\S]*\}', text)
|
||||
if match:
|
||||
parsed = json.loads(match.group())
|
||||
return {
|
||||
'matches': bool(parsed.get('matches', False)),
|
||||
'actual_state': str(parsed.get('actual_state', '')),
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"[ORA/vlm_verify] Erreur: {e}")
|
||||
return None
|
||||
|
||||
def _resolve_variables(self, text: str) -> str:
|
||||
"""Résout les variables {{var}} dans un texte."""
|
||||
if not text or '{{' not in text:
|
||||
return text
|
||||
|
||||
def replace_var(match):
|
||||
var_name = match.group(1)
|
||||
return str(self._variables.get(var_name, match.group(0)))
|
||||
|
||||
return re.sub(r'\{\{(\w+)\}\}', replace_var, text)
|
||||
@@ -1346,6 +1346,182 @@ def execute_action(action_type: str, params: dict) -> dict:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
def run_workflow_verified(execution_id: str, workflow_id: str, app):
|
||||
"""
|
||||
Thread d'exécution en mode 'verified' — boucle ORA.
|
||||
|
||||
Charge les steps du workflow depuis la BDD, construit les params
|
||||
comme execute_workflow_thread, puis délègue à ORALoop.run_workflow().
|
||||
|
||||
NE modifie PAS execute_workflow_thread : les modes basic/intelligent/debug
|
||||
continuent de fonctionner exactement comme avant.
|
||||
"""
|
||||
global _execution_state
|
||||
|
||||
with app.app_context():
|
||||
try:
|
||||
execution = Execution.query.get(execution_id)
|
||||
workflow = Workflow.query.get(workflow_id)
|
||||
|
||||
if not execution or not workflow:
|
||||
logger.error("[ORA] Workflow ou exécution non trouvé")
|
||||
return
|
||||
|
||||
steps_db = workflow.steps.order_by(Step.order).all()
|
||||
execution.total_steps = len(steps_db)
|
||||
execution.status = 'running'
|
||||
execution.started_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
|
||||
logger.info(f"🚀 [ORA] Démarrage workflow vérifié {workflow_id}: {len(steps_db)} étapes")
|
||||
|
||||
# --- Construire les params pour chaque step (même logique que execute_workflow_thread) ---
|
||||
ora_steps = []
|
||||
for step in steps_db:
|
||||
params = step.parameters or {}
|
||||
|
||||
# Charger l'ancre visuelle si présente
|
||||
if step.anchor_id:
|
||||
anchor = VisualAnchor.query.get(step.anchor_id)
|
||||
if anchor:
|
||||
anchor_image_path = anchor.thumbnail_path or anchor.image_path
|
||||
if anchor_image_path and os.path.exists(anchor_image_path):
|
||||
with open(anchor_image_path, 'rb') as f:
|
||||
image_base64 = base64.b64encode(f.read()).decode('utf-8')
|
||||
else:
|
||||
image_base64 = None
|
||||
|
||||
anchor_data = {
|
||||
'anchor_id': anchor.id,
|
||||
'screenshot': image_base64,
|
||||
'bounding_box': {
|
||||
'x': anchor.bbox_x,
|
||||
'y': anchor.bbox_y,
|
||||
'width': anchor.bbox_width,
|
||||
'height': anchor.bbox_height,
|
||||
},
|
||||
'metadata': {
|
||||
'screen_resolution': {
|
||||
'width': anchor.screen_width,
|
||||
'height': anchor.screen_height,
|
||||
}
|
||||
},
|
||||
}
|
||||
if anchor.target_text:
|
||||
anchor_data['target_text'] = anchor.target_text
|
||||
if anchor.ocr_description:
|
||||
anchor_data['description'] = anchor.ocr_description
|
||||
|
||||
params['visual_anchor'] = anchor_data
|
||||
|
||||
# Injecter le label pour le grounding
|
||||
if step.label:
|
||||
params['_step_label'] = step.label
|
||||
|
||||
ora_steps.append({
|
||||
'action_type': step.action_type,
|
||||
'parameters': params,
|
||||
'visual_anchor': params.get('visual_anchor', {}),
|
||||
'label': step.label or step.action_type,
|
||||
'_full_params': params, # Pour les passthrough
|
||||
'_step_id': step.id,
|
||||
})
|
||||
|
||||
# --- Créer et lancer la boucle ORA ---
|
||||
from core.execution.observe_reason_act import ORALoop
|
||||
|
||||
ora = ORALoop(max_retries=2, max_steps=50, verify_level='auto')
|
||||
ora._variables = _execution_state.get('variables', {})
|
||||
|
||||
# Créer les ExecutionStep en amont pour le suivi
|
||||
step_results_map = {}
|
||||
for idx, step in enumerate(steps_db):
|
||||
step_result = ExecutionStep(
|
||||
execution_id=execution_id,
|
||||
step_id=step.id,
|
||||
status='pending',
|
||||
)
|
||||
db.session.add(step_result)
|
||||
step_results_map[idx] = step_result
|
||||
db.session.commit()
|
||||
|
||||
def on_progress(step_index, total, verification):
|
||||
"""Callback de progression — met à jour la BDD."""
|
||||
try:
|
||||
sr = step_results_map.get(step_index - 1)
|
||||
if sr:
|
||||
sr.status = 'success' if verification.success else 'error'
|
||||
sr.started_at = sr.started_at or datetime.utcnow()
|
||||
sr.ended_at = datetime.utcnow()
|
||||
if sr.started_at:
|
||||
sr.duration_ms = int((sr.ended_at - sr.started_at).total_seconds() * 1000)
|
||||
sr.output = {
|
||||
'change_level': verification.change_level,
|
||||
'matches_expected': verification.matches_expected,
|
||||
'detail': verification.detail,
|
||||
'mode': 'verified',
|
||||
}
|
||||
if verification.success:
|
||||
execution.completed_steps = step_index
|
||||
else:
|
||||
execution.failed_steps = (execution.failed_steps or 0) + 1
|
||||
sr.error_message = verification.detail
|
||||
execution.current_step_index = step_index
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
logger.warning(f"[ORA/progress] Erreur BDD: {e}")
|
||||
|
||||
# Exécuter
|
||||
loop_result = ora.run_workflow(
|
||||
steps=ora_steps,
|
||||
on_progress=on_progress,
|
||||
execute_action_fn=execute_action,
|
||||
)
|
||||
|
||||
# Synchroniser les variables runtime
|
||||
_execution_state['variables'] = ora._variables
|
||||
|
||||
# Finaliser
|
||||
if loop_result.success:
|
||||
execution.status = 'completed'
|
||||
else:
|
||||
execution.status = 'error'
|
||||
execution.error_message = loop_result.reason
|
||||
# Marquer le step en échec
|
||||
failed_sr = step_results_map.get(loop_result.steps_completed)
|
||||
if failed_sr and failed_sr.status == 'pending':
|
||||
failed_sr.status = 'error'
|
||||
failed_sr.error_message = loop_result.reason
|
||||
failed_sr.ended_at = datetime.utcnow()
|
||||
|
||||
execution.ended_at = datetime.utcnow()
|
||||
execution.completed_steps = loop_result.steps_completed
|
||||
db.session.commit()
|
||||
|
||||
logger.info(
|
||||
f"{'✅' if loop_result.success else '❌'} [ORA] Workflow terminé: "
|
||||
f"{loop_result.steps_completed}/{loop_result.total_steps} "
|
||||
f"({execution.status}) — {loop_result.reason}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ [ORA] Erreur fatale: {e}", exc_info=True)
|
||||
try:
|
||||
execution = Execution.query.get(execution_id)
|
||||
if execution:
|
||||
execution.status = 'error'
|
||||
execution.error_message = f"Erreur fatale ORA: {str(e)}"
|
||||
execution.ended_at = datetime.utcnow()
|
||||
db.session.commit()
|
||||
except Exception as db_err:
|
||||
logger.warning(f"[ORA] DB cleanup error: {db_err}")
|
||||
|
||||
finally:
|
||||
with _execution_lock:
|
||||
_execution_state['is_running'] = False
|
||||
_execution_state['current_execution_id'] = None
|
||||
|
||||
|
||||
@api_v3_bp.route('/execute/start', methods=['POST'])
|
||||
def start_execution():
|
||||
"""
|
||||
@@ -1371,7 +1547,7 @@ def start_execution():
|
||||
minimize_browser = data.get('minimize_browser', True) # Activé par défaut
|
||||
|
||||
# Valider le mode
|
||||
if execution_mode not in ['basic', 'intelligent', 'debug']:
|
||||
if execution_mode not in ['basic', 'intelligent', 'debug', 'verified']:
|
||||
execution_mode = 'basic'
|
||||
|
||||
# Utiliser le workflow actif si non spécifié
|
||||
@@ -1430,15 +1606,22 @@ def start_execution():
|
||||
from flask import current_app
|
||||
app = current_app._get_current_object()
|
||||
|
||||
thread = threading.Thread(
|
||||
target=execute_workflow_thread,
|
||||
args=(execution.id, workflow_id, app)
|
||||
)
|
||||
if execution_mode == 'verified':
|
||||
# Mode ORA : boucle observe→raisonne→agit avec vérification
|
||||
thread = threading.Thread(
|
||||
target=run_workflow_verified,
|
||||
args=(execution.id, workflow_id, app)
|
||||
)
|
||||
else:
|
||||
thread = threading.Thread(
|
||||
target=execute_workflow_thread,
|
||||
args=(execution.id, workflow_id, app)
|
||||
)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
_execution_state['thread'] = thread
|
||||
|
||||
print(f"🚀 [API v3] Exécution lancée: {execution.id}")
|
||||
print(f"🚀 [API v3] Exécution lancée: {execution.id} (mode={execution_mode})")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
|
||||
Reference in New Issue
Block a user