feat: boucle ORA (observe→raisonne→agit) avec vérification post-action

Nouveau module core/execution/observe_reason_act.py (794 lignes) : - ORALoop : boucle unifiée pour workflow VWB et instructions - observe() : capture écran + pHash + titre fenêtre - reason_workflow_step() : mappe step VWB → Decision (sans VLM) - act() : template matching → find_element → pyautogui - verify() : Level 1 pHash + Level 2 VLM conditionnel - run_workflow() : boucle complète avec retries et callbacks Nouveau mode execution_mode='verified' dans execute.py : - run_workflow_verified() utilise ORALoop - Modes basic/intelligent/debug inchangés (zéro risque) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 09:02:54 +02:00
parent 5027ed9a23
commit 0c5fffe951
3 changed files with 990 additions and 6 deletions
--- a/core/execution/init.py
+++ b/core/execution/init.py
@@ -10,6 +10,7 @@ from .error_handler import ErrorHandler, ErrorType, RecoveryStrategy
 from .workflow_runner import WorkflowRunner, RunResult, RunStatus, RunnerConfig
 from .dag_executor import DAGExecutor, WorkflowStep, StepType, StepStatus, DAGExecutionResult
 from .llm_actions import LLMActionHandler
+from .observe_reason_act import ORALoop, Observation, Decision, VerificationResult, LoopResult

 # Import tardif pour éviter import circulaire avec pipeline
 def _get_execution_loop():
@@ -34,5 +35,11 @@ __all__ = [
    'StepStatus',
    'DAGExecutionResult',
    'LLMActionHandler',
+    # ORA — boucle Observe-Raisonne-Agit avec vérification
+    'ORALoop',
+    'Observation',
+    'Decision',
+    'VerificationResult',
+    'LoopResult',
    # ExecutionLoop accessible via import direct du module
 ]
--- a/core/execution/observe_reason_act.py
+++ b/core/execution/observe_reason_act.py
@@ -0,0 +1,794 @@
+"""
+Boucle Observe → Raisonne → Agit (ORA) pour l'exécution vérifiée de workflows.
+
+Chaque étape du workflow passe par 3 phases :
+1. 👁 OBSERVE : capture écran + pHash + titre fenêtre
+2. 🧠 RAISONNE : traduit le step VWB en Decision exécutable
+3. 🎯 AGIT : exécute l'action (clic, saisie, raccourci, attente)
+4. ✅/❌ VÉRIFIE : compare pré/post screenshot pour confirmer le résultat
+
+Module standalone — importable depuis n'importe où.
+"""
+
+import logging
+import time
+import subprocess
+import base64
+import os
+import json
+import re
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+# --- Imports conditionnels ---
+try:
+    from PIL import Image
+    PIL_AVAILABLE = True
+except ImportError:
+    Image = None
+    PIL_AVAILABLE = False
+
+try:
+    import mss as mss_lib
+    MSS_AVAILABLE = True
+except ImportError:
+    mss_lib = None
+    MSS_AVAILABLE = False
+
+try:
+    import cv2
+    import numpy as np
+    CV2_AVAILABLE = True
+except ImportError:
+    cv2 = None
+    np = None
+    CV2_AVAILABLE = False
+
+try:
+    import pyautogui
+    PYAUTOGUI_AVAILABLE = True
+except ImportError:
+    pyautogui = None
+    PYAUTOGUI_AVAILABLE = False
+
+try:
+    import imagehash
+    IMAGEHASH_AVAILABLE = True
+except ImportError:
+    imagehash = None
+    IMAGEHASH_AVAILABLE = False
+
+
+# ═══════════════════════════════════════════════════════════════
+# Dataclasses
+# ═══════════════════════════════════════════════════════════════
+
+@dataclass
+class Observation:
+    """Capture de l'état courant de l'écran."""
+    screenshot: Any  # PIL Image
+    phash: Any  # imagehash.ImageHash
+    window_title: str
+    timestamp: float
+
+
+@dataclass
+class Decision:
+    """Décision d'action à exécuter."""
+    action: str  # "click" | "type" | "hotkey" | "wait" | "done" | "need_help"
+    target: str
+    value: str  # texte à saisir ou touches
+    reasoning: str
+    expected_after: str
+    confidence: float
+    done: bool = False
+
+
+@dataclass
+class VerificationResult:
+    """Résultat de la vérification post-action."""
+    success: bool
+    change_level: str  # "same" | "minor" | "major"
+    matches_expected: bool
+    detail: str
+
+
+@dataclass
+class LoopResult:
+    """Résultat global de l'exécution de la boucle ORA."""
+    success: bool
+    steps_completed: int
+    total_steps: int
+    reason: str = ""
+
+
+# ═══════════════════════════════════════════════════════════════
+# Classe principale
+# ═══════════════════════════════════════════════════════════════
+
+class ORALoop:
+    """
+    Boucle Observe-Raisonne-Agit avec vérification intégrée.
+
+    Args:
+        max_retries: Nombre de réessais par étape en cas d'échec de vérification.
+        max_steps: Nombre maximal d'étapes autorisées (garde-fou).
+        verify_level: 'none' | 'phash' | 'vlm' | 'auto'
+            - none  : pas de vérification post-action
+            - phash : comparaison pHash uniquement
+            - vlm   : pHash + appel VLM systématique
+            - auto  : pHash toujours, VLM seulement si confiance < 0.95
+    """
+
+    def __init__(self, max_retries: int = 2, max_steps: int = 50, verify_level: str = 'auto'):
+        self.max_retries = max_retries
+        self.max_steps = max_steps
+        self.verify_level = verify_level
+
+        # Variables runtime injectées par le workflow
+        self._variables: Dict[str, Any] = {}
+
+    # ─── Phase 1 : OBSERVE ────────────────────────────────
+
+    def observe(self) -> Observation:
+        """👁 Capture l'état courant de l'écran.
+
+        Returns:
+            Observation avec screenshot PIL, pHash, titre fenêtre, timestamp.
+        """
+        screenshot = self._capture_screen()
+        phash = self._compute_phash(screenshot)
+        window_title = self._get_active_window_title()
+        ts = time.time()
+
+        logger.info(f"👁 [ORA/observe] titre='{window_title}' phash={phash}")
+        return Observation(
+            screenshot=screenshot,
+            phash=phash,
+            window_title=window_title,
+            timestamp=ts,
+        )
+
+    # ─── Phase 2 : RAISONNE ───────────────────────────────
+
+    def reason_workflow_step(self, step_params: dict, observation: Observation) -> Decision:
+        """🧠 Traduit un step VWB en Decision exécutable.
+
+        Le workflow dit déjà quoi faire — pas d'appel VLM ici.
+        On se contente de mapper les paramètres du step vers une Decision.
+
+        Args:
+            step_params: Paramètres du step (action_type, visual_anchor, keys, text, etc.)
+            observation: Observation courante (pour contexte)
+
+        Returns:
+            Decision prête à exécuter.
+        """
+        action_type = step_params.get('action_type', '')
+        params = step_params.get('parameters', {})
+        anchor = step_params.get('visual_anchor', {})
+        label = step_params.get('label', action_type)
+
+        # --- Mapper action_type vers action Decision ---
+
+        if action_type in ('click_anchor', 'click', 'double_click_anchor', 'right_click_anchor'):
+            target_text = anchor.get('target_text', '') or label
+            action = 'click'
+            value = 'double' if action_type == 'double_click_anchor' else (
+                'right' if action_type == 'right_click_anchor' else 'left')
+            expected = f"L'écran devrait changer après le clic sur '{target_text}'"
+            confidence = 0.9
+
+        elif action_type in ('type_text', 'type'):
+            text = params.get('text', '')
+            # Résoudre les variables {{var}}
+            text = self._resolve_variables(text)
+            action = 'type'
+            target_text = ''
+            value = text
+            expected = f"Le texte '{text[:30]}...' devrait apparaître dans le champ"
+            confidence = 0.95
+
+        elif action_type == 'keyboard_shortcut':
+            keys = params.get('keys', [])
+            action = 'hotkey'
+            target_text = ''
+            value = '+'.join(keys)
+            expected = f"L'écran devrait réagir au raccourci {value}"
+            confidence = 0.9
+
+        elif action_type in ('wait_for_anchor', 'wait'):
+            timeout_ms = params.get('timeout_ms', params.get('timeout', 5000))
+            action = 'wait'
+            target_text = ''
+            value = str(timeout_ms)
+            expected = "Attente passive"
+            confidence = 1.0
+
+        elif action_type in ('hover_anchor', 'hover'):
+            target_text = anchor.get('target_text', '') or label
+            action = 'click'  # On réutilise click avec value='hover'
+            value = 'hover'
+            expected = f"Le curseur survole '{target_text}'"
+            confidence = 0.95
+
+        elif action_type in ('scroll_to_anchor', 'scroll'):
+            direction = params.get('scroll_direction', 'down')
+            amount = params.get('scroll_amount', params.get('scroll_step_pixels', 3))
+            action = 'hotkey'  # Scroll passe par pyautogui.scroll
+            target_text = ''
+            value = f"scroll_{direction}_{amount}"
+            expected = f"L'écran devrait défiler vers {direction}"
+            confidence = 0.9
+
+        elif action_type in ('focus_anchor', 'focus'):
+            target_text = anchor.get('target_text', '') or label
+            action = 'click'
+            value = 'focus'
+            expected = f"L'élément '{target_text}' a le focus"
+            confidence = 0.95
+
+        elif action_type in ('ai_analyze_text', 'ai_ocr', 'ai_summarize',
+                             'ai_extract', 'ai_classify', 'ai_custom',
+                             'extract_text', 'screenshot_evidence',
+                             'verify_element_exists', 'verify_element'):
+            # Actions passthrough : exécutées tel quel par execute_action existant
+            action = 'passthrough'
+            target_text = ''
+            value = action_type
+            expected = "L'action IA/extraction devrait retourner un résultat"
+            confidence = 0.95
+
+        else:
+            action = 'passthrough'
+            target_text = ''
+            value = action_type
+            expected = f"Action '{action_type}' exécutée"
+            confidence = 0.8
+
+        decision = Decision(
+            action=action,
+            target=target_text,
+            value=value,
+            reasoning=f"Step VWB '{label}' → {action} (confiance={confidence:.2f})",
+            expected_after=expected,
+            confidence=confidence,
+        )
+        logger.info(f"🧠 [ORA/reason] {decision.action} target='{decision.target}' value='{decision.value[:50]}'")
+        return decision
+
+    # ─── Phase 3 : AGIT ───────────────────────────────────
+
+    def act(self, decision: Decision, step_params: dict = None) -> bool:
+        """🎯 Exécute la Decision.
+
+        Utilise le même pipeline que execute.py :
+        - Template matching → CLIP → OCR/UI-TARS pour les clics
+        - safe_type_text pour la saisie
+        - pyautogui.hotkey pour les raccourcis
+
+        Args:
+            decision: Decision à exécuter.
+            step_params: Paramètres originaux du step (pour l'ancre, etc.)
+
+        Returns:
+            True si l'action a été exécutée (pas encore vérifiée).
+        """
+        if step_params is None:
+            step_params = {}
+
+        logger.info(f"🎯 [ORA/act] {decision.action} target='{decision.target}' value='{decision.value[:50]}'")
+
+        try:
+            if decision.action == 'click':
+                return self._act_click(decision, step_params)
+
+            elif decision.action == 'type':
+                return self._act_type(decision)
+
+            elif decision.action == 'hotkey':
+                return self._act_hotkey(decision, step_params)
+
+            elif decision.action == 'wait':
+                return self._act_wait(decision)
+
+            elif decision.action == 'passthrough':
+                # Exécuté directement via execute_action dans run_workflow
+                return True
+
+            elif decision.action == 'done':
+                return True
+
+            elif decision.action == 'need_help':
+                logger.warning("🆘 [ORA/act] L'action nécessite une aide humaine")
+                return False
+
+            else:
+                logger.warning(f"🎯 [ORA/act] Action inconnue: {decision.action}")
+                return False
+
+        except Exception as e:
+            logger.error(f"❌ [ORA/act] Erreur: {e}", exc_info=True)
+            return False
+
+    # ─── Phase 4 : VÉRIFIE ────────────────────────────────
+
+    def verify(self, pre: Observation, post: Observation, decision: Decision) -> VerificationResult:
+        """✅/❌ Vérifie que l'action a produit l'effet attendu.
+
+        Level 1 (toujours) : compare pHash pre vs post.
+        Level 2 (conditionnel) : appel VLM si verify_level='vlm' ou 'auto' avec confiance < 0.95.
+
+        Args:
+            pre: Observation avant l'action.
+            post: Observation après l'action.
+            decision: Decision exécutée.
+
+        Returns:
+            VerificationResult.
+        """
+        # Pas de vérification demandée
+        if self.verify_level == 'none':
+            return VerificationResult(
+                success=True, change_level='unknown',
+                matches_expected=True, detail="Vérification désactivée"
+            )
+
+        # Actions qui ne changent pas l'écran
+        if decision.action in ('wait', 'done', 'passthrough'):
+            return VerificationResult(
+                success=True, change_level='n/a',
+                matches_expected=True, detail=f"Action '{decision.action}' — pas de changement attendu"
+            )
+
+        # --- Level 1 : pHash ---
+        distance = self._phash_distance(pre.phash, post.phash)
+        change_level = self._classify_change(distance)
+
+        logger.info(f"🔍 [ORA/verify] pHash distance={distance} → {change_level}")
+
+        # Déterminer le succès selon le niveau de changement
+        if change_level == 'same':
+            # Rien n'a changé — probable échec
+            # Exception : certains clics (focus, hover) peuvent ne pas changer l'écran
+            if decision.value in ('hover', 'focus'):
+                success = True
+                detail = f"Pas de changement visible (normal pour {decision.value})"
+            else:
+                success = False
+                detail = f"Aucun changement détecté (distance={distance})"
+        elif change_level == 'minor':
+            # Changement mineur — OK si confiance élevée
+            success = decision.confidence > 0.9
+            detail = f"Changement mineur (distance={distance})"
+        else:
+            # Changement majeur — succès
+            success = True
+            detail = f"Changement majeur détecté (distance={distance})"
+
+        matches_expected = success
+
+        # --- Level 2 : VLM (conditionnel) ---
+        if self.verify_level == 'vlm' or (self.verify_level == 'auto' and decision.confidence < 0.95 and not success):
+            vlm_result = self._vlm_verify(post, decision)
+            if vlm_result is not None:
+                matches_expected = vlm_result.get('matches', success)
+                actual_state = vlm_result.get('actual_state', '')
+                detail += f" | VLM: matches={matches_expected}, état='{actual_state[:80]}'"
+                # Le VLM a le dernier mot
+                success = matches_expected
+                logger.info(f"🔍 [ORA/verify/VLM] matches={matches_expected} état='{actual_state[:60]}'")
+
+        emoji = "✅" if success else "❌"
+        logger.info(f"{emoji} [ORA/verify] success={success} level={change_level} — {detail[:100]}")
+
+        return VerificationResult(
+            success=success,
+            change_level=change_level,
+            matches_expected=matches_expected,
+            detail=detail,
+        )
+
+    # ─── Boucle principale ────────────────────────────────
+
+    def run_workflow(
+        self,
+        steps: list,
+        on_progress: Optional[Callable] = None,
+        execute_action_fn: Optional[Callable] = None,
+    ) -> LoopResult:
+        """Exécute un workflow complet avec la boucle ORA.
+
+        Args:
+            steps: Liste de dicts avec les paramètres de chaque étape.
+                   Chaque dict contient : action_type, parameters, visual_anchor, label.
+            on_progress: Callback(step_index, total_steps, verification_result).
+            execute_action_fn: Fonction externe pour les actions passthrough
+                               (signature: execute_action(action_type, params) -> dict).
+
+        Returns:
+            LoopResult.
+        """
+        total = len(steps)
+        if total == 0:
+            return LoopResult(success=True, steps_completed=0, total_steps=0, reason="Aucune étape")
+
+        if total > self.max_steps:
+            return LoopResult(
+                success=False, steps_completed=0, total_steps=total,
+                reason=f"Trop d'étapes ({total} > max {self.max_steps})"
+            )
+
+        logger.info(f"🚀 [ORA] Démarrage workflow: {total} étapes, verify={self.verify_level}, retries={self.max_retries}")
+
+        for i, step in enumerate(steps):
+            logger.info(f"\n{'='*60}")
+            logger.info(f"📋 [ORA] Étape {i+1}/{total}: {step.get('action_type', '?')} — {step.get('label', '')}")
+
+            # --- 1. Observer l'état pré-action ---
+            pre = self.observe()
+
+            # --- 2. Raisonner : construire la Decision ---
+            decision = self.reason_workflow_step(step, pre)
+
+            # --- 3. Agir ---
+            if decision.action == 'passthrough' and execute_action_fn:
+                # Déléguer aux fonctions existantes (IA, extraction, etc.)
+                action_type = step.get('action_type', '')
+                params = step.get('_full_params', step.get('parameters', {}))
+                result = execute_action_fn(action_type, params)
+                act_success = result.get('success', False)
+                if not act_success:
+                    logger.warning(f"❌ [ORA] Action passthrough échouée: {result.get('error', '?')}")
+                    return LoopResult(
+                        success=False, steps_completed=i, total_steps=total,
+                        reason=f"Étape {i+1} passthrough échouée: {result.get('error', '?')}"
+                    )
+                # Pas de vérification pour les passthrough
+                if on_progress:
+                    on_progress(i + 1, total, VerificationResult(
+                        success=True, change_level='n/a',
+                        matches_expected=True, detail="Passthrough OK"
+                    ))
+                continue
+
+            act_success = self.act(decision, step)
+            if not act_success and decision.action not in ('wait', 'done'):
+                logger.warning(f"❌ [ORA] Action échouée immédiatement")
+                return LoopResult(
+                    success=False, steps_completed=i, total_steps=total,
+                    reason=f"Étape {i+1}: action '{decision.action}' échouée"
+                )
+
+            # Petit délai pour laisser l'écran se stabiliser
+            time.sleep(0.3)
+
+            # --- 4. Observer l'état post-action ---
+            post = self.observe()
+
+            # --- 5. Vérifier ---
+            verification = self.verify(pre, post, decision)
+
+            if not verification.success:
+                # Réessayer
+                retried = False
+                for retry in range(self.max_retries):
+                    logger.info(f"🔄 [ORA] Retry {retry+1}/{self.max_retries} pour étape {i+1}")
+                    pre_retry = self.observe()
+                    act_success = self.act(decision, step)
+                    time.sleep(0.3)
+                    post_retry = self.observe()
+                    verification = self.verify(pre_retry, post_retry, decision)
+                    if verification.success:
+                        retried = True
+                        logger.info(f"✅ [ORA] Retry {retry+1} réussi")
+                        break
+                if not retried and not verification.success:
+                    logger.warning(f"❌ [ORA] Étape {i+1} échouée après {self.max_retries} retries")
+                    return LoopResult(
+                        success=False, steps_completed=i, total_steps=total,
+                        reason=f"Étape {i+1}: vérification échouée — {verification.detail}"
+                    )
+
+            # --- Callback de progression ---
+            if on_progress:
+                on_progress(i + 1, total, verification)
+
+        logger.info(f"✅ [ORA] Workflow terminé avec succès: {total}/{total} étapes")
+        return LoopResult(success=True, steps_completed=total, total_steps=total)
+
+    # ═══════════════════════════════════════════════════════════
+    # Méthodes privées — actions
+    # ═══════════════════════════════════════════════════════════
+
+    def _act_click(self, decision: Decision, step_params: dict) -> bool:
+        """Exécute un clic (simple, double, droit, hover, focus).
+
+        Pipeline : template matching → find_element_on_screen (OCR → UI-TARS → VLM).
+        """
+        if not PYAUTOGUI_AVAILABLE:
+            logger.error("pyautogui non disponible")
+            return False
+
+        anchor = step_params.get('visual_anchor', {})
+        screenshot_b64 = anchor.get('screenshot')
+        bbox = anchor.get('bounding_box', {})
+        target_text = anchor.get('target_text', '') or decision.target
+        target_desc = anchor.get('description', '')
+
+        x, y = None, None
+        method_used = ''
+
+        # --- Méthode 1 : Template matching direct (~1-10ms) ---
+        if screenshot_b64 and CV2_AVAILABLE and PIL_AVAILABLE and MSS_AVAILABLE:
+            try:
+                import io as _io
+
+                # Capturer l'écran
+                with mss_lib.mss() as sct:
+                    mon = sct.monitors[0]
+                    grab = sct.grab(mon)
+                    screen_img = Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
+
+                # Décoder l'ancre
+                raw_b64 = screenshot_b64.split(',')[1] if ',' in screenshot_b64 else screenshot_b64
+                anchor_data = base64.b64decode(raw_b64)
+                anchor_img = Image.open(_io.BytesIO(anchor_data))
+
+                screen_cv = cv2.cvtColor(np.array(screen_img), cv2.COLOR_RGB2BGR)
+                anchor_cv = cv2.cvtColor(np.array(anchor_img), cv2.COLOR_RGB2BGR)
+
+                if anchor_cv.shape[0] < screen_cv.shape[0] and anchor_cv.shape[1] < screen_cv.shape[1]:
+                    t0 = time.time()
+                    result_tm = cv2.matchTemplate(screen_cv, anchor_cv, cv2.TM_CCOEFF_NORMED)
+                    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result_tm)
+                    elapsed_ms = (time.time() - t0) * 1000
+                    logger.info(f"⚡ [ORA/template] score={max_val:.3f} pos={max_loc} ({elapsed_ms:.0f}ms)")
+
+                    if max_val > 0.75:
+                        x = max_loc[0] + anchor_cv.shape[1] // 2
+                        y = max_loc[1] + anchor_cv.shape[0] // 2
+                        method_used = 'template'
+            except Exception as e:
+                logger.debug(f"⚠️ [ORA/template] Erreur: {e}")
+
+        # --- Méthode 2 : find_element_on_screen (OCR → UI-TARS → VLM) ---
+        if x is None and (target_text or target_desc):
+            try:
+                from core.execution.input_handler import find_element_on_screen
+                grounding_result = find_element_on_screen(
+                    target_text=target_text,
+                    target_description=target_desc,
+                    anchor_image_base64=screenshot_b64,
+                    anchor_bbox=bbox if bbox else None,
+                )
+                if grounding_result:
+                    x, y = grounding_result['x'], grounding_result['y']
+                    method_used = f"grounding_{grounding_result['method']}"
+                    logger.info(f"🔍 [ORA/grounding] Trouvé via {method_used} à ({x}, {y})")
+            except Exception as e:
+                logger.debug(f"⚠️ [ORA/grounding] Erreur: {e}")
+
+        # --- Exécuter le clic ---
+        if x is None:
+            # Dernier recours : coordonnées statiques de l'ancre
+            if bbox and bbox.get('width') and bbox.get('height'):
+                x = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
+                y = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
+                method_used = 'static_fallback'
+                logger.warning(f"⚠️ [ORA/click] Fallback coordonnées statiques: ({x}, {y})")
+            else:
+                logger.error(f"❌ [ORA/click] Impossible de localiser '{target_text}' — aucune méthode n'a fonctionné")
+                return False
+
+        logger.info(f"🖱️ [ORA/click] {decision.value} à ({x}, {y}) via {method_used}")
+
+        if decision.value == 'double':
+            pyautogui.doubleClick(x, y)
+        elif decision.value == 'right':
+            pyautogui.rightClick(x, y)
+        elif decision.value == 'hover':
+            pyautogui.moveTo(x, y, duration=0.3)
+            time.sleep(step_params.get('parameters', {}).get('hover_duration_ms',
+                       step_params.get('parameters', {}).get('duration_ms', 1000)) / 1000)
+        elif decision.value == 'focus':
+            pyautogui.click(x, y)
+            time.sleep(0.3)
+        else:
+            pyautogui.click(x, y)
+
+        return True
+
+    def _act_type(self, decision: Decision) -> bool:
+        """Saisie de texte via safe_type_text."""
+        if not decision.value:
+            logger.warning("🎯 [ORA/type] Pas de texte à saisir")
+            return True  # Vide = rien à faire, pas un échec
+
+        try:
+            from core.execution.input_handler import safe_type_text
+            safe_type_text(decision.value)
+            return True
+        except Exception as e:
+            logger.error(f"❌ [ORA/type] Erreur saisie: {e}")
+            return False
+
+    def _act_hotkey(self, decision: Decision, step_params: dict) -> bool:
+        """Raccourci clavier ou scroll."""
+        if not PYAUTOGUI_AVAILABLE:
+            logger.error("pyautogui non disponible")
+            return False
+
+        value = decision.value
+
+        # Gestion du scroll (encodé dans value)
+        if value.startswith('scroll_'):
+            parts = value.split('_')
+            # scroll_down_3 ou scroll_up_5
+            direction = parts[1] if len(parts) > 1 else 'down'
+            amount = int(parts[2]) if len(parts) > 2 else 3
+
+            # Positionner la souris si ancre disponible
+            anchor = step_params.get('visual_anchor', {})
+            bbox = anchor.get('bounding_box', {})
+            if bbox and bbox.get('width'):
+                sx = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
+                sy = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
+                pyautogui.moveTo(sx, sy, duration=0.1)
+
+            scroll_value = amount if direction in ('up', 'left') else -amount
+            if direction in ('left', 'right'):
+                pyautogui.hscroll(scroll_value)
+            else:
+                pyautogui.scroll(scroll_value)
+            time.sleep(0.5)
+            return True
+
+        # Raccourci clavier normal
+        keys = value.split('+')
+        logger.info(f"⌨️ [ORA/hotkey] {'+'.join(keys)}")
+        pyautogui.hotkey(*keys)
+        return True
+
+    def _act_wait(self, decision: Decision) -> bool:
+        """Attente passive."""
+        try:
+            timeout_ms = int(decision.value)
+        except (ValueError, TypeError):
+            timeout_ms = 5000
+
+        logger.info(f"⏳ [ORA/wait] {timeout_ms}ms")
+        time.sleep(timeout_ms / 1000)
+        return True
+
+    # ═══════════════════════════════════════════════════════════
+    # Méthodes privées — utilitaires
+    # ═══════════════════════════════════════════════════════════
+
+    def _capture_screen(self) -> Any:
+        """Capture l'écran principal, retourne une PIL Image."""
+        if not MSS_AVAILABLE or not PIL_AVAILABLE:
+            logger.warning("mss ou PIL non disponible — capture impossible")
+            return None
+
+        try:
+            with mss_lib.mss() as sct:
+                monitor = sct.monitors[0]
+                grab = sct.grab(monitor)
+                return Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
+        except Exception as e:
+            logger.error(f"Erreur capture écran: {e}")
+            return None
+
+    def _compute_phash(self, image: Any) -> Any:
+        """Calcule le pHash d'une image PIL."""
+        if image is None or not IMAGEHASH_AVAILABLE:
+            return None
+        try:
+            from core.analytics.screen_change_detector import compute_phash
+            return compute_phash(image)
+        except Exception as e:
+            logger.debug(f"Erreur compute_phash: {e}")
+            return None
+
+    def _get_active_window_title(self) -> str:
+        """Récupère le titre de la fenêtre active via xdotool."""
+        try:
+            result = subprocess.run(
+                ['xdotool', 'getactivewindow', 'getwindowname'],
+                capture_output=True, text=True, timeout=2
+            )
+            if result.returncode == 0:
+                return result.stdout.strip()
+        except Exception:
+            pass
+        return ''
+
+    def _phash_distance(self, hash1: Any, hash2: Any) -> int:
+        """Distance de Hamming entre deux pHash. Retourne 999 si non calculable."""
+        if hash1 is None or hash2 is None:
+            return 999
+        try:
+            return hash1 - hash2
+        except Exception:
+            return 999
+
+    def _classify_change(self, distance: int) -> str:
+        """Classifie un changement d'écran selon la distance pHash."""
+        if distance < 5:
+            return 'same'
+        elif distance < 15:
+            return 'minor'
+        else:
+            return 'major'
+
+    def _vlm_verify(self, post: Observation, decision: Decision) -> Optional[dict]:
+        """Appelle le VLM pour vérifier l'écran post-action.
+
+        Returns:
+            Dict {'matches': bool, 'actual_state': str} ou None si échec.
+        """
+        if post.screenshot is None:
+            return None
+
+        try:
+            import requests
+            import io as _io
+
+            buffer = _io.BytesIO()
+            post.screenshot.save(buffer, format='JPEG', quality=70)
+            image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+
+            prompt = (
+                f"L'action '{decision.action} {decision.target}' vient d'être exécutée. "
+                f"Le résultat attendu était: '{decision.expected_after}'. "
+                f"L'écran correspond-il ? Réponds JSON: "
+                f'{{\"matches\": true/false, \"actual_state\": \"...\"}}'
+            )
+
+            ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
+            model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
+
+            response = requests.post(
+                f"{ollama_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": prompt,
+                    "images": [image_b64],
+                    "stream": False,
+                    "options": {"temperature": 0.1, "num_predict": 200}
+                },
+                timeout=30,
+            )
+
+            if response.status_code != 200:
+                logger.warning(f"[ORA/vlm_verify] HTTP {response.status_code}")
+                return None
+
+            text = response.json().get('response', '').strip()
+            match = re.search(r'\{[\s\S]*\}', text)
+            if match:
+                parsed = json.loads(match.group())
+                return {
+                    'matches': bool(parsed.get('matches', False)),
+                    'actual_state': str(parsed.get('actual_state', '')),
+                }
+
+            return None
+
+        except Exception as e:
+            logger.debug(f"[ORA/vlm_verify] Erreur: {e}")
+            return None
+
+    def _resolve_variables(self, text: str) -> str:
+        """Résout les variables {{var}} dans un texte."""
+        if not text or '{{' not in text:
+            return text
+
+        def replace_var(match):
+            var_name = match.group(1)
+            return str(self._variables.get(var_name, match.group(0)))
+
+        return re.sub(r'\{\{(\w+)\}\}', replace_var, text)
--- a/visual_workflow_builder/backend/api_v3/execute.py
+++ b/visual_workflow_builder/backend/api_v3/execute.py
@@ -1346,6 +1346,182 @@ def execute_action(action_type: str, params: dict) -> dict:
        return {'success': False, 'error': str(e)}


+def run_workflow_verified(execution_id: str, workflow_id: str, app):
+    """
+    Thread d'exécution en mode 'verified' — boucle ORA.
+
+    Charge les steps du workflow depuis la BDD, construit les params
+    comme execute_workflow_thread, puis délègue à ORALoop.run_workflow().
+
+    NE modifie PAS execute_workflow_thread : les modes basic/intelligent/debug
+    continuent de fonctionner exactement comme avant.
+    """
+    global _execution_state
+
+    with app.app_context():
+        try:
+            execution = Execution.query.get(execution_id)
+            workflow = Workflow.query.get(workflow_id)
+
+            if not execution or not workflow:
+                logger.error("[ORA] Workflow ou exécution non trouvé")
+                return
+
+            steps_db = workflow.steps.order_by(Step.order).all()
+            execution.total_steps = len(steps_db)
+            execution.status = 'running'
+            execution.started_at = datetime.utcnow()
+            db.session.commit()
+
+            logger.info(f"🚀 [ORA] Démarrage workflow vérifié {workflow_id}: {len(steps_db)} étapes")
+
+            # --- Construire les params pour chaque step (même logique que execute_workflow_thread) ---
+            ora_steps = []
+            for step in steps_db:
+                params = step.parameters or {}
+
+                # Charger l'ancre visuelle si présente
+                if step.anchor_id:
+                    anchor = VisualAnchor.query.get(step.anchor_id)
+                    if anchor:
+                        anchor_image_path = anchor.thumbnail_path or anchor.image_path
+                        if anchor_image_path and os.path.exists(anchor_image_path):
+                            with open(anchor_image_path, 'rb') as f:
+                                image_base64 = base64.b64encode(f.read()).decode('utf-8')
+                        else:
+                            image_base64 = None
+
+                        anchor_data = {
+                            'anchor_id': anchor.id,
+                            'screenshot': image_base64,
+                            'bounding_box': {
+                                'x': anchor.bbox_x,
+                                'y': anchor.bbox_y,
+                                'width': anchor.bbox_width,
+                                'height': anchor.bbox_height,
+                            },
+                            'metadata': {
+                                'screen_resolution': {
+                                    'width': anchor.screen_width,
+                                    'height': anchor.screen_height,
+                                }
+                            },
+                        }
+                        if anchor.target_text:
+                            anchor_data['target_text'] = anchor.target_text
+                        if anchor.ocr_description:
+                            anchor_data['description'] = anchor.ocr_description
+
+                        params['visual_anchor'] = anchor_data
+
+                    # Injecter le label pour le grounding
+                    if step.label:
+                        params['_step_label'] = step.label
+
+                ora_steps.append({
+                    'action_type': step.action_type,
+                    'parameters': params,
+                    'visual_anchor': params.get('visual_anchor', {}),
+                    'label': step.label or step.action_type,
+                    '_full_params': params,  # Pour les passthrough
+                    '_step_id': step.id,
+                })
+
+            # --- Créer et lancer la boucle ORA ---
+            from core.execution.observe_reason_act import ORALoop
+
+            ora = ORALoop(max_retries=2, max_steps=50, verify_level='auto')
+            ora._variables = _execution_state.get('variables', {})
+
+            # Créer les ExecutionStep en amont pour le suivi
+            step_results_map = {}
+            for idx, step in enumerate(steps_db):
+                step_result = ExecutionStep(
+                    execution_id=execution_id,
+                    step_id=step.id,
+                    status='pending',
+                )
+                db.session.add(step_result)
+                step_results_map[idx] = step_result
+            db.session.commit()
+
+            def on_progress(step_index, total, verification):
+                """Callback de progression — met à jour la BDD."""
+                try:
+                    sr = step_results_map.get(step_index - 1)
+                    if sr:
+                        sr.status = 'success' if verification.success else 'error'
+                        sr.started_at = sr.started_at or datetime.utcnow()
+                        sr.ended_at = datetime.utcnow()
+                        if sr.started_at:
+                            sr.duration_ms = int((sr.ended_at - sr.started_at).total_seconds() * 1000)
+                        sr.output = {
+                            'change_level': verification.change_level,
+                            'matches_expected': verification.matches_expected,
+                            'detail': verification.detail,
+                            'mode': 'verified',
+                        }
+                        if verification.success:
+                            execution.completed_steps = step_index
+                        else:
+                            execution.failed_steps = (execution.failed_steps or 0) + 1
+                            sr.error_message = verification.detail
+                    execution.current_step_index = step_index
+                    db.session.commit()
+                except Exception as e:
+                    logger.warning(f"[ORA/progress] Erreur BDD: {e}")
+
+            # Exécuter
+            loop_result = ora.run_workflow(
+                steps=ora_steps,
+                on_progress=on_progress,
+                execute_action_fn=execute_action,
+            )
+
+            # Synchroniser les variables runtime
+            _execution_state['variables'] = ora._variables
+
+            # Finaliser
+            if loop_result.success:
+                execution.status = 'completed'
+            else:
+                execution.status = 'error'
+                execution.error_message = loop_result.reason
+                # Marquer le step en échec
+                failed_sr = step_results_map.get(loop_result.steps_completed)
+                if failed_sr and failed_sr.status == 'pending':
+                    failed_sr.status = 'error'
+                    failed_sr.error_message = loop_result.reason
+                    failed_sr.ended_at = datetime.utcnow()
+
+            execution.ended_at = datetime.utcnow()
+            execution.completed_steps = loop_result.steps_completed
+            db.session.commit()
+
+            logger.info(
+                f"{'✅' if loop_result.success else '❌'} [ORA] Workflow terminé: "
+                f"{loop_result.steps_completed}/{loop_result.total_steps} "
+                f"({execution.status}) — {loop_result.reason}"
+            )
+
+        except Exception as e:
+            logger.error(f"❌ [ORA] Erreur fatale: {e}", exc_info=True)
+            try:
+                execution = Execution.query.get(execution_id)
+                if execution:
+                    execution.status = 'error'
+                    execution.error_message = f"Erreur fatale ORA: {str(e)}"
+                    execution.ended_at = datetime.utcnow()
+                    db.session.commit()
+            except Exception as db_err:
+                logger.warning(f"[ORA] DB cleanup error: {db_err}")
+
+        finally:
+            with _execution_lock:
+                _execution_state['is_running'] = False
+                _execution_state['current_execution_id'] = None
+
+
@api_v3_bp.route('/execute/start', methods=['POST'])
 def start_execution():
    """
@@ -1371,7 +1547,7 @@ def start_execution():
        minimize_browser = data.get('minimize_browser', True)  # Activé par défaut

        # Valider le mode
-        if execution_mode not in ['basic', 'intelligent', 'debug']:
+        if execution_mode not in ['basic', 'intelligent', 'debug', 'verified']:
            execution_mode = 'basic'

        # Utiliser le workflow actif si non spécifié
@@ -1430,15 +1606,22 @@ def start_execution():
        from flask import current_app
        app = current_app._get_current_object()

-        thread = threading.Thread(
-            target=execute_workflow_thread,
-            args=(execution.id, workflow_id, app)
-        )
+        if execution_mode == 'verified':
+            # Mode ORA : boucle observe→raisonne→agit avec vérification
+            thread = threading.Thread(
+                target=run_workflow_verified,
+                args=(execution.id, workflow_id, app)
+            )
+        else:
+            thread = threading.Thread(
+                target=execute_workflow_thread,
+                args=(execution.id, workflow_id, app)
+            )
        thread.daemon = True
        thread.start()
        _execution_state['thread'] = thread

-        print(f"🚀 [API v3] Exécution lancée: {execution.id}")
+        print(f"🚀 [API v3] Exécution lancée: {execution.id} (mode={execution_mode})")

        return jsonify({
            'success': True,