feat(ORA): classification erreurs + recovery intelligent

4 types d'erreurs : ELEMENT_NOT_FOUND, OVERLAY_BLOCKING, WRONG_SCREEN, ACTION_NO_EFFECT. Recovery spécialisé par type : - Element introuvable → attente + scroll + retry UI-TARS élargi - Overlay bloquant → détection pattern + fermeture auto + retry - Mauvais écran → description VLM + Alt+Tab + recherche taskbar - Pas d'effet → double-clic + délai + coordonnées décalées Intégré dans run_workflow() : classification → recovery → re-vérif. Échec total → pause supervisée (pas de stop brutal). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 16:44:31 +02:00
parent 8903f35433
commit 6829ad8e79
1 changed files with 511 additions and 19 deletions
--- a/core/execution/observe_reason_act.py
+++ b/core/execution/observe_reason_act.py
@@ -104,6 +104,27 @@ class VerificationResult:
    detail: str


+# ═══════════════════════════════════════════════════════════════
+# Classification des erreurs et stratégies de recovery
+# ═══════════════════════════════════════════════════════════════
+
+class ErrorType:
+    """Types d'erreurs identifiables après un échec d'action."""
+    ELEMENT_NOT_FOUND = "element_not_found"    # L'élément n'est pas visible
+    OVERLAY_BLOCKING = "overlay_blocking"       # Un dialogue/popup bloque
+    WRONG_SCREEN = "wrong_screen"              # L'écran ne correspond pas
+    ACTION_NO_EFFECT = "action_no_effect"       # L'action a été faite mais rien n'a changé
+
+
+@dataclass
+class RecoveryAttempt:
+    """Trace d'une tentative de recovery."""
+    error_type: str
+    strategy: str
+    success: bool
+    detail: str
+
+
@dataclass
 class LoopResult:
    """Résultat global de l'exécution de la boucle ORA."""
@@ -723,6 +744,439 @@ Règles:
            detail=detail,
        )

+    # ─── Classification d'erreurs ───────────────────────────
+
+    def _classify_error(self, pre: Observation, post: Observation,
+                        decision: Decision, act_success: bool) -> str:
+        """Classifie l'erreur après un échec de vérification.
+
+        Ordre de priorité :
+        1. L'action n'a même pas trouvé l'élément → ELEMENT_NOT_FOUND
+        2. Un dialogue/popup bloque → OVERLAY_BLOCKING
+        3. L'écran n'a pas du tout changé → ACTION_NO_EFFECT
+        4. L'écran a changé mais pas comme attendu → WRONG_SCREEN
+
+        Returns:
+            ErrorType constant.
+        """
+        # Si l'action a échoué avant même de cliquer, c'est ELEMENT_NOT_FOUND
+        if not act_success:
+            print(f"🔎 [ORA/classify] → {ErrorType.ELEMENT_NOT_FOUND} (action non exécutée)")
+            return ErrorType.ELEMENT_NOT_FOUND
+
+        # Vérifier si un dialogue/popup bloque
+        try:
+            from core.execution.input_handler import check_screen_for_patterns
+            pattern = check_screen_for_patterns()
+            if pattern:
+                print(f"🔎 [ORA/classify] → {ErrorType.OVERLAY_BLOCKING} (pattern={pattern.get('pattern', '?')})")
+                return ErrorType.OVERLAY_BLOCKING
+        except Exception as e:
+            logger.debug(f"[ORA/classify] check_screen_for_patterns échoué: {e}")
+
+        # Vérifier si l'écran a changé du tout
+        distance = self._phash_distance(pre.phash, post.phash)
+        if distance < 5:
+            print(f"🔎 [ORA/classify] → {ErrorType.ACTION_NO_EFFECT} (distance={distance})")
+            return ErrorType.ACTION_NO_EFFECT
+
+        # L'écran a changé mais pas comme attendu
+        print(f"🔎 [ORA/classify] → {ErrorType.WRONG_SCREEN} (distance={distance})")
+        return ErrorType.WRONG_SCREEN
+
+    # ─── Stratégies de recovery ──────────────────────────────
+
+    def _recover(self, error_type: str, decision: Decision,
+                 step_params: dict, post: Observation) -> RecoveryAttempt:
+        """Applique la stratégie de recovery adaptée au type d'erreur.
+
+        Returns:
+            RecoveryAttempt avec le résultat de la tentative.
+        """
+        print(f"🔧 [ORA/recovery] Stratégie pour: {error_type}")
+
+        if error_type == ErrorType.ELEMENT_NOT_FOUND:
+            return self._recover_element_not_found(decision, step_params)
+
+        elif error_type == ErrorType.OVERLAY_BLOCKING:
+            return self._recover_overlay_blocking(decision, step_params)
+
+        elif error_type == ErrorType.WRONG_SCREEN:
+            return self._recover_wrong_screen(decision, step_params)
+
+        elif error_type == ErrorType.ACTION_NO_EFFECT:
+            return self._recover_no_effect(decision, step_params)
+
+        return RecoveryAttempt(
+            error_type=error_type, strategy="none",
+            success=False, detail="Type d'erreur inconnu"
+        )
+
+    def _recover_element_not_found(self, decision: Decision,
+                                   step_params: dict) -> RecoveryAttempt:
+        """Recovery quand l'élément n'est pas visible.
+
+        Stratégie :
+        1. Attendre 1s (l'élément charge peut-être)
+        2. Scroller pour chercher l'élément
+        3. Retry avec UI-TARS (description plus large)
+        """
+        anchor = step_params.get('visual_anchor', {})
+        target_text = anchor.get('target_text', '') or decision.target
+
+        # --- Étape 1 : Attendre que l'élément charge ---
+        print(f"🔧 [ORA/recovery/not_found] Étape 1: attente 1s...")
+        time.sleep(1.0)
+
+        # Retenter l'action après attente
+        act_ok = self.act(decision, step_params)
+        if act_ok:
+            time.sleep(0.3)
+            post = self.observe()
+            # Vérification rapide : l'écran a-t-il changé ?
+            pre_check = self.observe()
+            # On considère que si act réussit, c'est bon (le verify sera fait en amont)
+            return RecoveryAttempt(
+                error_type=ErrorType.ELEMENT_NOT_FOUND,
+                strategy="wait_1s",
+                success=True,
+                detail=f"Élément trouvé après attente 1s"
+            )
+
+        # --- Étape 2 : Scroller pour chercher l'élément ---
+        if PYAUTOGUI_AVAILABLE:
+            print(f"🔧 [ORA/recovery/not_found] Étape 2: scroll down pour chercher '{target_text}'")
+            for scroll_attempt in range(3):
+                pyautogui.scroll(-3)  # Scroll vers le bas
+                time.sleep(0.5)
+                act_ok = self.act(decision, step_params)
+                if act_ok:
+                    return RecoveryAttempt(
+                        error_type=ErrorType.ELEMENT_NOT_FOUND,
+                        strategy=f"scroll_down_{scroll_attempt + 1}",
+                        success=True,
+                        detail=f"Élément trouvé après scroll down x{scroll_attempt + 1}"
+                    )
+
+            # Remonter au point initial
+            pyautogui.scroll(9)  # 3 scrolls x 3 tentatives
+            time.sleep(0.3)
+
+        # --- Étape 3 : Retry avec UI-TARS description élargie ---
+        if target_text:
+            print(f"🔧 [ORA/recovery/not_found] Étape 3: UI-TARS avec description élargie")
+            try:
+                from core.execution.input_handler import _grounding_ui_tars
+                # Description plus large : ajouter le contexte
+                broader_desc = f"bouton ou élément contenant '{target_text}' ou similaire"
+                result = _grounding_ui_tars(target_text, broader_desc)
+                if result:
+                    x, y = result['x'], result['y']
+                    print(f"✅ [ORA/recovery/not_found] UI-TARS élargi: trouvé à ({x}, {y})")
+                    if PYAUTOGUI_AVAILABLE:
+                        if decision.value == 'double':
+                            pyautogui.doubleClick(x, y)
+                        elif decision.value == 'right':
+                            pyautogui.rightClick(x, y)
+                        else:
+                            pyautogui.click(x, y)
+                        return RecoveryAttempt(
+                            error_type=ErrorType.ELEMENT_NOT_FOUND,
+                            strategy="ui_tars_broad",
+                            success=True,
+                            detail=f"Élément trouvé par UI-TARS élargi à ({x}, {y})"
+                        )
+            except Exception as e:
+                logger.debug(f"[ORA/recovery/not_found] UI-TARS élargi échoué: {e}")
+
+        return RecoveryAttempt(
+            error_type=ErrorType.ELEMENT_NOT_FOUND,
+            strategy="exhausted",
+            success=False,
+            detail=f"Élément '{target_text}' introuvable après attente + scroll + UI-TARS élargi"
+        )
+
+    def _recover_overlay_blocking(self, decision: Decision,
+                                  step_params: dict) -> RecoveryAttempt:
+        """Recovery quand un dialogue/popup bloque.
+
+        Stratégie :
+        1. Détecter le pattern avec UIPatternLibrary
+        2. Cliquer sur OK/Fermer automatiquement
+        3. Retry l'action originale
+        """
+        try:
+            from core.execution.input_handler import check_screen_for_patterns, handle_detected_pattern
+
+            # --- Étape 1 : Détecter et identifier le dialogue ---
+            pattern = check_screen_for_patterns()
+            if not pattern:
+                # Le pattern a peut-être disparu entre la classification et ici
+                return RecoveryAttempt(
+                    error_type=ErrorType.OVERLAY_BLOCKING,
+                    strategy="pattern_vanished",
+                    success=True,
+                    detail="Le dialogue a disparu spontanément"
+                )
+
+            print(f"🔧 [ORA/recovery/overlay] Pattern détecté: {pattern.get('pattern', '?')} → action={pattern.get('action', '?')}")
+
+            # --- Étape 2 : Gérer le dialogue (cliquer OK/Fermer) ---
+            handled = handle_detected_pattern(pattern)
+            if not handled:
+                print(f"⚠️ [ORA/recovery/overlay] Impossible de gérer le pattern automatiquement")
+                return RecoveryAttempt(
+                    error_type=ErrorType.OVERLAY_BLOCKING,
+                    strategy="handle_failed",
+                    success=False,
+                    detail=f"Pattern '{pattern.get('pattern', '?')}' détecté mais non géré"
+                )
+
+            print(f"✅ [ORA/recovery/overlay] Dialogue fermé")
+            time.sleep(0.5)
+
+            # --- Étape 3 : Retry l'action originale ---
+            act_ok = self.act(decision, step_params)
+            if act_ok:
+                return RecoveryAttempt(
+                    error_type=ErrorType.OVERLAY_BLOCKING,
+                    strategy="close_dialog_retry",
+                    success=True,
+                    detail=f"Dialogue '{pattern.get('pattern', '?')}' fermé, action retentée avec succès"
+                )
+
+            return RecoveryAttempt(
+                error_type=ErrorType.OVERLAY_BLOCKING,
+                strategy="close_dialog_retry_failed",
+                success=False,
+                detail=f"Dialogue fermé mais l'action originale a encore échoué"
+            )
+
+        except ImportError as e:
+            logger.debug(f"[ORA/recovery/overlay] Import manquant: {e}")
+            return RecoveryAttempt(
+                error_type=ErrorType.OVERLAY_BLOCKING,
+                strategy="import_error",
+                success=False,
+                detail=f"Fonctions de pattern non disponibles: {e}"
+            )
+        except Exception as e:
+            logger.debug(f"[ORA/recovery/overlay] Erreur: {e}")
+            return RecoveryAttempt(
+                error_type=ErrorType.OVERLAY_BLOCKING,
+                strategy="error",
+                success=False,
+                detail=f"Erreur recovery overlay: {e}"
+            )
+
+    def _recover_wrong_screen(self, decision: Decision,
+                              step_params: dict) -> RecoveryAttempt:
+        """Recovery quand l'écran ne correspond pas à ce qu'on attend.
+
+        Stratégie :
+        1. Le VLM décrit l'écran actuel
+        2. Comparer avec l'écran attendu
+        3. Si possible, naviguer vers le bon écran (Alt+Tab, clic sur taskbar)
+        """
+        if not PYAUTOGUI_AVAILABLE:
+            return RecoveryAttempt(
+                error_type=ErrorType.WRONG_SCREEN,
+                strategy="no_pyautogui",
+                success=False,
+                detail="pyautogui non disponible"
+            )
+
+        # --- Étape 1 : Demander au VLM ce qu'il voit ---
+        current_obs = self.observe()
+        vlm_description = self._vlm_describe_screen(current_obs)
+        expected = decision.expected_after
+
+        print(f"🔧 [ORA/recovery/wrong_screen] Écran actuel: '{vlm_description[:80]}'")
+        print(f"🔧 [ORA/recovery/wrong_screen] Attendu: '{expected[:80]}'")
+
+        # --- Étape 2 : Tenter Alt+Tab pour revenir à la bonne fenêtre ---
+        print(f"🔧 [ORA/recovery/wrong_screen] Tentative Alt+Tab...")
+        pyautogui.hotkey('alt', 'tab')
+        time.sleep(0.8)
+
+        # Vérifier si on est revenu au bon endroit
+        post_tab = self.observe()
+        act_ok = self.act(decision, step_params)
+        if act_ok:
+            return RecoveryAttempt(
+                error_type=ErrorType.WRONG_SCREEN,
+                strategy="alt_tab",
+                success=True,
+                detail=f"Alt+Tab a ramené la bonne fenêtre, action retentée"
+            )
+
+        # --- Étape 3 : Tenter de cliquer sur la taskbar ---
+        # On cherche la fenêtre attendue dans le titre via l'ancre
+        anchor = step_params.get('visual_anchor', {})
+        app_name = anchor.get('window_title', '') or anchor.get('target_text', '')
+        if app_name:
+            print(f"🔧 [ORA/recovery/wrong_screen] Recherche '{app_name}' dans la taskbar...")
+            try:
+                from core.execution.input_handler import _grounding_ui_tars
+                result = _grounding_ui_tars(app_name, f"bouton dans la barre des tâches pour '{app_name}'")
+                if result:
+                    pyautogui.click(result['x'], result['y'])
+                    time.sleep(0.8)
+                    act_ok = self.act(decision, step_params)
+                    if act_ok:
+                        return RecoveryAttempt(
+                            error_type=ErrorType.WRONG_SCREEN,
+                            strategy="taskbar_click",
+                            success=True,
+                            detail=f"Fenêtre '{app_name}' retrouvée via la taskbar"
+                        )
+            except Exception as e:
+                logger.debug(f"[ORA/recovery/wrong_screen] Taskbar search échoué: {e}")
+
+        return RecoveryAttempt(
+            error_type=ErrorType.WRONG_SCREEN,
+            strategy="exhausted",
+            success=False,
+            detail=f"Impossible de retrouver le bon écran. Actuel: '{vlm_description[:60]}'"
+        )
+
+    def _recover_no_effect(self, decision: Decision,
+                           step_params: dict) -> RecoveryAttempt:
+        """Recovery quand l'action n'a eu aucun effet visible.
+
+        Stratégie :
+        1. Retry avec double-clic au lieu de simple clic
+        2. Retry avec un délai plus long
+        3. Retry avec des coordonnées légèrement décalées (+/- 5px)
+        """
+        if not PYAUTOGUI_AVAILABLE:
+            return RecoveryAttempt(
+                error_type=ErrorType.ACTION_NO_EFFECT,
+                strategy="no_pyautogui",
+                success=False,
+                detail="pyautogui non disponible"
+            )
+
+        # Ne s'applique qu'aux clics
+        if decision.action != 'click':
+            # Pour les autres actions, simple retry avec délai
+            print(f"🔧 [ORA/recovery/no_effect] Retry après 1s (action={decision.action})")
+            time.sleep(1.0)
+            act_ok = self.act(decision, step_params)
+            return RecoveryAttempt(
+                error_type=ErrorType.ACTION_NO_EFFECT,
+                strategy="delayed_retry",
+                success=act_ok,
+                detail=f"Retry après délai 1s: {'OK' if act_ok else 'échoué'}"
+            )
+
+        # --- Étape 1 : Double-clic au lieu de simple clic ---
+        if decision.value == 'left':
+            print(f"🔧 [ORA/recovery/no_effect] Étape 1: double-clic au lieu de simple clic")
+            double_decision = Decision(
+                action='click', target=decision.target, value='double',
+                reasoning='Recovery: double-clic',
+                expected_after=decision.expected_after,
+                confidence=decision.confidence,
+            )
+            act_ok = self.act(double_decision, step_params)
+            if act_ok:
+                time.sleep(0.3)
+                post = self.observe()
+                pre_check = self.observe()
+                # Si l'écran a changé après le double-clic, c'est bon
+                dist = self._phash_distance(post.phash, pre_check.phash)
+                # On vérifie que le double-clic a eu un effet en recapturant
+                recheck = self.observe()
+                dist2 = self._phash_distance(post.phash, recheck.phash)
+                if dist2 > 5:
+                    return RecoveryAttempt(
+                        error_type=ErrorType.ACTION_NO_EFFECT,
+                        strategy="double_click",
+                        success=True,
+                        detail="Double-clic a produit un changement"
+                    )
+
+        # --- Étape 2 : Retry avec délai plus long ---
+        print(f"🔧 [ORA/recovery/no_effect] Étape 2: attente 1.5s puis retry")
+        time.sleep(1.5)
+        pre_delayed = self.observe()
+        act_ok = self.act(decision, step_params)
+        if act_ok:
+            time.sleep(0.5)
+            post_delayed = self.observe()
+            dist = self._phash_distance(pre_delayed.phash, post_delayed.phash)
+            if dist > 5:
+                return RecoveryAttempt(
+                    error_type=ErrorType.ACTION_NO_EFFECT,
+                    strategy="delayed_retry",
+                    success=True,
+                    detail=f"Retry après délai: changement détecté (distance={dist})"
+                )
+
+        # --- Étape 3 : Coordonnées décalées (+/- 5px) ---
+        anchor = step_params.get('visual_anchor', {})
+        bbox = anchor.get('bounding_box', {})
+        if bbox and bbox.get('width') and bbox.get('height'):
+            cx = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
+            cy = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
+
+            offsets = [(5, 0), (-5, 0), (0, 5), (0, -5)]
+            for dx, dy in offsets:
+                nx, ny = cx + dx, cy + dy
+                print(f"🔧 [ORA/recovery/no_effect] Étape 3: clic décalé ({nx}, {ny})")
+                pre_offset = self.observe()
+                pyautogui.click(nx, ny)
+                time.sleep(0.3)
+                post_offset = self.observe()
+                dist = self._phash_distance(pre_offset.phash, post_offset.phash)
+                if dist > 5:
+                    return RecoveryAttempt(
+                        error_type=ErrorType.ACTION_NO_EFFECT,
+                        strategy=f"offset_{dx}_{dy}",
+                        success=True,
+                        detail=f"Clic décalé ({dx},{dy}) a produit un changement (distance={dist})"
+                    )
+
+        return RecoveryAttempt(
+            error_type=ErrorType.ACTION_NO_EFFECT,
+            strategy="exhausted",
+            success=False,
+            detail="Aucune stratégie n'a produit de changement visible"
+        )
+
+    def _vlm_describe_screen(self, observation: Observation) -> str:
+        """Demande au VLM de décrire brièvement l'écran actuel."""
+        if observation.screenshot is None:
+            return "(pas de screenshot)"
+
+        try:
+            import requests
+            import io as _io
+
+            buffer = _io.BytesIO()
+            observation.screenshot.save(buffer, format='JPEG', quality=60)
+            image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+
+            ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
+            model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
+
+            resp = requests.post(f"{ollama_url}/api/generate", json={
+                "model": model,
+                "prompt": "Décris en une phrase courte ce que montre cet écran d'ordinateur.",
+                "images": [image_b64],
+                "stream": False,
+                "options": {"temperature": 0.1, "num_predict": 60}
+            }, timeout=15)
+
+            if resp.status_code == 200:
+                return resp.json().get("response", "").strip()
+
+        except Exception as e:
+            logger.debug(f"[ORA/vlm_describe] Erreur: {e}")
+
+        return f"(titre fenêtre: {observation.window_title})"
+
    # ─── Boucle principale ────────────────────────────────

    def run_workflow(
@@ -792,10 +1246,25 @@ Règles:

            act_success = self.act(decision, step)
            if not act_success and decision.action not in ('wait', 'done'):
-                logger.warning(f"❌ [ORA] Action échouée immédiatement")
+                # L'action a échoué — classifier et tenter un recovery
+                print(f"⚠️ [ORA] Action échouée, lancement du recovery intelligent...")
+                error_type = self._classify_error(pre, pre, decision, act_success=False)
+                recovery = self._recover(error_type, decision, step, pre)
+                if recovery.success:
+                    print(f"✅ [ORA] Recovery réussi: {recovery.strategy} — {recovery.detail}")
+                else:
+                    print(f"❌ [ORA] Recovery échoué: {recovery.detail}")
+                    # Pause supervisée au lieu d'un arrêt brutal
+                    logger.warning(
+                        f"🆘 [ORA] Étape {i+1} en pause supervisée: "
+                        f"{error_type} → {recovery.detail}"
+                    )
                    return LoopResult(
                        success=False, steps_completed=i, total_steps=total,
-                    reason=f"Étape {i+1}: action '{decision.action}' échouée"
+                        reason=(
+                            f"Étape {i+1}: {error_type} — recovery '{recovery.strategy}' échoué. "
+                            f"{recovery.detail}"
+                        )
                    )

            # Petit délai pour laisser l'écran se stabiliser
@@ -808,24 +1277,47 @@ Règles:
            verification = self.verify(pre, post, decision)

            if not verification.success:
-                # Réessayer
-                retried = False
-                for retry in range(self.max_retries):
-                    print(f"🔄 [ORA] Retry {retry+1}/{self.max_retries} pour étape {i+1}")
+                # --- 5b. Recovery intelligent ---
+                error_type = self._classify_error(pre, post, decision, act_success=True)
+                print(f"🔎 [ORA] Étape {i+1}: erreur classifiée → {error_type}")
+
+                recovery = self._recover(error_type, decision, step, post)
+
+                if recovery.success:
+                    print(f"✅ [ORA] Recovery réussi: {recovery.strategy} — {recovery.detail}")
+                    # Re-vérifier après recovery
+                    time.sleep(0.3)
+                    post_recovery = self.observe()
+                    verification = self.verify(pre, post_recovery, decision)
+                    if verification.success:
+                        print(f"✅ [ORA] Vérification post-recovery OK")
+                    else:
+                        # Le recovery a marché mais la vérification reste KO
+                        # On essaie un simple retry en dernier recours
+                        print(f"⚠️ [ORA] Vérification post-recovery KO, retry simple...")
                        pre_retry = self.observe()
-                    act_success = self.act(decision, step)
+                        self.act(decision, step)
                        time.sleep(0.3)
                        post_retry = self.observe()
                        verification = self.verify(pre_retry, post_retry, decision)
-                    if verification.success:
-                        retried = True
-                        print(f"✅ [ORA] Retry {retry+1} réussi")
-                        break
-                if not retried and not verification.success:
-                    logger.warning(f"❌ [ORA] Étape {i+1} échouée après {self.max_retries} retries")
+
+                if not verification.success:
+                    # Tout a échoué — pause supervisée
+                    print(
+                        f"❌ [ORA] Étape {i+1} échouée après recovery "
+                        f"({error_type} → {recovery.strategy})"
+                    )
+                    logger.warning(
+                        f"🆘 [ORA] Étape {i+1} en pause supervisée: "
+                        f"{error_type} → {recovery.detail}"
+                    )
                    return LoopResult(
                        success=False, steps_completed=i, total_steps=total,
-                        reason=f"Étape {i+1}: vérification échouée — {verification.detail}"
+                        reason=(
+                            f"Étape {i+1}: {error_type} — recovery '{recovery.strategy}' "
+                            f"{'partiel' if recovery.success else 'échoué'}. "
+                            f"{verification.detail}"
+                        )
                    )

            # --- Callback de progression ---