feat(ORA): classification erreurs + recovery intelligent

4 types d'erreurs : ELEMENT_NOT_FOUND, OVERLAY_BLOCKING, WRONG_SCREEN, ACTION_NO_EFFECT. Recovery spécialisé par type : - Element introuvable → attente + scroll + retry UI-TARS élargi - Overlay bloquant → détection pattern + fermeture auto + retry - Mauvais écran → description VLM + Alt+Tab + recherche taskbar - Pas d'effet → double-clic + délai + coordonnées décalées Intégré dans run_workflow() : classification → recovery → re-vérif. Échec total → pause supervisée (pas de stop brutal). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 16:44:31 +02:00
parent 8903f35433
commit 6829ad8e79
1 changed files with 511 additions and 19 deletions
--- a/core/execution/observe_reason_act.py
+++ b/core/execution/observe_reason_act.py
@@ -104,6 +104,27 @@ class VerificationResult:
    detail: str
 # ═══════════════════════════════════════════════════════════════
 # Classification des erreurs et stratégies de recovery
 # ═══════════════════════════════════════════════════════════════
 class ErrorType:
    """Types d'erreurs identifiables après un échec d'action."""
    ELEMENT_NOT_FOUND = "element_not_found"    # L'élément n'est pas visible
    OVERLAY_BLOCKING = "overlay_blocking"       # Un dialogue/popup bloque
    WRONG_SCREEN = "wrong_screen"              # L'écran ne correspond pas
    ACTION_NO_EFFECT = "action_no_effect"       # L'action a été faite mais rien n'a changé
@dataclass
 class RecoveryAttempt:
    """Trace d'une tentative de recovery."""
    error_type: str
    strategy: str
    success: bool
    detail: str
@dataclass
 class LoopResult:
    """Résultat global de l'exécution de la boucle ORA."""
@@ -723,6 +744,439 @@ Règles:
            detail=detail,
        )
    # ─── Classification d'erreurs ───────────────────────────
    def _classify_error(self, pre: Observation, post: Observation,
                        decision: Decision, act_success: bool) -> str:
        """Classifie l'erreur après un échec de vérification.
        Ordre de priorité :
        1. L'action n'a même pas trouvé l'élément → ELEMENT_NOT_FOUND
        2. Un dialogue/popup bloque → OVERLAY_BLOCKING
        3. L'écran n'a pas du tout changé → ACTION_NO_EFFECT
        4. L'écran a changé mais pas comme attendu → WRONG_SCREEN
        Returns:
            ErrorType constant.
        """
        # Si l'action a échoué avant même de cliquer, c'est ELEMENT_NOT_FOUND
        if not act_success:
            print(f"🔎 [ORA/classify] → {ErrorType.ELEMENT_NOT_FOUND} (action non exécutée)")
            return ErrorType.ELEMENT_NOT_FOUND
        # Vérifier si un dialogue/popup bloque
        try:
            from core.execution.input_handler import check_screen_for_patterns
            pattern = check_screen_for_patterns()
            if pattern:
                print(f"🔎 [ORA/classify] → {ErrorType.OVERLAY_BLOCKING} (pattern={pattern.get('pattern', '?')})")
                return ErrorType.OVERLAY_BLOCKING
        except Exception as e:
            logger.debug(f"[ORA/classify] check_screen_for_patterns échoué: {e}")
        # Vérifier si l'écran a changé du tout
        distance = self._phash_distance(pre.phash, post.phash)
        if distance < 5:
            print(f"🔎 [ORA/classify] → {ErrorType.ACTION_NO_EFFECT} (distance={distance})")
            return ErrorType.ACTION_NO_EFFECT
        # L'écran a changé mais pas comme attendu
        print(f"🔎 [ORA/classify] → {ErrorType.WRONG_SCREEN} (distance={distance})")
        return ErrorType.WRONG_SCREEN
    # ─── Stratégies de recovery ──────────────────────────────
    def _recover(self, error_type: str, decision: Decision,
                 step_params: dict, post: Observation) -> RecoveryAttempt:
        """Applique la stratégie de recovery adaptée au type d'erreur.
        Returns:
            RecoveryAttempt avec le résultat de la tentative.
        """
        print(f"🔧 [ORA/recovery] Stratégie pour: {error_type}")
        if error_type == ErrorType.ELEMENT_NOT_FOUND:
            return self._recover_element_not_found(decision, step_params)
        elif error_type == ErrorType.OVERLAY_BLOCKING:
            return self._recover_overlay_blocking(decision, step_params)
        elif error_type == ErrorType.WRONG_SCREEN:
            return self._recover_wrong_screen(decision, step_params)
        elif error_type == ErrorType.ACTION_NO_EFFECT:
            return self._recover_no_effect(decision, step_params)
        return RecoveryAttempt(
            error_type=error_type, strategy="none",
            success=False, detail="Type d'erreur inconnu"
        )
    def _recover_element_not_found(self, decision: Decision,
                                   step_params: dict) -> RecoveryAttempt:
        """Recovery quand l'élément n'est pas visible.
        Stratégie :
        1. Attendre 1s (l'élément charge peut-être)
        2. Scroller pour chercher l'élément
        3. Retry avec UI-TARS (description plus large)
        """
        anchor = step_params.get('visual_anchor', {})
        target_text = anchor.get('target_text', '') or decision.target
        # --- Étape 1 : Attendre que l'élément charge ---
        print(f"🔧 [ORA/recovery/not_found] Étape 1: attente 1s...")
        time.sleep(1.0)
        # Retenter l'action après attente
        act_ok = self.act(decision, step_params)
        if act_ok:
            time.sleep(0.3)
            post = self.observe()
            # Vérification rapide : l'écran a-t-il changé ?
            pre_check = self.observe()
            # On considère que si act réussit, c'est bon (le verify sera fait en amont)
            return RecoveryAttempt(
                error_type=ErrorType.ELEMENT_NOT_FOUND,
                strategy="wait_1s",
                success=True,
                detail=f"Élément trouvé après attente 1s"
            )
        # --- Étape 2 : Scroller pour chercher l'élément ---
        if PYAUTOGUI_AVAILABLE:
            print(f"🔧 [ORA/recovery/not_found] Étape 2: scroll down pour chercher '{target_text}'")
            for scroll_attempt in range(3):
                pyautogui.scroll(-3)  # Scroll vers le bas
                time.sleep(0.5)
                act_ok = self.act(decision, step_params)
                if act_ok:
                    return RecoveryAttempt(
                        error_type=ErrorType.ELEMENT_NOT_FOUND,
                        strategy=f"scroll_down_{scroll_attempt + 1}",
                        success=True,
                        detail=f"Élément trouvé après scroll down x{scroll_attempt + 1}"
                    )
            # Remonter au point initial
            pyautogui.scroll(9)  # 3 scrolls x 3 tentatives
            time.sleep(0.3)
        # --- Étape 3 : Retry avec UI-TARS description élargie ---
        if target_text:
            print(f"🔧 [ORA/recovery/not_found] Étape 3: UI-TARS avec description élargie")
            try:
                from core.execution.input_handler import _grounding_ui_tars
                # Description plus large : ajouter le contexte
                broader_desc = f"bouton ou élément contenant '{target_text}' ou similaire"
                result = _grounding_ui_tars(target_text, broader_desc)
                if result:
                    x, y = result['x'], result['y']
                    print(f"✅ [ORA/recovery/not_found] UI-TARS élargi: trouvé à ({x}, {y})")
                    if PYAUTOGUI_AVAILABLE:
                        if decision.value == 'double':
                            pyautogui.doubleClick(x, y)
                        elif decision.value == 'right':
                            pyautogui.rightClick(x, y)
                        else:
                            pyautogui.click(x, y)
                        return RecoveryAttempt(
                            error_type=ErrorType.ELEMENT_NOT_FOUND,
                            strategy="ui_tars_broad",
                            success=True,
                            detail=f"Élément trouvé par UI-TARS élargi à ({x}, {y})"
                        )
            except Exception as e:
                logger.debug(f"[ORA/recovery/not_found] UI-TARS élargi échoué: {e}")
        return RecoveryAttempt(
            error_type=ErrorType.ELEMENT_NOT_FOUND,
            strategy="exhausted",
            success=False,
            detail=f"Élément '{target_text}' introuvable après attente + scroll + UI-TARS élargi"
        )
    def _recover_overlay_blocking(self, decision: Decision,
                                  step_params: dict) -> RecoveryAttempt:
        """Recovery quand un dialogue/popup bloque.
        Stratégie :
        1. Détecter le pattern avec UIPatternLibrary
        2. Cliquer sur OK/Fermer automatiquement
        3. Retry l'action originale
        """
        try:
            from core.execution.input_handler import check_screen_for_patterns, handle_detected_pattern
            # --- Étape 1 : Détecter et identifier le dialogue ---
            pattern = check_screen_for_patterns()
            if not pattern:
                # Le pattern a peut-être disparu entre la classification et ici
                return RecoveryAttempt(
                    error_type=ErrorType.OVERLAY_BLOCKING,
                    strategy="pattern_vanished",
                    success=True,
                    detail="Le dialogue a disparu spontanément"
                )
            print(f"🔧 [ORA/recovery/overlay] Pattern détecté: {pattern.get('pattern', '?')} → action={pattern.get('action', '?')}")
            # --- Étape 2 : Gérer le dialogue (cliquer OK/Fermer) ---
            handled = handle_detected_pattern(pattern)
            if not handled:
                print(f"⚠️ [ORA/recovery/overlay] Impossible de gérer le pattern automatiquement")
                return RecoveryAttempt(
                    error_type=ErrorType.OVERLAY_BLOCKING,
                    strategy="handle_failed",
                    success=False,
                    detail=f"Pattern '{pattern.get('pattern', '?')}' détecté mais non géré"
                )
            print(f"✅ [ORA/recovery/overlay] Dialogue fermé")
            time.sleep(0.5)
            # --- Étape 3 : Retry l'action originale ---
            act_ok = self.act(decision, step_params)
            if act_ok:
                return RecoveryAttempt(
                    error_type=ErrorType.OVERLAY_BLOCKING,
                    strategy="close_dialog_retry",
                    success=True,
                    detail=f"Dialogue '{pattern.get('pattern', '?')}' fermé, action retentée avec succès"
                )
            return RecoveryAttempt(
                error_type=ErrorType.OVERLAY_BLOCKING,
                strategy="close_dialog_retry_failed",
                success=False,
                detail=f"Dialogue fermé mais l'action originale a encore échoué"
            )
        except ImportError as e:
            logger.debug(f"[ORA/recovery/overlay] Import manquant: {e}")
            return RecoveryAttempt(
                error_type=ErrorType.OVERLAY_BLOCKING,
                strategy="import_error",
                success=False,
                detail=f"Fonctions de pattern non disponibles: {e}"
            )
        except Exception as e:
            logger.debug(f"[ORA/recovery/overlay] Erreur: {e}")
            return RecoveryAttempt(
                error_type=ErrorType.OVERLAY_BLOCKING,
                strategy="error",
                success=False,
                detail=f"Erreur recovery overlay: {e}"
            )
    def _recover_wrong_screen(self, decision: Decision,
                              step_params: dict) -> RecoveryAttempt:
        """Recovery quand l'écran ne correspond pas à ce qu'on attend.
        Stratégie :
        1. Le VLM décrit l'écran actuel
        2. Comparer avec l'écran attendu
        3. Si possible, naviguer vers le bon écran (Alt+Tab, clic sur taskbar)
        """
        if not PYAUTOGUI_AVAILABLE:
            return RecoveryAttempt(
                error_type=ErrorType.WRONG_SCREEN,
                strategy="no_pyautogui",
                success=False,
                detail="pyautogui non disponible"
            )
        # --- Étape 1 : Demander au VLM ce qu'il voit ---
        current_obs = self.observe()
        vlm_description = self._vlm_describe_screen(current_obs)
        expected = decision.expected_after
        print(f"🔧 [ORA/recovery/wrong_screen] Écran actuel: '{vlm_description[:80]}'")
        print(f"🔧 [ORA/recovery/wrong_screen] Attendu: '{expected[:80]}'")
        # --- Étape 2 : Tenter Alt+Tab pour revenir à la bonne fenêtre ---
        print(f"🔧 [ORA/recovery/wrong_screen] Tentative Alt+Tab...")
        pyautogui.hotkey('alt', 'tab')
        time.sleep(0.8)
        # Vérifier si on est revenu au bon endroit
        post_tab = self.observe()
        act_ok = self.act(decision, step_params)
        if act_ok:
            return RecoveryAttempt(
                error_type=ErrorType.WRONG_SCREEN,
                strategy="alt_tab",
                success=True,
                detail=f"Alt+Tab a ramené la bonne fenêtre, action retentée"
            )
        # --- Étape 3 : Tenter de cliquer sur la taskbar ---
        # On cherche la fenêtre attendue dans le titre via l'ancre
        anchor = step_params.get('visual_anchor', {})
        app_name = anchor.get('window_title', '') or anchor.get('target_text', '')
        if app_name:
            print(f"🔧 [ORA/recovery/wrong_screen] Recherche '{app_name}' dans la taskbar...")
            try:
                from core.execution.input_handler import _grounding_ui_tars
                result = _grounding_ui_tars(app_name, f"bouton dans la barre des tâches pour '{app_name}'")
                if result:
                    pyautogui.click(result['x'], result['y'])
                    time.sleep(0.8)
                    act_ok = self.act(decision, step_params)
                    if act_ok:
                        return RecoveryAttempt(
                            error_type=ErrorType.WRONG_SCREEN,
                            strategy="taskbar_click",
                            success=True,
                            detail=f"Fenêtre '{app_name}' retrouvée via la taskbar"
                        )
            except Exception as e:
                logger.debug(f"[ORA/recovery/wrong_screen] Taskbar search échoué: {e}")
        return RecoveryAttempt(
            error_type=ErrorType.WRONG_SCREEN,
            strategy="exhausted",
            success=False,
            detail=f"Impossible de retrouver le bon écran. Actuel: '{vlm_description[:60]}'"
        )
    def _recover_no_effect(self, decision: Decision,
                           step_params: dict) -> RecoveryAttempt:
        """Recovery quand l'action n'a eu aucun effet visible.
        Stratégie :
        1. Retry avec double-clic au lieu de simple clic
        2. Retry avec un délai plus long
        3. Retry avec des coordonnées légèrement décalées (+/- 5px)
        """
        if not PYAUTOGUI_AVAILABLE:
            return RecoveryAttempt(
                error_type=ErrorType.ACTION_NO_EFFECT,
                strategy="no_pyautogui",
                success=False,
                detail="pyautogui non disponible"
            )
        # Ne s'applique qu'aux clics
        if decision.action != 'click':
            # Pour les autres actions, simple retry avec délai
            print(f"🔧 [ORA/recovery/no_effect] Retry après 1s (action={decision.action})")
            time.sleep(1.0)
            act_ok = self.act(decision, step_params)
            return RecoveryAttempt(
                error_type=ErrorType.ACTION_NO_EFFECT,
                strategy="delayed_retry",
                success=act_ok,
                detail=f"Retry après délai 1s: {'OK' if act_ok else 'échoué'}"
            )
        # --- Étape 1 : Double-clic au lieu de simple clic ---
        if decision.value == 'left':
            print(f"🔧 [ORA/recovery/no_effect] Étape 1: double-clic au lieu de simple clic")
            double_decision = Decision(
                action='click', target=decision.target, value='double',
                reasoning='Recovery: double-clic',
                expected_after=decision.expected_after,
                confidence=decision.confidence,
            )
            act_ok = self.act(double_decision, step_params)
            if act_ok:
                time.sleep(0.3)
                post = self.observe()
                pre_check = self.observe()
                # Si l'écran a changé après le double-clic, c'est bon
                dist = self._phash_distance(post.phash, pre_check.phash)
                # On vérifie que le double-clic a eu un effet en recapturant
                recheck = self.observe()
                dist2 = self._phash_distance(post.phash, recheck.phash)
                if dist2 > 5:
                    return RecoveryAttempt(
                        error_type=ErrorType.ACTION_NO_EFFECT,
                        strategy="double_click",
                        success=True,
                        detail="Double-clic a produit un changement"
                    )
        # --- Étape 2 : Retry avec délai plus long ---
        print(f"🔧 [ORA/recovery/no_effect] Étape 2: attente 1.5s puis retry")
        time.sleep(1.5)
        pre_delayed = self.observe()
        act_ok = self.act(decision, step_params)
        if act_ok:
            time.sleep(0.5)
            post_delayed = self.observe()
            dist = self._phash_distance(pre_delayed.phash, post_delayed.phash)
            if dist > 5:
                return RecoveryAttempt(
                    error_type=ErrorType.ACTION_NO_EFFECT,
                    strategy="delayed_retry",
                    success=True,
                    detail=f"Retry après délai: changement détecté (distance={dist})"
                )
        # --- Étape 3 : Coordonnées décalées (+/- 5px) ---
        anchor = step_params.get('visual_anchor', {})
        bbox = anchor.get('bounding_box', {})
        if bbox and bbox.get('width') and bbox.get('height'):
            cx = int(bbox.get('x', 0) + bbox.get('width', 0) / 2)
            cy = int(bbox.get('y', 0) + bbox.get('height', 0) / 2)
            offsets = [(5, 0), (-5, 0), (0, 5), (0, -5)]
            for dx, dy in offsets:
                nx, ny = cx + dx, cy + dy
                print(f"🔧 [ORA/recovery/no_effect] Étape 3: clic décalé ({nx}, {ny})")
                pre_offset = self.observe()
                pyautogui.click(nx, ny)
                time.sleep(0.3)
                post_offset = self.observe()
                dist = self._phash_distance(pre_offset.phash, post_offset.phash)
                if dist > 5:
                    return RecoveryAttempt(
                        error_type=ErrorType.ACTION_NO_EFFECT,
                        strategy=f"offset_{dx}_{dy}",
                        success=True,
                        detail=f"Clic décalé ({dx},{dy}) a produit un changement (distance={dist})"
                    )
        return RecoveryAttempt(
            error_type=ErrorType.ACTION_NO_EFFECT,
            strategy="exhausted",
            success=False,
            detail="Aucune stratégie n'a produit de changement visible"
        )
    def _vlm_describe_screen(self, observation: Observation) -> str:
        """Demande au VLM de décrire brièvement l'écran actuel."""
        if observation.screenshot is None:
            return "(pas de screenshot)"
        try:
            import requests
            import io as _io
            buffer = _io.BytesIO()
            observation.screenshot.save(buffer, format='JPEG', quality=60)
            image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
            ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
            model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
            resp = requests.post(f"{ollama_url}/api/generate", json={
                "model": model,
                "prompt": "Décris en une phrase courte ce que montre cet écran d'ordinateur.",
                "images": [image_b64],
                "stream": False,
                "options": {"temperature": 0.1, "num_predict": 60}
            }, timeout=15)
            if resp.status_code == 200:
                return resp.json().get("response", "").strip()
        except Exception as e:
            logger.debug(f"[ORA/vlm_describe] Erreur: {e}")
        return f"(titre fenêtre: {observation.window_title})"
    # ─── Boucle principale ────────────────────────────────
    def run_workflow(
@@ -792,11 +1246,26 @@ Règles:
            act_success = self.act(decision, step)
            if not act_success and decision.action not in ('wait', 'done'):
-                logger.warning(f"❌ [ORA] Action échouée immédiatement")
+                # L'action a échoué — classifier et tenter un recovery
-                return LoopResult(
+                print(f"⚠️ [ORA] Action échouée, lancement du recovery intelligent...")
-                    success=False, steps_completed=i, total_steps=total,
+                error_type = self._classify_error(pre, pre, decision, act_success=False)
-                    reason=f"Étape {i+1}: action '{decision.action}' échouée"
+                recovery = self._recover(error_type, decision, step, pre)
-                )
+                if recovery.success:
                    print(f"✅ [ORA] Recovery réussi: {recovery.strategy} — {recovery.detail}")
                else:
                    print(f"❌ [ORA] Recovery échoué: {recovery.detail}")
                    # Pause supervisée au lieu d'un arrêt brutal
                    logger.warning(
                        f"🆘 [ORA] Étape {i+1} en pause supervisée: "
                        f"{error_type} → {recovery.detail}"
                    )
                    return LoopResult(
                        success=False, steps_completed=i, total_steps=total,
                        reason=(
                            f"Étape {i+1}: {error_type} — recovery '{recovery.strategy}' échoué. "
                            f"{recovery.detail}"
                        )
                    )
            # Petit délai pour laisser l'écran se stabiliser
            time.sleep(0.3)
@@ -808,24 +1277,47 @@ Règles:
            verification = self.verify(pre, post, decision)
            if not verification.success:
-                # Réessayer
+                # --- 5b. Recovery intelligent ---
-                retried = False
+                error_type = self._classify_error(pre, post, decision, act_success=True)
-                for retry in range(self.max_retries):
+                print(f"🔎 [ORA] Étape {i+1}: erreur classifiée → {error_type}")
-                    print(f"🔄 [ORA] Retry {retry+1}/{self.max_retries} pour étape {i+1}")
+
-                    pre_retry = self.observe()
+                recovery = self._recover(error_type, decision, step, post)
-                    act_success = self.act(decision, step)
+
                if recovery.success:
                    print(f"✅ [ORA] Recovery réussi: {recovery.strategy} — {recovery.detail}")
                    # Re-vérifier après recovery
                    time.sleep(0.3)
-                    post_retry = self.observe()
+                    post_recovery = self.observe()
-                    verification = self.verify(pre_retry, post_retry, decision)
+                    verification = self.verify(pre, post_recovery, decision)
                    if verification.success:
-                        retried = True
+                        print(f"✅ [ORA] Vérification post-recovery OK")
-                        print(f"✅ [ORA] Retry {retry+1} réussi")
+                    else:
-                        break
+                        # Le recovery a marché mais la vérification reste KO
-                if not retried and not verification.success:
+                        # On essaie un simple retry en dernier recours
-                    logger.warning(f"❌ [ORA] Étape {i+1} échouée après {self.max_retries} retries")
+                        print(f"⚠️ [ORA] Vérification post-recovery KO, retry simple...")
                        pre_retry = self.observe()
                        self.act(decision, step)
                        time.sleep(0.3)
                        post_retry = self.observe()
                        verification = self.verify(pre_retry, post_retry, decision)
                if not verification.success:
                    # Tout a échoué — pause supervisée
                    print(
                        f"❌ [ORA] Étape {i+1} échouée après recovery "
                        f"({error_type} → {recovery.strategy})"
                    )
                    logger.warning(
                        f"🆘 [ORA] Étape {i+1} en pause supervisée: "
                        f"{error_type} → {recovery.detail}"
                    )
                    return LoopResult(
                        success=False, steps_completed=i, total_steps=total,
-                        reason=f"Étape {i+1}: vérification échouée — {verification.detail}"
+                        reason=(
                            f"Étape {i+1}: {error_type} — recovery '{recovery.strategy}' "
                            f"{'partiel' if recovery.success else 'échoué'}. "
                            f"{verification.detail}"
                        )
                    )
            # --- Callback de progression ---