diff --git a/agent_v0/agent_v1/core/executor.py b/agent_v0/agent_v1/core/executor.py index 93c14b31f..f0f7ebbfa 100644 --- a/agent_v0/agent_v1/core/executor.py +++ b/agent_v0/agent_v1/core/executor.py @@ -760,16 +760,48 @@ class ActionExecutorV1: result["visual_resolved"] = True print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}") else: - result["success"] = False - result["error"] = "target_not_found" - result["target_description"] = target_desc - result["target_spec"] = target_spec - result["screenshot"] = self._capture_screenshot_b64() - result["warning"] = "visual_resolve_failed" - self.notifier.replay_target_not_found( - target_desc, - target_spec.get("window_title", ""), + # Retry échoué → mode apprentissage + # Léa a tout essayé (UIA, template, VLM, retry) + # et ne trouve toujours pas. L'humain doit montrer. + print(f" [POLICY] Retry échoué → mode apprentissage") + try: + self.notifier.replay_target_not_found( + target_desc, + target_spec.get("window_title", ""), + ) + except Exception: + pass + + human_actions = self._capture_human_correction( + timeout_s=120, ) + if human_actions: + result["success"] = True + result["resolution_method"] = "human_supervised" + result["warning"] = "human_supervised_after_retry_failed" + last_click = None + for ha in reversed(human_actions): + if ha.get("type") == "click": + last_click = ha + break + if last_click: + result["actual_position"] = { + "x_pct": last_click["x_pct"], + "y_pct": last_click["y_pct"], + } + result["correction"] = { + "actions": human_actions, + "action_count": len(human_actions), + "last_click": last_click, + "trigger": "retry_failed", + } + else: + result["success"] = False + result["error"] = "target_not_found" + result["target_description"] = target_desc + result["target_spec"] = target_spec + result["screenshot"] = self._capture_screenshot_b64() + result["warning"] = "visual_resolve_failed" return result elif policy_decision.decision == Decision.SKIP: @@ -1004,33 +1036,66 @@ class ActionExecutorV1: hash_before, timeout_ms=3000 ) if not screen_changed: - # ── Recovery : tenter un rollback si l'action n'a pas eu d'effet ── - from .recovery import RecoveryEngine - recovery = RecoveryEngine(self) - recovery_result = recovery.attempt( - failed_action=action, - critic_detail="L'écran n'a pas changé après l'action", - ) - if recovery_result.success: - print(f" [RECOVERY] {recovery_result.detail}") - result["recovery"] = recovery_result.to_dict() - - result["success"] = False - result["warning"] = "no_screen_change" - result["error"] = "Ecran inchange apres l'action" - print( - f" [ECHEC] Ecran inchange apres {action_type} — " - f"l'action n'a pas eu d'effet visible" - ) logger.warning( f"[LEA] Écran inchangé après {action_type} " f"(action_id={action_id}) — pas d'effet visible" ) - # Notifier l'utilisateur en français naturel (niveau ATTENTION) - try: - self.notifier.replay_no_screen_change(action_type) - except Exception: - pass + + # ── Mode apprentissage : clic sans effet = mauvais clic ── + # Si l'action était un clic visuel, l'écran inchangé prouve + # que le grounding a cliqué au mauvais endroit. Au lieu de + # passer silencieusement à la suite, Léa demande à l'humain. + if action_type == "click" and visual_mode: + print( + f" [ECHEC] Clic sans effet — " + f"je demande de l'aide" + ) + try: + self.notifier.replay_no_screen_change(action_type) + except Exception: + pass + + human_actions = self._capture_human_correction( + timeout_s=120, + ) + if human_actions: + result["success"] = True + result["resolution_method"] = "human_supervised" + result["warning"] = "human_supervised_after_no_change" + last_click = None + for ha in reversed(human_actions): + if ha.get("type") == "click": + last_click = ha + break + if last_click: + result["actual_position"] = { + "x_pct": last_click["x_pct"], + "y_pct": last_click["y_pct"], + } + result["correction"] = { + "actions": human_actions, + "action_count": len(human_actions), + "last_click": last_click, + "trigger": "no_screen_change", + } + else: + # Timeout — l'humain n'a pas répondu + result["success"] = False + result["warning"] = "no_screen_change" + result["error"] = "Ecran inchange apres l'action" + else: + # Actions non-visuelles : comportement existant + result["success"] = False + result["warning"] = "no_screen_change" + result["error"] = "Ecran inchange apres l'action" + print( + f" [ECHEC] Ecran inchange apres {action_type} — " + f"l'action n'a pas eu d'effet visible" + ) + try: + self.notifier.replay_no_screen_change(action_type) + except Exception: + pass else: print(f" [OK] Changement d'ecran detecte apres {action_type}") else: