feat: mode apprentissage — retry échoué + écran inchangé déclenchent la capture humaine
Trois chemins vers le mode apprentissage supervisé : 1. Grounding échoue → Policy RETRY → retry échoue → capture humaine 2. Clic visuel sans effet (écran inchangé 3s) → capture humaine 3. Policy SUPERVISE direct → capture humaine La capture enregistre un mini-workflow complet (clics + frappes + combos) jusqu'à Ctrl+Shift+L ou 10s d'inactivité. Correction envoyée au serveur. Testé E2E : workflow Chrome avec résultats Google dynamiques + bandeau cookies — Léa demande l'aide, capture, reprend. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -759,6 +759,42 @@ class ActionExecutorV1:
|
||||
y_pct = resolved2["y_pct"]
|
||||
result["visual_resolved"] = True
|
||||
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
||||
else:
|
||||
# Retry échoué → mode apprentissage
|
||||
# Léa a tout essayé (UIA, template, VLM, retry)
|
||||
# et ne trouve toujours pas. L'humain doit montrer.
|
||||
print(f" [POLICY] Retry échoué → mode apprentissage")
|
||||
try:
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised_after_retry_failed"
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
"trigger": "retry_failed",
|
||||
}
|
||||
else:
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
@@ -766,10 +802,6 @@ class ActionExecutorV1:
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
return result
|
||||
|
||||
elif policy_decision.decision == Decision.SKIP:
|
||||
@@ -1004,17 +1036,55 @@ class ActionExecutorV1:
|
||||
hash_before, timeout_ms=3000
|
||||
)
|
||||
if not screen_changed:
|
||||
# ── Recovery : tenter un rollback si l'action n'a pas eu d'effet ──
|
||||
from .recovery import RecoveryEngine
|
||||
recovery = RecoveryEngine(self)
|
||||
recovery_result = recovery.attempt(
|
||||
failed_action=action,
|
||||
critic_detail="L'écran n'a pas changé après l'action",
|
||||
logger.warning(
|
||||
f"[LEA] Écran inchangé après {action_type} "
|
||||
f"(action_id={action_id}) — pas d'effet visible"
|
||||
)
|
||||
if recovery_result.success:
|
||||
print(f" [RECOVERY] {recovery_result.detail}")
|
||||
result["recovery"] = recovery_result.to_dict()
|
||||
|
||||
# ── Mode apprentissage : clic sans effet = mauvais clic ──
|
||||
# Si l'action était un clic visuel, l'écran inchangé prouve
|
||||
# que le grounding a cliqué au mauvais endroit. Au lieu de
|
||||
# passer silencieusement à la suite, Léa demande à l'humain.
|
||||
if action_type == "click" and visual_mode:
|
||||
print(
|
||||
f" [ECHEC] Clic sans effet — "
|
||||
f"je demande de l'aide"
|
||||
)
|
||||
try:
|
||||
self.notifier.replay_no_screen_change(action_type)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised_after_no_change"
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
"trigger": "no_screen_change",
|
||||
}
|
||||
else:
|
||||
# Timeout — l'humain n'a pas répondu
|
||||
result["success"] = False
|
||||
result["warning"] = "no_screen_change"
|
||||
result["error"] = "Ecran inchange apres l'action"
|
||||
else:
|
||||
# Actions non-visuelles : comportement existant
|
||||
result["success"] = False
|
||||
result["warning"] = "no_screen_change"
|
||||
result["error"] = "Ecran inchange apres l'action"
|
||||
@@ -1022,11 +1092,6 @@ class ActionExecutorV1:
|
||||
f" [ECHEC] Ecran inchange apres {action_type} — "
|
||||
f"l'action n'a pas eu d'effet visible"
|
||||
)
|
||||
logger.warning(
|
||||
f"[LEA] Écran inchangé après {action_type} "
|
||||
f"(action_id={action_id}) — pas d'effet visible"
|
||||
)
|
||||
# Notifier l'utilisateur en français naturel (niveau ATTENTION)
|
||||
try:
|
||||
self.notifier.replay_no_screen_change(action_type)
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user