feat: mode apprentissage — retry échoué + écran inchangé déclenchent la capture humaine
Trois chemins vers le mode apprentissage supervisé : 1. Grounding échoue → Policy RETRY → retry échoue → capture humaine 2. Clic visuel sans effet (écran inchangé 3s) → capture humaine 3. Policy SUPERVISE direct → capture humaine La capture enregistre un mini-workflow complet (clics + frappes + combos) jusqu'à Ctrl+Shift+L ou 10s d'inactivité. Correction envoyée au serveur. Testé E2E : workflow Chrome avec résultats Google dynamiques + bandeau cookies — Léa demande l'aide, capture, reprend. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -760,16 +760,48 @@ class ActionExecutorV1:
|
|||||||
result["visual_resolved"] = True
|
result["visual_resolved"] = True
|
||||||
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
||||||
else:
|
else:
|
||||||
result["success"] = False
|
# Retry échoué → mode apprentissage
|
||||||
result["error"] = "target_not_found"
|
# Léa a tout essayé (UIA, template, VLM, retry)
|
||||||
result["target_description"] = target_desc
|
# et ne trouve toujours pas. L'humain doit montrer.
|
||||||
result["target_spec"] = target_spec
|
print(f" [POLICY] Retry échoué → mode apprentissage")
|
||||||
result["screenshot"] = self._capture_screenshot_b64()
|
try:
|
||||||
result["warning"] = "visual_resolve_failed"
|
self.notifier.replay_target_not_found(
|
||||||
self.notifier.replay_target_not_found(
|
target_desc,
|
||||||
target_desc,
|
target_spec.get("window_title", ""),
|
||||||
target_spec.get("window_title", ""),
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
human_actions = self._capture_human_correction(
|
||||||
|
timeout_s=120,
|
||||||
)
|
)
|
||||||
|
if human_actions:
|
||||||
|
result["success"] = True
|
||||||
|
result["resolution_method"] = "human_supervised"
|
||||||
|
result["warning"] = "human_supervised_after_retry_failed"
|
||||||
|
last_click = None
|
||||||
|
for ha in reversed(human_actions):
|
||||||
|
if ha.get("type") == "click":
|
||||||
|
last_click = ha
|
||||||
|
break
|
||||||
|
if last_click:
|
||||||
|
result["actual_position"] = {
|
||||||
|
"x_pct": last_click["x_pct"],
|
||||||
|
"y_pct": last_click["y_pct"],
|
||||||
|
}
|
||||||
|
result["correction"] = {
|
||||||
|
"actions": human_actions,
|
||||||
|
"action_count": len(human_actions),
|
||||||
|
"last_click": last_click,
|
||||||
|
"trigger": "retry_failed",
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
result["success"] = False
|
||||||
|
result["error"] = "target_not_found"
|
||||||
|
result["target_description"] = target_desc
|
||||||
|
result["target_spec"] = target_spec
|
||||||
|
result["screenshot"] = self._capture_screenshot_b64()
|
||||||
|
result["warning"] = "visual_resolve_failed"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
elif policy_decision.decision == Decision.SKIP:
|
elif policy_decision.decision == Decision.SKIP:
|
||||||
@@ -1004,33 +1036,66 @@ class ActionExecutorV1:
|
|||||||
hash_before, timeout_ms=3000
|
hash_before, timeout_ms=3000
|
||||||
)
|
)
|
||||||
if not screen_changed:
|
if not screen_changed:
|
||||||
# ── Recovery : tenter un rollback si l'action n'a pas eu d'effet ──
|
|
||||||
from .recovery import RecoveryEngine
|
|
||||||
recovery = RecoveryEngine(self)
|
|
||||||
recovery_result = recovery.attempt(
|
|
||||||
failed_action=action,
|
|
||||||
critic_detail="L'écran n'a pas changé après l'action",
|
|
||||||
)
|
|
||||||
if recovery_result.success:
|
|
||||||
print(f" [RECOVERY] {recovery_result.detail}")
|
|
||||||
result["recovery"] = recovery_result.to_dict()
|
|
||||||
|
|
||||||
result["success"] = False
|
|
||||||
result["warning"] = "no_screen_change"
|
|
||||||
result["error"] = "Ecran inchange apres l'action"
|
|
||||||
print(
|
|
||||||
f" [ECHEC] Ecran inchange apres {action_type} — "
|
|
||||||
f"l'action n'a pas eu d'effet visible"
|
|
||||||
)
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"[LEA] Écran inchangé après {action_type} "
|
f"[LEA] Écran inchangé après {action_type} "
|
||||||
f"(action_id={action_id}) — pas d'effet visible"
|
f"(action_id={action_id}) — pas d'effet visible"
|
||||||
)
|
)
|
||||||
# Notifier l'utilisateur en français naturel (niveau ATTENTION)
|
|
||||||
try:
|
# ── Mode apprentissage : clic sans effet = mauvais clic ──
|
||||||
self.notifier.replay_no_screen_change(action_type)
|
# Si l'action était un clic visuel, l'écran inchangé prouve
|
||||||
except Exception:
|
# que le grounding a cliqué au mauvais endroit. Au lieu de
|
||||||
pass
|
# passer silencieusement à la suite, Léa demande à l'humain.
|
||||||
|
if action_type == "click" and visual_mode:
|
||||||
|
print(
|
||||||
|
f" [ECHEC] Clic sans effet — "
|
||||||
|
f"je demande de l'aide"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self.notifier.replay_no_screen_change(action_type)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
human_actions = self._capture_human_correction(
|
||||||
|
timeout_s=120,
|
||||||
|
)
|
||||||
|
if human_actions:
|
||||||
|
result["success"] = True
|
||||||
|
result["resolution_method"] = "human_supervised"
|
||||||
|
result["warning"] = "human_supervised_after_no_change"
|
||||||
|
last_click = None
|
||||||
|
for ha in reversed(human_actions):
|
||||||
|
if ha.get("type") == "click":
|
||||||
|
last_click = ha
|
||||||
|
break
|
||||||
|
if last_click:
|
||||||
|
result["actual_position"] = {
|
||||||
|
"x_pct": last_click["x_pct"],
|
||||||
|
"y_pct": last_click["y_pct"],
|
||||||
|
}
|
||||||
|
result["correction"] = {
|
||||||
|
"actions": human_actions,
|
||||||
|
"action_count": len(human_actions),
|
||||||
|
"last_click": last_click,
|
||||||
|
"trigger": "no_screen_change",
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Timeout — l'humain n'a pas répondu
|
||||||
|
result["success"] = False
|
||||||
|
result["warning"] = "no_screen_change"
|
||||||
|
result["error"] = "Ecran inchange apres l'action"
|
||||||
|
else:
|
||||||
|
# Actions non-visuelles : comportement existant
|
||||||
|
result["success"] = False
|
||||||
|
result["warning"] = "no_screen_change"
|
||||||
|
result["error"] = "Ecran inchange apres l'action"
|
||||||
|
print(
|
||||||
|
f" [ECHEC] Ecran inchange apres {action_type} — "
|
||||||
|
f"l'action n'a pas eu d'effet visible"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
self.notifier.replay_no_screen_change(action_type)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
print(f" [OK] Changement d'ecran detecte apres {action_type}")
|
print(f" [OK] Changement d'ecran detecte apres {action_type}")
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user