fix(agent): P0.6 guard human corrections

2026-05-24 21:07:12 +02:00
parent ad24d16d83
commit b1b32187ba
4 changed files with 150 additions and 8 deletions
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -3785,9 +3785,15 @@ async def report_action_result(report: ReplayResultReport):
        try:
            corr = report.correction
            target_spec = original_action.get("target_spec", {})
+            log_x = corr.get("x_pct")
+            log_y = corr.get("y_pct")
+            last_click = corr.get("last_click")
+            if (log_x is None or log_y is None) and isinstance(last_click, dict):
+                log_x = last_click.get("x_pct")
+                log_y = last_click.get("y_pct")
            logger.info(
                f"[APPRENTISSAGE] Correction humaine reçue : "
-                f"({corr.get('x_pct', 0):.4f}, {corr.get('y_pct', 0):.4f}) "
+                f"({float(log_x or 0):.4f}, {float(log_y or 0):.4f}) "
                f"pour '{target_spec.get('by_text', '?')}'"
            )
            _replay_learner.record_human_correction(
--- a/agent_v0/server_v1/replay_learner.py
+++ b/agent_v0/server_v1/replay_learner.py
@@ -194,8 +194,33 @@ class ReplayLearner:
            or target_spec.get("expected_window_before", "")
            or (target_spec.get("context_hints") or {}).get("window_title", "")
        )
-        x_pct = correction.get("x_pct", 0.0)
-        y_pct = correction.get("y_pct", 0.0)
+        x_pct = correction.get("x_pct")
+        y_pct = correction.get("y_pct")
+        last_click = correction.get("last_click")
+        if (x_pct is None or y_pct is None) and isinstance(last_click, dict):
+            x_pct = last_click.get("x_pct")
+            y_pct = last_click.get("y_pct")
+
+        try:
+            x_pct_f = float(x_pct)
+            y_pct_f = float(y_pct)
+        except (TypeError, ValueError):
+            logger.warning(
+                "[APPRENTISSAGE] Correction humaine non persistée : "
+                "aucune coordonnée clic exploitable pour '%s'",
+                by_text,
+            )
+            return
+
+        if not (0.0 < x_pct_f <= 1.0 and 0.0 < y_pct_f <= 1.0):
+            logger.warning(
+                "[APPRENTISSAGE] Correction humaine non persistée : "
+                "coordonnées hors bornes pour '%s' (%.4f, %.4f)",
+                by_text,
+                x_pct_f,
+                y_pct_f,
+            )
+            return

        # Enregistrer dans le JSONL d'apprentissage
        outcome = ActionOutcome(
@@ -218,8 +243,8 @@ class ReplayLearner:
                stored = memory_record_success(
                    window_title=window_title,
                    target_spec=target_spec,
-                    x_pct=float(x_pct),
-                    y_pct=float(y_pct),
+                    x_pct=x_pct_f,
+                    y_pct=y_pct_f,
                    method="human_supervised",
                    confidence=1.0,
                )