fix(agent): P0.6 guard human corrections

2026-05-24 21:07:12 +02:00
parent ad24d16d83
commit b1b32187ba
4 changed files with 150 additions and 8 deletions
--- a/agent_v0/agent_v1/core/executor.py
+++ b/agent_v0/agent_v1/core/executor.py
@@ -3565,15 +3565,62 @@ Example: x_pct=0.50, y_pct=0.30"""

        monitor = self.sct.monitors[1]
        screen_w, screen_h = monitor["width"], monitor["height"]
+        screen_left = monitor.get("left", 0)
+        screen_top = monitor.get("top", 0)
+
+        if not (
+            isinstance(screen_w, int)
+            and isinstance(screen_h, int)
+            and screen_w >= 200
+            and screen_h >= 200
+        ):
+            logger.warning(
+                "[APPRENTISSAGE] Monitor aberrant (%sx%s) — capture refusée",
+                screen_w,
+                screen_h,
+            )
+            return []
+
+        listener_start_ts = time.time()
+        drain_guard_s = 1.0

        def _on_click(x, y, button, pressed):
            if done_event.is_set():
                return False
            if pressed and button.name in ("left", "right"):
+                # Ignore residual low-level mouse events delivered just after
+                # the listener is attached. These are often synthetic agent or
+                # remote-desktop events, not a deliberate human correction.
+                if time.time() - listener_start_ts < drain_guard_s:
+                    logger.debug(
+                        "[APPRENTISSAGE] Clic ignoré (drain %.1fs) : (%s, %s)",
+                        drain_guard_s,
+                        x,
+                        y,
+                    )
+                    return
+
+                rel_x = x - screen_left
+                rel_y = y - screen_top
+                if not (0 <= rel_x < screen_w and 0 <= rel_y < screen_h):
+                    logger.warning(
+                        "[APPRENTISSAGE] Clic ignoré hors moniteur "
+                        "(monitor=%sx%s left=%s top=%s, raw=(%s,%s), rel=(%s,%s))",
+                        screen_w,
+                        screen_h,
+                        screen_left,
+                        screen_top,
+                        x,
+                        y,
+                        rel_x,
+                        rel_y,
+                    )
+                    return
+
                action = {
                    "type": "click",
-                    "x_pct": round(x / screen_w, 6),
-                    "y_pct": round(y / screen_h, 6),
+                    "x_pct": round(rel_x / screen_w, 6),
+                    "y_pct": round(rel_y / screen_h, 6),
                    "button": button.name,
                    "timestamp": time.time(),
                }
@@ -3589,7 +3636,14 @@ Example: x_pct=0.50, y_pct=0.30"""
                    pass
                actions.append(action)
                last_action_time[0] = time.time()
-                logger.info(f"[APPRENTISSAGE] Clic ({x}, {y}) bouton={button.name}")
+                logger.info(
+                    "[APPRENTISSAGE] Clic raw=(%s,%s) rel=(%s,%s) bouton=%s",
+                    x,
+                    y,
+                    rel_x,
+                    rel_y,
+                    button.name,
+                )

        def _on_key_press(key):
            if done_event.is_set():
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -3785,9 +3785,15 @@ async def report_action_result(report: ReplayResultReport):
        try:
            corr = report.correction
            target_spec = original_action.get("target_spec", {})
+            log_x = corr.get("x_pct")
+            log_y = corr.get("y_pct")
+            last_click = corr.get("last_click")
+            if (log_x is None or log_y is None) and isinstance(last_click, dict):
+                log_x = last_click.get("x_pct")
+                log_y = last_click.get("y_pct")
            logger.info(
                f"[APPRENTISSAGE] Correction humaine reçue : "
-                f"({corr.get('x_pct', 0):.4f}, {corr.get('y_pct', 0):.4f}) "
+                f"({float(log_x or 0):.4f}, {float(log_y or 0):.4f}) "
                f"pour '{target_spec.get('by_text', '?')}'"
            )
            _replay_learner.record_human_correction(
--- a/agent_v0/server_v1/replay_learner.py
+++ b/agent_v0/server_v1/replay_learner.py
@@ -194,8 +194,33 @@ class ReplayLearner:
            or target_spec.get("expected_window_before", "")
            or (target_spec.get("context_hints") or {}).get("window_title", "")
        )
-        x_pct = correction.get("x_pct", 0.0)
-        y_pct = correction.get("y_pct", 0.0)
+        x_pct = correction.get("x_pct")
+        y_pct = correction.get("y_pct")
+        last_click = correction.get("last_click")
+        if (x_pct is None or y_pct is None) and isinstance(last_click, dict):
+            x_pct = last_click.get("x_pct")
+            y_pct = last_click.get("y_pct")
+
+        try:
+            x_pct_f = float(x_pct)
+            y_pct_f = float(y_pct)
+        except (TypeError, ValueError):
+            logger.warning(
+                "[APPRENTISSAGE] Correction humaine non persistée : "
+                "aucune coordonnée clic exploitable pour '%s'",
+                by_text,
+            )
+            return
+
+        if not (0.0 < x_pct_f <= 1.0 and 0.0 < y_pct_f <= 1.0):
+            logger.warning(
+                "[APPRENTISSAGE] Correction humaine non persistée : "
+                "coordonnées hors bornes pour '%s' (%.4f, %.4f)",
+                by_text,
+                x_pct_f,
+                y_pct_f,
+            )
+            return

        # Enregistrer dans le JSONL d'apprentissage
        outcome = ActionOutcome(
@@ -218,8 +243,8 @@ class ReplayLearner:
                stored = memory_record_success(
                    window_title=window_title,
                    target_spec=target_spec,
-                    x_pct=float(x_pct),
-                    y_pct=float(y_pct),
+                    x_pct=x_pct_f,
+                    y_pct=y_pct_f,
                    method="human_supervised",
                    confidence=1.0,
                )
--- a/tests/unit/test_policy_grounding_recovery_learning.py
+++ b/tests/unit/test_policy_grounding_recovery_learning.py
@@ -770,6 +770,63 @@ class TestReplayLearner:

        assert captured["window_title"] == "Fenêtre fallback"

+    def test_record_human_correction_uses_last_click_contract(self, learner, monkeypatch):
+        """Le contrat agent envoie les coordonnées dans correction.last_click."""
+        captured = {}
+
+        def fake_memory_record_success(**kwargs):
+            captured.update(kwargs)
+            return True
+
+        monkeypatch.setattr(
+            "agent_v0.server_v1.replay_memory.memory_record_success",
+            fake_memory_record_success,
+        )
+
+        learner.record_human_correction(
+            session_id="s_corr_last_click",
+            action={
+                "action_id": "a_corr_last_click",
+                "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"},
+            },
+            correction={
+                "actions": [{"type": "click", "x_pct": 0.33, "y_pct": 0.66}],
+                "last_click": {"type": "click", "x_pct": 0.33, "y_pct": 0.66},
+            },
+        )
+
+        assert captured["x_pct"] == 0.33
+        assert captured["y_pct"] == 0.66
+        assert len(learner.load_session("s_corr_last_click")) == 1
+
+    def test_record_human_correction_rejects_out_of_bounds_coords(self, learner, monkeypatch):
+        """Une correction hors écran ne doit pas alimenter la mémoire."""
+        captured = {}
+
+        def fake_memory_record_success(**kwargs):
+            captured.update(kwargs)
+            return True
+
+        monkeypatch.setattr(
+            "agent_v0.server_v1.replay_memory.memory_record_success",
+            fake_memory_record_success,
+        )
+
+        learner.record_human_correction(
+            session_id="s_corr_oob",
+            action={
+                "action_id": "a_corr_oob",
+                "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"},
+            },
+            correction={
+                "actions": [{"type": "click", "x_pct": 1.748, "y_pct": 0.135}],
+                "last_click": {"type": "click", "x_pct": 1.748, "y_pct": 0.135},
+            },
+        )
+
+        assert captured == {}
+        assert learner.load_session("s_corr_oob") == []
+

 # =========================================================================
 # Boucle d'apprentissage : consolidation cross-workflow