From b1b32187bada6b29da7a2a3e1bc01de2ee177d24 Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 24 May 2026 21:07:12 +0200 Subject: [PATCH] fix(agent): P0.6 guard human corrections --- agent_v0/agent_v1/core/executor.py | 60 ++++++++++++++++++- agent_v0/server_v1/api_stream.py | 8 ++- agent_v0/server_v1/replay_learner.py | 33 ++++++++-- ...test_policy_grounding_recovery_learning.py | 57 ++++++++++++++++++ 4 files changed, 150 insertions(+), 8 deletions(-) diff --git a/agent_v0/agent_v1/core/executor.py b/agent_v0/agent_v1/core/executor.py index 8d12e16d8..a07edc3dd 100644 --- a/agent_v0/agent_v1/core/executor.py +++ b/agent_v0/agent_v1/core/executor.py @@ -3565,15 +3565,62 @@ Example: x_pct=0.50, y_pct=0.30""" monitor = self.sct.monitors[1] screen_w, screen_h = monitor["width"], monitor["height"] + screen_left = monitor.get("left", 0) + screen_top = monitor.get("top", 0) + + if not ( + isinstance(screen_w, int) + and isinstance(screen_h, int) + and screen_w >= 200 + and screen_h >= 200 + ): + logger.warning( + "[APPRENTISSAGE] Monitor aberrant (%sx%s) — capture refusée", + screen_w, + screen_h, + ) + return [] + + listener_start_ts = time.time() + drain_guard_s = 1.0 def _on_click(x, y, button, pressed): if done_event.is_set(): return False if pressed and button.name in ("left", "right"): + # Ignore residual low-level mouse events delivered just after + # the listener is attached. These are often synthetic agent or + # remote-desktop events, not a deliberate human correction. + if time.time() - listener_start_ts < drain_guard_s: + logger.debug( + "[APPRENTISSAGE] Clic ignoré (drain %.1fs) : (%s, %s)", + drain_guard_s, + x, + y, + ) + return + + rel_x = x - screen_left + rel_y = y - screen_top + if not (0 <= rel_x < screen_w and 0 <= rel_y < screen_h): + logger.warning( + "[APPRENTISSAGE] Clic ignoré hors moniteur " + "(monitor=%sx%s left=%s top=%s, raw=(%s,%s), rel=(%s,%s))", + screen_w, + screen_h, + screen_left, + screen_top, + x, + y, + rel_x, + rel_y, + ) + return + action = { "type": "click", - "x_pct": round(x / screen_w, 6), - "y_pct": round(y / screen_h, 6), + "x_pct": round(rel_x / screen_w, 6), + "y_pct": round(rel_y / screen_h, 6), "button": button.name, "timestamp": time.time(), } @@ -3589,7 +3636,14 @@ Example: x_pct=0.50, y_pct=0.30""" pass actions.append(action) last_action_time[0] = time.time() - logger.info(f"[APPRENTISSAGE] Clic ({x}, {y}) bouton={button.name}") + logger.info( + "[APPRENTISSAGE] Clic raw=(%s,%s) rel=(%s,%s) bouton=%s", + x, + y, + rel_x, + rel_y, + button.name, + ) def _on_key_press(key): if done_event.is_set(): diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index 860e438a4..989af4467 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -3785,9 +3785,15 @@ async def report_action_result(report: ReplayResultReport): try: corr = report.correction target_spec = original_action.get("target_spec", {}) + log_x = corr.get("x_pct") + log_y = corr.get("y_pct") + last_click = corr.get("last_click") + if (log_x is None or log_y is None) and isinstance(last_click, dict): + log_x = last_click.get("x_pct") + log_y = last_click.get("y_pct") logger.info( f"[APPRENTISSAGE] Correction humaine reçue : " - f"({corr.get('x_pct', 0):.4f}, {corr.get('y_pct', 0):.4f}) " + f"({float(log_x or 0):.4f}, {float(log_y or 0):.4f}) " f"pour '{target_spec.get('by_text', '?')}'" ) _replay_learner.record_human_correction( diff --git a/agent_v0/server_v1/replay_learner.py b/agent_v0/server_v1/replay_learner.py index f9f16d265..35949a421 100644 --- a/agent_v0/server_v1/replay_learner.py +++ b/agent_v0/server_v1/replay_learner.py @@ -194,8 +194,33 @@ class ReplayLearner: or target_spec.get("expected_window_before", "") or (target_spec.get("context_hints") or {}).get("window_title", "") ) - x_pct = correction.get("x_pct", 0.0) - y_pct = correction.get("y_pct", 0.0) + x_pct = correction.get("x_pct") + y_pct = correction.get("y_pct") + last_click = correction.get("last_click") + if (x_pct is None or y_pct is None) and isinstance(last_click, dict): + x_pct = last_click.get("x_pct") + y_pct = last_click.get("y_pct") + + try: + x_pct_f = float(x_pct) + y_pct_f = float(y_pct) + except (TypeError, ValueError): + logger.warning( + "[APPRENTISSAGE] Correction humaine non persistée : " + "aucune coordonnée clic exploitable pour '%s'", + by_text, + ) + return + + if not (0.0 < x_pct_f <= 1.0 and 0.0 < y_pct_f <= 1.0): + logger.warning( + "[APPRENTISSAGE] Correction humaine non persistée : " + "coordonnées hors bornes pour '%s' (%.4f, %.4f)", + by_text, + x_pct_f, + y_pct_f, + ) + return # Enregistrer dans le JSONL d'apprentissage outcome = ActionOutcome( @@ -218,8 +243,8 @@ class ReplayLearner: stored = memory_record_success( window_title=window_title, target_spec=target_spec, - x_pct=float(x_pct), - y_pct=float(y_pct), + x_pct=x_pct_f, + y_pct=y_pct_f, method="human_supervised", confidence=1.0, ) diff --git a/tests/unit/test_policy_grounding_recovery_learning.py b/tests/unit/test_policy_grounding_recovery_learning.py index c0c00bbc6..25d11a587 100644 --- a/tests/unit/test_policy_grounding_recovery_learning.py +++ b/tests/unit/test_policy_grounding_recovery_learning.py @@ -770,6 +770,63 @@ class TestReplayLearner: assert captured["window_title"] == "Fenêtre fallback" + def test_record_human_correction_uses_last_click_contract(self, learner, monkeypatch): + """Le contrat agent envoie les coordonnées dans correction.last_click.""" + captured = {} + + def fake_memory_record_success(**kwargs): + captured.update(kwargs) + return True + + monkeypatch.setattr( + "agent_v0.server_v1.replay_memory.memory_record_success", + fake_memory_record_success, + ) + + learner.record_human_correction( + session_id="s_corr_last_click", + action={ + "action_id": "a_corr_last_click", + "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}, + }, + correction={ + "actions": [{"type": "click", "x_pct": 0.33, "y_pct": 0.66}], + "last_click": {"type": "click", "x_pct": 0.33, "y_pct": 0.66}, + }, + ) + + assert captured["x_pct"] == 0.33 + assert captured["y_pct"] == 0.66 + assert len(learner.load_session("s_corr_last_click")) == 1 + + def test_record_human_correction_rejects_out_of_bounds_coords(self, learner, monkeypatch): + """Une correction hors écran ne doit pas alimenter la mémoire.""" + captured = {} + + def fake_memory_record_success(**kwargs): + captured.update(kwargs) + return True + + monkeypatch.setattr( + "agent_v0.server_v1.replay_memory.memory_record_success", + fake_memory_record_success, + ) + + learner.record_human_correction( + session_id="s_corr_oob", + action={ + "action_id": "a_corr_oob", + "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}, + }, + correction={ + "actions": [{"type": "click", "x_pct": 1.748, "y_pct": 0.135}], + "last_click": {"type": "click", "x_pct": 1.748, "y_pct": 0.135}, + }, + ) + + assert captured == {} + assert learner.load_session("s_corr_oob") == [] + # ========================================================================= # Boucle d'apprentissage : consolidation cross-workflow