snapshot: WIP 5j replay reliability (B1 watchdog + dialog handlers + grounding drift)

Snapshot avant correction du blocage relance Léa (3 incidents 24h: SSH refusé,
polls morts ×2). Point de rollback stable.

Contenu:
- agent_v1/core/executor.py: 5 patchs dialog handling (saveas drift, close_tab
  hotkey fallback, confirm_save Unicode apostrophe, foreground dialog
  recontextualization, runtime_dialog in-loop) + helpers normalize_window_hint,
  requires_post_verify_window_transition
- agent_v1/core/grounding.py: garde drift template fix (fallback_x/y plumbed)
- server_v1/replay_watchdog.py (NEW): orphan watchdog B1, scan 10s timeout 30s
- server_v1/api_stream.py: dispatched_action plumbing, watchdog lifespan,
  metrics endpoint
- server_v1/replay_engine.py: _schedule_retry préserve original_action +
  dispatched_action
- stream_processor.py: gardes _infer_tab_switch_target (no false switch_tab
  on save_as dialog open) + _attach_expected_window_before
- tests/integration: test_replay_watchdog.py (8 cas), test_stream_processor.py
- tests/unit: test_executor_verify_window_guard.py (start_button, close_tab,
  runtime_dialog, post_verify, transition fallbacks)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-05-24 16:48:37 +02:00
parent 5ea4960e65
commit 7df51d2c79
47 changed files with 9811 additions and 451 deletions

View File

@@ -112,6 +112,58 @@ class TestLiveSessionManager:
assert len(raw["screenshots"]) == 1
assert raw["screenshots"][0]["screenshot_id"] == "shot_full_001"
def test_discovers_bg_session_machine_id_from_root_folder(self, tmp_path):
from agent_v0.server_v1.live_session_manager import LiveSessionManager
live_dir = tmp_path / "live_sessions"
session_dir = live_dir / "bg_DESKTOP-58D5CAC_windows"
session_dir.mkdir(parents=True)
(session_dir / "live_events.jsonl").write_text("{}", encoding="utf-8")
mgr = LiveSessionManager(
persist_dir=str(tmp_path / "persist"),
live_sessions_dir=str(live_dir),
)
session = mgr.get_session("bg_DESKTOP-58D5CAC_windows")
assert session is not None
assert session.machine_id == "DESKTOP-58D5CAC_windows"
def test_loads_persisted_bg_session_with_machine_id_inferred(self, tmp_path):
from agent_v0.server_v1.live_session_manager import LiveSessionManager
persist_dir = tmp_path / "persist"
persist_dir.mkdir()
(persist_dir / "bg_DESKTOP-58D5CAC_windows.json").write_text(
'{"session_id":"bg_DESKTOP-58D5CAC_windows","machine_id":"default",'
'"events":[],"shot_paths":{},"last_window_info":{"title":"Unknown","app_name":"unknown"},'
'"created_at":"2026-05-20T14:00:00","last_activity":"2026-05-20T14:00:00",'
'"finalized":false,"window_titles_seen":{},"app_names_seen":{}}',
encoding="utf-8",
)
mgr = LiveSessionManager(persist_dir=str(persist_dir))
session = mgr.get_session("bg_DESKTOP-58D5CAC_windows")
assert session is not None
assert session.machine_id == "DESKTOP-58D5CAC_windows"
def test_find_active_agent_session_falls_back_to_bg_machine_session(self, tmp_path):
from agent_v0.server_v1.live_session_manager import LiveSessionManager
from agent_v0.server_v1.replay_engine import _find_active_agent_session
mgr = LiveSessionManager(persist_dir=str(tmp_path / "persist"))
mgr.register_session(
"sess_20260520T102916_066851",
machine_id="DESKTOP-58D5CAC_windows",
)
mgr.finalize("sess_20260520T102916_066851")
mgr.register_session("bg_DESKTOP-58D5CAC_windows")
active = _find_active_agent_session(mgr, machine_id="DESKTOP-58D5CAC_windows")
assert active == "bg_DESKTOP-58D5CAC_windows"
# =========================================================================
# StreamProcessor
@@ -195,6 +247,238 @@ class TestStreamProcessor:
assert stats["total_workflows"] == 0
assert stats["initialized"] is False
def test_build_replay_does_not_compile_save_dialog_open_as_switch_tab(
self, tmp_path, monkeypatch,
):
"""`Enregistrer sous` same-app n'est pas un onglet.
Régression live 2026-05-23 : un clic menu dans Notepad était
recompilé en faux `switch_tab`, ce qui injectait un clic parasite
avant la vraie ouverture de dialog.
"""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [820, 630],
"button": "left",
"screenshot_id": "shot_001",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [320, 520, 2240, 1636],
"click_relative": [500, 110],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.2,
"pos": [860, 562],
"button": "left",
"screenshot_id": "shot_002",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [320, 520, 2240, 1636],
"click_relative": [540, 40],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "window_focus_change",
"timestamp": 1.35,
"from": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.6,
"pos": [997, 743],
"button": "left",
"screenshot_id": "shot_003",
"window": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
}},
]
actions = sp.build_replay_from_raw_events(
events, session_id="sess_save_dialog", session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert len(clicks) == 3
assert all(
(c.get("target_spec", {}).get("context_hints") or {}).get("interaction") != "switch_tab"
for c in clicks
)
assert clicks[1].get("expected_window_title") == "Enregistrer sous"
assert clicks[2].get("expected_window_before") == "Enregistrer sous"
def test_build_replay_tab_switch_focus_belongs_to_latest_click_only(
self, tmp_path, monkeypatch,
):
"""Le focus d'onglet doit être rattaché au dernier clic causal."""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [1410, 562],
"button": "left",
"screenshot_id": "shot_001",
"window": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [1087, 40],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.1,
"pos": [1514, 562],
"button": "left",
"screenshot_id": "shot_002",
"window": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [1191, 40],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "window_focus_change",
"timestamp": 1.2,
"from": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"to": {
"title": "Sans titre Bloc-notes",
"app_name": "Notepad.exe",
},
}},
]
actions = sp.build_replay_from_raw_events(
events,
session_id="sess_intervening_click",
session_dir=str(session_dir),
)
assert len(actions) == 2
first_hints = actions[0].get("target_spec", {}).get("context_hints") or {}
second_hints = actions[1].get("target_spec", {}).get("context_hints") or {}
assert first_hints.get("interaction") != "switch_tab"
assert actions[1]["target_spec"]["by_text"] == "Sans titre"
assert actions[1]["target_spec"]["by_role"] == "tab"
assert second_hints.get("interaction") == "switch_tab"
def test_build_replay_infers_close_tab_before_save_dialog(
self, tmp_path, monkeypatch,
):
"""Le clic sur le x d'onglet actif doit être sémantisé comme close_tab."""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [1814, 560],
"button": "left",
"screenshot_id": "shot_001",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [1491, 38],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.3,
"pos": [1183, 1156],
"button": "left",
"screenshot_id": "shot_002",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [860, 634],
"window_size": [1920, 1116],
},
}},
{"event": {
"type": "window_focus_change",
"timestamp": 1.5,
"from": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
}},
]
actions = sp.build_replay_from_raw_events(
events,
session_id="sess_close_tab",
session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert len(clicks) == 2
first_spec = clicks[0].get("target_spec", {})
first_hints = first_spec.get("context_hints") or {}
assert first_spec.get("by_role") == "tab_close_button"
assert first_spec.get("by_text", "") == ""
assert first_hints.get("interaction") == "close_tab"
assert first_hints.get("active_tab_label") == "test"
assert "fermer l'onglet actif 'test'" in first_spec.get("vlm_description", "")
# =========================================================================
# StreamWorker