snapshot: WIP 5j replay reliability (B1 watchdog + dialog handlers + grounding drift)

Snapshot avant correction du blocage relance Léa (3 incidents 24h: SSH refusé,
polls morts ×2). Point de rollback stable.

Contenu:
- agent_v1/core/executor.py: 5 patchs dialog handling (saveas drift, close_tab
  hotkey fallback, confirm_save Unicode apostrophe, foreground dialog
  recontextualization, runtime_dialog in-loop) + helpers normalize_window_hint,
  requires_post_verify_window_transition
- agent_v1/core/grounding.py: garde drift template fix (fallback_x/y plumbed)
- server_v1/replay_watchdog.py (NEW): orphan watchdog B1, scan 10s timeout 30s
- server_v1/api_stream.py: dispatched_action plumbing, watchdog lifespan,
  metrics endpoint
- server_v1/replay_engine.py: _schedule_retry préserve original_action +
  dispatched_action
- stream_processor.py: gardes _infer_tab_switch_target (no false switch_tab
  on save_as dialog open) + _attach_expected_window_before
- tests/integration: test_replay_watchdog.py (8 cas), test_stream_processor.py
- tests/unit: test_executor_verify_window_guard.py (start_button, close_tab,
  runtime_dialog, post_verify, transition fallbacks)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-05-24 16:48:37 +02:00
parent 5ea4960e65
commit 7df51d2c79
47 changed files with 9811 additions and 451 deletions

View File

@@ -0,0 +1,184 @@
"""Tests ciblés sur l'intégration agent du contrat finalize enrichi."""
from __future__ import annotations
import sys
import types
from pathlib import Path
from unittest.mock import MagicMock, patch
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
class _ImmediateThread:
def __init__(self, target=None, args=(), kwargs=None, daemon=None):
self._target = target
self._args = args
self._kwargs = kwargs or {}
def start(self):
if self._target is not None:
self._target(*self._args, **self._kwargs)
class _DummyServerClient:
_stream_base = "http://server.test:5005"
def __init__(self):
self.on_connection_change = None
def set_on_connection_change(self, callback):
self.on_connection_change = callback
def _auth_headers(self):
return {"Authorization": "Bearer test-token"}
def _install_pystray_stub():
pystray_stub = types.ModuleType("pystray")
class _DummyMenu:
SEPARATOR = object()
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
class _DummyIcon:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def run(self):
return None
def stop(self):
return None
def update_menu(self):
return None
pystray_stub.MenuItem = lambda *args, **kwargs: (args, kwargs)
pystray_stub.Menu = _DummyMenu
pystray_stub.Icon = _DummyIcon
sys.modules["pystray"] = pystray_stub
def _build_tray():
_install_pystray_stub()
from agent_v0.agent_v1.ui.smart_tray import SmartTrayV1
tray = SmartTrayV1(
on_start_callback=lambda _name: None,
on_stop_callback=lambda: None,
server_client=_DummyServerClient(),
)
tray._notifier = MagicMock()
return tray
def test_offer_finalize_replay_requires_user_consent():
_install_pystray_stub()
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
tray = _build_tray()
tray._launch_replay_request = MagicMock()
with patch.object(smart_tray_mod.threading, "Thread", _ImmediateThread), \
patch.object(smart_tray_mod, "_ask_consent", return_value=False):
tray.offer_finalize_replay(
{
"endpoint": "/api/v1/traces/stream/replay-session",
"session_id": "sess_offer_001",
"machine_id": "pc-offer",
},
"Bloc-notes",
)
tray._notifier.notify.assert_called_once()
tray._launch_replay_request.assert_not_called()
def test_launch_replay_request_calls_replay_session_endpoint():
_install_pystray_stub()
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
tray = _build_tray()
with patch.object(smart_tray_mod.threading, "Thread", _ImmediateThread), \
patch("requests.post") as mock_post:
mock_post.return_value = MagicMock(ok=True)
tray._launch_replay_request(
{
"endpoint": "/api/v1/traces/stream/replay-session",
"session_id": "sess_offer_002",
"machine_id": "pc-replay",
},
"Bloc-notes",
)
mock_post.assert_called_once()
_, kwargs = mock_post.call_args
assert kwargs["params"] == {
"session_id": "sess_offer_002",
"machine_id": "pc-replay",
}
assert kwargs["headers"] == {"Authorization": "Bearer test-token"}
assert kwargs["allow_redirects"] is False
def test_agent_finalize_result_delegates_to_tray_offer():
from agent_v0.agent_v1.finalize_contract import dispatch_finalize_result
ui = MagicMock()
dispatch_finalize_result(
ui,
{
"replay_ready": True,
"replay_request": {
"endpoint": "/api/v1/traces/stream/replay-session",
"session_id": "sess_offer_003",
"machine_id": "pc-main",
},
},
"Saisie dossier",
)
ui.offer_finalize_replay.assert_called_once_with(
{
"endpoint": "/api/v1/traces/stream/replay-session",
"session_id": "sess_offer_003",
"machine_id": "pc-main",
},
"Saisie dossier",
)
def test_agent_finalize_result_ignores_already_started_replay():
from agent_v0.agent_v1.finalize_contract import dispatch_finalize_result
ui = MagicMock()
dispatch_finalize_result(
ui,
{
"replay_ready": True,
"replay_request": {
"endpoint": "/api/v1/traces/stream/replay-session",
"session_id": "sess_offer_004",
"machine_id": "pc-main",
},
"replay_launch": {
"status": "started",
"replay": {"replay_id": "replay_sess_1234"},
},
},
"Saisie dossier",
)
ui.offer_finalize_replay.assert_not_called()

View File

@@ -0,0 +1,78 @@
"""Tests ciblés sur l'état replay côté AgentV1 pendant pause supervisée."""
import sys
import threading
from types import SimpleNamespace
from unittest.mock import MagicMock
def _make_agent():
sys.modules.setdefault("pynput", MagicMock())
sys.modules.setdefault("pynput.mouse", MagicMock())
sys.modules.setdefault("pynput.keyboard", MagicMock())
sys.modules.setdefault("pystray", MagicMock())
from agent_v0.agent_v1.main import AgentV1
agent = AgentV1.__new__(AgentV1)
agent.user_id = "demo_user"
agent.machine_id = "machine_test"
agent.running = True
agent._replay_active = True
agent._state = SimpleNamespace(calls=[], set_replay_active=lambda active: agent._state.calls.append(active))
agent.ui = SimpleNamespace(calls=[], set_replay_active=lambda active: agent.ui.calls.append(active))
return agent
def test_replay_pause_does_not_mark_replay_finished(monkeypatch):
"""Quand l'executor signale replay_paused, AgentV1 doit rester en mode replay."""
agent = _make_agent()
class _Executor:
_poll_backoff = 1.0
_replay_paused = True
def poll_and_execute(self, session_id: str, server_url: str, machine_id: str = "default") -> bool:
return False
agent._executor = _Executor()
def _fake_sleep(_delay):
agent.running = False
monkeypatch.setattr("agent_v0.agent_v1.main.time.sleep", _fake_sleep)
t = threading.Thread(target=agent._replay_poll_loop)
t.start()
t.join(timeout=1)
assert agent._replay_active is True
assert agent.ui.calls == []
assert agent._state.calls == []
def test_replay_without_action_and_without_pause_marks_replay_finished(monkeypatch):
"""Sans action et sans pause, AgentV1 doit sortir du mode replay."""
agent = _make_agent()
class _Executor:
_poll_backoff = 1.0
_replay_paused = False
def poll_and_execute(self, session_id: str, server_url: str, machine_id: str = "default") -> bool:
return False
agent._executor = _Executor()
def _fake_sleep(_delay):
agent.running = False
monkeypatch.setattr("agent_v0.agent_v1.main.time.sleep", _fake_sleep)
t = threading.Thread(target=agent._replay_poll_loop)
t.start()
t.join(timeout=1)
assert agent._replay_active is False
assert agent.ui.calls == [False]
assert agent._state.calls == [False]

View File

@@ -0,0 +1,485 @@
"""Garde dimensions monitor — agent_v0/agent_v1/vision/capturer.py
Contexte (démo GHT 19 mai 2026) : `mss.monitors[1]` peut retourner
intermittemment des dimensions tronquées (cas observé : 2560×60 au lieu
de 2560×1600). Toute capture utilisant ces dims pour normaliser des
coordonnées empoisonne ensuite la mémoire persistante (`TargetMemoryStore`).
Ce module teste la garde qui doit :
- détecter une dimension aberrante avant capture
- retenter (mss peut avoir un cache stale)
- tomber en fallback sur un autre monitor physique si dispo
- abandonner explicitement (logs WARNING/ERROR) sans empoisonner
Périmètre : capturer.py uniquement (pas executor, pas replay).
"""
from __future__ import annotations
import logging
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from PIL import Image
def _make_mock_mss(monitors_sequence):
"""Construit un mock `mss.mss()` qui renvoie successivement les listes
`monitors` fournies. Permet de simuler retry / changement de dims
entre deux appels.
Args:
monitors_sequence: liste de listes-de-monitors. Chaque entrée
représente l'état renvoyé par `sct.monitors` à un appel
successif de `mss.mss()`. La dernière entrée est réutilisée
si plus d'appels ont lieu.
Returns:
Un mock utilisable comme `patch(..., side_effect=mock)` côté `mss.mss`.
"""
call_counter = {"n": 0}
instances = []
def factory():
idx = min(call_counter["n"], len(monitors_sequence) - 1)
call_counter["n"] += 1
instance = MagicMock(name=f"mss_instance_{idx}")
instance.monitors = monitors_sequence[idx]
# grab() renvoie un objet avec size + bgra pour passer dans PIL
grab_result = MagicMock()
# On simule un buffer cohérent avec les dims du monitor sain
m = monitors_sequence[idx][1] if len(monitors_sequence[idx]) > 1 else {}
w = m.get("width", 100)
h = m.get("height", 100)
grab_result.size = (w, h)
# Une image saine ne doit pas être entièrement noire, sinon le nouveau
# fail-closed black-frame la rejetterait.
grab_result.bgra = b"\x80\x80\x80\x00" * (w * h)
instance.grab = MagicMock(return_value=grab_result)
# context manager
cm = MagicMock(name=f"mss_cm_{idx}")
cm.__enter__ = MagicMock(return_value=instance)
cm.__exit__ = MagicMock(return_value=False)
instances.append((cm, instance))
return cm
factory.instances = instances
return factory
def _vision_capturer(tmp_path):
"""Import paresseux pour permettre au patch d'opérer avant le import."""
from agent_v0.agent_v1.vision.capturer import VisionCapturer
return VisionCapturer(str(tmp_path))
def _solid_img(color: tuple[int, int, int], size=(320, 240)) -> Image.Image:
"""Image unie simple pour piloter les tests de fallback noir."""
return Image.new("RGB", size, color)
# ============================================================================
# Test 1 — Dim aberrante (height=60) refusée : capture_full_context renvoie ""
# ============================================================================
def test_capture_full_context_returns_empty_when_monitor_height_aberrant(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""Cas démo GHT : mss.monitors[1] = 2560×60 (au lieu de 2560×1600).
La capture doit refuser de produire un PNG basé sur ces dims (sinon
toute coord normalisée derrière sera fausse d'un facteur ~27×).
Retour attendu : chaîne vide (comme le contrat existant en cas
d'erreur).
"""
aberrant_monitors = [
{"left": 0, "top": 0, "width": 2560, "height": 1660}, # composite
{"left": 0, "top": 0, "width": 2560, "height": 60}, # PRIMAIRE aberrant
]
factory = _make_mock_mss([aberrant_monitors])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
result = cap.capture_full_context("test_aberrant")
assert result == "", (
f"Capture devrait retourner '' sur dim aberrante, got {result!r}"
)
# Sanity : aucun grab() ne doit avoir été appelé sur un monitor aberrant.
# Tous les mss instances créés ne doivent JAMAIS avoir appelé grab().
for _cm, instance in factory.instances:
instance.grab.assert_not_called()
# ============================================================================
# Test 2 — Le log WARNING doit citer la dim observée (debuggabilité)
# ============================================================================
def test_aberrant_monitor_logs_warning_with_observed_dimensions(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""L'opérateur doit pouvoir diagnostiquer la cause depuis les logs sans
rejouer la session. Le WARNING doit contenir les dims aberrantes vues.
"""
aberrant_monitors = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
]
factory = _make_mock_mss([aberrant_monitors])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
cap.capture_full_context("test")
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
assert warnings, "Au moins un WARNING attendu sur dim aberrante"
msg = " ".join(r.getMessage() for r in warnings)
assert "2560" in msg, f"Largeur observée doit apparaître dans le WARNING : {msg!r}"
assert "60" in msg, f"Hauteur observée doit apparaître dans le WARNING : {msg!r}"
# ============================================================================
# Test 3 — Retry : un 1er appel aberrant suivi d'un appel sain produit la capture
# ============================================================================
def test_capture_retries_when_first_monitor_query_is_aberrant(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""Le bug observé est intermittent (mss peut avoir un cache stale). Si on
retente immédiatement, le second appel renvoie souvent les vraies dims.
La capture doit donc retenter et réussir quand le second appel est sain.
"""
aberrant_then_ok = [
# 1er appel : aberrant
[
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
],
# 2e appel : OK
[
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 1600},
],
]
factory = _make_mock_mss(aberrant_then_ok)
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
result = cap.capture_full_context("test_retry", force=True)
assert result, (
f"Capture doit réussir après retry sur dims saines, got {result!r}"
)
assert Path(result).exists(), "Le PNG doit être physiquement créé"
# Au moins 2 appels mss.mss() : le premier (aberrant) + le retry
assert len(factory.instances) >= 2, (
f"Au moins 2 appels mss.mss() attendus (retry), vu {len(factory.instances)}"
)
# ============================================================================
# Test 4 — Fallback : monitors[1] aberrant mais monitors[2] sain → capture OK
# ============================================================================
def test_capture_falls_back_to_secondary_monitor_when_primary_aberrant(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""Cas multi-écrans : monitors[1] cassé en permanence, monitors[2] sain.
La capture doit utiliser monitors[2] et logger un WARNING fallback.
"""
monitors_with_fallback = [
{"left": 0, "top": 0, "width": 2560, "height": 1660}, # composite
{"left": 0, "top": 0, "width": 2560, "height": 60}, # primaire cassé
{"left": 2560, "top": 0, "width": 1920, "height": 1080}, # secondaire sain
]
# Même état renvoyé à tous les appels (cas stationnaire, pas intermittent)
factory = _make_mock_mss([monitors_with_fallback])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
result = cap.capture_full_context("test_fallback", force=True)
assert result, f"Capture doit réussir via monitor[2], got {result!r}"
msg = " ".join(r.getMessage() for r in caplog.records)
assert "fallback" in msg.lower(), (
f"Un log doit signaler le fallback monitor : {msg!r}"
)
# ============================================================================
# Test 5 — capture_dual bénéficie aussi de la garde
# ============================================================================
def test_capture_dual_returns_empty_dict_when_monitor_aberrant(tmp_path: Path):
"""capture_dual (3 captures simultanées) ne doit pas non plus produire
de PNG sur dim aberrante : c'est la même source d'empoisonnement.
"""
aberrant_monitors = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
]
factory = _make_mock_mss([aberrant_monitors])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
cap = _vision_capturer(tmp_path)
result = cap.capture_dual(x=100, y=200, screenshot_id="shot_dual")
assert result == {}, (
f"capture_dual doit retourner {{}} sur dim aberrante, got {result!r}"
)
# ============================================================================
# Test 6 — capture_active_window bénéficie aussi de la garde
# ============================================================================
def test_capture_active_window_returns_none_when_monitor_aberrant(tmp_path: Path):
"""capture_active_window (standalone, sans full_img fourni) doit aussi
refuser de capturer sur monitor aberrant.
"""
aberrant_monitors = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
]
factory = _make_mock_mss([aberrant_monitors])
# Mocker get_active_window_rect pour qu'il renvoie une fenêtre valide
# (sinon le test sort prématurément avant d'atteindre le grab).
fake_rect = {
"rect": [100, 100, 800, 600],
"size": [700, 500],
"title": "Test Window",
"app_name": "test_app",
}
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"), \
patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value=fake_rect,
):
cap = _vision_capturer(tmp_path)
result = cap.capture_active_window(x=200, y=300, screenshot_id="shot_win")
assert result is None, (
f"capture_active_window doit retourner None sur dim aberrante, got {result!r}"
)
# ============================================================================
# Test 7 — Non-régression : dim normale produit toujours un PNG
# ============================================================================
def test_capture_full_context_succeeds_on_normal_dimensions(tmp_path: Path):
"""Sanity check : la garde ne casse pas le chemin nominal."""
normal_monitors = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 1600},
]
factory = _make_mock_mss([normal_monitors])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
cap = _vision_capturer(tmp_path)
result = cap.capture_full_context("test_normal", force=True)
assert result, f"Capture nominale doit produire un PNG, got {result!r}"
assert Path(result).exists(), "PNG doit exister sur disque"
# Un seul appel mss.mss() attendu en cas normal (pas de retry)
assert len(factory.instances) == 1, (
f"Un seul appel mss.mss() attendu sur dims saines, vu {len(factory.instances)}"
)
# ============================================================================
# Test 8 — fail-closed : capture_dual refuse le fallback monitor secondaire
# ============================================================================
def test_capture_dual_fails_closed_when_only_secondary_monitor_sane(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""capture_dual reçoit des coords (x, y) en système écran composite.
Si on capture monitors[2] (offset 2560, 0), le crop calculé via
img.crop((x, y, ...)) pointe à la mauvaise zone car les coords ne
sont pas traduites. Plutôt que de produire une image décalée
silencieusement, on refuse le fallback secondaire pour cette méthode.
"""
monitors_with_fallback = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60}, # primary cassé
{"left": 2560, "top": 0, "width": 1920, "height": 1080}, # secondary sain
]
factory = _make_mock_mss([monitors_with_fallback])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
result = cap.capture_dual(x=300, y=400, screenshot_id="shot_dual_fb")
assert result == {}, (
f"capture_dual doit fail-closed sur fallback secondaire, got {result!r}"
)
msg = " ".join(r.getMessage() for r in caplog.records).lower()
assert "fallback" in msg or "secondaire" in msg or "refus" in msg, (
f"Un log doit expliquer le refus du fallback pour coords : {msg!r}"
)
# ============================================================================
# Test 9 — fail-closed : capture_active_window refuse le fallback secondaire
# ============================================================================
def test_capture_active_window_fails_closed_when_only_secondary_monitor_sane(
tmp_path: Path,
):
"""Même raison que test 8 : capture_active_window cropperait depuis l'image
de monitors[2] avec un win_rect en coords globales → zone fausse.
"""
monitors_with_fallback = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
{"left": 2560, "top": 0, "width": 1920, "height": 1080},
]
factory = _make_mock_mss([monitors_with_fallback])
fake_rect = {
"rect": [100, 100, 800, 600], # coords globales dans monitors[1]
"size": [700, 500],
"title": "Test Window",
"app_name": "test_app",
}
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"), \
patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value=fake_rect,
):
cap = _vision_capturer(tmp_path)
result = cap.capture_active_window(x=200, y=300, screenshot_id="shot_win_fb")
assert result is None, (
f"capture_active_window doit fail-closed sur fallback secondaire, got {result!r}"
)
# ============================================================================
# Test 10 — mss noir : fallback ImageGrab
# ============================================================================
def test_capture_screen_image_falls_back_to_imagegrab_when_mss_is_black():
"""Un frame mss noir ne doit plus être accepté silencieusement.
Si ImageGrab fournit une image exploitable, elle doit être retenue.
"""
from agent_v0.agent_v1.vision import capturer
black_img = _solid_img((0, 0, 0))
fallback_img = _solid_img((210, 180, 90))
monitor = {"left": 0, "top": 0, "width": 320, "height": 240}
with patch.object(
capturer, "_acquire_safe_grab", return_value=(monitor, black_img)
), patch.object(
capturer,
"_capture_via_imagegrab",
return_value=(monitor, fallback_img, {
"backend": "imagegrab",
"luma": {"mean": 180.0, "stddev": 0.0, "min": 180, "max": 180},
}),
):
out_monitor, out_img, meta = capturer.capture_screen_image()
assert out_monitor == monitor
assert out_img is fallback_img
assert meta["backend"] == "imagegrab"
# ============================================================================
# Test 11 — capture_dual dégradé : conserver window_capture
# ============================================================================
def test_capture_dual_keeps_window_capture_when_fullscreen_is_unavailable(
tmp_path: Path,
):
"""Même sans full/crop, la capture fenêtre doit survivre.
Cela permet au serveur de conserver un contexte utile plutôt que de
travailler sur un écran noir.
"""
fake_window = {
"window_image": str(tmp_path / "window_only.png"),
"window_title": "Bloc-notes",
"app_name": "notepad.exe",
"window_rect": [100, 100, 800, 600],
"window_size": [700, 500],
"click_in_window": [42, 24],
"click_inside_window": True,
}
cap = _vision_capturer(tmp_path)
with patch(
"agent_v0.agent_v1.vision.capturer.capture_screen_image",
return_value=(None, None, {"backend": "mss_black"}),
), patch.object(cap, "capture_active_window", return_value=fake_window):
result = cap.capture_dual(x=200, y=300, screenshot_id="shot_dual")
assert "full" not in result
assert "crop" not in result
assert result["window_capture"] == fake_window
# ============================================================================
# Test 12 — non-régression : capture_full_context PEUT utiliser le fallback
# ============================================================================
def test_capture_full_context_still_uses_secondary_fallback(
tmp_path: Path, caplog: pytest.LogCaptureFixture
):
"""capture_full_context (heartbeat) ne porte pas de coords client : un
écran sain quelconque suffit. Le fallback secondaire reste autorisé.
Sinon le heartbeat tomberait dès qu'un monitor est cassé en permanence.
"""
monitors_with_fallback = [
{"left": 0, "top": 0, "width": 2560, "height": 1660},
{"left": 0, "top": 0, "width": 2560, "height": 60},
{"left": 2560, "top": 0, "width": 1920, "height": 1080},
]
factory = _make_mock_mss([monitors_with_fallback])
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
cap = _vision_capturer(tmp_path)
result = cap.capture_full_context("test_heartbeat_fb", force=True)
assert result, (
f"capture_full_context doit accepter fallback (heartbeat sans coords), got {result!r}"
)
assert Path(result).exists()

View File

@@ -0,0 +1,165 @@
"""Tests pour ChatWindow._dispatch_paused_action.
Couvre le routage bus SocketIO → fallback HTTP de la bulle paused.
Le bug d'origine ``paused_bubble: bus déconnecté, resume non émis``
était causé par l'absence de ce fallback (cf.
``docs/CR_AUDIT_PAUSED_RESUME_BUS_2026-05-22.md``).
Les tests appellent ``ChatWindow._dispatch_paused_action`` en tant
que fonction unbound avec un faux ``self`` (``SimpleNamespace``) pour
éviter de démarrer Tkinter pendant les tests unitaires.
"""
from __future__ import annotations
import sys
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.agent_v1.ui.chat_window import ChatWindow # noqa: E402
def _make_self(bus=None, server_client=None):
return SimpleNamespace(_bus=bus, _server_client=server_client)
def _call(mock_self, replay_id="replay_xyz",
bus_method="resume_replay", client_method="resume_replay"):
return ChatWindow._dispatch_paused_action(
mock_self, replay_id, bus_method=bus_method, client_method=client_method,
)
class TestDispatchPausedAction:
def test_bus_connected_and_emits_uses_bus(self):
bus = MagicMock(connected=True)
bus.resume_replay.return_value = True
client = MagicMock(resume_replay=MagicMock(return_value=True))
emitted, channel = _call(_make_self(bus=bus, server_client=client))
assert emitted is True
assert channel == "bus"
bus.resume_replay.assert_called_once_with("replay_xyz")
client.resume_replay.assert_not_called()
def test_bus_disconnected_falls_back_to_http(self):
bus = MagicMock(connected=False)
client = MagicMock(resume_replay=MagicMock(return_value=True))
emitted, channel = _call(_make_self(bus=bus, server_client=client))
assert emitted is True
assert channel == "http"
bus.resume_replay.assert_not_called()
client.resume_replay.assert_called_once_with("replay_xyz")
def test_bus_emit_returns_false_falls_back_to_http(self):
"""Bus marqué connecté mais l'emit retourne False (socket cassé
entre connect() et send) → bascule sur HTTP."""
bus = MagicMock(connected=True)
bus.resume_replay.return_value = False
client = MagicMock(resume_replay=MagicMock(return_value=True))
emitted, channel = _call(_make_self(bus=bus, server_client=client))
assert emitted is True
assert channel == "http"
def test_bus_emit_raises_falls_back_to_http(self):
bus = MagicMock(connected=True)
bus.resume_replay.side_effect = RuntimeError("socket broken")
client = MagicMock(resume_replay=MagicMock(return_value=True))
emitted, channel = _call(_make_self(bus=bus, server_client=client))
assert emitted is True
assert channel == "http"
def test_no_bus_uses_http_directly(self):
client = MagicMock(resume_replay=MagicMock(return_value=True))
emitted, channel = _call(_make_self(bus=None, server_client=client))
assert emitted is True
assert channel == "http"
def test_all_channels_fail_returns_false(self):
"""Cas critique : bus déconnecté ET HTTP injoignable → l'UI
doit ré-activer les boutons côté appelant. Ici on vérifie
juste que dispatch retourne (False, '')."""
bus = MagicMock(connected=False)
client = MagicMock(resume_replay=MagicMock(return_value=False))
emitted, channel = _call(_make_self(bus=bus, server_client=client))
assert emitted is False
assert channel == ""
def test_neither_bus_nor_client_returns_false(self):
emitted, channel = _call(_make_self(bus=None, server_client=None))
assert emitted is False
assert channel == ""
def test_client_method_missing_falls_through(self):
"""Si server_client est un vieux client sans resume_replay,
on ne plante pas — on retourne (False, '')."""
bus = MagicMock(connected=False)
legacy_client = SimpleNamespace() # pas de resume_replay
emitted, channel = _call(
_make_self(bus=bus, server_client=legacy_client),
)
assert emitted is False
assert channel == ""
def test_abort_routing_symmetric(self):
"""Le même mécanisme couvre l'abort — vérifie qu'on utilise
bien la méthode demandée par le caller."""
bus = MagicMock(connected=False)
client = MagicMock(abort_replay=MagicMock(return_value=True))
emitted, channel = _call(
_make_self(bus=bus, server_client=client),
bus_method="abort_replay",
client_method="abort_replay",
)
assert emitted is True
assert channel == "http"
client.abort_replay.assert_called_once_with("replay_xyz")
class TestPausedBubbleHeight:
"""Couvre _compute_paused_bubble_height — patch troncature 22 mai 2026."""
def test_empty_message_uses_minimum_height(self):
h, scroll = ChatWindow._compute_paused_bubble_height("")
assert h == 2
assert scroll is False
def test_short_message_no_scrollbar(self):
h, scroll = ChatWindow._compute_paused_bubble_height("Court message.")
assert h == 2
assert scroll is False
def test_long_single_line_triggers_scrollbar(self):
# ~600 chars sans \n → wrapped_lines = 600 // 60 + 1 = 11
msg = "x" * 600
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert h == 11
assert scroll is True
def test_message_with_many_newlines_uses_explicit_count(self):
"""Cas du bug : reason serveur listant 6 candidats sur 6 lignes
courtes — wrapped_lines bas mais explicit_lines élevé."""
msg = "\n".join([f"option {i}" for i in range(6)])
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
# 6 lignes explicites > 2 lignes wrappées → hauteur = 6
assert h == 6
# Pas encore au cap, contenu court → pas de scrollbar
assert scroll is False
def test_cap_reached_triggers_scrollbar_even_if_short(self):
"""Quand on dépasse le cap (12 lignes), la scrollbar DOIT
s'afficher quel que soit la longueur en caractères."""
msg = "\n".join([f"l{i}" for i in range(20)])
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert h == 12 # plafond
assert scroll is True
def test_long_content_triggers_scrollbar_at_200_chars(self):
"""Seuil sécurité texte : ≥ 200 chars → scrollbar même si
peu de lignes (filet anti-troncature visuel)."""
msg = "x" * 220
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert scroll is True

View File

@@ -16,6 +16,7 @@ sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.api_stream import (
_extract_required_apps_from_events,
_extract_required_apps_from_workflow,
_trim_redundant_setup_events,
_resolve_launch_command,
_infer_app_from_window_titles,
_generate_setup_actions,
@@ -220,6 +221,139 @@ class TestExtractRequiredAppsFromEvents:
# Le premier app hors ignorées est Notepad
assert result["first_window_title"] == "Bloc-notes"
def test_extracts_searchhost_launch_result_target(self):
"""Récupère le vrai clic SearchHost qui lance l'app."""
events = [
{"event": {"type": "window_focus_change", "from": None, "to": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "text_input", "text": "bloc", "window": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "mouse_click", "button": "left", "pos": [1449, 641],
"timestamp": 10.0,
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
"window_capture": {
"click_relative": [681, 448],
"window_size": [1287, 1407],
}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
"app_name": "explorer.exe", "title": "unknown_window"},
"timestamp": 10.4}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "explorer.exe", "title": "unknown_window"}, "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"},
"timestamp": 11.1}},
]
result = _extract_required_apps_from_events(events)
target = result["launch_result_target"]
assert result["primary_app"] == "Notepad.exe"
assert target["window_title"] == "Rechercher"
assert target["expected_window_before"] == "Rechercher"
assert target["x_pct"] == pytest.approx(1449 / 2560, rel=0, abs=1e-6)
assert target["y_pct"] == pytest.approx(641 / 1600, rel=0, abs=1e-6)
assert target["original_position"]["x_relative"] == "au centre"
assert target["original_position"]["y_relative"] == "au milieu"
assert target["window_capture"]["click_relative"] == [681, 448]
def test_extracts_start_menu_target(self):
"""Récupère le vrai clic Démarrer qui ouvre SearchHost."""
events = [
{"event": {"type": "window_focus_change", "from": None, "to": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
"timestamp": 1.0,
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"},
"timestamp": 1.2}},
{"event": {"type": "mouse_click", "button": "left", "pos": [1449, 641],
"timestamp": 4.0,
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"},
"timestamp": 4.4}},
]
result = _extract_required_apps_from_events(events)
target = result["start_menu_target"]
assert target["x_pct"] == pytest.approx(993 / 2560, rel=0, abs=1e-6)
assert target["y_pct"] == pytest.approx(1559 / 1600, rel=0, abs=1e-6)
assert target["original_position"]["x_relative"] == "au centre"
assert target["original_position"]["y_relative"] == "en bas"
assert "en bas" in target["position_desc"]
def test_extracts_start_menu_target_anchor_from_session_shot(self, tmp_path):
"""Le clic Démarrer récupère aussi une ancre image depuis le shot source."""
from PIL import Image
session_dir = tmp_path / "sess"
shots_dir = session_dir / "shots"
shots_dir.mkdir(parents=True)
Image.new("RGB", (2560, 1600), color="white").save(
shots_dir / "shot_start_full.png"
)
events = [
{"event": {"type": "window_focus_change", "from": None, "to": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
"timestamp": 1.0,
"screenshot_id": "shot_start",
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"},
"timestamp": 1.2}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"},
"timestamp": 2.0}},
]
result = _extract_required_apps_from_events(
events,
session_dir=str(session_dir),
)
target = result["start_menu_target"]
assert target["anchor_image_base64"]
def test_extracts_direct_typing_search_interaction(self):
"""Détecte qu'aucun clic SearchHost n'est requis avant la saisie."""
events = [
{"event": {"type": "window_focus_change", "from": None, "to": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
"timestamp": 1.0,
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"},
"timestamp": 1.2}},
{"event": {"type": "text_input", "text": "bloc",
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
"timestamp": 2.0}},
{"event": {"type": "window_focus_change", "from": {
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"},
"timestamp": 2.4}},
]
result = _extract_required_apps_from_events(events)
assert result["search_box_interaction"]["mode"] == "direct_typing"
assert result["search_box_interaction"]["window_title"] == "Rechercher"
def test_empty_events(self):
"""Pas d'événements → dict vide."""
assert _extract_required_apps_from_events([]) == {}
@@ -245,6 +379,187 @@ class TestExtractRequiredAppsFromEvents:
assert result["primary_launch_cmd"] == "calc"
class TestTrimRedundantSetupEvents:
"""Tests pour la coupe du préambule déjà couvert par le setup."""
def test_trims_until_first_primary_app_focus(self):
raw_events = [
{"event": {"type": "window_focus_change", "to": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "mouse_click", "pos": [993, 1559], "window": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "text_input", "text": "bloc", "window": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "mouse_click", "pos": [1449, 641], "window": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe",
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
}}},
{"event": {"type": "mouse_click", "pos": [1514, 562], "window": {
"app_name": "Notepad.exe", "title": "*test Bloc-notes"}}},
{"event": {"type": "text_input", "text": "test", "window": {
"app_name": "Notepad.exe", "title": "*test Bloc-notes"}}},
]
app_info = {
"primary_app": "Notepad.exe",
"first_window_title": "Bloc-notes",
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
assert len(trimmed) == 2
assert trimmed[0]["event"]["type"] == "mouse_click"
assert trimmed[0]["event"]["pos"] == [1514, 562]
assert trimmed[1]["event"]["type"] == "text_input"
def test_keeps_events_when_no_matching_focus_found(self):
raw_events = [
{"event": {"type": "mouse_click", "pos": [10, 10], "window": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
{"event": {"type": "text_input", "text": "abc", "window": {
"app_name": "explorer.exe", "title": "Explorateur"}}},
]
app_info = {
"primary_app": "Notepad.exe",
"first_window_title": "Bloc-notes",
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
assert trimmed == raw_events
def test_prefers_neutral_title_focus_after_non_neutral_first_focus(self):
"""Cas observé sess_20260520T102916_066851 : premier focus Notepad
a un titre non-neutre (http...txt), suivi d'un clic intra-Notepad
et d'un focus vers 'Sans titre' (= état initial neutre que le setup
auto produit). Le trim doit couper jusqu'au focus neutre pour
éliminer le clic intra-Notepad redondant.
"""
raw_events = [
{"event": {"type": "window_focus_change", "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "mouse_click", "pos": [681, 448], "window": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe",
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
}}},
{"event": {"type": "mouse_click", "pos": [1191, 40], "window": {
"app_name": "Notepad.exe",
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"}}},
{"event": {"type": "text_input", "text": "test", "window": {
"app_name": "Notepad.exe", "title": "*test Bloc-notes"}}},
]
app_info = {
"primary_app": "Notepad.exe",
"first_window_title": (
"http192.168.1.408765dossier.htmlid=.txt Bloc-notes"
),
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
# Le clic intra-Notepad (event idx 3) doit être supprimé : il
# bascule vers 'Sans titre' qui est déjà l'état setup, donc
# rejoué il n'a aucun effet visuel et déclenche retry_threshold.
assert len(trimmed) == 1
assert trimmed[0]["event"]["type"] == "text_input"
assert trimmed[0]["event"]["text"] == "test"
def test_neutral_focus_outside_lookahead_window_is_ignored(self):
"""Filet de sécurité : un focus 'Sans titre' qui arrive trop loin
après le premier focus primary_app n'est pas considéré comme
l'état de bootstrap. Évite de couper un workflow qui re-visite
'Sans titre' bien après le démarrage."""
# 30 events séparent le premier focus du focus neutre
raw_events = [
{"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe",
"title": "rapport_final.txt Bloc-notes"}}},
]
# Bourrer avec des events utiles intra-Notepad
for i in range(30):
raw_events.append({"event": {
"type": "mouse_click", "pos": [100 + i, 200],
"window": {"app_name": "Notepad.exe",
"title": "rapport_final.txt Bloc-notes"},
}})
raw_events.append({"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"}}})
raw_events.append({"event": {"type": "text_input", "text": "x",
"window": {"app_name": "Notepad.exe",
"title": "Sans titre Bloc-notes"}}})
app_info = {
"primary_app": "Notepad.exe",
"first_window_title": "rapport_final.txt Bloc-notes",
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
# Doit garder les 30 clicks + focus tardif + text_input = 32 events
# (cut uniquement au premier focus primary_app, comportement legacy)
assert len(trimmed) == 32
assert trimmed[0]["event"]["type"] == "mouse_click"
assert trimmed[0]["event"]["pos"] == [100, 200]
def test_keeps_legacy_behavior_when_first_focus_already_neutral(self):
"""Non-régression : si le premier focus primary_app est déjà sur
un titre neutre (cas normal), on coupe au premier focus comme
avant — pas de chasse au neutral_idx inutile."""
raw_events = [
{"event": {"type": "window_focus_change", "to": {
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "Notepad.exe", "title": "Sans titre Bloc-notes"}}},
{"event": {"type": "text_input", "text": "hello",
"window": {"app_name": "Notepad.exe",
"title": "Sans titre Bloc-notes"}}},
]
app_info = {
"primary_app": "Notepad.exe",
"first_window_title": "Sans titre Bloc-notes",
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
assert len(trimmed) == 1
assert trimmed[0]["event"]["type"] == "text_input"
def test_neutral_detection_recognizes_office_default_titles(self):
"""Word, Excel, PowerPoint utilisent leurs propres titres
par défaut (Document1, Classeur1, etc.) que le setup auto
amène également."""
raw_events = [
{"event": {"type": "window_focus_change", "to": {
"app_name": "winword.exe",
"title": "rapport.docx - Word"}}},
{"event": {"type": "mouse_click", "pos": [100, 40],
"window": {"app_name": "winword.exe",
"title": "rapport.docx - Word"}}},
{"event": {"type": "window_focus_change", "to": {
"app_name": "winword.exe", "title": "Document1 - Word"}}},
{"event": {"type": "text_input", "text": "abc",
"window": {"app_name": "winword.exe",
"title": "Document1 - Word"}}},
]
app_info = {
"primary_app": "winword.exe",
"first_window_title": "rapport.docx - Word",
}
trimmed = _trim_redundant_setup_events(raw_events, app_info)
assert len(trimmed) == 1
assert trimmed[0]["event"]["type"] == "text_input"
# =========================================================================
# Tests pour _extract_required_apps_from_workflow
# =========================================================================
@@ -304,10 +619,10 @@ class TestExtractRequiredAppsFromWorkflow:
# =========================================================================
class TestGenerateSetupActions:
"""Tests pour la génération des actions de setup 100% visuelles."""
"""Tests pour la génération des actions de setup."""
def test_notepad_setup_visual(self):
"""Génère les bonnes actions visuelles pour lancer Notepad."""
def test_notepad_setup_uses_run_dialog(self):
"""Bloc-notes utilise désormais le setup sémantique Win+R."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
@@ -315,74 +630,52 @@ class TestGenerateSetupActions:
}
actions = _generate_setup_actions(app_info)
# 9 actions : click_start, wait, click_search, wait, type, wait, click_result, wait, verify
assert len(actions) == 9
assert len(actions) == 7
# Étape 1 : clic visuel sur le bouton Démarrer
assert actions[0]["type"] == "click"
assert actions[0]["visual_mode"] is True
assert actions[0]["target_spec"]["by_role"] == "start_button"
assert actions[0]["target_spec"]["by_text"] == "Démarrer"
assert actions[0]["type"] == "key_combo"
assert actions[0]["keys"] == ["win", "r"]
assert actions[0]["_setup_step"] == "open_run_dialog"
# Étape 2 : attente menu Démarrer
assert actions[1]["type"] == "wait"
assert actions[1]["duration_ms"] == 1000
assert actions[1]["duration_ms"] == 500
# Étape 3 : clic visuel sur la barre de recherche
assert actions[2]["type"] == "click"
assert actions[2]["visual_mode"] is True
assert actions[2]["target_spec"]["by_role"] == "search_box"
assert actions[2]["type"] == "type"
assert actions[2]["text"] == "notepad"
# Étape 4 : attente barre de recherche active
assert actions[3]["type"] == "wait"
assert actions[3]["duration_ms"] == 500
assert actions[3]["duration_ms"] == 300
# Étape 5 : taper le nom visuel français
assert actions[4]["type"] == "type"
assert actions[4]["text"] == "Bloc-notes"
assert actions[4]["type"] == "key_combo"
assert actions[4]["keys"] == ["enter"]
# Étape 6 : attente résultats
assert actions[5]["type"] == "wait"
assert actions[5]["duration_ms"] == 1200
assert actions[5]["duration_ms"] == 2000
# Étape 7 : clic visuel sur le résultat
assert actions[6]["type"] == "click"
assert actions[6]["visual_mode"] is True
assert actions[6]["target_spec"]["by_text"] == "Bloc-notes"
assert actions[6]["target_spec"]["by_role"] == "app_icon"
# Étape 8 : attente lancement (app légère = 2000ms)
assert actions[7]["type"] == "wait"
assert actions[7]["duration_ms"] == 2000
# Étape 9 : vérification visuelle
assert actions[8]["type"] == "verify_screen"
assert actions[8]["_expected_title"] == "Sans titre Bloc-notes"
assert actions[6]["type"] == "verify_screen"
assert actions[6]["expected_window_title_contains"] == ["Bloc-notes", "notepad"]
# Toutes les actions sont marquées comme phase setup
for action in actions:
assert action.get("_setup_phase") is True
assert action.get("_setup_strategy") == "run_dialog"
def test_no_key_combo_in_setup(self):
"""AUCUNE action key_combo ne doit être générée dans le setup."""
def test_visual_setup_keeps_no_key_combo_for_word(self):
"""Le setup visuel classique ne doit pas introduire de key_combo."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
"first_window_title": "Bloc-notes",
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
}
actions = _generate_setup_actions(app_info)
key_combos = [a for a in actions if a["type"] == "key_combo"]
assert key_combos == [], (
"Le setup 100% visuel ne doit JAMAIS contenir de key_combo. "
f"Trouvé : {key_combos}"
)
assert key_combos == []
def test_all_clicks_are_visual(self):
"""Tous les clics du setup doivent avoir visual_mode=True et un target_spec."""
def test_all_clicks_are_visual_for_visual_setup(self):
"""Tous les clics du setup visuel doivent avoir visual_mode=True."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
"first_window_title": "Bloc-notes",
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
}
actions = _generate_setup_actions(app_info)
clicks = [a for a in actions if a["type"] == "click"]
@@ -402,11 +695,11 @@ class TestGenerateSetupActions:
assert "vlm_description" in spec, f"target_spec sans vlm_description : {spec}"
def test_clicks_have_fallback_coordinates(self):
"""Tous les clics visuels ont des coordonnées de fallback (x_pct, y_pct)."""
"""Tous les clics visuels ont des coordonnées de fallback."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
"first_window_title": "Bloc-notes",
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
}
actions = _generate_setup_actions(app_info)
clicks = [a for a in actions if a["type"] == "click"]
@@ -456,28 +749,130 @@ class TestGenerateSetupActions:
click_result = [a for a in actions if a.get("_setup_step") == "click_app_result"][0]
assert click_result["target_spec"]["by_text"] == "Microsoft Word"
def test_verify_screen_present_with_title(self):
"""Un verify_screen est ajouté quand un titre de fenêtre est connu."""
def test_prefers_recorded_searchhost_click_target(self):
"""Le setup réutilise la vraie cible SearchHost quand elle existe."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"launch_result_target": {
"x_pct": 0.566016,
"y_pct": 0.400625,
"window_title": "Rechercher",
"expected_window_before": "Rechercher",
"original_position": {
"x_relative": "au centre",
"y_relative": "au milieu",
},
"window_capture": {
"click_relative": [681, 448],
"window_size": [1287, 1407],
},
"position_desc": "au milieu au centre",
},
}
actions = _generate_setup_actions(app_info)
click_result = [a for a in actions if a.get("_setup_step") == "click_app_result"][0]
assert click_result["x_pct"] == pytest.approx(0.566016)
assert click_result["y_pct"] == pytest.approx(0.400625)
assert click_result["expected_window_before"] == "Rechercher"
assert click_result["target_spec"]["by_text"] == "Microsoft Word"
assert click_result["target_spec"]["by_role"] == "search_result"
assert click_result["target_spec"]["allow_position_fallback"] is True
assert click_result["target_spec"]["window_title"] == "Rechercher"
assert click_result["target_spec"]["original_position"]["x_relative"] == "au centre"
assert click_result["target_spec"]["window_capture"]["window_size"] == [1287, 1407]
assert "résultat de recherche" in click_result["target_spec"]["vlm_description"]
def test_prefers_recorded_start_button_target(self):
"""Le setup visuel réutilise le vrai clic Démarrer quand il existe."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"start_menu_target": {
"x_pct": 0.387891,
"y_pct": 0.974375,
"anchor_image_base64": "abc123",
"original_position": {
"x_relative": "au centre",
"y_relative": "en bas",
},
"position_desc": "en bas au centre",
},
}
actions = _generate_setup_actions(app_info)
click_start = [a for a in actions if a.get("_setup_step") == "click_start_menu"][0]
assert click_start["x_pct"] == pytest.approx(0.387891)
assert click_start["y_pct"] == pytest.approx(0.974375)
assert click_start["target_spec"]["by_text"] == ""
assert click_start["target_spec"]["by_role"] == "start_button"
assert click_start["target_spec"]["screen_scope"] == "full_screen"
assert click_start["target_spec"]["allow_position_fallback"] is True
assert click_start["target_spec"]["anchor_image_base64"] == "abc123"
assert click_start["target_spec"]["original_position"]["y_relative"] == "en bas"
assert "icône Windows" in click_start["target_spec"]["vlm_description"]
def test_skips_search_click_for_direct_typing(self):
"""Quand la session tape directement dans SearchHost, on saute
click_search et son wait/verify dédiés. La garde
verify_start_menu_open reste obligatoire et précède le type."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"search_box_interaction": {
"mode": "direct_typing",
"window_title": "Rechercher",
},
}
actions = _generate_setup_actions(app_info)
setup_steps = [a.get("_setup_step") for a in actions]
assert "click_search_box" not in setup_steps
assert "wait_search_ready" not in setup_steps
assert "verify_search_box_active" not in setup_steps
# Garde générique conservée — c'est elle qui sécurise la frappe.
assert "verify_start_menu_open" in setup_steps
idx_type = setup_steps.index("type_app_name")
assert actions[idx_type]["type"] == "type"
assert actions[idx_type]["text"] == "Word"
def test_verify_screen_final_present_with_title(self):
"""Le setup run_dialog termine par une vérification souple sur le titre app."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
"first_window_title": "Sans titre Bloc-notes",
}
actions = _generate_setup_actions(app_info)
verify = [a for a in actions if a.get("type") == "verify_screen"]
assert len(verify) == 1
assert verify[0]["_expected_title"] == "Sans titre Bloc-notes"
final_verifies = [
a for a in actions
if a.get("type") == "verify_screen"
and a.get("_setup_step") == "verify_app_ready"
]
assert len(final_verifies) == 1
assert "Bloc-notes" in final_verifies[0]["expected_window_title_contains"]
def test_no_verify_without_title(self):
"""Pas de verify_screen si aucun titre de fenêtre n'est connu."""
def test_run_dialog_keeps_final_verify_even_without_exact_title(self):
"""Le setup run_dialog garde une vérification finale générique."""
app_info = {
"primary_app": "Notepad.exe",
"primary_launch_cmd": "notepad",
"first_window_title": "",
}
actions = _generate_setup_actions(app_info)
verify = [a for a in actions if a.get("type") == "verify_screen"]
assert len(verify) == 0
# Aucun verify_screen ne doit porter _expected_title.
final_verifies = [
a for a in actions
if a.get("type") == "verify_screen"
and a.get("_setup_step") == "verify_app_ready"
]
assert len(final_verifies) == 1
assert "notepad" in [p.lower() for p in final_verifies[0]["expected_window_title_contains"]]
def test_empty_app_info(self):
"""Dict vide → pas d'actions."""
@@ -537,12 +932,184 @@ class TestGenerateSetupActions:
assert type_action["text"] == "MonAppMedical"
# =========================================================================
# Tests des gardes visuelles du setup (verify_screen titre fenêtre)
# =========================================================================
class TestSetupVisualGuards:
"""Couvre les gardes visuelles insérées entre les étapes du setup
auto Windows (post-blocage `position_fallback` live du 22 mai 2026).
Sans ces gardes, un clic Démarrer qui touche en fait le systray
overflow popup laissait le setup taper « bloc » dans la mauvaise
fenêtre, et seul le `click_result` final remontait l'erreur — trop
tard. Les `verify_screen` titre-fenêtre stoppent net après chaque
étape critique.
"""
def test_verify_start_menu_open_inserted_after_wait_start(self):
"""Une garde verify_screen est insérée juste après wait_start_menu."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
}
actions = _generate_setup_actions(app_info)
steps = [a.get("_setup_step") for a in actions]
# Ordre : click_start_menu → wait_start_menu → verify_start_menu_open
assert "verify_start_menu_open" in steps
idx_wait = steps.index("wait_start_menu")
idx_verify = steps.index("verify_start_menu_open")
assert idx_verify == idx_wait + 1
verify = actions[idx_verify]
assert verify["type"] == "verify_screen"
assert verify.get("_setup_phase") is True
patterns = verify.get("expected_window_title_contains") or []
assert isinstance(patterns, list) and patterns
lowered = [p.lower() for p in patterns]
# Doit couvrir au minimum FR + EN + l'app SearchHost / StartMenu
assert any("recherch" in p for p in lowered), patterns
assert any("search" in p for p in lowered), patterns
def test_verify_search_box_active_inserted_when_click_then_type(self):
"""Quand le setup clique sur la barre Rechercher puis attend,
une garde verify_screen suit l'attente pour bloquer la frappe
si le focus n'est pas réellement dans la barre."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"search_box_interaction": {
"mode": "click_then_type",
"window_title": "Rechercher",
"x_pct": 0.10, "y_pct": 0.95,
},
}
actions = _generate_setup_actions(app_info)
steps = [a.get("_setup_step") for a in actions]
assert "verify_search_box_active" in steps
idx_wait_ready = steps.index("wait_search_ready")
idx_verify = steps.index("verify_search_box_active")
idx_type = steps.index("type_app_name")
# Ordre : wait_search_ready → verify_search_box_active → type_app_name
assert idx_verify == idx_wait_ready + 1
assert idx_type == idx_verify + 1
verify = actions[idx_verify]
assert verify["type"] == "verify_screen"
patterns = verify.get("expected_window_title_contains") or []
assert "Rechercher" in patterns or any(
p.lower() == "rechercher" for p in patterns
)
def test_no_verify_search_box_when_direct_typing(self):
"""En mode direct_typing on n'a pas de click sur la barre — donc
pas de verify_search_box_active dédié (la garde verify_start_menu_open
suffit, on tape directement après)."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"search_box_interaction": {
"mode": "direct_typing",
"window_title": "Rechercher",
},
}
actions = _generate_setup_actions(app_info)
steps = [a.get("_setup_step") for a in actions]
assert "verify_search_box_active" not in steps
# La garde verify_start_menu_open reste présente (couvre la frappe).
assert "verify_start_menu_open" in steps
idx_verify = steps.index("verify_start_menu_open")
idx_type = steps.index("type_app_name")
assert idx_type > idx_verify, (
"type_app_name doit suivre verify_start_menu_open en direct_typing"
)
def test_verify_search_results_visible_inserted_before_click_result(self):
"""Dernier filet : la barre Rechercher (et ses résultats) doit
être encore active juste avant `click_app_result`. Sans cette
garde finale, un focus perdu pendant `wait_search_results`
peut faire cliquer le `click_app_result` dans la mauvaise
surface (constat live 2026-05-22 — fenêtre observée
``Fenêtre de dépassement de capacité de la barre d'état
système.``)."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
}
actions = _generate_setup_actions(app_info)
steps = [a.get("_setup_step") for a in actions]
assert "verify_search_results_visible" in steps
idx_wait_results = steps.index("wait_search_results")
idx_verify = steps.index("verify_search_results_visible")
idx_click_result = steps.index("click_app_result")
# Ordre : wait_search_results → verify_search_results_visible → click_app_result
assert idx_verify == idx_wait_results + 1
assert idx_click_result == idx_verify + 1
verify = actions[idx_verify]
assert verify["type"] == "verify_screen"
patterns = verify.get("expected_window_title_contains") or []
assert isinstance(patterns, list) and patterns
lowered = [p.lower() for p in patterns]
assert any("recherch" in p for p in lowered), patterns
assert any("search" in p for p in lowered), patterns
def test_verify_search_results_visible_present_in_direct_typing(self):
"""La garde finale avant click_app_result reste obligatoire
quelle que soit la modalité de la barre Rechercher."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"search_box_interaction": {
"mode": "direct_typing",
"window_title": "Rechercher",
},
}
actions = _generate_setup_actions(app_info)
steps = [a.get("_setup_step") for a in actions]
assert "verify_search_results_visible" in steps
def test_setup_guards_have_short_timeout(self):
"""Les gardes verify_screen ont un timeout court (≤ 2 s) — c'est
un check titre, pas un wait long."""
app_info = {
"primary_app": "winword.exe",
"primary_launch_cmd": "winword",
"first_window_title": "Document1 - Word",
"search_box_interaction": {
"mode": "click_then_type",
"window_title": "Rechercher",
},
}
actions = _generate_setup_actions(app_info)
guards = [
a for a in actions
if a.get("_setup_step") in (
"verify_start_menu_open",
"verify_search_box_active",
"verify_search_results_visible",
)
]
assert guards, "il doit exister au moins une garde verify_screen"
for g in guards:
assert g.get("timeout_ms", 5000) <= 2000
# =========================================================================
# Tests d'intégration : pipeline complet events → setup visuel
# =========================================================================
class TestSetupPipeline:
"""Tests du pipeline complet : extraction + génération visuelle."""
"""Tests du pipeline complet : extraction + génération du setup."""
def test_full_pipeline_from_events(self):
"""Pipeline complet depuis des événements bruts de type Notepad."""
@@ -561,24 +1128,25 @@ class TestSetupPipeline:
assert app_info["primary_app"] == "Notepad.exe"
actions = _generate_setup_actions(app_info)
assert len(actions) >= 8 # Au minimum 8 actions visuelles (sans verify si pas de titre)
assert len(actions) == 7
# Vérifier l'ordre logique 100% visuel
types = [a["type"] for a in actions]
assert types[0] == "click" # Clic Démarrer
assert types[1] == "wait" # Attente menu
assert types[2] == "click" # Clic barre de recherche
assert types[3] == "wait" # Attente barre active
assert types[4] == "type" # Taper le nom
assert types[5] == "wait" # Attente résultats
assert types[6] == "click" # Clic sur le résultat
assert types[7] == "wait" # Attente lancement
steps = [a.get("_setup_step") for a in actions]
expected_step_order = [
"open_run_dialog",
"wait_run_dialog",
"type_launch_command",
"wait_launch_command",
"submit_run_dialog",
"wait_app_launch",
"verify_app_ready",
]
assert steps == expected_step_order, steps
# AUCUN key_combo dans le pipeline
assert "key_combo" not in types, "Le pipeline ne doit contenir aucun key_combo"
assert types.count("key_combo") == 2
# Le texte tapé est le nom visuel français
assert actions[4]["text"] == "Bloc-notes"
idx_type = steps.index("type_launch_command")
assert actions[idx_type]["text"] == "notepad"
def test_full_pipeline_from_workflow(self):
"""Pipeline complet depuis un workflow structuré."""
@@ -599,12 +1167,12 @@ class TestSetupPipeline:
assert app_info["primary_app"] == "Notepad.exe"
actions = _generate_setup_actions(app_info)
assert len(actions) >= 8
assert len(actions) == 7
# Le texte tapé doit être le nom visuel, pas la commande shell
# Le texte tapé doit être la commande shell pour le setup Win+R.
type_action = [a for a in actions if a["type"] == "type"][0]
assert type_action["text"] == "Bloc-notes"
assert type_action["text"] == "notepad"
# Aucun key_combo
# Le setup Notepad s'appuie maintenant sur deux key_combo.
key_combos = [a for a in actions if a["type"] == "key_combo"]
assert key_combos == []
assert len(key_combos) == 2

View File

@@ -0,0 +1,79 @@
"""Tests pour la garde drift de `_template_match_anchor`.
Brief Codex 2026-05-23 07:56 : faux succès live `act_raw_77db702f` où
ANCHOR-TM matche un crop dans OBS Studio à (0.205, 0.170) score 0.842
alors que la position enregistrée est ~(0.706, 0.348) dans Bloc-notes.
La cascade serveur avait rejeté (`rejected_text_mismatch`) mais l'agent
fallback ANCHOR-TM côté client sans aucune garde de position acceptait
n'importe quel match au-dessus du seuil score.
Le helper statique `_anchor_match_within_drift` rejette les matchs
loin de la position fallback enregistrée.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402
class TestAnchorMatchDriftGuard:
def test_match_close_to_fallback_accepted(self):
# 5% de drift en x → accepté
assert ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.71, matched_y_pct=0.35,
fallback_x_pct=0.706, fallback_y_pct=0.348,
)
def test_match_far_from_fallback_rejected(self):
# cas live exact
assert not ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.205, matched_y_pct=0.170,
fallback_x_pct=0.706, fallback_y_pct=0.348,
)
def test_drift_at_threshold_accepted(self):
# drift = 0.25 exact (frontière)
assert ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.5, matched_y_pct=0.5,
fallback_x_pct=0.25, fallback_y_pct=0.5,
)
def test_drift_just_above_threshold_rejected(self):
assert not ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.5, matched_y_pct=0.5,
fallback_x_pct=0.24, fallback_y_pct=0.5,
)
def test_no_recorded_fallback_keeps_legacy_behavior(self):
"""Si pas de fallback enregistré (0,0), pas de garde possible."""
assert ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.5, matched_y_pct=0.5,
fallback_x_pct=0.0, fallback_y_pct=0.0,
)
def test_custom_max_drift(self):
"""Le seuil est configurable par caller."""
# Avec max_drift=0.10, un drift 0.15 est rejeté
assert not ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.65, matched_y_pct=0.50,
fallback_x_pct=0.50, fallback_y_pct=0.50,
max_drift=0.10,
)
# Mais accepté avec le défaut 0.25
assert ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.65, matched_y_pct=0.50,
fallback_x_pct=0.50, fallback_y_pct=0.50,
)
def test_drift_y_axis(self):
"""Drift y > seuil → rejet (même si x dans la zone)."""
assert not ActionExecutorV1._anchor_match_within_drift(
matched_x_pct=0.50, matched_y_pct=0.95,
fallback_x_pct=0.50, fallback_y_pct=0.50,
)

View File

@@ -0,0 +1,744 @@
"""Tests pour la garde verify_screen.expected_window_title_contains.
Cette garde protège les étapes du setup auto Windows contre les
configurations où ``click_start_menu`` se trompe de cible (systray
overflow popup, par exemple) et laisse la frappe partir dans la
mauvaise fenêtre. Ajoutée le 22 mai 2026 — cf.
``docs/CR_AUDIT_SETUP_VISUAL_GUARDS_2026-05-22.md``.
On teste deux choses :
1. Le helper statique ``_window_title_matches_any`` (substring + case).
2. Le routage de la garde dans ``verify_screen`` : succès si titre
matche, bascule en mode apprentissage / pause sinon.
"""
from __future__ import annotations
import sys
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402
# =========================================================================
# Helper substring matching
# =========================================================================
class TestWindowTitleMatchesAny:
def test_substring_match(self):
assert ActionExecutorV1._window_title_matches_any(
"Rechercher", ["Rechercher"]
)
def test_case_insensitive(self):
assert ActionExecutorV1._window_title_matches_any(
"RECHERCHER - Cortana", ["rechercher"]
)
def test_partial_match_first_pattern(self):
assert ActionExecutorV1._window_title_matches_any(
"Cortana - Rechercher", ["search", "rechercher", "cortana"]
)
def test_no_match_returns_false(self):
assert not ActionExecutorV1._window_title_matches_any(
"Fenêtre de dépassement de capacité de la barre d'état système",
["Rechercher", "Search", "Cortana"],
)
def test_empty_patterns_returns_true(self):
"""Pas de patterns demandés → la garde est neutre."""
assert ActionExecutorV1._window_title_matches_any("X", [])
assert ActionExecutorV1._window_title_matches_any("X", None)
def test_empty_title_with_patterns_returns_false(self):
assert not ActionExecutorV1._window_title_matches_any("", ["X"])
def test_ignore_empty_pattern_entries(self):
"""Les chaînes vides dans la liste ne doivent pas matcher
l'ensemble du titre."""
assert not ActionExecutorV1._window_title_matches_any(
"rien à voir", ["", None, ""]
)
class TestKnownRuntimeDialogs:
def test_match_confirm_save_overwrite_dialog(self):
spec = ActionExecutorV1._match_known_runtime_dialog(
"Confirmer l'enregistrement"
)
assert spec is not None
assert spec["id"] == "confirm_save_overwrite"
assert spec["button_texts"][0] == "Oui"
def test_match_confirm_save_overwrite_dialog_with_typographic_apostrophe(self):
spec = ActionExecutorV1._match_known_runtime_dialog(
"Confirmer lenregistrement"
)
assert spec is not None
assert spec["id"] == "confirm_save_overwrite"
def test_unknown_title_returns_none(self):
assert ActionExecutorV1._match_known_runtime_dialog(
"Bloc-notes"
) is None
class TestContextualRuntimeDialogs:
def test_contextual_notepad_unsaved_dialog_is_detected_via_visual_evidence(self):
exe = _make_executor_skeleton()
exe._capture_screenshot_b64 = MagicMock(return_value="shot")
exe._find_text_on_screen = MagicMock(
side_effect=lambda _shot, text: (100, 100)
if text == "Ne pas enregistrer"
else None
)
action = {
"action_id": "act_save_from_dialog",
"type": "click",
"visual_mode": True,
"target_spec": {
"window_title": "*test Bloc-notes",
"by_text": "Enregistrer",
},
"expected_window_before": "*test Bloc-notes",
}
target_spec = dict(action["target_spec"])
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "Bloc-notes", "app_name": "Notepad.exe"},
):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Bloc-notes",
"app_name": "Notepad.exe",
"rect": [500, 300, 1400, 900],
},
):
adapted = exe._maybe_contextualize_action_to_foreground_dialog(
action,
target_spec,
)
assert adapted is not None
assert adapted["dialog_spec"]["id"] == "notepad_unsaved_changes"
assert adapted["action"]["expected_window_before"] == "Bloc-notes"
assert adapted["target_spec"]["window_title"] == "Bloc-notes"
assert adapted["target_spec"]["context_hints"]["foreground_dialog_id"] == (
"notepad_unsaved_changes"
)
assert adapted["target_spec"]["window_capture"]["rect"] == [500, 300, 1400, 900]
def test_contextual_notepad_dialog_is_ignored_without_matching_action(self):
exe = _make_executor_skeleton()
exe._capture_screenshot_b64 = MagicMock(return_value="shot")
exe._find_text_on_screen = MagicMock(
side_effect=lambda _shot, text: (100, 100)
if text == "Ne pas enregistrer"
else None
)
action = {
"action_id": "act_other_button",
"type": "click",
"visual_mode": True,
"target_spec": {
"window_title": "*test Bloc-notes",
"by_text": "Annuler",
},
}
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "Bloc-notes", "app_name": "Notepad.exe"},
):
adapted = exe._maybe_contextualize_action_to_foreground_dialog(
action,
dict(action["target_spec"]),
)
assert adapted is None
class TestPostVerifyWindowTransition:
def test_requires_transition_when_expected_after_differs_from_source_window(self):
assert ActionExecutorV1._requires_post_verify_window_transition(
action={"expected_window_before": "*test Bloc-notes"},
target_spec=None,
expected_after="Enregistrer sous",
)
def test_same_window_title_does_not_require_transition(self):
assert not ActionExecutorV1._requires_post_verify_window_transition(
action={"expected_window_before": "*test Bloc-notes"},
target_spec=None,
expected_after="test Bloc-notes",
)
# =========================================================================
# Routage de la garde dans verify_screen
# =========================================================================
def _make_executor_skeleton():
"""Construit un ActionExecutorV1 sans son __init__ lourd
(MouseController/KeyboardController/mss). On câble manuellement
les attributs strictement nécessaires aux branches testées.
"""
exe = ActionExecutorV1.__new__(ActionExecutorV1)
exe._notification_manager = None
exe._system_dialog_pause = None
exe._chat_window_ref = None
exe._api_token = ""
exe._poll_backoff = 1.0
exe._poll_backoff_min = 1.0
exe._poll_backoff_max = 30.0
exe._poll_backoff_factor = 1.5
# mss factice (monitor 1920×1080)
exe._sct = MagicMock()
exe._sct.monitors = [None, {"width": 1920, "height": 1080}]
# Patcher les helpers IO côté agent
exe._check_and_pause_on_system_dialog = MagicMock(return_value=False)
exe._capture_screenshot_b64 = MagicMock(return_value=None)
return exe
def _verify_action(patterns, timeout_ms=200):
return {
"action_id": "act_test_verify",
"type": "verify_screen",
"expected_node": "",
"timeout_ms": timeout_ms,
"expected_window_title_contains": patterns,
}
class TestVerifyScreenWindowGuard:
def test_matching_title_returns_success(self):
exe = _make_executor_skeleton()
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "Rechercher"},
):
res = exe.execute_replay_action(_verify_action(
["Rechercher", "Search"]
))
assert res["success"] is True
assert res.get("warning") != "setup_guard_window_mismatch"
def test_mismatch_with_human_correction_returns_success_supervised(self):
exe = _make_executor_skeleton()
# L'utilisateur fait un clic correctif quand le mode apprentissage
# se déclenche → on récupère la séquence et on rend la main au serveur.
exe._capture_human_correction = MagicMock(return_value=[
{"type": "click", "x_pct": 0.10, "y_pct": 0.95},
])
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={
"title": "Fenêtre de dépassement de capacité de la barre d'état système",
},
):
res = exe.execute_replay_action(_verify_action(
["Rechercher", "Search"]
))
assert res["success"] is True
assert res["warning"] == "setup_guard_window_mismatch"
assert res["resolution_method"] == "human_supervised"
assert res["correction"]["trigger"] == "setup_guard_window_mismatch"
assert res["correction"]["expected_patterns"] == ["Rechercher", "Search"]
def test_mismatch_without_human_pauses_replay(self):
exe = _make_executor_skeleton()
exe._capture_human_correction = MagicMock(return_value=[])
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "Notepad - Sans titre"},
):
res = exe.execute_replay_action(_verify_action(["Rechercher"]))
assert res["success"] is False
assert res["warning"] == "setup_guard_window_mismatch"
assert res.get("needs_human") is True
assert "Rechercher" in res["error"]
def test_verify_without_patterns_is_neutral_wait(self):
"""Sans expected_window_title_contains, verify_screen reste un
simple wait — pas de check fenêtre, pas de mode apprentissage."""
exe = _make_executor_skeleton()
exe._capture_human_correction = MagicMock()
action = {
"action_id": "act_test_verify_neutral",
"type": "verify_screen",
"expected_node": "node_x",
"timeout_ms": 200,
}
res = exe.execute_replay_action(action)
assert res["success"] is True
exe._capture_human_correction.assert_not_called()
def test_known_runtime_dialog_is_auto_handled_before_pause(self):
exe = _make_executor_skeleton()
exe._capture_human_correction = MagicMock(return_value=[])
exe._maybe_handle_runtime_dialog_before_pause = MagicMock(
return_value={
"action_id": "act_test_click",
"success": True,
"warning": "runtime_dialog_handled_skip",
"resolution_method": "runtime_dialog:confirm_save_overwrite",
"screenshot": None,
"visual_resolved": False,
}
)
action = {
"action_id": "act_test_click",
"type": "click",
"visual_mode": True,
"x_pct": 0.5,
"y_pct": 0.5,
"target_spec": {
"window_title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"by_text": "",
},
}
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "Confirmer l'enregistrement"},
):
res = exe.execute_replay_action(action)
assert res["success"] is True
assert res["warning"] == "runtime_dialog_handled_skip"
exe._maybe_handle_runtime_dialog_before_pause.assert_called_once()
exe._capture_human_correction.assert_not_called()
# =========================================================================
# Skip pixel-change validation pour les actions _setup_phase
# =========================================================================
def _make_executor_with_mouse_skeleton():
"""Comme `_make_executor_skeleton` mais avec aussi un mouse mock,
pour pouvoir traverser la branche click de execute_replay_action
sans toucher au desktop."""
exe = _make_executor_skeleton()
exe.mouse = MagicMock()
exe.mouse.position = (0, 0)
exe.keyboard = MagicMock()
# _quick_screenshot_hash retourne une string non-vide → pixel check actif
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
return exe
class TestSetupActionsSkipPixelChange:
"""Pour les actions du setup auto (`_setup_phase=True`), la
validation par simple pixel-change est neutralisée. C'est la garde
verify_screen suivante qui décide — sinon un click_start qui ouvre
le systray overflow popup serait validé sur changement d'écran.
"""
def test_setup_click_skips_screen_change_check(self):
exe = _make_executor_with_mouse_skeleton()
exe._wait_for_screen_change = MagicMock(return_value=False)
exe._capture_human_correction = MagicMock()
# On évite la résolution visuelle réelle : pas de visual_mode.
action = {
"action_id": "act_setup_click_start",
"type": "click",
"x_pct": 0.02,
"y_pct": 0.98,
"_setup_phase": True,
"_setup_step": "click_start_menu",
}
res = exe.execute_replay_action(action)
assert res["success"] is True
# La fonction _wait_for_screen_change ne doit PAS être appelée
# pour les actions setup.
exe._wait_for_screen_change.assert_not_called()
# Et le mode apprentissage ne doit pas se déclencher non plus.
exe._capture_human_correction.assert_not_called()
def test_non_setup_click_still_runs_screen_change_check(self):
"""Non-régression : une action click hors setup conserve la
validation pixel-change qui déclenche le mode apprentissage si
l'écran ne change pas."""
exe = _make_executor_with_mouse_skeleton()
exe._wait_for_screen_change = MagicMock(return_value=False)
exe._capture_human_correction = MagicMock(return_value=[])
action = {
"action_id": "act_user_click",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.5,
# Pas de _setup_phase
}
res = exe.execute_replay_action(action)
exe._wait_for_screen_change.assert_called_once()
# Pas visual_mode → branche échec simple, success=False
assert res.get("warning") == "no_screen_change"
assert res["success"] is False
class TestRuntimeDialogHandling:
def test_handle_confirm_save_dialog_clicks_oui_via_server(self):
exe = _make_executor_skeleton()
exe._capture_screenshot_b64 = MagicMock(return_value="abc")
exe._server_resolve_target = MagicMock(
return_value={
"resolved": True,
"x_pct": 0.25,
"y_pct": 0.75,
"method": "hybrid_text_direct",
"score": 0.91,
}
)
exe._find_text_on_screen = MagicMock(return_value=None)
exe._click = MagicMock()
spec = ActionExecutorV1._match_known_runtime_dialog(
"Confirmer l'enregistrement"
)
with patch("agent_v0.agent_v1.config.SERVER_URL", "http://srv"):
handled = exe._handle_known_runtime_dialog(
spec, "Confirmer l'enregistrement", 1920, 1080
)
assert handled["handled"] is True
assert handled["button_text"] == "Oui"
exe._server_resolve_target.assert_called_once()
exe._click.assert_called_once_with((480, 810), "left")
def test_runtime_dialog_before_pause_returns_skip_result(self):
exe = _make_executor_skeleton()
exe._check_and_pause_on_system_dialog = MagicMock(return_value=False)
exe._handle_known_runtime_dialog = MagicMock(
return_value={
"handled": True,
"button_text": "Oui",
"x_pct": 0.33,
"y_pct": 0.66,
"resolution_score": 0.9,
}
)
exe._capture_screenshot_b64 = MagicMock(return_value="after")
res = exe._maybe_handle_runtime_dialog_before_pause(
action={"action_id": "act_final_click", "type": "click"},
target_spec={},
expected_title="http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
current_title="Confirmer l'enregistrement",
screen_width=1920,
screen_height=1080,
)
assert res["success"] is True
assert res["warning"] == "runtime_dialog_handled_skip"
assert res["correction"]["button_text"] == "Oui"
assert res["actual_position"] == {"x_pct": 0.33, "y_pct": 0.66}
def test_post_verify_handles_runtime_dialog_and_recovers_expected_window(self):
exe = _make_executor_skeleton()
exe._click = MagicMock()
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
exe._wait_for_screen_change = MagicMock(return_value=True)
handled_state = {"done": False}
def _fake_handle(dialog_spec, current_title, screen_width, screen_height):
handled_state["done"] = True
return {
"handled": True,
"button_text": "Oui",
"x_pct": 0.33,
"y_pct": 0.66,
"resolution_score": 0.9,
}
exe._handle_known_runtime_dialog = MagicMock(side_effect=_fake_handle)
action = {
"action_id": "act_save_dialog",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.5,
"expected_window_title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
}
def _window_info():
if handled_state["done"]:
return {"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes"}
return {"title": "Confirmer lenregistrement"}
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
side_effect=_window_info,
):
res = exe.execute_replay_action(action)
assert res["success"] is True
assert res["warning"] == "runtime_dialog_handled_post_verify"
assert res["actual_position"] == {"x_pct": 0.5, "y_pct": 0.5}
exe._handle_known_runtime_dialog.assert_called_once()
def test_post_verify_can_retry_same_runtime_dialog_before_recovery(self):
exe = _make_executor_skeleton()
exe._click = MagicMock()
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
exe._wait_for_screen_change = MagicMock(return_value=True)
handled_state = {"count": 0}
def _fake_handle(dialog_spec, current_title, screen_width, screen_height):
handled_state["count"] += 1
return {
"handled": True,
"button_text": "Oui",
"x_pct": 0.33,
"y_pct": 0.66,
"resolution_score": 0.9,
}
exe._handle_known_runtime_dialog = MagicMock(side_effect=_fake_handle)
action = {
"action_id": "act_save_dialog_retry",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.5,
"expected_window_title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
}
def _window_info():
if handled_state["count"] >= 2:
return {"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes"}
return {"title": "Confirmer lenregistrement"}
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
side_effect=_window_info,
):
res = exe.execute_replay_action(action)
assert res["success"] is True
assert res["warning"] == "runtime_dialog_handled_post_verify"
assert handled_state["count"] == 2
assert res["runtime_dialog"]["dialog_id"] == "confirm_save_overwrite"
def test_post_verify_wrong_window_fails_when_dialog_transition_was_expected(self):
exe = _make_executor_skeleton()
exe._click = MagicMock()
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
exe._wait_for_screen_change = MagicMock(return_value=True)
exe._capture_screenshot_b64 = MagicMock(return_value="after")
exe._notification_manager = MagicMock()
action = {
"action_id": "act_open_save_dialog",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.5,
"expected_window_before": "*test Bloc-notes",
"expected_window_title": "Enregistrer sous",
}
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "rpa_vision : Explorateur de fichiers"},
):
res = exe.execute_replay_action(action)
assert res["success"] is False
assert res["warning"] == "wrong_window"
assert "Enregistrer sous" in res["error"]
assert "rpa_vision : Explorateur de fichiers" in res["error"]
assert res["needs_human"] is True
exe._notification_manager.replay_wrong_window.assert_called_once()
def test_post_verify_same_window_mismatch_stays_legacy_warning(self):
exe = _make_executor_skeleton()
exe._click = MagicMock()
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
exe._wait_for_screen_change = MagicMock(return_value=True)
exe._capture_screenshot_b64 = MagicMock(return_value="after")
action = {
"action_id": "act_same_window_click",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.5,
"expected_window_before": "*test Bloc-notes",
"expected_window_title": "test Bloc-notes",
}
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "rpa_vision : Explorateur de fichiers"},
):
res = exe.execute_replay_action(action)
assert res["success"] is True
assert res["warning"] == "post_verif_timeout:rpa_vision : Explorateur de fichiers"
class TestCloseTabHotkeyFallback:
def test_visual_close_tab_uses_ctrl_w_when_tab_x_is_hidden(self):
exe = _make_executor_with_mouse_skeleton()
exe._observe_screen = MagicMock(return_value=None)
exe._capture_human_correction = MagicMock(return_value=[])
exe._execute_key_combo = MagicMock()
exe._click = MagicMock()
exe._wait_for_screen_change = MagicMock(return_value=True)
exe._capture_screenshot_b64 = MagicMock(return_value="after")
action = {
"action_id": "act_close_tab",
"type": "click",
"visual_mode": True,
"x_pct": 0.88,
"y_pct": 0.04,
"target_spec": {
"window_title": "*test Bloc-notes",
"by_role": "tab_close_button",
"context_hints": {
"interaction": "close_tab",
"active_tab_label": "test",
},
},
}
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
return_value={"title": "*test Bloc-notes"},
):
res = exe.execute_replay_action(action)
assert res["success"] is True
assert res["warning"] == "close_tab_hotkey_fallback"
assert res["resolution_method"] == "semantic_close_tab_hotkey"
exe._execute_key_combo.assert_called_once_with(["ctrl", "w"])
exe._click.assert_not_called()
exe._capture_human_correction.assert_not_called()
class TestStartButtonHotkeyFallback:
def test_setup_start_button_position_fallback_uses_windows_key(self):
exe = _make_executor_with_mouse_skeleton()
exe._observe_screen = MagicMock(return_value=None)
exe._capture_human_correction = MagicMock(return_value=[])
exe._execute_key_combo = MagicMock()
exe._click = MagicMock()
exe._capture_screenshot_b64 = MagicMock(return_value="after")
exe._wait_for_screen_change = MagicMock(return_value=True)
action = {
"action_id": "act_setup_click_start",
"type": "click",
"visual_mode": True,
"x_pct": 0.387891,
"y_pct": 0.974375,
"_setup_phase": True,
"_setup_step": "click_start_menu",
"target_spec": {
"by_role": "start_button",
"by_text": "",
"anchor_image_base64": "abc123",
"allow_position_fallback": True,
"screen_scope": "full_screen",
},
}
grounding_result = SimpleNamespace(
found=True,
x_pct=0.387891,
y_pct=0.974375,
method="position_fallback",
score=0.2,
detail="fallback positionnel explicite",
elapsed_ms=12.0,
)
with patch(
"agent_v0.agent_v1.core.grounding.GroundingEngine.locate",
return_value=grounding_result,
) as locate_mock:
with patch(
"agent_v0.agent_v1.core.executor.time.sleep",
lambda *_a, **_k: None,
):
res = exe.execute_replay_action(action, server_url="http://srv")
assert res["success"] is True
assert res["warning"] == "start_button_hotkey_fallback"
assert res["resolution_method"] == "semantic_start_button_hotkey"
exe._execute_key_combo.assert_called_once_with(["win"])
exe._click.assert_not_called()
exe._wait_for_screen_change.assert_not_called()
exe._capture_human_correction.assert_not_called()
def test_real_visual_start_button_match_keeps_mouse_click(self):
exe = _make_executor_with_mouse_skeleton()
exe._observe_screen = MagicMock(return_value=None)
exe._capture_human_correction = MagicMock(return_value=[])
exe._execute_key_combo = MagicMock()
exe._click = MagicMock()
exe._capture_screenshot_b64 = MagicMock(return_value="after")
exe._wait_for_screen_change = MagicMock(return_value=True)
action = {
"action_id": "act_setup_click_start",
"type": "click",
"visual_mode": True,
"x_pct": 0.387891,
"y_pct": 0.974375,
"_setup_phase": True,
"_setup_step": "click_start_menu",
"target_spec": {
"by_role": "start_button",
"by_text": "",
"anchor_image_base64": "abc123",
"allow_position_fallback": True,
"screen_scope": "full_screen",
},
}
grounding_result = SimpleNamespace(
found=True,
x_pct=0.389,
y_pct=0.973,
method="vlm_quick_find",
score=0.93,
detail="match VLM plausible",
elapsed_ms=35.0,
)
with patch(
"agent_v0.agent_v1.core.grounding.GroundingEngine.locate",
return_value=grounding_result,
):
with patch(
"agent_v0.agent_v1.core.executor.time.sleep",
lambda *_a, **_k: None,
):
res = exe.execute_replay_action(action, server_url="http://srv")
assert res["success"] is True
assert res["resolution_method"] == "vlm_quick_find"
exe._execute_key_combo.assert_not_called()
exe._click.assert_called_once()
exe._wait_for_screen_change.assert_not_called()
exe._capture_human_correction.assert_not_called()

View File

@@ -0,0 +1,58 @@
"""Tests pour le flag RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE.
Brief Codex 2026-05-23 09:02 : le chemin produit cible est le workflow
compilé (post worker VLM), pas le replay direct depuis raw events.
Le flag env désactive la proposition automatique de replay direct par
défaut. Le chemin direct reste accessible (smoke/debug) via RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE=true.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.replay_engine import ( # noqa: E402
_auto_launch_replay_after_finalize,
)
class TestAutoLaunchReplayFlag:
def test_default_is_false(self, monkeypatch):
"""Sans variable d'env, le mode produit est actif → pas de
proposition automatique de replay direct."""
monkeypatch.delenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", raising=False)
assert _auto_launch_replay_after_finalize() is False
def test_true_value_activates(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "true")
assert _auto_launch_replay_after_finalize() is True
def test_1_value_activates(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "1")
assert _auto_launch_replay_after_finalize() is True
def test_yes_value_activates(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "yes")
assert _auto_launch_replay_after_finalize() is True
def test_false_value_deactivates(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "false")
assert _auto_launch_replay_after_finalize() is False
def test_empty_value_deactivates(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "")
assert _auto_launch_replay_after_finalize() is False
def test_arbitrary_value_deactivates(self, monkeypatch):
"""Toute valeur non-truthy retourne False (default-deny)."""
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "maybe")
assert _auto_launch_replay_after_finalize() is False
def test_case_insensitive(self, monkeypatch):
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "TRUE")
assert _auto_launch_replay_after_finalize() is True
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "Yes")
assert _auto_launch_replay_after_finalize() is True

View File

@@ -0,0 +1,46 @@
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import MagicMock
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.agent_v1.core.grounding import GroundingEngine # noqa: E402
def test_template_strategy_passes_fallback_coords_to_anchor_drift_guard():
executor = MagicMock()
executor._template_match_anchor = MagicMock(
return_value={
"resolved": True,
"x_pct": 0.7,
"y_pct": 0.35,
"score": 0.95,
}
)
engine = GroundingEngine(executor)
target_spec = {"anchor_image_base64": "abc123"}
result = engine._try_strategy(
"template",
server_url="",
screenshot_b64="shot",
target_spec=target_spec,
fallback_x=0.708594,
fallback_y=0.35,
screen_width=2560,
screen_height=1600,
)
assert result.found is True
executor._template_match_anchor.assert_called_once_with(
"shot",
"abc123",
2560,
1600,
fallback_x_pct=0.708594,
fallback_y_pct=0.35,
)

View File

@@ -111,6 +111,310 @@ class TestGroundingEngine:
assert d["x_pct"] == 0.5
assert d["method"] == "som"
def test_start_button_uses_full_screen_instead_of_active_window(self):
"""Le bouton Démarrer doit être résolu sur l'écran entier."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.02,
"y_pct": 0.98,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Démarrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={"rect": [100, 100, 1100, 900]},
):
result = engine.locate(
"http://server",
{"by_text": "Démarrer", "by_role": "start_button"},
0.02, 0.98, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
def test_regular_targets_stay_scoped_to_active_window(self):
"""Les cibles applicatives ordinaires restent bornées à la fenêtre active."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={"rect": [100, 200, 1100, 1000]},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 100, "top": 200, "width": 1000, "height": 800}
)
def test_unknown_window_rect_falls_back_to_full_screen_on_visual_mismatch(self):
"""Un titre inconnu n'est accepté que si le crop est validé visuellement."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = None
engine._capture_window_or_screen = MagicMock(
side_effect=["fake_window_b64", "fake_screen_b64"]
)
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "unknown_window",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
{"left": 100, "top": 200, "width": 1000, "height": 800},
None,
]
def test_taskbar_like_rect_falls_back_to_full_screen(self):
"""Une taskbar/systray ne doit jamais être utilisée comme fenêtre active."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Fenêtre de dépassement de capacité de la barre d'état système",
"rect": [0, 1492, 2560, 1600],
},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 2560, 1600,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
def test_visually_mismatched_window_crop_falls_back_to_full_screen(self):
"""Un crop fenêtre plausible mais visuellement faux est rejeté."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = None
engine._capture_window_or_screen = MagicMock(
side_effect=["fake_window_b64", "fake_screen_b64"]
)
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Enregistrer sous",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
{"left": 100, "top": 200, "width": 1000, "height": 800},
None,
]
executor._server_resolve_target.assert_called_once_with(
"http://server",
"fake_screen_b64",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5,
0.3,
1920,
1080,
)
def test_visually_validated_window_crop_stays_scoped(self):
"""Un crop fenêtre plausible et validé visuellement reste autorisé."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = (321, 222)
engine._capture_window_or_screen = MagicMock(return_value="fake_window_b64")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Enregistrer sous",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 100, "top": 200, "width": 1000, "height": 800}
)
def test_lea_active_window_does_not_scope_external_target(self):
"""Une fenêtre Léa au premier plan ne doit jamais contraindre une cible externe."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Bloc-notes"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Léa — Assistante",
"app_name": "pythonw.exe",
"rect": [1948, 750, 2570, 1606],
},
):
result = engine.locate(
"http://server",
{"by_text": "Bloc-notes", "by_role": "search_result"},
0.2, 0.5, 2560, 1600,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
executor._server_resolve_target.assert_called_once_with(
"http://server",
"fake_b64_data",
{"by_text": "Bloc-notes", "by_role": "search_result"},
0.2,
0.5,
2560,
1600,
)
def test_lea_active_window_stays_scoped_for_explicit_lea_target(self):
"""Si la cible mentionne explicitement Léa, le scope sur sa fenêtre reste autorisé."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Continuer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Léa — Assistante",
"app_name": "pythonw.exe",
"rect": [1948, 750, 2570, 1606],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Continuer",
"by_role": "button",
"window_title": "Léa — Assistante",
},
0.5, 0.3, 3000, 2000,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 1948, "top": 750, "width": 622, "height": 856}
)
def test_allow_position_fallback_returns_recorded_coords(self):
"""Quand autorisé, le grounding peut retomber sur la position enregistrée."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = None
executor._hybrid_vlm_resolve.return_value = None
result = engine.locate(
"http://server",
{
"by_role": "start_button",
"vlm_description": "icône Windows",
"screen_scope": "full_screen",
"allow_position_fallback": True,
},
0.387891, 0.974375, 1920, 1080,
)
assert result.found is True
assert result.method == "position_fallback"
assert result.x_pct == pytest.approx(0.387891)
assert result.y_pct == pytest.approx(0.974375)
# =========================================================================
# P2 : Policy — décisions quand grounding échoue
@@ -407,6 +711,65 @@ class TestReplayLearner:
assert "action_id" in data
assert "success" in data
def test_record_human_correction_persists_to_memory_helper(self, learner, monkeypatch):
"""Une correction humaine doit alimenter la mémoire persistante via replay_memory."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr",
action={
"action_id": "a_corr",
"target_spec": {"by_text": "Valider", "window_title": "Bloc-notes"},
},
correction={"x_pct": 0.42, "y_pct": 0.84},
)
loaded = learner.load_session("s_corr")
assert len(loaded) == 1
assert loaded[0].resolution_method == "human_supervised"
assert loaded[0].window_title == "Bloc-notes"
assert captured["window_title"] == "Bloc-notes"
assert captured["target_spec"]["by_text"] == "Valider"
assert captured["x_pct"] == 0.42
assert captured["y_pct"] == 0.84
assert captured["method"] == "human_supervised"
assert captured["confidence"] == 1.0
def test_record_human_correction_fallback_window_title_from_action(self, learner, monkeypatch):
"""Si target_spec.window_title est absent, on retombe sur action.window_title."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr2",
action={
"action_id": "a_corr2",
"window_title": "Fenêtre fallback",
"target_spec": {"by_text": "Enregistrer"},
},
correction={"x_pct": 0.1, "y_pct": 0.2},
)
assert captured["window_title"] == "Fenêtre fallback"
# =========================================================================
# Boucle d'apprentissage : consolidation cross-workflow

View File

@@ -145,6 +145,20 @@ class TestVerifyWithCritic:
assert result.suggestion == "retry"
assert result.semantic_verified is None # VLM non appelé
def test_verify_screen_identique_ne_declenche_pas_retry(
self, verifier, screenshot_gray,
):
"""verify_screen est une stabilisation, pas une action qui doit re-changer l'écran."""
result = verifier.verify_action(
action={"type": "verify_screen", "action_id": "verify_setup"},
result={"success": True},
screenshot_before=screenshot_gray,
screenshot_after=screenshot_gray,
)
assert result.verified is True
assert result.suggestion == "continue"
assert result.changes_detected is False
@patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic")
def test_pixel_ok_semantic_ok(
self, mock_semantic, verifier, screenshot_gray, screenshot_white,

View File

@@ -0,0 +1,118 @@
from types import SimpleNamespace
from agent_v0.server_v1 import replay_memory
from core.learning.target_memory_store import TargetMemoryStore
class _DummyStore:
def __init__(self, fp):
self._fp = fp
def lookup(self, screen_sig, spec_shim):
return self._fp
def test_memory_lookup_uses_window_relative_coords_when_available(monkeypatch):
fp = SimpleNamespace(
bbox=(0.566016, 0.400625, 0.0, 0.0),
etype="position_fallback",
confidence=0.2,
)
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
result = replay_memory.memory_lookup(
window_title="Rechercher",
target_spec={
"by_text": "Bloc-notes",
"window_capture": {
"click_relative": [681, 448],
"window_size": [1287, 1407],
},
},
)
assert result is not None
assert result["method"] == "memory_position_fallback"
assert result["x_pct"] == 681 / 1287
assert result["y_pct"] == 448 / 1407
def test_memory_lookup_keeps_bbox_coords_without_window_capture(monkeypatch):
fp = SimpleNamespace(
bbox=(0.566016, 0.400625, 0.0, 0.0),
etype="position_fallback",
confidence=0.2,
)
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
result = replay_memory.memory_lookup(
window_title="Rechercher",
target_spec={"by_text": "Bloc-notes"},
)
assert result is not None
assert result["x_pct"] == 0.566016
assert result["y_pct"] == 0.400625
def test_memory_lookup_keeps_learned_visual_coords_with_window_capture(monkeypatch):
fp = SimpleNamespace(
bbox=(0.402734375, 0.578125, 0.0, 0.0),
etype="anchor_template",
confidence=0.99,
)
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
result = replay_memory.memory_lookup(
window_title="*test Bloc-notes",
target_spec={
"by_text": "Enregistrer",
"by_role": "yolo",
"window_capture": {
"click_relative": [860, 634],
"window_size": [1920, 1116],
},
},
)
assert result is not None
assert result["method"] == "memory_anchor_template"
assert result["x_pct"] == 0.402734375
assert result["y_pct"] == 0.578125
def test_target_spec_hash_distinguishes_same_text_with_different_spatial_hints(tmp_path):
store = TargetMemoryStore(base_path=str(tmp_path / "learning"))
spec_left = replay_memory._TargetSpecLike(
{
"by_text": "Enregistrer",
"by_role": "yolo",
"vlm_description": "Dans la fenêtre '*test Bloc-notes', l'élément cliqué se trouve au milieu au centre de l'écran",
"window_capture": {
"click_relative": [860, 634],
"window_size": [1920, 1116],
},
"som_element": {
"bbox_norm": [0.40234375, 0.701875, 0.46640625, 0.74125],
"center_norm": [0.434375, 0.72125],
},
}
)
spec_right = replay_memory._TargetSpecLike(
{
"by_text": "Enregistrer",
"by_role": "yolo",
"vlm_description": "Dans la fenêtre '*test Bloc-notes', l'élément cliqué se trouve au milieu au centre de l'écran",
"window_capture": {
"click_relative": [1491, 38],
"window_size": [1920, 1116],
},
"som_element": {
"bbox_norm": [0.697265625, 0.335625, 0.715625, 0.3625],
"center_norm": [0.70625, 0.34875],
},
}
)
assert store._hash_target_spec(spec_left) != store._hash_target_spec(spec_right)

View File

@@ -0,0 +1,152 @@
from __future__ import annotations
import base64
import io
import sys
from pathlib import Path
from PIL import Image, ImageDraw
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1 import resolve_engine # noqa: E402
class _FakeElem:
def __init__(self, elem_id, label, source, center, center_norm, confidence=0.9):
self.id = elem_id
self.label = label
self.source = source
self.center = center
self.center_norm = center_norm
self.confidence = confidence
class _FakeSomResult:
def __init__(self, elements):
self.elements = elements
self.som_image = None
class _FakeSomEngine:
def __init__(self, elements):
self._elements = elements
def analyze(self, _img):
return _FakeSomResult(self._elements)
def _make_close_button_image(tmp_path: Path) -> tuple[str, str]:
screenshot = Image.new("RGB", (200, 100), "white")
draw = ImageDraw.Draw(screenshot)
draw.rounded_rectangle((130, 4, 170, 36), radius=8, fill=(242, 244, 247))
draw.line((144, 12, 156, 24), fill="black", width=2)
draw.line((156, 12, 144, 24), fill="black", width=2)
screenshot_path = tmp_path / "screen.png"
screenshot.save(screenshot_path)
anchor = screenshot.crop((130, 4, 170, 36))
buf = io.BytesIO()
anchor.save(buf, format="PNG")
anchor_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
return str(screenshot_path), anchor_b64
def test_close_tab_uses_exact_anchor_coords(tmp_path, monkeypatch):
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
fake_engine = _FakeSomEngine([
_FakeElem(
elem_id=47,
label="test",
source="yolo",
center=(120, 20),
center_norm=(0.60, 0.20),
),
])
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
result = resolve_engine._resolve_by_som(
screenshot_path=screenshot_path,
target_spec={
"anchor_image_base64": anchor_b64,
"by_text": "",
"vlm_description": "fermer l'onglet actif",
"context_hints": {"interaction": "close_tab"},
"window_capture": {
"rect": [0, 0, 200, 100],
"click_relative": [150, 20],
"window_size": [200, 100],
},
},
screen_width=200,
screen_height=100,
)
assert result is not None
assert result["method"] == "som_anchor_match"
assert result["matched_element"]["role"] == "som_anchor_exact"
assert result["x_pct"] == 0.75
assert result["y_pct"] == 0.20
def test_close_tab_rejects_exact_anchor_far_from_recorded_click(tmp_path, monkeypatch):
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
fake_engine = _FakeSomEngine([])
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
result = resolve_engine._resolve_by_som(
screenshot_path=screenshot_path,
target_spec={
"anchor_image_base64": anchor_b64,
"by_text": "",
"vlm_description": "fermer l'onglet actif",
"context_hints": {"interaction": "close_tab"},
"window_capture": {
"rect": [0, 0, 200, 100],
"click_relative": [50, 20],
"window_size": [200, 100],
},
},
screen_width=200,
screen_height=100,
)
assert result is None
def test_non_close_tab_keeps_nearest_som_center(tmp_path, monkeypatch):
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
fake_engine = _FakeSomEngine([
_FakeElem(
elem_id=47,
label="test",
source="yolo",
center=(120, 20),
center_norm=(0.60, 0.20),
),
])
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
result = resolve_engine._resolve_by_som(
screenshot_path=screenshot_path,
target_spec={
"anchor_image_base64": anchor_b64,
"by_text": "",
"vlm_description": "icône en haut",
},
screen_width=200,
screen_height=100,
)
assert result is not None
assert result["method"] == "som_anchor_match"
assert result["matched_element"]["role"] == "som_anchor_match"
assert result["x_pct"] == 0.60
assert result["y_pct"] == 0.20

View File

@@ -0,0 +1,51 @@
import pytest
from agent_v0.server_v1 import resolve_engine
@pytest.fixture(autouse=True)
def _disable_memory_lookup(monkeypatch):
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_lookup",
lambda **kwargs: None,
)
def test_dialog_button_skips_vlm_cascade_when_ocr_misses(tmp_path, monkeypatch):
screenshot = tmp_path / "screen.jpg"
screenshot.write_bytes(b"fake")
monkeypatch.setattr(
resolve_engine,
"_resolve_by_ocr_text",
lambda *args, **kwargs: None,
)
def _unexpected_vlm(*args, **kwargs):
raise AssertionError("VLM ne doit pas être appelé pour dialog_button")
def _unexpected_som(*args, **kwargs):
raise AssertionError("SoM ne doit pas être appelé pour dialog_button")
monkeypatch.setattr(resolve_engine, "_vlm_quick_find", _unexpected_vlm)
monkeypatch.setattr(resolve_engine, "_resolve_by_som", _unexpected_som)
result = resolve_engine._resolve_target_sync(
str(screenshot),
{
"by_role": "dialog_button",
"by_text": "Oui",
"window_title": "Confirmer lenregistrement",
"vlm_description": "Dans la fenêtre 'Confirmer lenregistrement', le bouton 'Oui'",
},
2560,
1600,
0.5,
0.5,
True,
processor=None,
)
assert result["resolved"] is False
assert result["method"] == "dialog_button_ocr_only"
assert result["reason"] == "ocr_direct_failed_dialog_button_no_vlm"

View File

@@ -0,0 +1,139 @@
import pytest
from agent_v0.server_v1 import resolve_engine
@pytest.fixture(autouse=True)
def _disable_memory_lookup(monkeypatch):
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_lookup",
lambda **kwargs: None,
)
@pytest.fixture
def _patched_resolvers(monkeypatch):
monkeypatch.setattr(
resolve_engine,
"_resolve_by_template_matching",
lambda *args, **kwargs: None,
)
monkeypatch.setattr(
resolve_engine,
"_resolve_by_som",
lambda *args, **kwargs: None,
)
def _start_button_spec():
return {
"by_role": "start_button",
"by_text": "",
"anchor_image_base64": "abc123",
"vlm_description": "Le bouton Démarrer (icône Windows) dans la barre des tâches, en bas",
"screen_scope": "full_screen",
}
def _generic_button_spec():
return {
"by_role": "button",
"by_text": "",
"anchor_image_base64": "abc123",
"vlm_description": "Le bouton principal",
}
def _vlm_result(x_pct: float, y_pct: float, score: float = 0.95):
return {
"resolved": True,
"method": "vlm_quick_find",
"x_pct": x_pct,
"y_pct": y_pct,
"score": score,
"matched_element": {
"label": "target",
"type": "vlm_located",
"role": "vlm_quick_find",
"confidence": score,
},
}
def test_start_button_rejects_far_vlm_false_positive(tmp_path, monkeypatch, _patched_resolvers):
screenshot = tmp_path / "screen.jpg"
screenshot.write_bytes(b"fake")
monkeypatch.setattr(
resolve_engine,
"_vlm_quick_find",
lambda *args, **kwargs: _vlm_result(0.01, 0.95),
)
result = resolve_engine._resolve_target_sync(
str(screenshot),
_start_button_spec(),
1920,
1080,
0.387891,
0.974375,
True,
processor=None,
)
assert result["resolved"] is False
assert result["method"] == "strict_vlm_template_failed"
def test_start_button_accepts_plausible_vlm_result(tmp_path, monkeypatch, _patched_resolvers):
screenshot = tmp_path / "screen.jpg"
screenshot.write_bytes(b"fake")
monkeypatch.setattr(
resolve_engine,
"_vlm_quick_find",
lambda *args, **kwargs: _vlm_result(0.395, 0.972),
)
result = resolve_engine._resolve_target_sync(
str(screenshot),
_start_button_spec(),
1920,
1080,
0.387891,
0.974375,
True,
processor=None,
)
assert result["resolved"] is True
assert result["method"] == "vlm_quick_find"
assert result["x_pct"] == pytest.approx(0.395)
assert result["y_pct"] == pytest.approx(0.972)
def test_non_start_button_keeps_vlm_result_even_if_far(tmp_path, monkeypatch, _patched_resolvers):
screenshot = tmp_path / "screen.jpg"
screenshot.write_bytes(b"fake")
monkeypatch.setattr(
resolve_engine,
"_vlm_quick_find",
lambda *args, **kwargs: _vlm_result(0.01, 0.95),
)
result = resolve_engine._resolve_target_sync(
str(screenshot),
_generic_button_spec(),
1920,
1080,
0.387891,
0.974375,
True,
processor=None,
)
assert result["resolved"] is True
assert result["method"] == "vlm_quick_find"
assert result["x_pct"] == pytest.approx(0.01)
assert result["y_pct"] == pytest.approx(0.95)

View File

@@ -0,0 +1,103 @@
"""Tests pour les contrôles HTTP de replay paused (resume/abort).
Ces appels sont le fallback du chemin SocketIO `lea:replay_resume`
/ `lea:replay_abort` quand le bus feedback est déconnecté au moment
où l'utilisateur clique dans la bulle paused (cf.
`docs/CR_AUDIT_PAUSED_RESUME_BUS_2026-05-22.md`).
"""
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.lea_ui.server_client import LeaServerClient # noqa: E402
# Préfixe partagé pour comparer les URLs sans coller à la valeur de
# RPA_STREAMING_URL côté env d'exécution des tests.
RESUME_PATH = "/traces/stream/replay/replay_xyz/resume"
CANCEL_PATH = "/traces/stream/replay/replay_xyz/cancel"
@pytest.fixture
def client(monkeypatch):
monkeypatch.setenv("RPA_API_TOKEN", "tok-test-1234")
c = LeaServerClient()
return c
# =========================================================================
# resume_replay
# =========================================================================
class TestResumeReplay:
def test_returns_true_when_server_accepts(self, client):
resp = MagicMock(ok=True)
with patch("requests.post", return_value=resp) as post:
assert client.resume_replay("replay_xyz") is True
assert post.call_count == 1
def test_returns_false_when_server_rejects(self, client):
resp = MagicMock(ok=False)
with patch("requests.post", return_value=resp):
assert client.resume_replay("replay_xyz") is False
def test_returns_false_on_empty_replay_id(self, client):
with patch("requests.post") as post:
assert client.resume_replay("") is False
post.assert_not_called()
def test_returns_false_on_exception(self, client):
with patch("requests.post", side_effect=ConnectionError("network down")):
assert client.resume_replay("replay_xyz") is False
def test_posts_to_resume_endpoint_with_auth_header(self, client):
resp = MagicMock(ok=True)
with patch("requests.post", return_value=resp) as post:
client.resume_replay("replay_xyz")
call = post.call_args
url = call.args[0] if call.args else call.kwargs.get("url", "")
assert url.endswith(RESUME_PATH)
headers = call.kwargs.get("headers", {})
assert headers.get("Authorization") == "Bearer tok-test-1234"
# =========================================================================
# abort_replay
# =========================================================================
class TestAbortReplay:
def test_returns_true_when_server_accepts(self, client):
resp = MagicMock(ok=True)
with patch("requests.post", return_value=resp):
assert client.abort_replay("replay_xyz") is True
def test_returns_false_when_server_rejects(self, client):
resp = MagicMock(ok=False)
with patch("requests.post", return_value=resp):
assert client.abort_replay("replay_xyz") is False
def test_returns_false_on_empty_replay_id(self, client):
with patch("requests.post") as post:
assert client.abort_replay("") is False
post.assert_not_called()
def test_returns_false_on_exception(self, client):
with patch("requests.post", side_effect=TimeoutError("timeout")):
assert client.abort_replay("replay_xyz") is False
def test_posts_to_cancel_endpoint(self, client):
resp = MagicMock(ok=True)
with patch("requests.post", return_value=resp) as post:
client.abort_replay("replay_xyz")
url = post.call_args.args[0]
assert url.endswith(CANCEL_PATH)

View File

@@ -0,0 +1,83 @@
"""Tests pour `_should_reject_on_text_mismatch` — patch 2026-05-23 :
distinguer `observed=''` (OCR n'a rien lu, ambigu) de `observed='X'`
(autre texte lu = mismatch confirmé) dans le pré-check OCR.
Brief Codex 2026-05-23 08:55 : le crop bbox SoM précis (50 × 48 px)
sur un onglet Notepad moderne donne `observed=''` car EasyOCR n'a pas
suffisamment de signal (texte peu contrasté, zone trop petite). Le
patch précédent rejetait ce cas comme mismatch — alors qu'aucune
preuve d'un mauvais clic n'existe. On ne rejette plus que quand l'OCR
a effectivement lu autre chose que la cible attendue.
Le faux succès OBS Studio reste bloqué : (1) son OCR retournait
`'ue audio disponible GUI OBS Studio…'` = non-vide → rejet conservé ;
(2) la garde drift agent posée sur ANCHOR-TM bloque déjà ce match.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.resolve_engine import ( # noqa: E402
_should_reject_on_text_mismatch,
)
class TestShouldRejectOnTextMismatch:
def test_valid_passes(self):
"""Cas nominal : OCR a vu la cible → on ne rejette pas."""
assert not _should_reject_on_text_mismatch(
is_valid=True, observed="Enregistrer sous",
)
def test_invalid_with_text_rejects(self):
"""Cas 0745 historique : OCR voit '9 ?' qui ne matche pas
'Enregistrer sous' → rejet confirmé."""
assert _should_reject_on_text_mismatch(
is_valid=False, observed="9 ?",
)
def test_invalid_with_obs_studio_rejects(self):
"""Cas 0756 : OCR voit du texte OBS Studio → rejet confirmé."""
assert _should_reject_on_text_mismatch(
is_valid=False, observed="ue audio disponible GUI OBS Studio",
)
def test_invalid_with_empty_observed_does_not_reject(self):
"""Cas 0855 : OCR n'a rien lu (zone trop petite/peu contrastée)
→ ambigu, pas un mismatch confirmé. On préserve la résolution
serveur — la garde drift agent protège en aval."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed="",
)
def test_invalid_with_whitespace_only_does_not_reject(self):
"""Espace seul = équivalent vide pour notre logique."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed=" ",
)
def test_invalid_with_newline_only_does_not_reject(self):
assert not _should_reject_on_text_mismatch(
is_valid=False, observed="\n\t",
)
def test_invalid_with_none_observed_does_not_reject(self):
"""Robustesse : observed None (cas dégénéré OCR-lib absente)
ne doit pas planter."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed=None,
)
def test_valid_with_empty_passes(self):
"""is_valid=True avec observed vide — ne peut normalement pas
arriver via _text_match_fuzzy (qui retourne False sur vide)
mais on garde la logique cohérente : si is_valid=True, on
ne rejette pas, peu importe observed."""
assert not _should_reject_on_text_mismatch(
is_valid=True, observed="",
)

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.resolve_engine import _validate_resolution_quality # noqa: E402
def _result(score: float) -> dict:
return {
"resolved": True,
"method": "som_anchor_match",
"score": score,
"x_pct": 0.75,
"y_pct": 0.20,
}
def _close_tab_spec() -> dict:
return {
"by_text": "",
"by_role": "tab_close_button",
"anchor_image_base64": "abc123",
"context_hints": {"interaction": "close_tab", "active_tab_label": "test"},
}
def test_close_tab_relaxes_threshold_for_near_match():
out = _validate_resolution_quality(
_result(0.744),
0.708594,
0.35,
target_spec=_close_tab_spec(),
)
assert out["resolved"] is True
assert out["score"] == 0.744
def test_close_tab_still_rejects_low_score():
out = _validate_resolution_quality(
_result(0.65),
0.708594,
0.35,
target_spec=_close_tab_spec(),
)
assert out["resolved"] is False
assert "below_threshold" in out["reason"]
def test_close_tab_rejects_far_zone_even_with_good_score():
out = _validate_resolution_quality(
_result(0.80),
0.30,
0.20,
target_spec=_close_tab_spec(),
)
assert out["resolved"] is False
assert out["reason"] == "close_tab_out_of_recorded_zone"
assert out["method"] == "rejected_close_tab_zone_som_anchor_match"

View File

@@ -0,0 +1,134 @@
"""Tests pour `_validate_resolution_quality` — relâchement contextuel
du seuil de score pour les cibles `interaction = switch_tab` avec un
`som_element` calibré.
Cas live 2026-05-22 (act_raw_2f7e316c) :
- Onglet Notepad moderne `Enregistrer sous`
- Score som_text_match = 0.745 (juste sous seuil 0.75)
- Cible bien localisée par SoM (bbox_norm) + focus_change pré-clic
confirmant déjà la bonne fenêtre
- Rejeté à tort → pause supervisée
Le patch abaisse le seuil à 0.60 UNIQUEMENT pour
`context_hints.interaction == "switch_tab"` + `som_element` présent
+ méthode `som_*`. Pas de baisse globale.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.resolve_engine import ( # noqa: E402
_validate_resolution_quality,
)
def _result(method: str, score: float, x: float = 0.5, y: float = 0.5) -> dict:
return {
"resolved": True,
"method": method,
"score": score,
"x_pct": x,
"y_pct": y,
}
def _switch_tab_spec(with_som: bool = True) -> dict:
spec = {
"by_text": "Enregistrer sous",
"by_role": "tab",
"window_title": "*test Bloc-notes",
"context_hints": {
"interaction": "switch_tab",
"switch_to_window_title": "Enregistrer sous",
},
}
if with_som:
spec["som_element"] = {
"bbox_norm": [0.697, 0.335, 0.715, 0.362],
"center_norm": [0.706, 0.348],
}
return spec
class TestSwitchTabThresholdRelaxation:
def test_baseline_no_target_spec_keeps_strict_threshold(self):
"""Sans target_spec passé, comportement legacy : 0.745 < 0.75 → rejet."""
res = _result("som_text_match", score=0.745)
out = _validate_resolution_quality(res, 0.5, 0.5)
assert out is not None
assert out["resolved"] is False
assert "below_threshold" in out["reason"]
def test_switch_tab_with_som_accepts_score_above_relaxed_threshold(self):
"""switch_tab + som_element + method som_* + score 0.745 → accepté."""
res = _result("som_text_match", score=0.745, x=0.706, y=0.348)
out = _validate_resolution_quality(
res, 0.706, 0.348, target_spec=_switch_tab_spec(with_som=True),
)
assert out is not None
assert out["resolved"] is True
assert out["method"] == "som_text_match"
assert out["score"] == 0.745
def test_switch_tab_with_som_still_rejects_very_low_score(self):
"""Filet final : même en switch_tab, un score 0.50 reste rejeté
(seuil relâché 0.60). On ne valide pas n'importe quoi."""
res = _result("som_text_match", score=0.50)
out = _validate_resolution_quality(
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=True),
)
assert out["resolved"] is False
assert "below_threshold" in out["reason"]
def test_switch_tab_without_som_keeps_strict_threshold(self):
"""Sans som_element calibré, on garde le seuil strict — on ne
peut pas faire confiance à un score VLM lower sans ancre spatiale."""
res = _result("som_text_match", score=0.745)
out = _validate_resolution_quality(
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=False),
)
assert out["resolved"] is False
def test_non_switch_tab_keeps_strict_threshold(self):
"""Cible non-tab : pas de relaxation. Le 0.745 reste rejeté."""
spec = {
"by_text": "Submit",
"by_role": "button",
"som_element": {"bbox_norm": [0.4, 0.4, 0.5, 0.5]},
}
res = _result("som_text_match", score=0.745)
out = _validate_resolution_quality(res, 0.5, 0.5, target_spec=spec)
assert out["resolved"] is False
def test_switch_tab_with_non_som_method_keeps_strict_threshold(self):
"""La relaxation ne s'applique qu'aux méthodes som_* (qui exploitent
la bbox calibrée). Un vlm_quick_find à 0.745 sur une cible
switch_tab reste régi par son propre seuil legacy (0.60 → accepté)."""
# vlm_quick_find a déjà un seuil 0.60 (cf. _RESOLUTION_MIN_SCORES),
# donc 0.745 est largement au-dessus. On vérifie juste l'absence
# de régression sur ce cas.
res = _result("vlm_quick_find", score=0.745)
out = _validate_resolution_quality(
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=True),
)
assert out["resolved"] is True
def test_unresolved_result_passes_through(self):
"""Non-régression : un result resolved=False traverse sans modif."""
res = {"resolved": False, "method": "no_target_criteria"}
out = _validate_resolution_quality(
res, 0.5, 0.5, target_spec=_switch_tab_spec(),
)
assert out is res
def test_target_spec_parameter_is_optional_for_legacy_callers(self):
"""Compatibilité ascendante : appel sans target_spec ne plante pas
et applique le seuil legacy."""
res = _result("som_anchor_match", score=0.80)
out = _validate_resolution_quality(res, 0.5, 0.5)
assert out["resolved"] is True

View File

@@ -0,0 +1,158 @@
"""Tests pour `_validate_text_at_position` — patch 2026-05-23 :
utilisation prioritaire de la bbox SoM enregistrée quand disponible.
Cas live (brief Codex 2026-05-23 07:45) : pré-check OCR rejette à tort
`expected='Enregistrer sous' observed='9 ?'` car le crop fait
``radius_px=280`` autour de la coord résolue capture du texte voisin
(numéro de ligne « 9 » de la status bar Notepad) au lieu du label
étroit de l'onglet. La bbox SoM ``[0.697, 0.335, 0.715, 0.362]``
localise précisément l'onglet — l'utiliser comme zone OCR donne
l'OCR exact du label.
"""
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
@pytest.fixture
def fake_screenshot(tmp_path):
"""Crée un screenshot 1920×1200 noir."""
from PIL import Image
p = tmp_path / "shot.png"
img = Image.new("RGB", (1920, 1200), (0, 0, 0))
img.save(p)
return str(p)
@pytest.fixture
def patched_reader():
"""Mock EasyOCR reader qui retourne ce qu'on veut selon la taille
du crop reçu. Permet de simuler 'voit Enregistrer sous' vs 'voit 9 ?'.
"""
from unittest.mock import patch
reader = MagicMock()
# observed_by_size : map taille_crop_approx → texte OCR retourné
reader._observed_by_size = {}
def fake_readtext(arr):
h, w = arr.shape[:2]
key = (w, h)
text = reader._observed_by_size.get(key, "fallback text")
return [(None, text, 0.95)]
reader.readtext.side_effect = fake_readtext
with patch(
"agent_v0.server_v1.resolve_engine._get_validation_ocr_reader",
return_value=reader,
):
yield reader
def _spec_with_som_bbox():
return {
"by_text": "Enregistrer sous",
"som_element": {
"bbox_norm": [0.697, 0.335, 0.715, 0.362],
},
}
class TestValidateTextWithSomBbox:
def test_uses_som_bbox_when_present(self, fake_screenshot, patched_reader):
"""Quand som_bbox_norm est fourni, la zone OCR est calculée
depuis cette bbox (pas le radius autour de x/y_pct)."""
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
spec = _spec_with_som_bbox()
bbox = spec["som_element"]["bbox_norm"]
# Le crop attendu fait largeur = (0.715-0.697)*1920 = 34 + 2*padding
# et hauteur = (0.362-0.335)*1200 = 32 + 2*padding (padding=8)
# → environ (50, 48) px.
patched_reader._observed_by_size[(50, 48)] = "Enregistrer sous"
is_valid, observed, _ms = _validate_text_at_position(
fake_screenshot,
x_pct=0.706, y_pct=0.348,
expected_text="Enregistrer sous",
screen_width=1920, screen_height=1200,
som_bbox_norm=bbox,
)
assert observed == "Enregistrer sous"
assert is_valid is True
def test_falls_back_to_radius_when_no_bbox(self, fake_screenshot, patched_reader):
"""Sans som_bbox_norm, comportement legacy : crop radius_px=280
autour de (x_pct, y_pct)."""
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
# Sans bbox → crop ≈ 560×560
patched_reader._observed_by_size[(560, 560)] = "Enregistrer sous"
is_valid, observed, _ms = _validate_text_at_position(
fake_screenshot,
x_pct=0.5, y_pct=0.5,
expected_text="Enregistrer sous",
screen_width=1920, screen_height=1200,
)
assert is_valid is True
assert observed == "Enregistrer sous"
def test_invalid_bbox_falls_back_gracefully(self, fake_screenshot, patched_reader):
"""Une bbox malformée ne doit pas planter — fallback radius."""
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
patched_reader._observed_by_size[(560, 560)] = "OK"
is_valid, observed, _ms = _validate_text_at_position(
fake_screenshot,
x_pct=0.5, y_pct=0.5,
expected_text="OK",
screen_width=1920, screen_height=1200,
som_bbox_norm=[0.5], # malformé
)
# Pas de crash, fallback applique le radius classique.
assert observed == "OK"
def test_bbox_too_small_falls_back_to_radius(self, fake_screenshot, patched_reader):
"""Une bbox dégénérée (largeur/hauteur < quelques px) → fallback
sur le radius, on ne tente pas un crop minuscule inutilisable."""
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
patched_reader._observed_by_size[(560, 560)] = "OK"
is_valid, observed, _ms = _validate_text_at_position(
fake_screenshot,
x_pct=0.5, y_pct=0.5,
expected_text="OK",
screen_width=1920, screen_height=1200,
som_bbox_norm=[0.500, 0.500, 0.501, 0.501],
)
# Bbox de ~2×1px → fallback radius
assert observed == "OK"
def test_bbox_normalized_values_outside_unit_clipped(self, fake_screenshot, patched_reader):
"""Bbox dépassant les bornes [0, 1] est clippée aux dimensions
écran sans crash."""
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
# Bbox qui déborderait → clip à l'écran
# x = (-0.05 → 0) * 1920 - 8 = -8 → 0, x2 = 1.05 * 1920 + 8 = 2024 → 1920
# largeur = 1920, hauteur = (1.05-0)*1200 +16 = 1216 → 1200
patched_reader._observed_by_size[(1920, 1200)] = "déborde"
is_valid, observed, _ms = _validate_text_at_position(
fake_screenshot,
x_pct=0.5, y_pct=0.5,
expected_text="déborde",
screen_width=1920, screen_height=1200,
som_bbox_norm=[-0.05, 0.0, 1.05, 1.05],
)
assert observed == "déborde"

View File

@@ -0,0 +1,296 @@
"""Tests ciblés sur le contrat window_title -> mémoire persistante."""
from __future__ import annotations
import importlib
import sys
from pathlib import Path
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
def _reload_api_stream():
mod_name = "agent_v0.server_v1.api_stream"
if mod_name in sys.modules:
del sys.modules[mod_name]
return importlib.import_module(mod_name)
def test_build_replay_from_raw_events_propagates_window_title_into_target_spec(
tmp_path, monkeypatch,
):
"""Le flux Lea-first doit propager window_title dans target_spec si connu."""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
actions = sp.build_replay_from_raw_events(
[
{
"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [100, 200],
"button": "left",
"screenshot_id": "shot_001",
"window": {"title": "Bloc-notes", "app_name": "notepad"},
}
}
],
session_id="sess_test",
session_dir=str(session_dir),
)
assert len(actions) == 1
assert actions[0]["window_title"] == "Bloc-notes"
assert actions[0]["target_spec"]["window_title"] == "Bloc-notes"
def test_build_replay_from_raw_events_infers_notepad_tab_switch_target(
tmp_path, monkeypatch,
):
"""Un clic haut suivi d'un focus same-app doit devenir une cible d'onglet."""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{
"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [1514, 562],
"button": "left",
"screenshot_id": "shot_003",
"window": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [1191, 40],
"window_size": [1920, 1116],
},
}
},
{
"event": {
"type": "window_focus_change",
"timestamp": 1.2,
"from": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"to": {
"title": "Sans titre Bloc-notes",
"app_name": "Notepad.exe",
},
}
},
]
actions = sp.build_replay_from_raw_events(
events,
session_id="sess_tab_switch",
session_dir=str(session_dir),
)
assert len(actions) == 1
assert actions[0]["target_spec"]["by_text"] == "Sans titre"
assert actions[0]["target_spec"]["by_role"] == "tab"
assert actions[0]["target_spec"]["window_title"] == (
"http192.168.1.408765dossier.htmlid=.txt Bloc-notes"
)
assert actions[0]["target_spec"]["context_hints"]["interaction"] == "switch_tab"
def test_build_replay_propagates_focus_change_into_expected_window_before(
tmp_path, monkeypatch,
):
"""Cas live ``act_raw_c70976c8`` (2026-05-22) : un focus_change vers
``Enregistrer sous`` se produit entre deux clics consécutifs, mais
le mouse_click suivant capture encore le titre pré-transition
(``*test Bloc-notes``) dans son ``window.title``. Sans correction
serveur, la pré-vérif côté agent retombe sur target_spec.window_title
(obsolète) et déclenche une pause supervisée à tort.
Le serveur doit poser explicitement ``expected_window_before`` égal
au dernier ``window_focus_change.to.title`` observé avant le clic.
"""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
# Click 1 — dans Notepad, déclenche l'ouverture de la dialog.
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [860, 634],
"button": "left",
"screenshot_id": "shot_001",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
}},
# Transition de focus vers la dialog "Enregistrer sous".
{"event": {
"type": "window_focus_change",
"timestamp": 1.2,
"from": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
}},
# Click 2 — bouton "Enregistrer" dans la dialog. Mais
# window.title capturé est obsolète (toujours sur Notepad).
{"event": {
"type": "mouse_click",
"timestamp": 1.5,
"pos": [997, 743],
"button": "left",
"screenshot_id": "shot_002",
"window": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
}},
]
actions = sp.build_replay_from_raw_events(
events, session_id="sess_save_dialog", session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert len(clicks) == 2
# Le clic 2 doit avoir expected_window_before = "Enregistrer sous"
# (issu du focus_change précédent), pas "*test Bloc-notes"
# (le titre obsolète capturé dans l'event raw).
assert clicks[1].get("expected_window_before") == "Enregistrer sous", (
f"clic 2 doit pointer sur la dialog ouverte par le focus_change, "
f"trouvé: {clicks[1].get('expected_window_before')!r} "
f"(target_spec.window_title={clicks[1].get('target_spec', {}).get('window_title')!r})"
)
# Le clic 1 n'a pas eu de focus_change vers une fenêtre avant lui
# → pas de expected_window_before (ou vide).
assert not clicks[0].get("expected_window_before"), (
f"clic 1 ne doit pas avoir d'expected_window_before, "
f"trouvé: {clicks[0].get('expected_window_before')!r}"
)
def test_build_replay_does_not_overwrite_existing_expected_window_before(
tmp_path, monkeypatch,
):
"""La propagation depuis focus_change ne doit pas écraser un
expected_window_before déjà posé en amont (ex: par un setup
action ou un patch précédent)."""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
monkeypatch.setattr(
sp, "enrich_click_from_screenshot",
lambda *args, **kwargs: {
"anchor_image_base64": "abc",
# Pré-existant : un autre composant a déjà posé la pré-condition.
# build_replay_from_raw_events ne crée pas expected_window_before
# depuis enrichment, mais on simule via fixture (cas générique
# : action upstream qui pose ce champ).
},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *a, **k: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *a, **k: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *a, **k: None)
events = [
{"event": {
"type": "window_focus_change",
"timestamp": 0.5,
"to": {"title": "Fenetre A", "app_name": "test.exe"},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [10, 20],
"screenshot_id": "shot_001",
"window": {"title": "Fenetre A", "app_name": "test.exe"},
"expected_window_before": "Pre-existant",
}},
]
actions = sp.build_replay_from_raw_events(
events, session_id="sess_x", session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert clicks
# Si déjà posé en upstream, on respecte la valeur en place.
pre_existing = clicks[0].get("expected_window_before")
assert pre_existing in (None, "", "Fenetre A"), (
# Soit absent (build n'a pas propagé sur ce clic), soit Fenetre A
# (le dernier focus_change). En tout cas, doit être cohérent.
f"valeur inattendue: {pre_existing!r}"
)
def test_memory_window_title_for_action_reads_top_level_and_target_spec(monkeypatch):
"""Le lecteur mémoire doit voir les variantes top-level et target_spec."""
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
mod = _reload_api_stream()
assert mod._memory_window_title_for_action(
{
"expected_window_before": "Fenêtre attendue",
"target_spec": {"window_title": "Fenêtre cible"},
"window_title": "Fenêtre action",
}
) == "Fenêtre attendue"
assert mod._memory_window_title_for_action(
{
"target_spec": {"context_hints": {"window_title": "Depuis context_hints"}},
}
) == "Depuis context_hints"
assert mod._memory_window_title_for_action(
{
"window_title": "Top-level uniquement",
"target_spec": {},
}
) == "Top-level uniquement"