replay_state enrichi de _screenshot_history (5 dernières images PIL) et _action_history (5 dernières signatures action). report_action_result : - met à jour les deux anneaux après chaque action - évalue le LoopDetector (singleton lazy avec _clip_embedder serveur) - si detected → bascule paused_need_help avec pause_reason="loop_detected" et bus event lea:loop_detected (signal + evidence) Tous les chemins d'erreur (embedder absent, OOM, exception) loggent et laissent le replay continuer — aucun blocage par la couche détection. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
62 lines
2.5 KiB
Python
62 lines
2.5 KiB
Python
# tests/integration/test_loop_detector_replay.py
|
|
"""Tests intégration : un replay simulé qui boucle bascule en paused_need_help."""
|
|
import pytest
|
|
from unittest.mock import MagicMock
|
|
|
|
from agent_v0.server_v1.loop_detector import LoopDetector
|
|
|
|
|
|
def test_replay_state_transitions_to_paused_on_screen_static():
|
|
"""Cas : 4 screenshots identiques → replay passe à paused_need_help."""
|
|
embedder = MagicMock()
|
|
embedder.embed_image.return_value = [1.0, 0.0, 0.0] # constant
|
|
detector = LoopDetector(clip_embedder=embedder)
|
|
|
|
state = {
|
|
"replay_id": "r_test",
|
|
"status": "running",
|
|
"retried_actions": 0,
|
|
"_screenshot_history": ["img1", "img2", "img3", "img4"], # 4 images factices
|
|
"_action_history": [
|
|
{"type": "click", "x_pct": 0.1, "y_pct": 0.1},
|
|
{"type": "type", "x_pct": 0.2, "y_pct": 0.2},
|
|
],
|
|
}
|
|
verdict = detector.evaluate(state, state["_screenshot_history"], state["_action_history"])
|
|
|
|
# Simuler ce que ferait api_stream après verdict
|
|
if verdict.detected:
|
|
state["status"] = "paused_need_help"
|
|
state["pause_reason"] = verdict.reason
|
|
state["pause_message"] = f"signal={verdict.signal}"
|
|
|
|
assert state["status"] == "paused_need_help"
|
|
assert state["pause_reason"] == "loop_detected"
|
|
assert "screen_static" in state["pause_message"]
|
|
|
|
|
|
def test_replay_state_transitions_on_action_repeat():
|
|
"""Cas : 3 actions identiques → paused_need_help signal action_repeat."""
|
|
detector = LoopDetector(clip_embedder=None)
|
|
actions = [{"type": "click", "x_pct": 0.5, "y_pct": 0.5}] * 3
|
|
state = {"replay_id": "r2", "status": "running", "retried_actions": 0,
|
|
"_screenshot_history": [], "_action_history": actions}
|
|
|
|
verdict = detector.evaluate(state, [], actions)
|
|
assert verdict.detected and verdict.signal == "action_repeat"
|
|
|
|
|
|
def test_kill_switch_keeps_replay_running(monkeypatch):
|
|
"""Avec RPA_LOOP_DETECTOR_ENABLED=0 le replay continue même en boucle."""
|
|
monkeypatch.setenv("RPA_LOOP_DETECTOR_ENABLED", "0")
|
|
embedder = MagicMock()
|
|
embedder.embed_image.return_value = [1.0, 0.0, 0.0]
|
|
detector = LoopDetector(clip_embedder=embedder)
|
|
|
|
state = {"retried_actions": 10,
|
|
"_screenshot_history": ["img1"] * 10,
|
|
"_action_history": [{"type": "click", "x_pct": 0.5, "y_pct": 0.5}] * 10}
|
|
|
|
verdict = detector.evaluate(state, state["_screenshot_history"], state["_action_history"])
|
|
assert verdict.detected is False
|