Module isolé, 3 signaux indépendants : - screen_static : CLIP similarity > 0.99 sur N captures consécutives - action_repeat : N actions identiques (type+coords) - retry_threshold : retried_actions >= seuil Premier signal positif → LoopVerdict.detected=True (caller responsable de la bascule en paused_need_help). Configurable env vars : RPA_LOOP_DETECTOR_ENABLED (kill-switch), RPA_LOOP_SCREEN_STATIC_N/THRESHOLD, RPA_LOOP_ACTION_REPEAT_N, RPA_LOOP_RETRY_THRESHOLD. Tests : 8 cas (chaque signal isolé, kill-switch, embedder absent, exception). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
97 lines
3.8 KiB
Python
97 lines
3.8 KiB
Python
# tests/unit/test_loop_detector.py
|
|
"""Tests unitaires pour LoopDetector composite (QW2)."""
|
|
import os
|
|
import pytest
|
|
from unittest.mock import MagicMock
|
|
|
|
from agent_v0.server_v1.loop_detector import LoopDetector, LoopVerdict
|
|
|
|
|
|
@pytest.fixture
|
|
def detector():
|
|
"""LoopDetector avec embedder mocké (signal A toujours dispo)."""
|
|
embedder = MagicMock()
|
|
# Par défaut : 4 embeddings tous identiques → similarity 1.0
|
|
embedder.embed_image.return_value = [1.0, 0.0, 0.0]
|
|
return LoopDetector(clip_embedder=embedder)
|
|
|
|
|
|
def _state(retried=0, n_screenshots=0, n_actions=0):
|
|
return {
|
|
"retried_actions": retried,
|
|
"_screenshot_history": [[1.0, 0.0, 0.0]] * n_screenshots,
|
|
"_action_history": [{"type": "click", "x_pct": 0.5, "y_pct": 0.5}] * n_actions,
|
|
}
|
|
|
|
|
|
def test_screen_static_triggers_when_n_identical_embeddings(detector):
|
|
"""Signal A : 4 captures identiques (similarity > 0.99) → detected."""
|
|
state = _state(n_screenshots=4)
|
|
verdict = detector.evaluate(state, screenshots=state["_screenshot_history"], actions=[])
|
|
assert verdict.detected is True
|
|
assert verdict.signal == "screen_static"
|
|
|
|
|
|
def test_screen_static_skipped_when_history_too_short(detector):
|
|
"""Signal A : moins de N captures → pas de détection."""
|
|
state = _state(n_screenshots=2)
|
|
verdict = detector.evaluate(state, screenshots=state["_screenshot_history"], actions=[])
|
|
# Si seul A pourrait déclencher mais skip, et B/C pas remplis : detected=False
|
|
assert verdict.detected is False
|
|
|
|
|
|
def test_action_repeat_triggers_when_n_identical_actions(detector):
|
|
"""Signal B : 3 actions consécutives identiques → detected."""
|
|
state = _state(n_actions=3)
|
|
verdict = detector.evaluate(state, screenshots=[], actions=state["_action_history"])
|
|
assert verdict.detected is True
|
|
assert verdict.signal == "action_repeat"
|
|
|
|
|
|
def test_action_repeat_skipped_when_actions_differ(detector):
|
|
"""Signal B : actions différentes → pas de détection."""
|
|
actions = [
|
|
{"type": "click", "x_pct": 0.1, "y_pct": 0.1},
|
|
{"type": "click", "x_pct": 0.2, "y_pct": 0.2},
|
|
{"type": "click", "x_pct": 0.3, "y_pct": 0.3},
|
|
]
|
|
verdict = detector.evaluate(_state(), screenshots=[], actions=actions)
|
|
assert verdict.detected is False
|
|
|
|
|
|
def test_retry_threshold_triggers_at_3(detector):
|
|
"""Signal C : retried_actions >= 3 → detected."""
|
|
state = _state(retried=3)
|
|
verdict = detector.evaluate(state, screenshots=[], actions=[])
|
|
assert verdict.detected is True
|
|
assert verdict.signal == "retry_threshold"
|
|
|
|
|
|
def test_kill_switch_disables_all_signals(monkeypatch, detector):
|
|
"""Si RPA_LOOP_DETECTOR_ENABLED=0 → toujours detected=False."""
|
|
monkeypatch.setenv("RPA_LOOP_DETECTOR_ENABLED", "0")
|
|
state = _state(retried=10, n_screenshots=10, n_actions=10)
|
|
verdict = detector.evaluate(state, screenshots=state["_screenshot_history"],
|
|
actions=state["_action_history"])
|
|
assert verdict.detected is False
|
|
|
|
|
|
def test_embedder_unavailable_skips_signal_A_continues_others():
|
|
"""Si CLIP embedder None → signal A skip, B et C continuent."""
|
|
detector = LoopDetector(clip_embedder=None)
|
|
# Trigger signal C
|
|
state = _state(retried=3)
|
|
verdict = detector.evaluate(state, screenshots=[], actions=[])
|
|
assert verdict.detected is True
|
|
assert verdict.signal == "retry_threshold"
|
|
|
|
|
|
def test_embedder_exception_does_not_crash(detector):
|
|
"""Si embed_image lève une exception → log + verdict detected=False."""
|
|
detector.clip_embedder.embed_image.side_effect = RuntimeError("CUDA OOM")
|
|
state = _state(n_screenshots=4)
|
|
# Ne doit PAS lever : signal A devient inerte
|
|
verdict = detector.evaluate(state, screenshots=state["_screenshot_history"], actions=[])
|
|
# Signal A inerte, B/C pas remplis → detected False
|
|
assert verdict.detected is False
|