Files
rpa_vision_v3/tests/unit/test_safety_checks_provider.py

121 lines
5.4 KiB
Python

# tests/unit/test_safety_checks_provider.py
"""Tests unitaires SafetyChecksProvider (QW4)."""
import json
import pytest
from unittest.mock import patch, MagicMock
from agent_v0.server_v1.safety_checks_provider import build_pause_payload, PausePayload
def _action(safety_level=None, declarative_checks=None, message="Validation"):
params = {"message": message}
if safety_level:
params["safety_level"] = safety_level
if declarative_checks is not None:
params["safety_checks"] = declarative_checks
return {"type": "pause_for_human", "parameters": params}
def test_only_declarative_when_no_safety_level():
"""Pas de safety_level → uniquement les checks déclaratifs, pas d'appel LLM."""
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm:
payload = build_pause_payload(_action(declarative_checks=decl), {}, last_screenshot=None)
mock_llm.assert_not_called()
assert len(payload.checks) == 1
assert payload.checks[0]["source"] == "declarative"
def test_default_pause_message_is_structured_not_validation_required():
"""Fallback humain: jamais 'Validation requise' seul."""
payload = build_pause_payload({"type": "pause_for_human", "parameters": {}}, {}, last_screenshot=None)
lines = payload.message.splitlines()
assert len(lines) == 4
assert lines[0].startswith("J'essaie de :")
assert "Validation requise" not in payload.message
def test_hybrid_appends_llm_checks_on_medical_critical(monkeypatch):
"""safety_level=medical_critical → LLM appelé, checks concaténés."""
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
llm_resp = [{"label": "Nom patient suspect à l'écran", "evidence": "vu un nom différent"}]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
return_value=llm_resp) as mock_llm:
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=decl),
{}, last_screenshot="/tmp/fake.png",
)
mock_llm.assert_called_once()
assert len(payload.checks) == 2
assert payload.checks[0]["source"] == "declarative"
assert payload.checks[1]["source"] == "llm_contextual"
assert payload.checks[1]["evidence"] == "vu un nom différent"
def test_llm_timeout_falls_back_to_declarative_only():
"""LLM timeout → additional_checks=[], pas de crash, déclaratifs gardés."""
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
return_value=[]) as mock_llm:
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=decl),
{}, last_screenshot="/tmp/fake.png",
)
assert len(payload.checks) == 1
assert payload.checks[0]["source"] == "declarative"
def test_llm_invalid_response_falls_back():
"""Si _call_llm retourne [] (parse échoué en interne) → fallback safe."""
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
return_value=[]):
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=[]),
{}, last_screenshot="/tmp/fake.png",
)
assert payload.checks == []
def test_kill_switch_disables_llm_call(monkeypatch):
"""RPA_SAFETY_CHECKS_LLM_ENABLED=0 → LLM jamais appelé."""
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_ENABLED", "0")
decl = [{"id": "c1", "label": "X", "required": True}]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm:
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=decl),
{}, last_screenshot="/tmp/fake.png",
)
mock_llm.assert_not_called()
assert len(payload.checks) == 1
def test_max_checks_respected(monkeypatch):
"""RPA_SAFETY_CHECKS_LLM_MAX_CHECKS=2 → max 2 checks LLM ajoutés."""
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MAX_CHECKS", "2")
decl = []
llm_resp = [
{"label": f"Check {i}", "evidence": f"e{i}"} for i in range(5)
]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
return_value=llm_resp[:2]): # provider tronque déjà
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=decl),
{}, last_screenshot="/tmp/fake.png",
)
assert len(payload.checks) == 2
def test_empty_declarative_with_llm_returns_only_llm():
"""Pas de déclaratif + LLM ajoute 2 checks → payload contient les 2."""
llm_resp = [{"label": "Vérifier date", "evidence": "date 1900 suspecte"},
{"label": "Vérifier devise", "evidence": "montant en USD au lieu d'EUR"}]
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
return_value=llm_resp):
payload = build_pause_payload(
_action(safety_level="medical_critical", declarative_checks=[]),
{}, last_screenshot="/tmp/fake.png",
)
assert len(payload.checks) == 2
assert all(c["source"] == "llm_contextual" for c in payload.checks)