# tests/unit/test_safety_checks_provider.py """Tests unitaires SafetyChecksProvider (QW4).""" import json import pytest from unittest.mock import patch, MagicMock from agent_v0.server_v1.safety_checks_provider import build_pause_payload, PausePayload def _action(safety_level=None, declarative_checks=None, message="Validation"): params = {"message": message} if safety_level: params["safety_level"] = safety_level if declarative_checks is not None: params["safety_checks"] = declarative_checks return {"type": "pause_for_human", "parameters": params} def test_only_declarative_when_no_safety_level(): """Pas de safety_level → uniquement les checks déclaratifs, pas d'appel LLM.""" decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm: payload = build_pause_payload(_action(declarative_checks=decl), {}, last_screenshot=None) mock_llm.assert_not_called() assert len(payload.checks) == 1 assert payload.checks[0]["source"] == "declarative" def test_default_pause_message_is_structured_not_validation_required(): """Fallback humain: jamais 'Validation requise' seul.""" payload = build_pause_payload({"type": "pause_for_human", "parameters": {}}, {}, last_screenshot=None) lines = payload.message.splitlines() assert len(lines) == 4 assert lines[0].startswith("J'essaie de :") assert "Validation requise" not in payload.message def test_hybrid_appends_llm_checks_on_medical_critical(monkeypatch): """safety_level=medical_critical → LLM appelé, checks concaténés.""" decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}] llm_resp = [{"label": "Nom patient suspect à l'écran", "evidence": "vu un nom différent"}] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks", return_value=llm_resp) as mock_llm: payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=decl), {}, last_screenshot="/tmp/fake.png", ) mock_llm.assert_called_once() assert len(payload.checks) == 2 assert payload.checks[0]["source"] == "declarative" assert payload.checks[1]["source"] == "llm_contextual" assert payload.checks[1]["evidence"] == "vu un nom différent" def test_llm_timeout_falls_back_to_declarative_only(): """LLM timeout → additional_checks=[], pas de crash, déclaratifs gardés.""" decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks", return_value=[]) as mock_llm: payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=decl), {}, last_screenshot="/tmp/fake.png", ) assert len(payload.checks) == 1 assert payload.checks[0]["source"] == "declarative" def test_llm_invalid_response_falls_back(): """Si _call_llm retourne [] (parse échoué en interne) → fallback safe.""" with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks", return_value=[]): payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=[]), {}, last_screenshot="/tmp/fake.png", ) assert payload.checks == [] def test_kill_switch_disables_llm_call(monkeypatch): """RPA_SAFETY_CHECKS_LLM_ENABLED=0 → LLM jamais appelé.""" monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_ENABLED", "0") decl = [{"id": "c1", "label": "X", "required": True}] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm: payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=decl), {}, last_screenshot="/tmp/fake.png", ) mock_llm.assert_not_called() assert len(payload.checks) == 1 def test_max_checks_respected(monkeypatch): """RPA_SAFETY_CHECKS_LLM_MAX_CHECKS=2 → max 2 checks LLM ajoutés.""" monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MAX_CHECKS", "2") decl = [] llm_resp = [ {"label": f"Check {i}", "evidence": f"e{i}"} for i in range(5) ] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks", return_value=llm_resp[:2]): # provider tronque déjà payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=decl), {}, last_screenshot="/tmp/fake.png", ) assert len(payload.checks) == 2 def test_empty_declarative_with_llm_returns_only_llm(): """Pas de déclaratif + LLM ajoute 2 checks → payload contient les 2.""" llm_resp = [{"label": "Vérifier date", "evidence": "date 1900 suspecte"}, {"label": "Vérifier devise", "evidence": "montant en USD au lieu d'EUR"}] with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks", return_value=llm_resp): payload = build_pause_payload( _action(safety_level="medical_critical", declarative_checks=[]), {}, last_screenshot="/tmp/fake.png", ) assert len(payload.checks) == 2 assert all(c["source"] == "llm_contextual" for c in payload.checks) # ============================================================================ # Dé-hardcodage VLM (P1.x) : modèle safety-checks résolu via vlm_config # ============================================================================ def _capture_generate(captured): def fake_post(url, json=None, timeout=None): captured["url"] = url captured["model"] = (json or {}).get("model") resp = MagicMock() resp.status_code = 200 resp.json.return_value = {"response": '{"additional_checks": []}'} return resp return fake_post def test_safety_checks_model_default_via_vlm_config(monkeypatch): """Sans RPA_SAFETY_CHECKS_LLM_MODEL, le modèle vient de vlm_config (pas gemma4 en dur).""" monkeypatch.delenv("RPA_SAFETY_CHECKS_LLM_MODEL", raising=False) captured = {} import requests monkeypatch.setattr(requests, "post", _capture_generate(captured)) from agent_v0.server_v1 import safety_checks_provider as scp with patch.object(scp.vlm_config, "get_vlm_model", return_value="modele-resolu:test"): scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, []) assert captured["model"] == "modele-resolu:test" assert ":11434" in captured["url"] def test_safety_checks_model_env_override_preserved(monkeypatch): """RPA_SAFETY_CHECKS_LLM_MODEL reste prioritaire (override non cassé).""" monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MODEL", "mon-modele-pin") captured = {} import requests monkeypatch.setattr(requests, "post", _capture_generate(captured)) from agent_v0.server_v1 import safety_checks_provider as scp # get_vlm_model ne doit pas écraser l'override : on le fait lever pour le prouver with patch.object(scp.vlm_config, "get_vlm_model", side_effect=AssertionError("ne doit pas être appelé")): scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, []) assert captured["model"] == "mon-modele-pin"