Migre les call-sites VLM serveur vers la configuration centrale pour fonctionner sur DGX (tunnel Ollama 11434), où gemma4:* est absent et le port Docker 11435 est mort. - task_planner, replay_verifier, domain_context, ir_builder, resolve_engine (popup): modele -> vlm_config.get_vlm_model(), defaut 11435 -> 11434 (override GEMMA4_PORT legacy conserve) - resolve_engine (grounding bbox x2): nouvel helper vlm_config.get_bbox_grounding_model() (var dediee RPA_BBOX_GROUNDING_MODEL, fallback RPA_GROUNDING_MODEL puis qwen2.5vl:7b-rpa) -> desambiguise le conflit D5-v3b, bbox_2d + num_ctx 4096 preserves - safety_checks_provider: defaut -> get_vlm_model(), override RPA_SAFETY_CHECKS_LLM_MODEL preserve - ui_detector: default_factory + resolution lazy (corrige aussi un gel a l'import), pas d'appel reseau a l'import - field_extractor: property lazy via vlm_config TDD strict (RED->GREEN), 305 tests verts, tests mockes HTTP (zero dependance DGX reel), aucun alias Ollama. Hors perimetre (arbitrage Dom): client Lea agent_v1/executor.py (gele), chemin V4 observe_reason_act (RPA_REASONING_MODEL), core/config.py defaults. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
168 lines
7.3 KiB
Python
168 lines
7.3 KiB
Python
# tests/unit/test_safety_checks_provider.py
|
|
"""Tests unitaires SafetyChecksProvider (QW4)."""
|
|
import json
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from agent_v0.server_v1.safety_checks_provider import build_pause_payload, PausePayload
|
|
|
|
|
|
def _action(safety_level=None, declarative_checks=None, message="Validation"):
|
|
params = {"message": message}
|
|
if safety_level:
|
|
params["safety_level"] = safety_level
|
|
if declarative_checks is not None:
|
|
params["safety_checks"] = declarative_checks
|
|
return {"type": "pause_for_human", "parameters": params}
|
|
|
|
|
|
def test_only_declarative_when_no_safety_level():
|
|
"""Pas de safety_level → uniquement les checks déclaratifs, pas d'appel LLM."""
|
|
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm:
|
|
payload = build_pause_payload(_action(declarative_checks=decl), {}, last_screenshot=None)
|
|
mock_llm.assert_not_called()
|
|
assert len(payload.checks) == 1
|
|
assert payload.checks[0]["source"] == "declarative"
|
|
|
|
|
|
def test_default_pause_message_is_structured_not_validation_required():
|
|
"""Fallback humain: jamais 'Validation requise' seul."""
|
|
payload = build_pause_payload({"type": "pause_for_human", "parameters": {}}, {}, last_screenshot=None)
|
|
lines = payload.message.splitlines()
|
|
assert len(lines) == 4
|
|
assert lines[0].startswith("J'essaie de :")
|
|
assert "Validation requise" not in payload.message
|
|
|
|
|
|
def test_hybrid_appends_llm_checks_on_medical_critical(monkeypatch):
|
|
"""safety_level=medical_critical → LLM appelé, checks concaténés."""
|
|
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
|
|
llm_resp = [{"label": "Nom patient suspect à l'écran", "evidence": "vu un nom différent"}]
|
|
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
|
|
return_value=llm_resp) as mock_llm:
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=decl),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
mock_llm.assert_called_once()
|
|
assert len(payload.checks) == 2
|
|
assert payload.checks[0]["source"] == "declarative"
|
|
assert payload.checks[1]["source"] == "llm_contextual"
|
|
assert payload.checks[1]["evidence"] == "vu un nom différent"
|
|
|
|
|
|
def test_llm_timeout_falls_back_to_declarative_only():
|
|
"""LLM timeout → additional_checks=[], pas de crash, déclaratifs gardés."""
|
|
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
|
|
return_value=[]) as mock_llm:
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=decl),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
assert len(payload.checks) == 1
|
|
assert payload.checks[0]["source"] == "declarative"
|
|
|
|
|
|
def test_llm_invalid_response_falls_back():
|
|
"""Si _call_llm retourne [] (parse échoué en interne) → fallback safe."""
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
|
|
return_value=[]):
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=[]),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
assert payload.checks == []
|
|
|
|
|
|
def test_kill_switch_disables_llm_call(monkeypatch):
|
|
"""RPA_SAFETY_CHECKS_LLM_ENABLED=0 → LLM jamais appelé."""
|
|
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_ENABLED", "0")
|
|
decl = [{"id": "c1", "label": "X", "required": True}]
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks") as mock_llm:
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=decl),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
mock_llm.assert_not_called()
|
|
assert len(payload.checks) == 1
|
|
|
|
|
|
def test_max_checks_respected(monkeypatch):
|
|
"""RPA_SAFETY_CHECKS_LLM_MAX_CHECKS=2 → max 2 checks LLM ajoutés."""
|
|
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MAX_CHECKS", "2")
|
|
decl = []
|
|
llm_resp = [
|
|
{"label": f"Check {i}", "evidence": f"e{i}"} for i in range(5)
|
|
]
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
|
|
return_value=llm_resp[:2]): # provider tronque déjà
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=decl),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
assert len(payload.checks) == 2
|
|
|
|
|
|
def test_empty_declarative_with_llm_returns_only_llm():
|
|
"""Pas de déclaratif + LLM ajoute 2 checks → payload contient les 2."""
|
|
llm_resp = [{"label": "Vérifier date", "evidence": "date 1900 suspecte"},
|
|
{"label": "Vérifier devise", "evidence": "montant en USD au lieu d'EUR"}]
|
|
with patch("agent_v0.server_v1.safety_checks_provider._call_llm_for_contextual_checks",
|
|
return_value=llm_resp):
|
|
payload = build_pause_payload(
|
|
_action(safety_level="medical_critical", declarative_checks=[]),
|
|
{}, last_screenshot="/tmp/fake.png",
|
|
)
|
|
assert len(payload.checks) == 2
|
|
assert all(c["source"] == "llm_contextual" for c in payload.checks)
|
|
|
|
|
|
# ============================================================================
|
|
# Dé-hardcodage VLM (P1.x) : modèle safety-checks résolu via vlm_config
|
|
# ============================================================================
|
|
|
|
def _capture_generate(captured):
|
|
def fake_post(url, json=None, timeout=None):
|
|
captured["url"] = url
|
|
captured["model"] = (json or {}).get("model")
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
resp.json.return_value = {"response": '{"additional_checks": []}'}
|
|
return resp
|
|
return fake_post
|
|
|
|
|
|
def test_safety_checks_model_default_via_vlm_config(monkeypatch):
|
|
"""Sans RPA_SAFETY_CHECKS_LLM_MODEL, le modèle vient de vlm_config (pas gemma4 en dur)."""
|
|
monkeypatch.delenv("RPA_SAFETY_CHECKS_LLM_MODEL", raising=False)
|
|
captured = {}
|
|
|
|
import requests
|
|
monkeypatch.setattr(requests, "post", _capture_generate(captured))
|
|
|
|
from agent_v0.server_v1 import safety_checks_provider as scp
|
|
with patch.object(scp.vlm_config, "get_vlm_model", return_value="modele-resolu:test"):
|
|
scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, [])
|
|
|
|
assert captured["model"] == "modele-resolu:test"
|
|
assert ":11434" in captured["url"]
|
|
|
|
|
|
def test_safety_checks_model_env_override_preserved(monkeypatch):
|
|
"""RPA_SAFETY_CHECKS_LLM_MODEL reste prioritaire (override non cassé)."""
|
|
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MODEL", "mon-modele-pin")
|
|
captured = {}
|
|
|
|
import requests
|
|
monkeypatch.setattr(requests, "post", _capture_generate(captured))
|
|
|
|
from agent_v0.server_v1 import safety_checks_provider as scp
|
|
# get_vlm_model ne doit pas écraser l'override : on le fait lever pour le prouver
|
|
with patch.object(scp.vlm_config, "get_vlm_model", side_effect=AssertionError("ne doit pas être appelé")):
|
|
scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, [])
|
|
|
|
assert captured["model"] == "mon-modele-pin"
|