Files
rpa_vision_v3/tests/unit/test_vlm_grounding_profile.py
Dom 4dc7d840d6 feat(p1x): de-hardcode VLM models/endpoints to vlm_config (DGX-ready)
Migre les call-sites VLM serveur vers la configuration centrale pour
fonctionner sur DGX (tunnel Ollama 11434), où gemma4:* est absent et le
port Docker 11435 est mort.

- task_planner, replay_verifier, domain_context, ir_builder, resolve_engine
  (popup): modele -> vlm_config.get_vlm_model(), defaut 11435 -> 11434
  (override GEMMA4_PORT legacy conserve)
- resolve_engine (grounding bbox x2): nouvel helper
  vlm_config.get_bbox_grounding_model() (var dediee RPA_BBOX_GROUNDING_MODEL,
  fallback RPA_GROUNDING_MODEL puis qwen2.5vl:7b-rpa) -> desambiguise le
  conflit D5-v3b, bbox_2d + num_ctx 4096 preserves
- safety_checks_provider: defaut -> get_vlm_model(), override
  RPA_SAFETY_CHECKS_LLM_MODEL preserve
- ui_detector: default_factory + resolution lazy (corrige aussi un gel a
  l'import), pas d'appel reseau a l'import
- field_extractor: property lazy via vlm_config

TDD strict (RED->GREEN), 305 tests verts, tests mockes HTTP (zero dependance
DGX reel), aucun alias Ollama.

Hors perimetre (arbitrage Dom): client Lea agent_v1/executor.py (gele),
chemin V4 observe_reason_act (RPA_REASONING_MODEL), core/config.py defaults.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:06:03 +02:00

346 lines
14 KiB
Python

"""Tests pour D5-v2 : profil grounding VLM centralisé + generate_grounding().
Couvre :
- vlm_config.get_grounding_profile() avec valeurs par défaut et overrides env
- ollama_client.OllamaClient.generate_grounding() avec mocks requests.post
- Parsing JSON prefill-aware (reconstitution {"x_pct": ...} → dict)
- Pas d'appel Ollama live (tous les requests.post sont mockés)
Référence : inbox_claude/2026-05-25_1620_codex-to-claude_GO-revue-strategique-D5v2-C2d.md
Fix : core/detection/vlm_config.py (get_grounding_profile) +
core/detection/ollama_client.py (generate_grounding, _extract_first_json_object)
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
# ────────────────────────────────────────────────────────────────────────────
# vlm_config.get_grounding_profile
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_grounding_profile_defaults(monkeypatch):
"""Sans env vars, valeurs par défaut D5-v2."""
for k in ("RPA_GROUNDING_MODEL", "RPA_GROUNDING_CTX", "RPA_GROUNDING_FALLBACK", "RPA_VLM_PREFILL"):
monkeypatch.delenv(k, raising=False)
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["model"] == "qwen3.5:9b"
assert p["num_ctx"] == 4096
assert p["prefill"] == '{"x_pct":'
assert p["temperature"] == 0.0
assert p["num_predict"] == 96
assert p["fallback_model"] == "qwen2.5vl:7b-rpa"
assert p["keep_alive"] == "30m"
# qwen3.5 = thinking model → think doit être False côté payload
# Le profile expose think comme bool ; False signifie "envoyer think:false"
assert p["think"] is False
@pytest.mark.unit
def test_grounding_profile_env_override(monkeypatch):
"""Env vars override modèle, ctx, fallback."""
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
monkeypatch.setenv("RPA_GROUNDING_CTX", "8192")
monkeypatch.setenv("RPA_GROUNDING_FALLBACK", "gemma4:latest")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["model"] == "qwen2.5vl:7b-rpa"
assert p["num_ctx"] == 8192
assert p["fallback_model"] == "gemma4:latest"
# qwen2.5vl n'est PAS thinking et n'est PAS gemma4 → think=True (rien à envoyer)
assert p["think"] is True
@pytest.mark.unit
def test_grounding_profile_ctx_invalid_falls_back_to_default(monkeypatch):
"""RPA_GROUNDING_CTX non-numeric → fallback 4096."""
monkeypatch.setenv("RPA_GROUNDING_CTX", "not_a_number")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["num_ctx"] == 4096
@pytest.mark.unit
def test_grounding_profile_prefill_disabled(monkeypatch):
"""RPA_VLM_PREFILL=false → prefill None."""
monkeypatch.setenv("RPA_VLM_PREFILL", "false")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["prefill"] is None
@pytest.mark.unit
def test_grounding_profile_gemma4_triggers_think_false(monkeypatch):
"""Si on remplace par gemma4, think_false doit être déclenché."""
monkeypatch.setenv("RPA_GROUNDING_MODEL", "gemma4:latest")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["think"] is False # gemma4 needs think=false
# ────────────────────────────────────────────────────────────────────────────
# get_bbox_grounding_model (D5-v3b : modèle bbox_2d dédié, désambiguïsé)
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_bbox_grounding_model_default(monkeypatch):
"""Sans env, défaut = DEFAULT_GROUNDING_FALLBACK (qwen2.5vl:7b-rpa, présent DGX)."""
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
monkeypatch.delenv("RPA_GROUNDING_MODEL", raising=False)
from core.detection.vlm_config import get_bbox_grounding_model, DEFAULT_GROUNDING_FALLBACK
assert get_bbox_grounding_model() == DEFAULT_GROUNDING_FALLBACK == "qwen2.5vl:7b-rpa"
@pytest.mark.unit
def test_bbox_grounding_model_dedicated_env(monkeypatch):
"""RPA_BBOX_GROUNDING_MODEL est prioritaire."""
monkeypatch.setenv("RPA_BBOX_GROUNDING_MODEL", "qwen2.5vl:32b")
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen3.5:9b")
from core.detection.vlm_config import get_bbox_grounding_model
assert get_bbox_grounding_model() == "qwen2.5vl:32b"
@pytest.mark.unit
def test_bbox_grounding_model_legacy_compat(monkeypatch):
"""Sans la var dédiée, RPA_GROUNDING_MODEL reste honoré (rétrocompat)."""
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
from core.detection.vlm_config import get_bbox_grounding_model
assert get_bbox_grounding_model() == "qwen2.5vl:7b-rpa"
# ────────────────────────────────────────────────────────────────────────────
# _extract_first_json_object
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_extract_first_json_object_clean():
"""JSON propre directement."""
from core.detection.ollama_client import _extract_first_json_object
obj = _extract_first_json_object('{"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}')
assert obj == {"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}
@pytest.mark.unit
def test_extract_first_json_object_with_trailing_text():
"""JSON suivi de texte parasite (typique VLM)."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.4, "y_pct": 0.6, "confidence": 0.88}\n\nThe button is located in the bottom-right area.'
obj = _extract_first_json_object(text)
assert obj["x_pct"] == 0.4
assert obj["confidence"] == 0.88
@pytest.mark.unit
def test_extract_first_json_object_with_nested():
"""JSON avec objet imbriqué."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.5, "meta": {"source": "qwen", "score": 0.9}}'
obj = _extract_first_json_object(text)
assert obj["meta"]["source"] == "qwen"
@pytest.mark.unit
def test_extract_first_json_object_with_braces_in_strings():
"""Les accolades dans les strings ne doivent pas perturber le compteur."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.5, "label": "Click {here}"}'
obj = _extract_first_json_object(text)
assert obj["label"] == "Click {here}"
@pytest.mark.unit
def test_extract_first_json_object_invalid():
"""Texte sans JSON → None."""
from core.detection.ollama_client import _extract_first_json_object
assert _extract_first_json_object("no json here at all") is None
@pytest.mark.unit
def test_extract_first_json_object_empty():
"""Texte vide → None."""
from core.detection.ollama_client import _extract_first_json_object
assert _extract_first_json_object("") is None
assert _extract_first_json_object(None) is None
# ────────────────────────────────────────────────────────────────────────────
# OllamaClient.generate_grounding (mocks requests.post)
# ────────────────────────────────────────────────────────────────────────────
@pytest.fixture
def mock_client(monkeypatch):
"""Construit un OllamaClient sans connexion réelle à Ollama."""
from core.detection import ollama_client as oc_module
# Bypass _check_connection
monkeypatch.setattr(oc_module.OllamaClient, "_check_connection", lambda self: True)
# Force le modèle pour éviter get_vlm_model() qui appelle Ollama
client = oc_module.OllamaClient(model="qwen2.5vl:7b-rpa")
return client
@pytest.mark.unit
def test_generate_grounding_payload_uses_profile(mock_client, monkeypatch):
"""Le payload envoyé à Ollama utilise le profile (model, ctx, prefill, temp, etc.)."""
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
captured = {}
def fake_post(url, json=None, timeout=None):
captured["url"] = url
captured["payload"] = json
# Réponse simulée : Ollama renvoie le contenu APRÈS le prefill
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {
"message": {
"content": ' 0.5, "y_pct": 0.3, "confidence": 0.95}'
}
}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="Find the Save button")
# Payload vérifié
assert captured["url"].endswith("/api/chat")
payload = captured["payload"]
assert payload["model"] == "qwen3.5:9b" # défaut grounding
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["temperature"] == 0.0
assert payload["options"]["num_predict"] == 96
# qwen3.5 = thinking → think=false dans payload
assert payload.get("think") is False
# Le prefill doit être présent dans le dernier message (assistant)
last_msg = payload["messages"][-1]
assert last_msg["role"] == "assistant"
assert last_msg["content"] == '{"x_pct":'
@pytest.mark.unit
def test_generate_grounding_parses_prefilled_json(mock_client, monkeypatch):
"""Le JSON est correctement reconstitué via prefill + parsé."""
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 200
# Ollama renvoie SANS le prefill (le client le rajoute)
resp.json.return_value = {
"message": {
"content": ' 0.42, "y_pct": 0.68, "confidence": 0.91}'
}
}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="Find OK button")
assert result["success"] is True
# response contient le JSON complet reconstitué
assert result["response"].startswith('{"x_pct":')
# parsed_json est le dict reconstruit
parsed = result["parsed_json"]
assert parsed is not None
assert parsed["x_pct"] == 0.42
assert parsed["y_pct"] == 0.68
assert parsed["confidence"] == 0.91
# profile_used est exposé
assert result["profile_used"]["model"] == "qwen3.5:9b"
assert result["profile_used"]["num_ctx"] == 4096
@pytest.mark.unit
def test_generate_grounding_restores_original_model(mock_client, monkeypatch):
"""Après generate_grounding, self.model est restauré (pas de side-effect)."""
original = mock_client.model # qwen2.5vl:7b-rpa
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"message": {"content": ' 0.5, "y_pct": 0.5, "confidence": 0.5}'}}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
mock_client.generate_grounding(prompt="test")
assert mock_client.model == original, (
f"self.model doit être restauré ({original}), trouvé : {mock_client.model}"
)
@pytest.mark.unit
def test_generate_grounding_handles_ollama_error(mock_client, monkeypatch):
"""Si Ollama retourne 500, success=False, parsed_json=None, model restauré."""
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 500
resp.text = "Internal error"
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="test")
assert result["success"] is False
assert result["parsed_json"] is None
@pytest.mark.unit
def test_generate_grounding_profile_override(mock_client, monkeypatch):
"""Override profile explicite dans l'appel (pour tests / contextes spéciaux)."""
captured = {}
def fake_post(url, json=None, timeout=None):
captured["payload"] = json
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"message": {"content": ' 0.1, "y_pct": 0.1, "confidence": 0.5}'}}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
custom_profile = {
"model": "gemma4:latest",
"num_ctx": 2048,
"prefill": None,
"temperature": 0.2,
"num_predict": 50,
"think": False,
"keep_alive": "5m",
"fallback_model": "qwen2.5vl:7b-rpa",
}
result = mock_client.generate_grounding(prompt="test", profile=custom_profile)
payload = captured["payload"]
assert payload["model"] == "gemma4:latest"
assert payload["options"]["num_ctx"] == 2048
assert payload["options"]["temperature"] == 0.2
# Pas de prefill → pas de message assistant
assert payload["messages"][-1]["role"] == "user"
# gemma4 needs think=false → injecté
assert payload.get("think") is False