Migre les call-sites VLM serveur vers la configuration centrale pour fonctionner sur DGX (tunnel Ollama 11434), où gemma4:* est absent et le port Docker 11435 est mort. - task_planner, replay_verifier, domain_context, ir_builder, resolve_engine (popup): modele -> vlm_config.get_vlm_model(), defaut 11435 -> 11434 (override GEMMA4_PORT legacy conserve) - resolve_engine (grounding bbox x2): nouvel helper vlm_config.get_bbox_grounding_model() (var dediee RPA_BBOX_GROUNDING_MODEL, fallback RPA_GROUNDING_MODEL puis qwen2.5vl:7b-rpa) -> desambiguise le conflit D5-v3b, bbox_2d + num_ctx 4096 preserves - safety_checks_provider: defaut -> get_vlm_model(), override RPA_SAFETY_CHECKS_LLM_MODEL preserve - ui_detector: default_factory + resolution lazy (corrige aussi un gel a l'import), pas d'appel reseau a l'import - field_extractor: property lazy via vlm_config TDD strict (RED->GREEN), 305 tests verts, tests mockes HTTP (zero dependance DGX reel), aucun alias Ollama. Hors perimetre (arbitrage Dom): client Lea agent_v1/executor.py (gele), chemin V4 observe_reason_act (RPA_REASONING_MODEL), core/config.py defaults. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
346 lines
14 KiB
Python
346 lines
14 KiB
Python
"""Tests pour D5-v2 : profil grounding VLM centralisé + generate_grounding().
|
|
|
|
Couvre :
|
|
- vlm_config.get_grounding_profile() avec valeurs par défaut et overrides env
|
|
- ollama_client.OllamaClient.generate_grounding() avec mocks requests.post
|
|
- Parsing JSON prefill-aware (reconstitution {"x_pct": ...} → dict)
|
|
- Pas d'appel Ollama live (tous les requests.post sont mockés)
|
|
|
|
Référence : inbox_claude/2026-05-25_1620_codex-to-claude_GO-revue-strategique-D5v2-C2d.md
|
|
Fix : core/detection/vlm_config.py (get_grounding_profile) +
|
|
core/detection/ollama_client.py (generate_grounding, _extract_first_json_object)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# vlm_config.get_grounding_profile
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_grounding_profile_defaults(monkeypatch):
|
|
"""Sans env vars, valeurs par défaut D5-v2."""
|
|
for k in ("RPA_GROUNDING_MODEL", "RPA_GROUNDING_CTX", "RPA_GROUNDING_FALLBACK", "RPA_VLM_PREFILL"):
|
|
monkeypatch.delenv(k, raising=False)
|
|
from core.detection.vlm_config import get_grounding_profile
|
|
|
|
p = get_grounding_profile()
|
|
assert p["model"] == "qwen3.5:9b"
|
|
assert p["num_ctx"] == 4096
|
|
assert p["prefill"] == '{"x_pct":'
|
|
assert p["temperature"] == 0.0
|
|
assert p["num_predict"] == 96
|
|
assert p["fallback_model"] == "qwen2.5vl:7b-rpa"
|
|
assert p["keep_alive"] == "30m"
|
|
# qwen3.5 = thinking model → think doit être False côté payload
|
|
# Le profile expose think comme bool ; False signifie "envoyer think:false"
|
|
assert p["think"] is False
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_grounding_profile_env_override(monkeypatch):
|
|
"""Env vars override modèle, ctx, fallback."""
|
|
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
|
|
monkeypatch.setenv("RPA_GROUNDING_CTX", "8192")
|
|
monkeypatch.setenv("RPA_GROUNDING_FALLBACK", "gemma4:latest")
|
|
from core.detection.vlm_config import get_grounding_profile
|
|
|
|
p = get_grounding_profile()
|
|
assert p["model"] == "qwen2.5vl:7b-rpa"
|
|
assert p["num_ctx"] == 8192
|
|
assert p["fallback_model"] == "gemma4:latest"
|
|
# qwen2.5vl n'est PAS thinking et n'est PAS gemma4 → think=True (rien à envoyer)
|
|
assert p["think"] is True
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_grounding_profile_ctx_invalid_falls_back_to_default(monkeypatch):
|
|
"""RPA_GROUNDING_CTX non-numeric → fallback 4096."""
|
|
monkeypatch.setenv("RPA_GROUNDING_CTX", "not_a_number")
|
|
from core.detection.vlm_config import get_grounding_profile
|
|
|
|
p = get_grounding_profile()
|
|
assert p["num_ctx"] == 4096
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_grounding_profile_prefill_disabled(monkeypatch):
|
|
"""RPA_VLM_PREFILL=false → prefill None."""
|
|
monkeypatch.setenv("RPA_VLM_PREFILL", "false")
|
|
from core.detection.vlm_config import get_grounding_profile
|
|
|
|
p = get_grounding_profile()
|
|
assert p["prefill"] is None
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_grounding_profile_gemma4_triggers_think_false(monkeypatch):
|
|
"""Si on remplace par gemma4, think_false doit être déclenché."""
|
|
monkeypatch.setenv("RPA_GROUNDING_MODEL", "gemma4:latest")
|
|
from core.detection.vlm_config import get_grounding_profile
|
|
|
|
p = get_grounding_profile()
|
|
assert p["think"] is False # gemma4 needs think=false
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# get_bbox_grounding_model (D5-v3b : modèle bbox_2d dédié, désambiguïsé)
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_bbox_grounding_model_default(monkeypatch):
|
|
"""Sans env, défaut = DEFAULT_GROUNDING_FALLBACK (qwen2.5vl:7b-rpa, présent DGX)."""
|
|
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
|
|
monkeypatch.delenv("RPA_GROUNDING_MODEL", raising=False)
|
|
from core.detection.vlm_config import get_bbox_grounding_model, DEFAULT_GROUNDING_FALLBACK
|
|
|
|
assert get_bbox_grounding_model() == DEFAULT_GROUNDING_FALLBACK == "qwen2.5vl:7b-rpa"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_bbox_grounding_model_dedicated_env(monkeypatch):
|
|
"""RPA_BBOX_GROUNDING_MODEL est prioritaire."""
|
|
monkeypatch.setenv("RPA_BBOX_GROUNDING_MODEL", "qwen2.5vl:32b")
|
|
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen3.5:9b")
|
|
from core.detection.vlm_config import get_bbox_grounding_model
|
|
|
|
assert get_bbox_grounding_model() == "qwen2.5vl:32b"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_bbox_grounding_model_legacy_compat(monkeypatch):
|
|
"""Sans la var dédiée, RPA_GROUNDING_MODEL reste honoré (rétrocompat)."""
|
|
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
|
|
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
|
|
from core.detection.vlm_config import get_bbox_grounding_model
|
|
|
|
assert get_bbox_grounding_model() == "qwen2.5vl:7b-rpa"
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# _extract_first_json_object
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_clean():
|
|
"""JSON propre directement."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
obj = _extract_first_json_object('{"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}')
|
|
assert obj == {"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_with_trailing_text():
|
|
"""JSON suivi de texte parasite (typique VLM)."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
text = '{"x_pct": 0.4, "y_pct": 0.6, "confidence": 0.88}\n\nThe button is located in the bottom-right area.'
|
|
obj = _extract_first_json_object(text)
|
|
assert obj["x_pct"] == 0.4
|
|
assert obj["confidence"] == 0.88
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_with_nested():
|
|
"""JSON avec objet imbriqué."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
text = '{"x_pct": 0.5, "meta": {"source": "qwen", "score": 0.9}}'
|
|
obj = _extract_first_json_object(text)
|
|
assert obj["meta"]["source"] == "qwen"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_with_braces_in_strings():
|
|
"""Les accolades dans les strings ne doivent pas perturber le compteur."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
text = '{"x_pct": 0.5, "label": "Click {here}"}'
|
|
obj = _extract_first_json_object(text)
|
|
assert obj["label"] == "Click {here}"
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_invalid():
|
|
"""Texte sans JSON → None."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
assert _extract_first_json_object("no json here at all") is None
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_extract_first_json_object_empty():
|
|
"""Texte vide → None."""
|
|
from core.detection.ollama_client import _extract_first_json_object
|
|
assert _extract_first_json_object("") is None
|
|
assert _extract_first_json_object(None) is None
|
|
|
|
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
# OllamaClient.generate_grounding (mocks requests.post)
|
|
# ────────────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_client(monkeypatch):
|
|
"""Construit un OllamaClient sans connexion réelle à Ollama."""
|
|
from core.detection import ollama_client as oc_module
|
|
|
|
# Bypass _check_connection
|
|
monkeypatch.setattr(oc_module.OllamaClient, "_check_connection", lambda self: True)
|
|
# Force le modèle pour éviter get_vlm_model() qui appelle Ollama
|
|
client = oc_module.OllamaClient(model="qwen2.5vl:7b-rpa")
|
|
return client
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_generate_grounding_payload_uses_profile(mock_client, monkeypatch):
|
|
"""Le payload envoyé à Ollama utilise le profile (model, ctx, prefill, temp, etc.)."""
|
|
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
|
|
|
|
captured = {}
|
|
|
|
def fake_post(url, json=None, timeout=None):
|
|
captured["url"] = url
|
|
captured["payload"] = json
|
|
# Réponse simulée : Ollama renvoie le contenu APRÈS le prefill
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
resp.json.return_value = {
|
|
"message": {
|
|
"content": ' 0.5, "y_pct": 0.3, "confidence": 0.95}'
|
|
}
|
|
}
|
|
return resp
|
|
|
|
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
|
|
|
|
result = mock_client.generate_grounding(prompt="Find the Save button")
|
|
|
|
# Payload vérifié
|
|
assert captured["url"].endswith("/api/chat")
|
|
payload = captured["payload"]
|
|
assert payload["model"] == "qwen3.5:9b" # défaut grounding
|
|
assert payload["options"]["num_ctx"] == 4096
|
|
assert payload["options"]["temperature"] == 0.0
|
|
assert payload["options"]["num_predict"] == 96
|
|
# qwen3.5 = thinking → think=false dans payload
|
|
assert payload.get("think") is False
|
|
# Le prefill doit être présent dans le dernier message (assistant)
|
|
last_msg = payload["messages"][-1]
|
|
assert last_msg["role"] == "assistant"
|
|
assert last_msg["content"] == '{"x_pct":'
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_generate_grounding_parses_prefilled_json(mock_client, monkeypatch):
|
|
"""Le JSON est correctement reconstitué via prefill + parsé."""
|
|
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
|
|
|
|
def fake_post(url, json=None, timeout=None):
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
# Ollama renvoie SANS le prefill (le client le rajoute)
|
|
resp.json.return_value = {
|
|
"message": {
|
|
"content": ' 0.42, "y_pct": 0.68, "confidence": 0.91}'
|
|
}
|
|
}
|
|
return resp
|
|
|
|
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
|
|
|
|
result = mock_client.generate_grounding(prompt="Find OK button")
|
|
|
|
assert result["success"] is True
|
|
# response contient le JSON complet reconstitué
|
|
assert result["response"].startswith('{"x_pct":')
|
|
# parsed_json est le dict reconstruit
|
|
parsed = result["parsed_json"]
|
|
assert parsed is not None
|
|
assert parsed["x_pct"] == 0.42
|
|
assert parsed["y_pct"] == 0.68
|
|
assert parsed["confidence"] == 0.91
|
|
# profile_used est exposé
|
|
assert result["profile_used"]["model"] == "qwen3.5:9b"
|
|
assert result["profile_used"]["num_ctx"] == 4096
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_generate_grounding_restores_original_model(mock_client, monkeypatch):
|
|
"""Après generate_grounding, self.model est restauré (pas de side-effect)."""
|
|
original = mock_client.model # qwen2.5vl:7b-rpa
|
|
|
|
def fake_post(url, json=None, timeout=None):
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
resp.json.return_value = {"message": {"content": ' 0.5, "y_pct": 0.5, "confidence": 0.5}'}}
|
|
return resp
|
|
|
|
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
|
|
mock_client.generate_grounding(prompt="test")
|
|
assert mock_client.model == original, (
|
|
f"self.model doit être restauré ({original}), trouvé : {mock_client.model}"
|
|
)
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_generate_grounding_handles_ollama_error(mock_client, monkeypatch):
|
|
"""Si Ollama retourne 500, success=False, parsed_json=None, model restauré."""
|
|
def fake_post(url, json=None, timeout=None):
|
|
resp = MagicMock()
|
|
resp.status_code = 500
|
|
resp.text = "Internal error"
|
|
return resp
|
|
|
|
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
|
|
result = mock_client.generate_grounding(prompt="test")
|
|
assert result["success"] is False
|
|
assert result["parsed_json"] is None
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_generate_grounding_profile_override(mock_client, monkeypatch):
|
|
"""Override profile explicite dans l'appel (pour tests / contextes spéciaux)."""
|
|
captured = {}
|
|
|
|
def fake_post(url, json=None, timeout=None):
|
|
captured["payload"] = json
|
|
resp = MagicMock()
|
|
resp.status_code = 200
|
|
resp.json.return_value = {"message": {"content": ' 0.1, "y_pct": 0.1, "confidence": 0.5}'}}
|
|
return resp
|
|
|
|
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
|
|
|
|
custom_profile = {
|
|
"model": "gemma4:latest",
|
|
"num_ctx": 2048,
|
|
"prefill": None,
|
|
"temperature": 0.2,
|
|
"num_predict": 50,
|
|
"think": False,
|
|
"keep_alive": "5m",
|
|
"fallback_model": "qwen2.5vl:7b-rpa",
|
|
}
|
|
result = mock_client.generate_grounding(prompt="test", profile=custom_profile)
|
|
payload = captured["payload"]
|
|
assert payload["model"] == "gemma4:latest"
|
|
assert payload["options"]["num_ctx"] == 2048
|
|
assert payload["options"]["temperature"] == 0.2
|
|
# Pas de prefill → pas de message assistant
|
|
assert payload["messages"][-1]["role"] == "user"
|
|
# gemma4 needs think=false → injecté
|
|
assert payload.get("think") is False
|