rpa_vision_v3/tests/unit/test_vlm_grounding_profile.py

"""Tests pour D5-v2 : profil grounding VLM centralisé + generate_grounding().

Couvre :
- vlm_config.get_grounding_profile() avec valeurs par défaut et overrides env
- ollama_client.OllamaClient.generate_grounding() avec mocks requests.post
- Parsing JSON prefill-aware (reconstitution {"x_pct": ...} → dict)
- Pas d'appel Ollama live (tous les requests.post sont mockés)

Référence : inbox_claude/2026-05-25_1620_codex-to-claude_GO-revue-strategique-D5v2-C2d.md
Fix : core/detection/vlm_config.py (get_grounding_profile) +
      core/detection/ollama_client.py (generate_grounding, _extract_first_json_object)
"""
from __future__ import annotations

import json
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock

import pytest


ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


# ────────────────────────────────────────────────────────────────────────────
# vlm_config.get_grounding_profile
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_grounding_profile_defaults(monkeypatch):
    """Sans env vars, valeurs par défaut D5-v2."""
    for k in ("RPA_GROUNDING_MODEL", "RPA_GROUNDING_CTX", "RPA_GROUNDING_FALLBACK", "RPA_VLM_PREFILL"):
        monkeypatch.delenv(k, raising=False)
    from core.detection.vlm_config import get_grounding_profile

    p = get_grounding_profile()
    assert p["model"] == "qwen3.5:9b"
    assert p["num_ctx"] == 4096
    assert p["prefill"] == '{"x_pct":'
    assert p["temperature"] == 0.0
    assert p["num_predict"] == 96
    assert p["fallback_model"] == "qwen2.5vl:7b-rpa"
    assert p["keep_alive"] == "30m"
    # qwen3.5 = thinking model → think doit être False côté payload
    # Le profile expose think comme bool ; False signifie "envoyer think:false"
    assert p["think"] is False


@pytest.mark.unit
def test_grounding_profile_env_override(monkeypatch):
    """Env vars override modèle, ctx, fallback."""
    monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
    monkeypatch.setenv("RPA_GROUNDING_CTX", "8192")
    monkeypatch.setenv("RPA_GROUNDING_FALLBACK", "gemma4:latest")
    from core.detection.vlm_config import get_grounding_profile

    p = get_grounding_profile()
    assert p["model"] == "qwen2.5vl:7b-rpa"
    assert p["num_ctx"] == 8192
    assert p["fallback_model"] == "gemma4:latest"
    # qwen2.5vl n'est PAS thinking et n'est PAS gemma4 → think=True (rien à envoyer)
    assert p["think"] is True


@pytest.mark.unit
def test_grounding_profile_ctx_invalid_falls_back_to_default(monkeypatch):
    """RPA_GROUNDING_CTX non-numeric → fallback 4096."""
    monkeypatch.setenv("RPA_GROUNDING_CTX", "not_a_number")
    from core.detection.vlm_config import get_grounding_profile

    p = get_grounding_profile()
    assert p["num_ctx"] == 4096


@pytest.mark.unit
def test_grounding_profile_prefill_disabled(monkeypatch):
    """RPA_VLM_PREFILL=false → prefill None."""
    monkeypatch.setenv("RPA_VLM_PREFILL", "false")
    from core.detection.vlm_config import get_grounding_profile

    p = get_grounding_profile()
    assert p["prefill"] is None


@pytest.mark.unit
def test_grounding_profile_gemma4_triggers_think_false(monkeypatch):
    """Si on remplace par gemma4, think_false doit être déclenché."""
    monkeypatch.setenv("RPA_GROUNDING_MODEL", "gemma4:latest")
    from core.detection.vlm_config import get_grounding_profile

    p = get_grounding_profile()
    assert p["think"] is False  # gemma4 needs think=false


# ────────────────────────────────────────────────────────────────────────────
# get_bbox_grounding_model (D5-v3b : modèle bbox_2d dédié, désambiguïsé)
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_bbox_grounding_model_default(monkeypatch):
    """Sans env, défaut = DEFAULT_GROUNDING_FALLBACK (qwen2.5vl:7b-rpa, présent DGX)."""
    monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
    monkeypatch.delenv("RPA_GROUNDING_MODEL", raising=False)
    from core.detection.vlm_config import get_bbox_grounding_model, DEFAULT_GROUNDING_FALLBACK

    assert get_bbox_grounding_model() == DEFAULT_GROUNDING_FALLBACK == "qwen2.5vl:7b-rpa"


@pytest.mark.unit
def test_bbox_grounding_model_dedicated_env(monkeypatch):
    """RPA_BBOX_GROUNDING_MODEL est prioritaire."""
    monkeypatch.setenv("RPA_BBOX_GROUNDING_MODEL", "qwen2.5vl:32b")
    monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen3.5:9b")
    from core.detection.vlm_config import get_bbox_grounding_model

    assert get_bbox_grounding_model() == "qwen2.5vl:32b"


@pytest.mark.unit
def test_bbox_grounding_model_legacy_compat(monkeypatch):
    """Sans la var dédiée, RPA_GROUNDING_MODEL reste honoré (rétrocompat)."""
    monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
    monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
    from core.detection.vlm_config import get_bbox_grounding_model

    assert get_bbox_grounding_model() == "qwen2.5vl:7b-rpa"


# ────────────────────────────────────────────────────────────────────────────
# _extract_first_json_object
# ────────────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_extract_first_json_object_clean():
    """JSON propre directement."""
    from core.detection.ollama_client import _extract_first_json_object
    obj = _extract_first_json_object('{"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}')
    assert obj == {"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}


@pytest.mark.unit
def test_extract_first_json_object_with_trailing_text():
    """JSON suivi de texte parasite (typique VLM)."""
    from core.detection.ollama_client import _extract_first_json_object
    text = '{"x_pct": 0.4, "y_pct": 0.6, "confidence": 0.88}\n\nThe button is located in the bottom-right area.'
    obj = _extract_first_json_object(text)
    assert obj["x_pct"] == 0.4
    assert obj["confidence"] == 0.88


@pytest.mark.unit
def test_extract_first_json_object_with_nested():
    """JSON avec objet imbriqué."""
    from core.detection.ollama_client import _extract_first_json_object
    text = '{"x_pct": 0.5, "meta": {"source": "qwen", "score": 0.9}}'
    obj = _extract_first_json_object(text)
    assert obj["meta"]["source"] == "qwen"


@pytest.mark.unit
def test_extract_first_json_object_with_braces_in_strings():
    """Les accolades dans les strings ne doivent pas perturber le compteur."""
    from core.detection.ollama_client import _extract_first_json_object
    text = '{"x_pct": 0.5, "label": "Click {here}"}'
    obj = _extract_first_json_object(text)
    assert obj["label"] == "Click {here}"


@pytest.mark.unit
def test_extract_first_json_object_invalid():
    """Texte sans JSON → None."""
    from core.detection.ollama_client import _extract_first_json_object
    assert _extract_first_json_object("no json here at all") is None


@pytest.mark.unit
def test_extract_first_json_object_empty():
    """Texte vide → None."""
    from core.detection.ollama_client import _extract_first_json_object
    assert _extract_first_json_object("") is None
    assert _extract_first_json_object(None) is None


# ────────────────────────────────────────────────────────────────────────────
# OllamaClient.generate_grounding (mocks requests.post)
# ────────────────────────────────────────────────────────────────────────────


@pytest.fixture
def mock_client(monkeypatch):
    """Construit un OllamaClient sans connexion réelle à Ollama."""
    from core.detection import ollama_client as oc_module

    # Bypass _check_connection
    monkeypatch.setattr(oc_module.OllamaClient, "_check_connection", lambda self: True)
    # Force le modèle pour éviter get_vlm_model() qui appelle Ollama
    client = oc_module.OllamaClient(model="qwen2.5vl:7b-rpa")
    return client


@pytest.mark.unit
def test_generate_grounding_payload_uses_profile(mock_client, monkeypatch):
    """Le payload envoyé à Ollama utilise le profile (model, ctx, prefill, temp, etc.)."""
    monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)

    captured = {}

    def fake_post(url, json=None, timeout=None):
        captured["url"] = url
        captured["payload"] = json
        # Réponse simulée : Ollama renvoie le contenu APRÈS le prefill
        resp = MagicMock()
        resp.status_code = 200
        resp.json.return_value = {
            "message": {
                "content": ' 0.5, "y_pct": 0.3, "confidence": 0.95}'
            }
        }
        return resp

    monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)

    result = mock_client.generate_grounding(prompt="Find the Save button")

    # Payload vérifié
    assert captured["url"].endswith("/api/chat")
    payload = captured["payload"]
    assert payload["model"] == "qwen3.5:9b"  # défaut grounding
    assert payload["options"]["num_ctx"] == 4096
    assert payload["options"]["temperature"] == 0.0
    assert payload["options"]["num_predict"] == 96
    # qwen3.5 = thinking → think=false dans payload
    assert payload.get("think") is False
    # Le prefill doit être présent dans le dernier message (assistant)
    last_msg = payload["messages"][-1]
    assert last_msg["role"] == "assistant"
    assert last_msg["content"] == '{"x_pct":'


@pytest.mark.unit
def test_generate_grounding_parses_prefilled_json(mock_client, monkeypatch):
    """Le JSON est correctement reconstitué via prefill + parsé."""
    monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)

    def fake_post(url, json=None, timeout=None):
        resp = MagicMock()
        resp.status_code = 200
        # Ollama renvoie SANS le prefill (le client le rajoute)
        resp.json.return_value = {
            "message": {
                "content": ' 0.42, "y_pct": 0.68, "confidence": 0.91}'
            }
        }
        return resp

    monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)

    result = mock_client.generate_grounding(prompt="Find OK button")

    assert result["success"] is True
    # response contient le JSON complet reconstitué
    assert result["response"].startswith('{"x_pct":')
    # parsed_json est le dict reconstruit
    parsed = result["parsed_json"]
    assert parsed is not None
    assert parsed["x_pct"] == 0.42
    assert parsed["y_pct"] == 0.68
    assert parsed["confidence"] == 0.91
    # profile_used est exposé
    assert result["profile_used"]["model"] == "qwen3.5:9b"
    assert result["profile_used"]["num_ctx"] == 4096


@pytest.mark.unit
def test_generate_grounding_restores_original_model(mock_client, monkeypatch):
    """Après generate_grounding, self.model est restauré (pas de side-effect)."""
    original = mock_client.model  # qwen2.5vl:7b-rpa

    def fake_post(url, json=None, timeout=None):
        resp = MagicMock()
        resp.status_code = 200
        resp.json.return_value = {"message": {"content": ' 0.5, "y_pct": 0.5, "confidence": 0.5}'}}
        return resp

    monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
    mock_client.generate_grounding(prompt="test")
    assert mock_client.model == original, (
        f"self.model doit être restauré ({original}), trouvé : {mock_client.model}"
    )


@pytest.mark.unit
def test_generate_grounding_handles_ollama_error(mock_client, monkeypatch):
    """Si Ollama retourne 500, success=False, parsed_json=None, model restauré."""
    def fake_post(url, json=None, timeout=None):
        resp = MagicMock()
        resp.status_code = 500
        resp.text = "Internal error"
        return resp

    monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
    result = mock_client.generate_grounding(prompt="test")
    assert result["success"] is False
    assert result["parsed_json"] is None


@pytest.mark.unit
def test_generate_grounding_profile_override(mock_client, monkeypatch):
    """Override profile explicite dans l'appel (pour tests / contextes spéciaux)."""
    captured = {}

    def fake_post(url, json=None, timeout=None):
        captured["payload"] = json
        resp = MagicMock()
        resp.status_code = 200
        resp.json.return_value = {"message": {"content": ' 0.1, "y_pct": 0.1, "confidence": 0.5}'}}
        return resp

    monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)

    custom_profile = {
        "model": "gemma4:latest",
        "num_ctx": 2048,
        "prefill": None,
        "temperature": 0.2,
        "num_predict": 50,
        "think": False,
        "keep_alive": "5m",
        "fallback_model": "qwen2.5vl:7b-rpa",
    }
    result = mock_client.generate_grounding(prompt="test", profile=custom_profile)
    payload = captured["payload"]
    assert payload["model"] == "gemma4:latest"
    assert payload["options"]["num_ctx"] == 2048
    assert payload["options"]["temperature"] == 0.2
    # Pas de prefill → pas de message assistant
    assert payload["messages"][-1]["role"] == "user"
    # gemma4 needs think=false → injecté
    assert payload.get("think") is False