"""Tests du client vLLM serveur (image + prompt -> texte). Le POST réseau est injectable (`post_fn`) → testable sans vLLM. Sert de `vlm_client` à `extract_dossier_from_image` dans le handler runtime. """ import pytest from core.extraction.vlm_client import build_chat_body, img_data_url, make_vllm_client def _png(tmp_path, w=2000, h=1000): from PIL import Image p = tmp_path / "x.png" Image.new("RGB", (w, h), (255, 255, 255)).save(p) return str(p) class _Resp: def __init__(self, code, payload=None, text=""): self.status_code = code self._p = payload or {} self.text = text def json(self): return self._p def test_img_data_url_downscale(tmp_path): url = img_data_url(_png(tmp_path), max_w=1280) assert url.startswith("data:image/png;base64,") def test_build_chat_body_structure(tmp_path): body = build_chat_body(_png(tmp_path), "PROMPT", model="M", max_tokens=1500, max_w=1280) assert body["model"] == "M" assert body["max_tokens"] == 1500 # thinking désactivé (vérifié hier : think=on -> vide/lent) assert body["chat_template_kwargs"]["enable_thinking"] is False content = body["messages"][0]["content"] assert any(c["type"] == "image_url" for c in content) assert any(c["type"] == "text" and c["text"] == "PROMPT" for c in content) def test_client_retourne_content(tmp_path): captured = {} def fake_post(url, json=None, headers=None, timeout=None): captured["url"] = url captured["body"] = json return _Resp(200, {"choices": [{"message": {"content": "REPONSE"}}]}) client = make_vllm_client(model="M", post_fn=fake_post) out = client(_png(tmp_path), "PROMPT") assert out == "REPONSE" assert "/v1/chat/completions" in captured["url"] assert captured["body"]["messages"][0]["content"][1]["text"] == "PROMPT" def test_client_erreur_status_leve(tmp_path): def fake_post(url, json=None, headers=None, timeout=None): return _Resp(500, text="boom") client = make_vllm_client(post_fn=fake_post) with pytest.raises(RuntimeError): client(_png(tmp_path), "PROMPT")