feat(p1x): de-hardcode VLM models/endpoints to vlm_config (DGX-ready)
Migre les call-sites VLM serveur vers la configuration centrale pour fonctionner sur DGX (tunnel Ollama 11434), où gemma4:* est absent et le port Docker 11435 est mort. - task_planner, replay_verifier, domain_context, ir_builder, resolve_engine (popup): modele -> vlm_config.get_vlm_model(), defaut 11435 -> 11434 (override GEMMA4_PORT legacy conserve) - resolve_engine (grounding bbox x2): nouvel helper vlm_config.get_bbox_grounding_model() (var dediee RPA_BBOX_GROUNDING_MODEL, fallback RPA_GROUNDING_MODEL puis qwen2.5vl:7b-rpa) -> desambiguise le conflit D5-v3b, bbox_2d + num_ctx 4096 preserves - safety_checks_provider: defaut -> get_vlm_model(), override RPA_SAFETY_CHECKS_LLM_MODEL preserve - ui_detector: default_factory + resolution lazy (corrige aussi un gel a l'import), pas d'appel reseau a l'import - field_extractor: property lazy via vlm_config TDD strict (RED->GREEN), 305 tests verts, tests mockes HTTP (zero dependance DGX reel), aucun alias Ollama. Hors perimetre (arbitrage Dom): client Lea agent_v1/executor.py (gele), chemin V4 observe_reason_act (RPA_REASONING_MODEL), core/config.py defaults. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -51,6 +51,8 @@ import unicodedata
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -399,7 +401,10 @@ class DomainContext:
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
port = os.environ.get("GEMMA4_PORT", _default_port)
|
||||
url = f"http://localhost:{port}/api/chat"
|
||||
|
||||
base = ""
|
||||
@@ -427,7 +432,7 @@ class DomainContext:
|
||||
resp = _requests.post(
|
||||
url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 200},
|
||||
|
||||
@@ -20,6 +20,8 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Seuils de détection configurables
|
||||
@@ -434,7 +436,7 @@ class ReplayVerifier:
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Appeler le VLM pour évaluer sémantiquement le résultat de l'action.
|
||||
|
||||
Utilise gemma4 en mode texte+images (Docker port 11435) pour analyser
|
||||
Utilise le VLM (résolu via vlm_config) en mode texte+images pour analyser
|
||||
les screenshots avant/après et dire si le résultat attendu est atteint.
|
||||
|
||||
Sur Citrix (image plate), c'est la SEULE façon de vérifier intelligemment
|
||||
@@ -449,7 +451,10 @@ class ReplayVerifier:
|
||||
if not screenshot_after:
|
||||
return None
|
||||
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", _default_port)
|
||||
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||
|
||||
# Construire le prompt Critic
|
||||
@@ -497,7 +502,7 @@ class ReplayVerifier:
|
||||
resp = _requests.post(
|
||||
gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": True,
|
||||
|
||||
@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.grounding.bbox_parser import parse_bbox_to_norm, parse_bbox_to_norm_validated
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger("api_stream")
|
||||
|
||||
@@ -878,8 +879,8 @@ def _resolve_by_grounding(
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Résoudre une cible via grounding VLM direct.
|
||||
|
||||
Le modèle VLM (gemma4:e4b par défaut, configurable via RPA_VLM_MODEL)
|
||||
reçoit le screenshot + une description textuelle et retourne
|
||||
Le modèle de grounding bbox (résolu via vlm_config.get_bbox_grounding_model,
|
||||
défaut qwen2.5vl:7b-rpa) reçoit le screenshot + une description et retourne
|
||||
directement les coordonnées de l'élément. Pas de SomEngine,
|
||||
pas de numérotation — le VLM fait du grounding UI natif.
|
||||
|
||||
@@ -944,7 +945,9 @@ def _resolve_by_grounding(
|
||||
# Le grounding nécessite un modèle entraîné pour les coordonnées (bbox_2d).
|
||||
# Qwen2.5-VL est le seul qui retourne des positions précises.
|
||||
# gemma4 comprend les images mais ne sait pas localiser en coordonnées.
|
||||
_grounding_model = os.environ.get("RPA_GROUNDING_MODEL", "qwen2.5vl:7b")
|
||||
# D5-v3b : résolution via helper dédié (var RPA_BBOX_GROUNDING_MODEL,
|
||||
# défaut qwen2.5vl:7b-rpa présent sur DGX) — désambiguïse RPA_GROUNDING_MODEL.
|
||||
_grounding_model = vlm_config.get_bbox_grounding_model()
|
||||
|
||||
# Appel VLM — vLLM (GPU, rapide) en priorité, Ollama en fallback
|
||||
import requests as _requests
|
||||
@@ -2909,7 +2912,7 @@ def _pre_analyze_screen_sync(
|
||||
) -> Dict[str, Any]:
|
||||
"""Pré-analyse synchrone de l'écran via VLM.
|
||||
|
||||
Utilise gemma4 (Docker port 11435) pour détecter :
|
||||
Utilise le VLM (résolu via vlm_config, endpoint Ollama) pour détecter :
|
||||
1. Popups/dialogues modaux (avec coordonnées du bouton à cliquer)
|
||||
2. États incohérents avec l'attendu
|
||||
|
||||
@@ -2917,7 +2920,10 @@ def _pre_analyze_screen_sync(
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", _default_port)
|
||||
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||
|
||||
# Charger le contexte métier pour l'Observer
|
||||
@@ -2945,7 +2951,7 @@ def _pre_analyze_screen_sync(
|
||||
resp = _requests.post(
|
||||
gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": True,
|
||||
@@ -3030,7 +3036,7 @@ def _locate_popup_button(
|
||||
resp = _requests.post(
|
||||
ollama_url,
|
||||
json={
|
||||
"model": "qwen2.5vl:7b",
|
||||
"model": vlm_config.get_bbox_grounding_model(),
|
||||
"messages": [{"role": "user", "content": prompt, "images": [screenshot_b64]}],
|
||||
"stream": False,
|
||||
# D5-v3a (2026-05-25) num_ctx=4096 explicite : éviter fuite 8192
|
||||
|
||||
@@ -18,6 +18,8 @@ import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
@@ -184,10 +186,11 @@ def _call_llm_for_contextual_checks(
|
||||
"""
|
||||
import requests
|
||||
|
||||
# Défaut gemma4:latest : meilleur compromis détection/latence sur bench
|
||||
# 2026-05-06 (cf. docs/BENCH_SAFETY_CHECKS_2026-05-06.md). medgemma:4b
|
||||
# retournait systématiquement [] (refus de signaler).
|
||||
model = _env("RPA_SAFETY_CHECKS_LLM_MODEL", "gemma4:latest")
|
||||
# Modèle : override explicite RPA_SAFETY_CHECKS_LLM_MODEL prioritaire ; sinon
|
||||
# résolution centralisée vlm_config (gemma4:latest si dispo — meilleur bench
|
||||
# 2026-05-06 cf. docs/BENCH_SAFETY_CHECKS_2026-05-06.md — sinon fallback DGX).
|
||||
# Pas de fallback silencieux vers un modèle absent : get_vlm_model vérifie /api/tags.
|
||||
model = _env("RPA_SAFETY_CHECKS_LLM_MODEL", "") or vlm_config.get_vlm_model()
|
||||
# Timeout 7s : warm avg gemma4 = 2.9s + marge 4s. Cold start ~10s couvert
|
||||
# si le modèle reste résident (OLLAMA_KEEP_ALIVE=24h recommandé prod).
|
||||
timeout_s = _env_int("RPA_SAFETY_CHECKS_LLM_TIMEOUT_S", 7)
|
||||
|
||||
@@ -26,6 +26,8 @@ import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -94,7 +96,10 @@ class TaskPlanner:
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = "", domain_id: str = ""):
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", _default_port)
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
self._domain_id = domain_id or os.environ.get("RPA_DOMAIN", "generic")
|
||||
|
||||
@@ -176,7 +181,7 @@ class TaskPlanner:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
@@ -499,7 +504,7 @@ class TaskPlanner:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
|
||||
@@ -11,7 +11,7 @@ Basée sur l'architecture éprouvée de la V2.
|
||||
|
||||
from typing import List, Dict, Optional, Any, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
@@ -25,6 +25,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
from ..models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures
|
||||
from .ollama_client import OllamaClient, check_ollama_available
|
||||
from . import vlm_config
|
||||
|
||||
# Import OWL-v2 (optionnel)
|
||||
try:
|
||||
@@ -71,10 +72,13 @@ class BoundingBox:
|
||||
@dataclass
|
||||
class DetectionConfig:
|
||||
"""Configuration de la détection UI hybride"""
|
||||
# VLM — modèle configurable via variable d'environnement RPA_VLM_MODEL
|
||||
# Par défaut : gemma4:e4b (meilleur grounding + contextualisation)
|
||||
# Fallback : qwen3-vl:8b si gemma4 non disponible
|
||||
vlm_model: str = os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))
|
||||
# VLM — modèle configurable via RPA_VLM_MODEL / VLM_MODEL.
|
||||
# default_factory : lu à l'instanciation (pas figé à l'import) ; None si non
|
||||
# défini → résolution lazy via vlm_config.get_vlm_model() dans _initialize_vlm
|
||||
# (pas de hardcode, pas d'appel réseau à l'import).
|
||||
vlm_model: Optional[str] = field(
|
||||
default_factory=lambda: os.environ.get("RPA_VLM_MODEL") or os.environ.get("VLM_MODEL")
|
||||
)
|
||||
vlm_endpoint: str = "http://localhost:11434"
|
||||
use_vlm_classification: bool = True # Utiliser VLM pour classifier
|
||||
|
||||
@@ -136,11 +140,16 @@ class UIDetector:
|
||||
"""Initialiser le client VLM"""
|
||||
try:
|
||||
if check_ollama_available(self.config.vlm_endpoint):
|
||||
# Résolution lazy : si aucun modèle explicite, vlm_config résout
|
||||
# (avec fallback) en interrogeant /api/tags. On normalise la config
|
||||
# pour que les métadonnées de sortie reflètent le modèle réel.
|
||||
model = self.config.vlm_model or vlm_config.get_vlm_model(self.config.vlm_endpoint)
|
||||
self.config.vlm_model = model
|
||||
self.vlm_client = OllamaClient(
|
||||
endpoint=self.config.vlm_endpoint,
|
||||
model=self.config.vlm_model
|
||||
model=model
|
||||
)
|
||||
logger.info(f"✓ VLM initialized: {self.config.vlm_model}")
|
||||
logger.info(f"✓ VLM initialized: {model}")
|
||||
else:
|
||||
logger.warning("Ollama not available, VLM classification disabled")
|
||||
self.vlm_client = None
|
||||
|
||||
@@ -234,6 +234,33 @@ def get_grounding_profile(endpoint: str = DEFAULT_OLLAMA_ENDPOINT) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def get_bbox_grounding_model() -> str:
|
||||
"""Retourne le modèle pour le grounding **format bbox_2d natif** (qwen2.5vl).
|
||||
|
||||
Distinct de get_grounding_profile() (format JSON {x_pct,y_pct} via prefill,
|
||||
défaut qwen3.5:9b). Les chemins bbox_2d de resolve_engine
|
||||
(`parse_bbox_to_norm` / `parse_bbox_to_norm_validated`) exigent un modèle
|
||||
de la famille qwen2.5vl qui émet des coordonnées en pixels.
|
||||
|
||||
D5-v3b (2026-06-03) : désambiguïse l'env var. Historiquement le site bbox
|
||||
lisait `RPA_GROUNDING_MODEL`, partagé avec get_grounding_profile() qui
|
||||
attend un modèle JSON → conflit documenté. On introduit une var dédiée.
|
||||
|
||||
Ordre de résolution :
|
||||
1. RPA_BBOX_GROUNDING_MODEL (dédié, prioritaire)
|
||||
2. RPA_GROUNDING_MODEL (rétrocompat — ancien comportement)
|
||||
3. DEFAULT_GROUNDING_FALLBACK (qwen2.5vl:7b-rpa, présent sur DGX)
|
||||
|
||||
Returns:
|
||||
Nom du modèle bbox_2d (ex: "qwen2.5vl:7b-rpa")
|
||||
"""
|
||||
return (
|
||||
os.environ.get("RPA_BBOX_GROUNDING_MODEL")
|
||||
or os.environ.get("RPA_GROUNDING_MODEL")
|
||||
or DEFAULT_GROUNDING_FALLBACK
|
||||
)
|
||||
|
||||
|
||||
def needs_think_false(model_name: str) -> bool:
|
||||
"""Détermine si un modèle nécessite think=false dans le payload.
|
||||
|
||||
|
||||
@@ -16,13 +16,13 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core.detection import vlm_config
|
||||
from .schema import ExtractionField, ExtractionSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration Ollama (coherente avec le reste du projet)
|
||||
OLLAMA_DEFAULT_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_DEFAULT_MODEL = os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))
|
||||
|
||||
|
||||
class FieldExtractor:
|
||||
@@ -38,19 +38,34 @@ class FieldExtractor:
|
||||
def __init__(
|
||||
self,
|
||||
ollama_url: str = OLLAMA_DEFAULT_URL,
|
||||
ollama_model: str = OLLAMA_DEFAULT_MODEL,
|
||||
ollama_model: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
ollama_url: URL du serveur Ollama
|
||||
ollama_model: Modele VLM a utiliser
|
||||
ollama_model: Modele VLM a utiliser (None = resolution lazy via vlm_config)
|
||||
timeout: Timeout en secondes pour les appels VLM
|
||||
"""
|
||||
self.ollama_url = ollama_url.rstrip("/")
|
||||
self.ollama_model = ollama_model
|
||||
self._ollama_model = ollama_model # None → resolu paresseusement
|
||||
self.timeout = timeout
|
||||
|
||||
@property
|
||||
def ollama_model(self) -> str:
|
||||
"""Modele VLM, resolu paresseusement via vlm_config si non fourni.
|
||||
|
||||
Resolution differee au premier acces (pas a l'import ni a la
|
||||
construction) : evite tout hardcode gemma4 et tout appel reseau a froid.
|
||||
"""
|
||||
if not self._ollama_model:
|
||||
self._ollama_model = vlm_config.get_vlm_model(self.ollama_url)
|
||||
return self._ollama_model
|
||||
|
||||
@ollama_model.setter
|
||||
def ollama_model(self, value: Optional[str]) -> None:
|
||||
self._ollama_model = value
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -23,6 +23,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .workflow_ir import WorkflowIR, Step, Action, Variable
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -41,7 +42,10 @@ class IRBuilder:
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = ""):
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", _default_port)
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
|
||||
def build(
|
||||
@@ -563,7 +567,7 @@ class IRBuilder:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# Point Claude → Codex — dé-hardcodage VLM (P1.x) en cours
|
||||
|
||||
- `Auteur`: Claude
|
||||
- `Date`: 2026-06-03 12:05 Europe/Paris
|
||||
- `Branche`: `backup/post-demo-2026-05-19`
|
||||
- `Statut`: en cours, TDD, GO Dom reçu in-session. Pas encore commité.
|
||||
|
||||
## Ce qui est fait (TDD strict, RED→GREEN, non-régression vérifiée)
|
||||
|
||||
| Fichier | Modèle | Endpoint | Tests |
|
||||
|---------|--------|----------|-------|
|
||||
| `agent_v0/server_v1/task_planner.py` | `gemma4:e4b` ×2 → `vlm_config.get_vlm_model()` | défaut `11435` → `DEFAULT_OLLAMA_ENDPOINT` (11434) | 42 ✅ |
|
||||
| `agent_v0/server_v1/replay_verifier.py` | idem (`_verify_semantic`) | idem | 24 ✅ |
|
||||
| `agent_v0/server_v1/domain_context.py` | idem (`_llm_refine_summary`) | idem | 49 ✅ |
|
||||
| `core/workflow/ir_builder.py` | idem (`_analyze_intent`) | idem | 20 ✅ |
|
||||
|
||||
Pattern commun : `GEMMA4_PORT` **conservé comme override legacy** (rétrocompat), seul le
|
||||
défaut mort `11435` devient `11434`. Tests mockés HTTP, zéro dépendance DGX. Fixture
|
||||
autouse côté task_planner pour neutraliser la résolution VLM réseau.
|
||||
|
||||
## Écart vs la feuille initiale (à acter)
|
||||
|
||||
1. **`ir_builder.py` ajouté au lot** : absent de la liste des 6, mais **même bug exact**
|
||||
(`11435` + `gemma4:e4b`), wiré dans `api_stream.py:3648`. L'écarter aurait été du
|
||||
scope aveugle. Dom a validé l'élargissement.
|
||||
2. **`stream_processor.py` déjà OK** (vérifié) : `_GEMMA4_PORT` défaut `11434`,
|
||||
`_CRITIC_MODEL=qwen2.5vl:7b-rpa` (présent DGX), override `RPA_CRITIC_MODEL`. Fait
|
||||
le 2026-05-24. Pas de 404. Seul reproche : court-circuite le fallback `vlm_config`.
|
||||
|
||||
## Reste à faire
|
||||
|
||||
- **`resolve_engine.py` popup** (l.2920/2948) : `11435` + `gemma4:e4b` — Groupe 1, en cours.
|
||||
- **Groupe 2 grounding** : `resolve_engine.py:947/3033` utilise `qwen2.5vl:7b` **en clair**
|
||||
→ DGX n'a que `qwen2.5vl:7b-rpa` → **404 aussi**. Chemin bbox délicat, conflit
|
||||
`RPA_GROUNDING_MODEL` documenté dans `vlm_config.get_grounding_profile()`. Traité à part,
|
||||
avec soin (ne pas casser le parsing bbox_2d natif).
|
||||
- Défauts env-overridables `gemma4:e4b` : `ui_detector.py:77`, `field_extractor.py:25`
|
||||
(moins critiques, runtime ui_detector passe déjà par `get_vlm_model()` à l'appel).
|
||||
|
||||
## Deux points stratégiques soulevés par Dom (pour P1.y)
|
||||
|
||||
1. **128 Go unifiés DGX invalident le dimensionnement 12 Go.** Les verdicts « écarté car
|
||||
déborde la VRAM » de `vlm_config` sont périmés. Viser la **qualité** (Qwen2.5-VL-32B,
|
||||
pleine précision, num_ctx large). Caveat : mémoire unifiée LPDDR5X (~273 Go/s) → la
|
||||
**bande passante** reste le facteur limitant, à mesurer (pas « plus gros = gratuit »).
|
||||
2. **vLLM devient pertinent** (débit/batching, gros modèles, API OpenAI). ⚠ Le
|
||||
dé-hardcodage centralise modèle+endpoint **mais pas le protocole** : runtime parle
|
||||
Ollama `/api/chat`, vLLM parle `/v1/chat/completions`. Passer le hot path à vLLM
|
||||
exigera une **couche client abstraite**, pas un simple switch de config. À acter dans
|
||||
le plan P1.y. Réserve ARM64 : maturité vLLM sur aarch64 à valider.
|
||||
|
||||
— Claude
|
||||
@@ -0,0 +1,88 @@
|
||||
# ACK Claude → Codex — P1.x dé-hardcodage VLM livré (serveur)
|
||||
|
||||
- `De`: Claude
|
||||
- `A`: Codex
|
||||
- `Copie`: Dom, Qwen
|
||||
- `Date`: 2026-06-03 12:40 Europe/Paris
|
||||
- `Répond à`: `inbox_claude/2026-06-03_1010_codex-to-claude_FICHE-ACTION-P1X-dehardcode-VLM-DGX.md`
|
||||
- `Statut`: **ACK — livré côté serveur**, TDD strict, GO Dom in-session.
|
||||
|
||||
## Verdict
|
||||
|
||||
Lot serveur livré. **305 tests verts** sur le périmètre touché + connexe. Aucun
|
||||
alias Ollama, aucun nouveau hardcode de modèle, tests mockés HTTP (zéro dépendance
|
||||
DGX réel), bbox_2d préservé. Reste 2 zones hors périmètre serveur à arbitrer par Dom
|
||||
(client Léa gelé, chemin V4) — détaillées plus bas.
|
||||
|
||||
## Fichiers modifiés (production)
|
||||
|
||||
| Fichier | Modèle | Endpoint |
|
||||
|---------|--------|----------|
|
||||
| `agent_v0/server_v1/task_planner.py` | `gemma4:e4b` ×2 → `vlm_config.get_vlm_model()` | défaut `11435` → `DEFAULT_OLLAMA_ENDPOINT` (11434) |
|
||||
| `agent_v0/server_v1/replay_verifier.py` | idem (`_verify_semantic`) | idem |
|
||||
| `agent_v0/server_v1/domain_context.py` | idem (`_llm_refine_summary`) | idem |
|
||||
| `core/workflow/ir_builder.py` | idem (`_analyze_intent`) | idem |
|
||||
| `agent_v0/server_v1/resolve_engine.py` (Observer popup) | `gemma4:e4b` → `get_vlm_model()` | `11435` → 11434 |
|
||||
| `agent_v0/server_v1/resolve_engine.py` (grounding bbox ×2) | `qwen2.5vl:7b` → `vlm_config.get_bbox_grounding_model()` | déjà 11434 (inchangé) |
|
||||
| `agent_v0/server_v1/safety_checks_provider.py` | défaut `gemma4:latest` → `_env(...) or get_vlm_model()` | déjà 11434 |
|
||||
| `core/detection/ui_detector.py` | défaut dataclass `gemma4:e4b` → `default_factory` + résolution **lazy** dans `_initialize_vlm` | inchangé |
|
||||
| `core/extraction/field_extractor.py` | constante `gemma4:e4b` → property **lazy** via `vlm_config` | inchangé |
|
||||
| `core/detection/vlm_config.py` | **nouvel** `get_bbox_grounding_model()` (D5-v3b) | — |
|
||||
|
||||
## Décision clé : grounding bbox (option B, validée par Dom)
|
||||
|
||||
Nouvel helper `vlm_config.get_bbox_grounding_model()` :
|
||||
`RPA_BBOX_GROUNDING_MODEL` → `RPA_GROUNDING_MODEL` (rétrocompat) → `DEFAULT_GROUNDING_FALLBACK`
|
||||
(`qwen2.5vl:7b-rpa`, présent DGX). **Désambiguïse** le conflit documenté : le chemin
|
||||
bbox_2d ne partage plus aveuglément `RPA_GROUNDING_MODEL` avec `get_grounding_profile()`
|
||||
(qui attend du JSON qwen3.5). `num_ctx=4096` et le parsing bbox_2d natif sont préservés.
|
||||
|
||||
## Tests exécutés
|
||||
|
||||
```bash
|
||||
RPA_AUTH_DISABLED=true .venv/bin/python -m pytest \
|
||||
tests/unit/test_task_planner.py tests/unit/test_replay_critic.py \
|
||||
tests/unit/test_domain_personality.py tests/unit/test_workflow_ir.py \
|
||||
tests/unit/test_resolve_engine_observer_vlm.py tests/unit/test_resolve_engine_bbox_num_ctx.py \
|
||||
tests/unit/test_resolve_engine_dialog_button_guard.py tests/unit/test_resolve_engine_start_button_guard.py \
|
||||
tests/unit/test_dialog_resolver.py tests/unit/test_vlm_grounding_profile.py \
|
||||
tests/unit/test_v4_resolve_order.py tests/unit/test_chat_interface.py tests/unit/test_v4_wiring.py \
|
||||
tests/unit/test_safety_checks_provider.py tests/unit/test_ui_detector.py tests/unit/test_extraction_engine.py
|
||||
# → 305 passed
|
||||
```
|
||||
|
||||
Chaque site migré a un test RED→GREEN (payload résolu via config, endpoint 11434,
|
||||
override env préservé). Un fixture autouse neutralise la résolution VLM réseau côté
|
||||
task_planner. Le test legacy `test_locate_popup_button_payload_num_ctx` a été mis à
|
||||
jour (assertion `qwen2.5vl:7b` → `qwen2.5vl:7b-rpa`, comportement intentionnellement changé).
|
||||
|
||||
## Écarts vs fiche (assumés, validés Dom)
|
||||
|
||||
- **`ir_builder.py` ajouté** (hors liste des 6) : même bug exact (`11435`+`gemma4:e4b`),
|
||||
wiré `api_stream.py:3648`.
|
||||
- **`field_extractor.py` ajouté** : même bug, couvert par la preuve grep sur `core`.
|
||||
- **`stream_processor.py` non modifié** : déjà OK (port 11434, `_CRITIC_MODEL=qwen2.5vl:7b-rpa`
|
||||
présent DGX, override `RPA_CRITIC_MODEL`). Vérifié, pas de 404.
|
||||
|
||||
## Call-sites NON migrés (hors périmètre serveur — arbitrage Dom requis)
|
||||
|
||||
1. **Client Léa `agent_v0/agent_v1/core/executor.py`** (+ copie `deploy/windows_client/`) :
|
||||
`gemma4:e4b` + `11435` (l.1382/1409/3115/3246/3819). ⚠️ **Client gelé depuis avril 2026** :
|
||||
toute modif = redéploiement SCP Windows explicite. **NON touché.** À statuer : ce chemin VLM
|
||||
tourne-t-il côté client ou est-il legacy/mort ?
|
||||
2. **Chemin V4 acteur** : `observe_reason_act.py` (×3), `input_handler.py:294`,
|
||||
`vram_orchestrator.py:21` → `RPA_REASONING_MODEL` défaut `qwen2.5vl:7b` (404 DGX sans env).
|
||||
**Wiring V4 à confirmer** (V3/V4 découplés) avant toute modif.
|
||||
3. **`core/config.py`** : 4× défaut `gemma4:latest`. À vérifier si consommé ou supplanté par `vlm_config`.
|
||||
4. **Infra/bench** (config justifiée) : `ollama_manager.py`, `gpu_resource_manager.py`,
|
||||
`ollama_lea_bench_adapter.py` (ce dernier = `qwen2.5vl:7b-rpa`, présent DGX).
|
||||
|
||||
## Risques résiduels
|
||||
|
||||
- Sites V4 / config.py / client : 404 DGX **si** l'env n'est pas posé. Le `.env.local`
|
||||
actuel couvre `RPA_VLM_MODEL`/`RPA_GROUNDING_MODEL` mais **pas** `RPA_REASONING_MODEL`.
|
||||
- `get_bbox_grounding_model()` lit `RPA_GROUNDING_MODEL` en fallback : si quelqu'un set
|
||||
cette var à un modèle JSON (qwen3.5) pour `get_grounding_profile()`, le chemin bbox
|
||||
recevra un modèle incompatible. Mitigation : poser `RPA_BBOX_GROUNDING_MODEL` dédié.
|
||||
|
||||
— Claude
|
||||
@@ -398,6 +398,56 @@ class TestWorkflowOutcomeLLM:
|
||||
)
|
||||
assert "10 dossiers" in rapport
|
||||
|
||||
def test_refine_modele_via_vlm_config(self):
|
||||
"""Le payload _llm_refine_summary utilise le modèle résolu par vlm_config."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
captured = {}
|
||||
|
||||
def fake_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "ok"}}
|
||||
return resp
|
||||
|
||||
fake_requests = MagicMock()
|
||||
fake_requests.post.side_effect = fake_post
|
||||
|
||||
with patch.dict("sys.modules", {"requests": fake_requests}), patch(
|
||||
"agent_v0.server_v1.domain_context.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
):
|
||||
ctx._llm_refine_summary(
|
||||
template="ok", subs={"workflow_name": "x"}, success=True
|
||||
)
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
def test_refine_endpoint_par_defaut_11434(self, monkeypatch):
|
||||
"""Sans GEMMA4_PORT, _llm_refine_summary vise 11434, pas le port mort 11435."""
|
||||
monkeypatch.delenv("GEMMA4_PORT", raising=False)
|
||||
ctx = get_domain_context("tim_codage")
|
||||
captured = {}
|
||||
|
||||
def fake_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "ok"}}
|
||||
return resp
|
||||
|
||||
fake_requests = MagicMock()
|
||||
fake_requests.post.side_effect = fake_post
|
||||
|
||||
with patch.dict("sys.modules", {"requests": fake_requests}):
|
||||
ctx._llm_refine_summary(
|
||||
template="ok", subs={"workflow_name": "x"}, success=True
|
||||
)
|
||||
|
||||
assert ":11434" in captured["url"]
|
||||
assert ":11435" not in captured["url"]
|
||||
|
||||
def test_llm_refine_network_error_safe(self):
|
||||
"""_llm_refine_summary ne doit jamais lever, même si requests échoue."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
|
||||
@@ -311,6 +311,26 @@ class TestDataStore:
|
||||
|
||||
class TestFieldExtractor:
|
||||
|
||||
def test_model_default_via_vlm_config(self, monkeypatch):
|
||||
"""Sans modèle explicite, ollama_model est résolu via vlm_config (lazy, pas gemma4 en dur)."""
|
||||
monkeypatch.delenv("RPA_VLM_MODEL", raising=False)
|
||||
monkeypatch.delenv("VLM_MODEL", raising=False)
|
||||
from unittest.mock import patch
|
||||
import core.extraction.field_extractor as fe_mod
|
||||
|
||||
extractor = FieldExtractor()
|
||||
with patch.object(fe_mod.vlm_config, "get_vlm_model", return_value="modele-resolu:test"):
|
||||
assert extractor.ollama_model == "modele-resolu:test"
|
||||
|
||||
def test_explicit_model_preserved(self):
|
||||
"""Un modèle explicite est conservé (résolution non déclenchée)."""
|
||||
from unittest.mock import patch
|
||||
import core.extraction.field_extractor as fe_mod
|
||||
extractor = FieldExtractor(ollama_model="mon-modele:pin")
|
||||
with patch.object(fe_mod.vlm_config, "get_vlm_model",
|
||||
side_effect=AssertionError("ne doit pas être appelé")):
|
||||
assert extractor.ollama_model == "mon-modele:pin"
|
||||
|
||||
def test_extract_file_not_found(self, sample_schema):
|
||||
extractor = FieldExtractor()
|
||||
result = extractor.extract_fields("/tmp/nonexistent.png", sample_schema)
|
||||
|
||||
@@ -263,6 +263,54 @@ class TestVerifySemantic:
|
||||
assert result is not None
|
||||
assert result["verified"] is False
|
||||
|
||||
def test_verify_semantic_modele_via_vlm_config(self, verifier, screenshot_white):
|
||||
"""Le payload _verify_semantic utilise le modèle résolu par vlm_config."""
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "VERDICT: OUI\nRAISON: ok"}}
|
||||
return resp
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.replay_verifier.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
), patch("requests.post", side_effect=capture_post):
|
||||
verifier._verify_semantic(
|
||||
screenshot_before=screenshot_white,
|
||||
screenshot_after=screenshot_white,
|
||||
expected_result="Le fichier est ouvert",
|
||||
)
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
def test_verify_semantic_endpoint_par_defaut_11434(
|
||||
self, verifier, screenshot_white, monkeypatch
|
||||
):
|
||||
"""Sans GEMMA4_PORT, _verify_semantic vise 11434, pas le port mort 11435."""
|
||||
monkeypatch.delenv("GEMMA4_PORT", raising=False)
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "VERDICT: OUI\nRAISON: ok"}}
|
||||
return resp
|
||||
|
||||
with patch("requests.post", side_effect=capture_post):
|
||||
verifier._verify_semantic(
|
||||
screenshot_before=screenshot_white,
|
||||
screenshot_after=screenshot_white,
|
||||
expected_result="Le fichier est ouvert",
|
||||
)
|
||||
|
||||
assert ":11434" in captured["url"]
|
||||
assert ":11435" not in captured["url"]
|
||||
|
||||
@patch("requests.post")
|
||||
def test_vlm_timeout_retourne_none(self, mock_post, verifier, screenshot_white):
|
||||
"""Timeout VLM → retourne None (fallback gracieux)."""
|
||||
|
||||
@@ -47,7 +47,10 @@ def test_all_three_bbox_sites_have_num_ctx_4096():
|
||||
@pytest.mark.unit
|
||||
def test_locate_popup_button_payload_num_ctx(monkeypatch):
|
||||
"""Test runtime : _locate_popup_button construit un payload avec
|
||||
num_ctx=4096 et model=qwen2.5vl:7b."""
|
||||
num_ctx=4096 et modèle bbox résolu via helper (défaut qwen2.5vl:7b-rpa)."""
|
||||
# D5-v3b : sans env, le défaut bbox est qwen2.5vl:7b-rpa (présent DGX).
|
||||
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
|
||||
monkeypatch.delenv("RPA_GROUNDING_MODEL", raising=False)
|
||||
captured = {}
|
||||
|
||||
def fake_post(url, json=None, timeout=None):
|
||||
@@ -78,8 +81,79 @@ def test_locate_popup_button_payload_num_ctx(monkeypatch):
|
||||
f"_locate_popup_button payload sans num_ctx=4096 : "
|
||||
f"{captured['payload']['options']}"
|
||||
)
|
||||
# Modèle non changé
|
||||
assert captured["payload"]["model"] == "qwen2.5vl:7b"
|
||||
# Modèle bbox résolu via helper : défaut qwen2.5vl:7b-rpa (présent DGX)
|
||||
assert captured["payload"]["model"] == "qwen2.5vl:7b-rpa"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_locate_popup_button_model_via_helper(monkeypatch):
|
||||
"""D5-v3b : _locate_popup_button résout le modèle via get_bbox_grounding_model."""
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_post(url, json=None, timeout=None):
|
||||
captured["payload"] = json
|
||||
resp = MagicMock()
|
||||
resp.ok = False
|
||||
resp.json.return_value = {"message": {"content": ""}}
|
||||
return resp
|
||||
|
||||
import requests
|
||||
monkeypatch.setattr(requests, "post", fake_post)
|
||||
from agent_v0.server_v1 import resolve_engine as re_module
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.resolve_engine.vlm_config.get_bbox_grounding_model",
|
||||
return_value="bbox-modele:test",
|
||||
):
|
||||
re_module._locate_popup_button(
|
||||
screenshot_b64="fake_b64",
|
||||
button_text="OK",
|
||||
screen_width=1920,
|
||||
screen_height=1080,
|
||||
)
|
||||
|
||||
assert captured["payload"]["model"] == "bbox-modele:test"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_resolve_by_grounding_model_via_helper(monkeypatch, tmp_path):
|
||||
"""D5-v3b : _resolve_by_grounding (Essai 2 Ollama) résout via get_bbox_grounding_model."""
|
||||
from unittest.mock import MagicMock, patch
|
||||
from PIL import Image
|
||||
|
||||
# Screenshot réel minimal
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (200, 120), (255, 255, 255)).save(shot)
|
||||
|
||||
posts = []
|
||||
|
||||
def fake_post(url, json=None, timeout=None):
|
||||
posts.append({"url": url, "payload": json})
|
||||
resp = MagicMock()
|
||||
resp.ok = False # vLLM KO → bascule Ollama ; Ollama contenu vide → None
|
||||
resp.json.return_value = {"message": {"content": ""}}
|
||||
return resp
|
||||
|
||||
import requests
|
||||
monkeypatch.setattr(requests, "post", fake_post)
|
||||
from agent_v0.server_v1 import resolve_engine as re_module
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.resolve_engine.vlm_config.get_bbox_grounding_model",
|
||||
return_value="bbox-modele:test",
|
||||
):
|
||||
re_module._resolve_by_grounding(
|
||||
screenshot_path=str(shot),
|
||||
target_spec={"by_text": "OK"},
|
||||
screen_width=200,
|
||||
screen_height=120,
|
||||
)
|
||||
|
||||
ollama_posts = [p for p in posts if p["url"].endswith("/api/chat")]
|
||||
assert ollama_posts, "Aucun appel Ollama /api/chat capturé"
|
||||
assert ollama_posts[0]["payload"]["model"] == "bbox-modele:test"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
|
||||
58
tests/unit/test_resolve_engine_observer_vlm.py
Normal file
58
tests/unit/test_resolve_engine_observer_vlm.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Tests dé-hardcodage VLM de l'Observer (_pre_analyze_screen_sync).
|
||||
|
||||
Le modèle doit venir de vlm_config et l'endpoint viser 11434 (Ollama/tunnel DGX),
|
||||
pas le port mort 11435 ni le modèle gemma4:e4b absent du DGX.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from agent_v0.server_v1.resolve_engine import _pre_analyze_screen_sync
|
||||
|
||||
|
||||
def _fake_resp():
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "ÉTAT: OK\nBOUTON: aucun\nDÉTAIL: rien"}}
|
||||
return resp
|
||||
|
||||
|
||||
def test_observer_modele_via_vlm_config():
|
||||
"""Le payload Observer utilise le modèle résolu par vlm_config."""
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
return _fake_resp()
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.resolve_engine.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
), patch("requests.post", side_effect=capture_post):
|
||||
_pre_analyze_screen_sync("ZmFrZQ==", "écran prêt", "Fenêtre", 1920, 1080)
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
|
||||
def test_observer_endpoint_par_defaut_11434(monkeypatch):
|
||||
"""Sans GEMMA4_PORT, l'Observer vise 11434, pas le port mort 11435."""
|
||||
monkeypatch.delenv("GEMMA4_PORT", raising=False)
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
return _fake_resp()
|
||||
|
||||
with patch("requests.post", side_effect=capture_post):
|
||||
_pre_analyze_screen_sync("ZmFrZQ==", "écran prêt", "Fenêtre", 1920, 1080)
|
||||
|
||||
assert ":11434" in captured["url"]
|
||||
assert ":11435" not in captured["url"]
|
||||
@@ -118,3 +118,50 @@ def test_empty_declarative_with_llm_returns_only_llm():
|
||||
)
|
||||
assert len(payload.checks) == 2
|
||||
assert all(c["source"] == "llm_contextual" for c in payload.checks)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Dé-hardcodage VLM (P1.x) : modèle safety-checks résolu via vlm_config
|
||||
# ============================================================================
|
||||
|
||||
def _capture_generate(captured):
|
||||
def fake_post(url, json=None, timeout=None):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
resp = MagicMock()
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {"response": '{"additional_checks": []}'}
|
||||
return resp
|
||||
return fake_post
|
||||
|
||||
|
||||
def test_safety_checks_model_default_via_vlm_config(monkeypatch):
|
||||
"""Sans RPA_SAFETY_CHECKS_LLM_MODEL, le modèle vient de vlm_config (pas gemma4 en dur)."""
|
||||
monkeypatch.delenv("RPA_SAFETY_CHECKS_LLM_MODEL", raising=False)
|
||||
captured = {}
|
||||
|
||||
import requests
|
||||
monkeypatch.setattr(requests, "post", _capture_generate(captured))
|
||||
|
||||
from agent_v0.server_v1 import safety_checks_provider as scp
|
||||
with patch.object(scp.vlm_config, "get_vlm_model", return_value="modele-resolu:test"):
|
||||
scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, [])
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
assert ":11434" in captured["url"]
|
||||
|
||||
|
||||
def test_safety_checks_model_env_override_preserved(monkeypatch):
|
||||
"""RPA_SAFETY_CHECKS_LLM_MODEL reste prioritaire (override non cassé)."""
|
||||
monkeypatch.setenv("RPA_SAFETY_CHECKS_LLM_MODEL", "mon-modele-pin")
|
||||
captured = {}
|
||||
|
||||
import requests
|
||||
monkeypatch.setattr(requests, "post", _capture_generate(captured))
|
||||
|
||||
from agent_v0.server_v1 import safety_checks_provider as scp
|
||||
# get_vlm_model ne doit pas écraser l'override : on le fait lever pour le prouver
|
||||
with patch.object(scp.vlm_config, "get_vlm_model", side_effect=AssertionError("ne doit pas être appelé")):
|
||||
scp._call_llm_for_contextual_checks({"parameters": {}}, {}, None, [])
|
||||
|
||||
assert captured["model"] == "mon-modele-pin"
|
||||
|
||||
@@ -32,6 +32,20 @@ from agent_v0.server_v1.task_planner import TaskPlanner, TaskPlan
|
||||
# Fixtures
|
||||
# =========================================================================
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _mock_vlm_model():
|
||||
"""Neutralise la résolution VLM (pas de dépendance Ollama/DGX en test).
|
||||
|
||||
Par défaut, get_vlm_model() interroge Ollama (/api/tags) ; on la fige
|
||||
pour garder les tests déterministes et hors réseau.
|
||||
"""
|
||||
with patch(
|
||||
"agent_v0.server_v1.task_planner.vlm_config.get_vlm_model",
|
||||
return_value="gemma4:latest",
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def planner():
|
||||
"""TaskPlanner avec port gemma4 factice."""
|
||||
@@ -77,6 +91,65 @@ def _mock_gemma4_response(content: str):
|
||||
return mock_resp
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests : dé-hardcodage VLM (modèle via vlm_config, endpoint 11434)
|
||||
# =========================================================================
|
||||
|
||||
class TestVlmConfigDehardcode:
|
||||
"""Le modèle et l'endpoint ne doivent plus être codés en dur."""
|
||||
|
||||
def test_understand_utilise_modele_de_vlm_config(self, sample_workflows):
|
||||
"""Le payload understand() utilise le modèle résolu par vlm_config."""
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
return _mock_gemma4_response("COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n")
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.task_planner.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
), patch("requests.post", side_effect=capture_post):
|
||||
planner = TaskPlanner(domain_id="generic")
|
||||
planner.understand("Ouvre le bloc-notes", available_workflows=sample_workflows)
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
def test_steps_to_actions_utilise_modele_de_vlm_config(self):
|
||||
"""Le payload _steps_to_actions() utilise le modèle résolu par vlm_config."""
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["model"] = (json or {}).get("model")
|
||||
return _mock_gemma4_response('{"type": "wait", "duration_ms": 100}\n')
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.task_planner.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
), patch("requests.post", side_effect=capture_post):
|
||||
planner = TaskPlanner(domain_id="generic")
|
||||
planner._steps_to_actions([{"description": "1. Attendre"}], {})
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
def test_endpoint_par_defaut_cible_11434(self, monkeypatch, sample_workflows):
|
||||
"""Sans GEMMA4_PORT, l'endpoint vise 11434 (Ollama/tunnel DGX), pas 11435."""
|
||||
monkeypatch.delenv("GEMMA4_PORT", raising=False)
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
return _mock_gemma4_response("COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n")
|
||||
|
||||
with patch("requests.post", side_effect=capture_post):
|
||||
planner = TaskPlanner(domain_id="generic")
|
||||
planner.understand("Ouvre le bloc-notes", available_workflows=sample_workflows)
|
||||
|
||||
assert ":11434" in captured["url"]
|
||||
assert ":11435" not in captured["url"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests : understand — ordre simple
|
||||
# =========================================================================
|
||||
|
||||
@@ -2,17 +2,55 @@
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
from PIL import Image
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
from core.detection.ui_detector import UIDetector
|
||||
from core.detection.ui_detector import UIDetector, DetectionConfig
|
||||
|
||||
class TestUIDetector:
|
||||
def setup_method(self):
|
||||
self.detector = UIDetector()
|
||||
|
||||
|
||||
def test_detector_initialization(self):
|
||||
assert self.detector is not None
|
||||
|
||||
|
||||
class TestVlmModelDehardcode:
|
||||
"""Le modèle VLM ne doit plus être codé en dur (gemma4:e4b) dans la config."""
|
||||
|
||||
def test_config_no_hardcoded_gemma4(self, monkeypatch):
|
||||
"""Sans env, DetectionConfig.vlm_model ne vaut plus le littéral gemma4:e4b."""
|
||||
monkeypatch.delenv("RPA_VLM_MODEL", raising=False)
|
||||
monkeypatch.delenv("VLM_MODEL", raising=False)
|
||||
cfg = DetectionConfig()
|
||||
assert cfg.vlm_model != "gemma4:e4b"
|
||||
assert not cfg.vlm_model # None ou "" → résolution déléguée à l'init
|
||||
|
||||
def test_config_respects_env(self, monkeypatch):
|
||||
"""RPA_VLM_MODEL est honoré dans la config."""
|
||||
monkeypatch.setenv("RPA_VLM_MODEL", "mon-modele:test")
|
||||
assert DetectionConfig().vlm_model == "mon-modele:test"
|
||||
|
||||
def test_initialize_vlm_resolves_lazily(self, monkeypatch):
|
||||
"""Sans modèle explicite, _initialize_vlm résout via vlm_config (pas de hardcode)."""
|
||||
monkeypatch.delenv("RPA_VLM_MODEL", raising=False)
|
||||
monkeypatch.delenv("VLM_MODEL", raising=False)
|
||||
|
||||
captured = {}
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self, endpoint=None, model=None):
|
||||
captured["model"] = model
|
||||
|
||||
import core.detection.ui_detector as uidet
|
||||
with patch.object(uidet, "check_ollama_available", return_value=True), \
|
||||
patch.object(uidet, "OllamaClient", FakeClient), \
|
||||
patch.object(uidet.vlm_config, "get_vlm_model", return_value="modele-resolu:test"):
|
||||
UIDetector()
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
|
||||
@@ -96,6 +96,41 @@ def test_grounding_profile_gemma4_triggers_think_false(monkeypatch):
|
||||
assert p["think"] is False # gemma4 needs think=false
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# get_bbox_grounding_model (D5-v3b : modèle bbox_2d dédié, désambiguïsé)
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bbox_grounding_model_default(monkeypatch):
|
||||
"""Sans env, défaut = DEFAULT_GROUNDING_FALLBACK (qwen2.5vl:7b-rpa, présent DGX)."""
|
||||
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
|
||||
monkeypatch.delenv("RPA_GROUNDING_MODEL", raising=False)
|
||||
from core.detection.vlm_config import get_bbox_grounding_model, DEFAULT_GROUNDING_FALLBACK
|
||||
|
||||
assert get_bbox_grounding_model() == DEFAULT_GROUNDING_FALLBACK == "qwen2.5vl:7b-rpa"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bbox_grounding_model_dedicated_env(monkeypatch):
|
||||
"""RPA_BBOX_GROUNDING_MODEL est prioritaire."""
|
||||
monkeypatch.setenv("RPA_BBOX_GROUNDING_MODEL", "qwen2.5vl:32b")
|
||||
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen3.5:9b")
|
||||
from core.detection.vlm_config import get_bbox_grounding_model
|
||||
|
||||
assert get_bbox_grounding_model() == "qwen2.5vl:32b"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_bbox_grounding_model_legacy_compat(monkeypatch):
|
||||
"""Sans la var dédiée, RPA_GROUNDING_MODEL reste honoré (rétrocompat)."""
|
||||
monkeypatch.delenv("RPA_BBOX_GROUNDING_MODEL", raising=False)
|
||||
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
|
||||
from core.detection.vlm_config import get_bbox_grounding_model
|
||||
|
||||
assert get_bbox_grounding_model() == "qwen2.5vl:7b-rpa"
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
# _extract_first_json_object
|
||||
# ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -151,6 +151,46 @@ class TestIRBuilder:
|
||||
{"event": {"type": "key_combo", "keys": ["ctrl", "s"], "window": {"title": "*Sans titre – Bloc-notes"}, "timestamp": 115.0}},
|
||||
]
|
||||
|
||||
def test_analyze_intent_modele_via_vlm_config(self):
|
||||
"""Le payload _analyze_intent utilise le modèle résolu par vlm_config."""
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
captured["model"] = (json or {}).get("model")
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "INTENTION: x\nAVANT: y\nAPRÈS: z"}}
|
||||
return resp
|
||||
|
||||
with patch(
|
||||
"core.workflow.ir_builder.vlm_config.get_vlm_model",
|
||||
return_value="modele-resolu:test",
|
||||
), patch("requests.post", side_effect=capture_post):
|
||||
builder = IRBuilder()
|
||||
builder._analyze_intent("clic Rechercher", 0, 1, "Test", "generic")
|
||||
|
||||
assert captured["model"] == "modele-resolu:test"
|
||||
|
||||
def test_analyze_intent_endpoint_par_defaut_11434(self, monkeypatch):
|
||||
"""Sans GEMMA4_PORT, _analyze_intent vise 11434, pas le port mort 11435."""
|
||||
monkeypatch.delenv("GEMMA4_PORT", raising=False)
|
||||
captured = {}
|
||||
|
||||
def capture_post(url, json=None, **kwargs):
|
||||
captured["url"] = url
|
||||
resp = MagicMock()
|
||||
resp.ok = True
|
||||
resp.json.return_value = {"message": {"content": "INTENTION: x\nAVANT: y\nAPRÈS: z"}}
|
||||
return resp
|
||||
|
||||
with patch("requests.post", side_effect=capture_post):
|
||||
builder = IRBuilder()
|
||||
builder._analyze_intent("clic Rechercher", 0, 1, "Test", "generic")
|
||||
|
||||
assert ":11434" in captured["url"]
|
||||
assert ":11435" not in captured["url"]
|
||||
|
||||
def test_builder_sans_gemma4(self):
|
||||
"""Le builder fonctionne même sans gemma4 (fallback gracieux)."""
|
||||
builder = IRBuilder(gemma4_port="99999") # Port invalide
|
||||
|
||||
Reference in New Issue
Block a user