feat(p1z): centralize V4 reasoning model resolution (DGX-safe)
Remplace le default runtime dangereux `qwen2.5vl:7b` (absent du tunnel DGX -> 404) des chemins V4/reasoning par un helper central get_reasoning_model(). - core/detection/vlm_config.py : + get_reasoning_model() + DEFAULT_REASONING_MODEL (qwen2.5vl:7b-rpa). Ordre : RPA_REASONING_MODEL -> RPA_VLM_MODEL/VLM_MODEL -> default DGX-safe. Pas d'appel reseau (lazy, safe a l'import). - core/execution/input_handler.py, observe_reason_act.py (x3), core/cognition/vram_orchestrator.py : migration des 5 call-sites. - tests/unit/test_reasoning_model.py : 8 tests (default DGX-safe, ordre de resolution, non-regression wiring des 3 modules V4). Hors scope (signale lot P1.w) : DEFAULT_VLM_MODEL=gemma4:latest reste fallback de get_vlm_model(). Client gele non touche. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,8 @@ import shutil
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
@@ -291,7 +293,7 @@ Si l'écran est normal sans action nécessaire, réponds action="nothing".
|
||||
Réponds UNIQUEMENT le JSON, pas d'explication."""
|
||||
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||
model = get_reasoning_model()
|
||||
|
||||
response = requests.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
|
||||
@@ -21,6 +21,8 @@ import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import du contexte cognitif (mémoire de travail)
|
||||
@@ -407,7 +409,7 @@ Règles:
|
||||
|
||||
# --- Appel VLM (Ollama) ---
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||
model = get_reasoning_model()
|
||||
|
||||
print(f"🧠 [ORA/reason_instruction] Appel VLM {model}...")
|
||||
|
||||
@@ -1207,7 +1209,7 @@ Règles:
|
||||
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||
model = get_reasoning_model()
|
||||
|
||||
resp = requests.post(f"{ollama_url}/api/generate", json={
|
||||
"model": model,
|
||||
@@ -1963,7 +1965,7 @@ Règles:
|
||||
)
|
||||
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||
model = get_reasoning_model()
|
||||
|
||||
response = requests.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
|
||||
Reference in New Issue
Block a user