chore(dgx): snapshot consolidation WIP pour transfert poc DGX
Regroupe le WIP non committé requis pour le clone/runtime DGX (Option A) : - api_stream.py : préflight replay + smoke santé modèles + handler 403 WP-B - de-hardcode VLM : vlm_config, gpu/*, vram_orchestrator, ollama_manager - stream_processor, semantic_matcher, agent_chat (app/planner/intent) - workflows.db (acquis ; le transfert artifacts le mettra à jour + rewrite chemins) - docs : plans DGX, benchmarks VLM/grounders, recherche SOTA, coordination 8 juin Snapshot destiné à la branche poc-dgx poussée sur Gitea pour cloner le DGX. Scan anti-secret : clean. graphify (repo embarqué) exclu. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,7 @@ from werkzeug.utils import secure_filename
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from core.workflow import SemanticMatcher, VariableManager
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
# Import des composants conversationnels
|
||||
from .intent_parser import IntentParser, IntentType, get_intent_parser
|
||||
@@ -237,6 +238,7 @@ def init_system():
|
||||
global matcher, gpu_manager
|
||||
global intent_parser, confirmation_loop, response_generator, conversation_manager
|
||||
global autonomous_planner
|
||||
reasoning_model = get_reasoning_model()
|
||||
|
||||
# 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7)
|
||||
# Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/
|
||||
@@ -244,7 +246,7 @@ def init_system():
|
||||
matcher = SemanticMatcher(
|
||||
workflows_dir=None, # None = scan tous les répertoires par défaut
|
||||
use_llm=True, # Matching sémantique via Ollama (P0-7)
|
||||
llm_model="qwen2.5:7b",
|
||||
llm_model=reasoning_model,
|
||||
)
|
||||
dirs_info = matcher.get_directories()
|
||||
dirs_summary = ", ".join(
|
||||
@@ -269,7 +271,10 @@ def init_system():
|
||||
|
||||
# 3. Composants conversationnels
|
||||
try:
|
||||
intent_parser = get_intent_parser(use_llm=True) # LLM activé (Ollama)
|
||||
intent_parser = get_intent_parser(
|
||||
use_llm=True,
|
||||
llm_model=reasoning_model,
|
||||
) # LLM activé (Ollama)
|
||||
confirmation_loop = get_confirmation_loop()
|
||||
response_generator = get_response_generator()
|
||||
conversation_manager = get_conversation_manager()
|
||||
@@ -350,7 +355,7 @@ def init_system():
|
||||
|
||||
# 5. Autonomous Planner (Agent Libre)
|
||||
try:
|
||||
autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b")
|
||||
autonomous_planner = get_autonomous_planner(llm_model=reasoning_model)
|
||||
|
||||
# Configurer les callbacks pour l'exécution
|
||||
if screen_capturer:
|
||||
@@ -726,7 +731,7 @@ def api_history():
|
||||
# =============================================================================
|
||||
|
||||
# Modèle texte pour les réponses conversationnelles (pas besoin de vision)
|
||||
_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL", "qwen3:8b")
|
||||
_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL") or get_reasoning_model()
|
||||
|
||||
_LEA_SYSTEM_PROMPT = """Tu es Léa, une assistante professionnelle chaleureuse et bienveillante.
|
||||
|
||||
|
||||
@@ -27,6 +27,8 @@ import requests
|
||||
# Ajouter le chemin du projet pour les imports core
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Essayer d'importer les composants de détection visuelle
|
||||
@@ -113,11 +115,11 @@ class AutonomousPlanner:
|
||||
def __init__(
|
||||
self,
|
||||
llm_endpoint: str = "http://localhost:11434/api/generate",
|
||||
llm_model: str = "qwen2.5:7b",
|
||||
llm_model: Optional[str] = None,
|
||||
timeout: int = 60
|
||||
):
|
||||
self.llm_endpoint = llm_endpoint
|
||||
self.llm_model = llm_model
|
||||
self.llm_model = llm_model or get_reasoning_model()
|
||||
self.timeout = timeout
|
||||
self.llm_available = self._check_llm()
|
||||
|
||||
@@ -1028,12 +1030,12 @@ _planner_instance: Optional[AutonomousPlanner] = None
|
||||
|
||||
|
||||
def get_autonomous_planner(
|
||||
llm_model: str = "qwen2.5:7b"
|
||||
llm_model: Optional[str] = None
|
||||
) -> AutonomousPlanner:
|
||||
"""Retourne l'instance singleton du planner."""
|
||||
global _planner_instance
|
||||
|
||||
if _planner_instance is None:
|
||||
_planner_instance = AutonomousPlanner(llm_model=llm_model)
|
||||
_planner_instance = AutonomousPlanner(llm_model=llm_model or get_reasoning_model())
|
||||
|
||||
return _planner_instance
|
||||
|
||||
@@ -19,6 +19,8 @@ from enum import Enum
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -280,7 +282,7 @@ class IntentParser:
|
||||
self,
|
||||
use_llm: bool = False,
|
||||
llm_endpoint: str = "http://localhost:11434",
|
||||
llm_model: str = "qwen2.5:7b"
|
||||
llm_model: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Initialiser le parseur d'intentions.
|
||||
@@ -292,7 +294,7 @@ class IntentParser:
|
||||
"""
|
||||
self.use_llm = use_llm
|
||||
self.llm_endpoint = llm_endpoint
|
||||
self.llm_model = llm_model
|
||||
self.llm_model = llm_model or get_reasoning_model()
|
||||
self.llm_available = False
|
||||
self._workflows_cache: List[Dict[str, Any]] = []
|
||||
|
||||
@@ -687,7 +689,7 @@ _intent_parser: Optional[IntentParser] = None
|
||||
|
||||
def get_intent_parser(
|
||||
use_llm: bool = False,
|
||||
llm_model: str = "qwen2.5:7b",
|
||||
llm_model: Optional[str] = None,
|
||||
llm_endpoint: str = "http://localhost:11434"
|
||||
) -> IntentParser:
|
||||
"""
|
||||
@@ -695,20 +697,21 @@ def get_intent_parser(
|
||||
|
||||
Args:
|
||||
use_llm: Activer le LLM (Ollama)
|
||||
llm_model: Modèle à utiliser (qwen2.5:7b par défaut)
|
||||
llm_model: Modèle à utiliser (défaut: modèle reasoning central)
|
||||
llm_endpoint: URL de l'endpoint Ollama
|
||||
"""
|
||||
global _intent_parser
|
||||
resolved_model = llm_model or get_reasoning_model()
|
||||
if _intent_parser is None:
|
||||
_intent_parser = IntentParser(
|
||||
use_llm=use_llm,
|
||||
llm_endpoint=llm_endpoint,
|
||||
llm_model=llm_model
|
||||
llm_model=resolved_model
|
||||
)
|
||||
elif use_llm and not _intent_parser.use_llm:
|
||||
# Réactiver le LLM si demandé
|
||||
_intent_parser.use_llm = True
|
||||
_intent_parser.llm_model = llm_model
|
||||
_intent_parser.llm_model = resolved_model
|
||||
_intent_parser._check_llm_availability()
|
||||
return _intent_parser
|
||||
|
||||
|
||||
Reference in New Issue
Block a user