chore(dgx): snapshot consolidation WIP pour transfert poc DGX

Regroupe le WIP non committé requis pour le clone/runtime DGX (Option A) : - api_stream.py : préflight replay + smoke santé modèles + handler 403 WP-B - de-hardcode VLM : vlm_config, gpu/*, vram_orchestrator, ollama_manager - stream_processor, semantic_matcher, agent_chat (app/planner/intent) - workflows.db (acquis ; le transfert artifacts le mettra à jour + rewrite chemins) - docs : plans DGX, benchmarks VLM/grounders, recherche SOTA, coordination 8 juin Snapshot destiné à la branche poc-dgx poussée sur Gitea pour cloner le DGX. Scan anti-secret : clean. graphify (repo embarqué) exclu. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 16:33:58 +02:00
parent f18de016d7
commit 6d34b3cb68
204 changed files with 15744 additions and 47 deletions
--- a/agent_chat/app.py
+++ b/agent_chat/app.py
@@ -38,6 +38,7 @@ from werkzeug.utils import secure_filename
 sys.path.insert(0, str(Path(__file__).parent.parent))

 from core.workflow import SemanticMatcher, VariableManager
+from core.detection.vlm_config import get_reasoning_model

 # Import des composants conversationnels
 from .intent_parser import IntentParser, IntentType, get_intent_parser
@@ -237,6 +238,7 @@ def init_system():
    global matcher, gpu_manager
    global intent_parser, confirmation_loop, response_generator, conversation_manager
    global autonomous_planner
+    reasoning_model = get_reasoning_model()

    # 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7)
    # Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/
@@ -244,7 +246,7 @@ def init_system():
        matcher = SemanticMatcher(
            workflows_dir=None,  # None = scan tous les répertoires par défaut
            use_llm=True,        # Matching sémantique via Ollama (P0-7)
-            llm_model="qwen2.5:7b",
+            llm_model=reasoning_model,
        )
        dirs_info = matcher.get_directories()
        dirs_summary = ", ".join(
@@ -269,7 +271,10 @@ def init_system():

    # 3. Composants conversationnels
    try:
-        intent_parser = get_intent_parser(use_llm=True)  # LLM activé (Ollama)
+        intent_parser = get_intent_parser(
+            use_llm=True,
+            llm_model=reasoning_model,
+        )  # LLM activé (Ollama)
        confirmation_loop = get_confirmation_loop()
        response_generator = get_response_generator()
        conversation_manager = get_conversation_manager()
@@ -350,7 +355,7 @@ def init_system():

    # 5. Autonomous Planner (Agent Libre)
    try:
-        autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b")
+        autonomous_planner = get_autonomous_planner(llm_model=reasoning_model)

        # Configurer les callbacks pour l'exécution
        if screen_capturer:
@@ -726,7 +731,7 @@ def api_history():
 # =============================================================================

 # Modèle texte pour les réponses conversationnelles (pas besoin de vision)
-_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL", "qwen3:8b")
+_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL") or get_reasoning_model()

 _LEA_SYSTEM_PROMPT = """Tu es Léa, une assistante professionnelle chaleureuse et bienveillante.

--- a/agent_chat/autonomous_planner.py
+++ b/agent_chat/autonomous_planner.py
@@ -27,6 +27,8 @@ import requests
 # Ajouter le chemin du projet pour les imports core
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

+from core.detection.vlm_config import get_reasoning_model
+
 logger = logging.getLogger(__name__)

 # Essayer d'importer les composants de détection visuelle
@@ -113,11 +115,11 @@ class AutonomousPlanner:
    def __init__(
        self,
        llm_endpoint: str = "http://localhost:11434/api/generate",
-        llm_model: str = "qwen2.5:7b",
+        llm_model: Optional[str] = None,
        timeout: int = 60
    ):
        self.llm_endpoint = llm_endpoint
-        self.llm_model = llm_model
+        self.llm_model = llm_model or get_reasoning_model()
        self.timeout = timeout
        self.llm_available = self._check_llm()

@@ -1028,12 +1030,12 @@ _planner_instance: Optional[AutonomousPlanner] = None


 def get_autonomous_planner(
-    llm_model: str = "qwen2.5:7b"
+    llm_model: Optional[str] = None
 ) -> AutonomousPlanner:
    """Retourne l'instance singleton du planner."""
    global _planner_instance

    if _planner_instance is None:
-        _planner_instance = AutonomousPlanner(llm_model=llm_model)
+        _planner_instance = AutonomousPlanner(llm_model=llm_model or get_reasoning_model())

    return _planner_instance
--- a/agent_chat/intent_parser.py
+++ b/agent_chat/intent_parser.py
@@ -19,6 +19,8 @@ from enum import Enum
 from typing import Dict, Any, List, Optional, Tuple
 from pathlib import Path

+from core.detection.vlm_config import get_reasoning_model
+
 logger = logging.getLogger(__name__)


@@ -280,7 +282,7 @@ class IntentParser:
        self,
        use_llm: bool = False,
        llm_endpoint: str = "http://localhost:11434",
-        llm_model: str = "qwen2.5:7b"
+        llm_model: Optional[str] = None
    ):
        """
        Initialiser le parseur d'intentions.
@@ -292,7 +294,7 @@ class IntentParser:
        """
        self.use_llm = use_llm
        self.llm_endpoint = llm_endpoint
-        self.llm_model = llm_model
+        self.llm_model = llm_model or get_reasoning_model()
        self.llm_available = False
        self._workflows_cache: List[Dict[str, Any]] = []

@@ -687,7 +689,7 @@ _intent_parser: Optional[IntentParser] = None

 def get_intent_parser(
    use_llm: bool = False,
-    llm_model: str = "qwen2.5:7b",
+    llm_model: Optional[str] = None,
    llm_endpoint: str = "http://localhost:11434"
 ) -> IntentParser:
    """
@@ -695,20 +697,21 @@ def get_intent_parser(

    Args:
        use_llm: Activer le LLM (Ollama)
-        llm_model: Modèle à utiliser (qwen2.5:7b par défaut)
+        llm_model: Modèle à utiliser (défaut: modèle reasoning central)
        llm_endpoint: URL de l'endpoint Ollama
    """
    global _intent_parser
+    resolved_model = llm_model or get_reasoning_model()
    if _intent_parser is None:
        _intent_parser = IntentParser(
            use_llm=use_llm,
            llm_endpoint=llm_endpoint,
-            llm_model=llm_model
+            llm_model=resolved_model
        )
    elif use_llm and not _intent_parser.use_llm:
        # Réactiver le LLM si demandé
        _intent_parser.use_llm = True
-        _intent_parser.llm_model = llm_model
+        _intent_parser.llm_model = resolved_model
        _intent_parser._check_llm_availability()
    return _intent_parser