chore(dgx): snapshot consolidation WIP pour transfert poc DGX

Regroupe le WIP non committé requis pour le clone/runtime DGX (Option A) : - api_stream.py : préflight replay + smoke santé modèles + handler 403 WP-B - de-hardcode VLM : vlm_config, gpu/*, vram_orchestrator, ollama_manager - stream_processor, semantic_matcher, agent_chat (app/planner/intent) - workflows.db (acquis ; le transfert artifacts le mettra à jour + rewrite chemins) - docs : plans DGX, benchmarks VLM/grounders, recherche SOTA, coordination 8 juin Snapshot destiné à la branche poc-dgx poussée sur Gitea pour cloner le DGX. Scan anti-secret : clean. graphify (repo embarqué) exclu. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 16:33:58 +02:00
parent f18de016d7
commit 6d34b3cb68
204 changed files with 15744 additions and 47 deletions
--- a/agent_chat/app.py
+++ b/agent_chat/app.py
@@ -38,6 +38,7 @@ from werkzeug.utils import secure_filename
 sys.path.insert(0, str(Path(__file__).parent.parent))

 from core.workflow import SemanticMatcher, VariableManager
+from core.detection.vlm_config import get_reasoning_model

 # Import des composants conversationnels
 from .intent_parser import IntentParser, IntentType, get_intent_parser
@@ -237,6 +238,7 @@ def init_system():
    global matcher, gpu_manager
    global intent_parser, confirmation_loop, response_generator, conversation_manager
    global autonomous_planner
+    reasoning_model = get_reasoning_model()

    # 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7)
    # Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/
@@ -244,7 +246,7 @@ def init_system():
        matcher = SemanticMatcher(
            workflows_dir=None,  # None = scan tous les répertoires par défaut
            use_llm=True,        # Matching sémantique via Ollama (P0-7)
-            llm_model="qwen2.5:7b",
+            llm_model=reasoning_model,
        )
        dirs_info = matcher.get_directories()
        dirs_summary = ", ".join(
@@ -269,7 +271,10 @@ def init_system():

    # 3. Composants conversationnels
    try:
-        intent_parser = get_intent_parser(use_llm=True)  # LLM activé (Ollama)
+        intent_parser = get_intent_parser(
+            use_llm=True,
+            llm_model=reasoning_model,
+        )  # LLM activé (Ollama)
        confirmation_loop = get_confirmation_loop()
        response_generator = get_response_generator()
        conversation_manager = get_conversation_manager()
@@ -350,7 +355,7 @@ def init_system():

    # 5. Autonomous Planner (Agent Libre)
    try:
-        autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b")
+        autonomous_planner = get_autonomous_planner(llm_model=reasoning_model)

        # Configurer les callbacks pour l'exécution
        if screen_capturer:
@@ -726,7 +731,7 @@ def api_history():
 # =============================================================================

 # Modèle texte pour les réponses conversationnelles (pas besoin de vision)
-_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL", "qwen3:8b")
+_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL") or get_reasoning_model()

 _LEA_SYSTEM_PROMPT = """Tu es Léa, une assistante professionnelle chaleureuse et bienveillante.