v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/gpu/ollama_manager.py
+++ b/core/gpu/ollama_manager.py
@@ -0,0 +1,265 @@
+"""
+Ollama Manager - Manages VLM model lifecycle via Ollama API
+
+Handles:
+- Loading/unloading models to/from VRAM
+- Health checks and availability detection
+- Keep-alive management for model persistence
+"""
+
+import asyncio
+import logging
+from typing import List, Optional
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+class OllamaManager:
+    """
+    Manages Ollama VLM model lifecycle.
+    
+    Uses Ollama's REST API to control model loading/unloading.
+    
+    Example:
+        >>> manager = OllamaManager()
+        >>> await manager.load_model()
+        >>> is_loaded = await manager.is_model_loaded()
+        >>> await manager.unload_model()
+    """
+    
+    def __init__(
+        self,
+        endpoint: str = "http://localhost:11434",
+        model: str = "qwen3-vl:8b",
+        default_keep_alive: str = "5m"
+    ):
+        """
+        Initialize OllamaManager.
+        
+        Args:
+            endpoint: Ollama API endpoint
+            model: Model name to manage
+            default_keep_alive: Default keep-alive duration
+        """
+        self._endpoint = endpoint.rstrip("/")
+        self._model = model
+        self._default_keep_alive = default_keep_alive
+        self._session: Optional[aiohttp.ClientSession] = None
+    
+    async def _get_session(self) -> aiohttp.ClientSession:
+        """Get or create aiohttp session."""
+        if self._session is None or self._session.closed:
+            self._session = aiohttp.ClientSession(
+                timeout=aiohttp.ClientTimeout(total=60)
+            )
+        return self._session
+    
+    async def close(self) -> None:
+        """Close the HTTP session."""
+        if self._session and not self._session.closed:
+            await self._session.close()
+
+    
+    # =========================================================================
+    # Health Check
+    # =========================================================================
+    
+    def is_available(self) -> bool:
+        """
+        Check if Ollama service is available (synchronous).
+        
+        Returns:
+            True if Ollama is reachable
+        """
+        import requests
+        try:
+            response = requests.get(f"{self._endpoint}/api/tags", timeout=5)
+            return response.status_code == 200
+        except Exception:
+            return False
+    
+    async def is_available_async(self) -> bool:
+        """
+        Check if Ollama service is available (async).
+        
+        Returns:
+            True if Ollama is reachable
+        """
+        try:
+            session = await self._get_session()
+            async with session.get(f"{self._endpoint}/api/tags") as response:
+                return response.status == 200
+        except Exception:
+            return False
+    
+    # =========================================================================
+    # Model Management
+    # =========================================================================
+    
+    async def load_model(self, keep_alive: Optional[str] = None) -> bool:
+        """
+        Load the model into VRAM.
+        
+        Uses a minimal generate request to trigger model loading.
+        
+        Args:
+            keep_alive: How long to keep model loaded (e.g., "5m", "1h")
+        
+        Returns:
+            True if model loaded successfully
+        """
+        keep_alive = keep_alive or self._default_keep_alive
+        
+        try:
+            session = await self._get_session()
+            
+            # Send a minimal request to load the model
+            # Pour Qwen3, utiliser /nothink pour désactiver le thinking mode
+            prompt = "/nothink " if "qwen" in self._model.lower() else ""
+            
+            payload = {
+                "model": self._model,
+                "prompt": prompt,
+                "keep_alive": keep_alive,
+                "stream": False,
+                "options": {
+                    "temperature": 0.0,  # Déterministe pour la classification
+                    "top_k": 1  # Plus rapide pour les tâches de classification
+                }
+            }
+            
+            logger.debug(f"Loading model {self._model} with keep_alive={keep_alive}")
+            
+            async with session.post(
+                f"{self._endpoint}/api/generate",
+                json=payload
+            ) as response:
+                if response.status == 200:
+                    logger.info(f"Model {self._model} loaded successfully")
+                    return True
+                else:
+                    text = await response.text()
+                    logger.error(f"Failed to load model: {response.status} - {text}")
+                    return False
+                    
+        except asyncio.TimeoutError:
+            logger.error("Timeout loading model")
+            return False
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            return False
+    
+    async def unload_model(self) -> bool:
+        """
+        Unload the model from VRAM.
+        
+        Sets keep_alive to 0 to trigger immediate unload.
+        
+        Returns:
+            True if model unloaded successfully
+        """
+        try:
+            session = await self._get_session()
+            
+            # Send request with keep_alive=0 to unload
+            payload = {
+                "model": self._model,
+                "prompt": "",
+                "keep_alive": 0,
+                "stream": False
+            }
+            
+            logger.debug(f"Unloading model {self._model}")
+            
+            async with session.post(
+                f"{self._endpoint}/api/generate",
+                json=payload
+            ) as response:
+                if response.status == 200:
+                    logger.info(f"Model {self._model} unloaded successfully")
+                    return True
+                else:
+                    text = await response.text()
+                    logger.error(f"Failed to unload model: {response.status} - {text}")
+                    return False
+                    
+        except asyncio.TimeoutError:
+            logger.error("Timeout unloading model")
+            return False
+        except Exception as e:
+            logger.error(f"Error unloading model: {e}")
+            return False
+    
+    async def is_model_loaded(self) -> bool:
+        """
+        Check if the model is currently loaded in VRAM.
+        
+        Returns:
+            True if model is loaded
+        """
+        try:
+            session = await self._get_session()
+            
+            async with session.get(f"{self._endpoint}/api/ps") as response:
+                if response.status == 200:
+                    data = await response.json()
+                    models = data.get("models", [])
+                    
+                    for model_info in models:
+                        if model_info.get("name", "").startswith(self._model.split(":")[0]):
+                            return True
+                    
+                    return False
+                else:
+                    logger.warning(f"Failed to check loaded models: {response.status}")
+                    return False
+                    
+        except Exception as e:
+            logger.error(f"Error checking loaded models: {e}")
+            return False
+    
+    async def list_loaded_models(self) -> List[str]:
+        """
+        List all currently loaded models.
+        
+        Returns:
+            List of loaded model names
+        """
+        try:
+            session = await self._get_session()
+            
+            async with session.get(f"{self._endpoint}/api/ps") as response:
+                if response.status == 200:
+                    data = await response.json()
+                    models = data.get("models", [])
+                    return [m.get("name", "") for m in models]
+                else:
+                    return []
+                    
+        except Exception as e:
+            logger.error(f"Error listing loaded models: {e}")
+            return []
+    
+    async def list_available_models(self) -> List[str]:
+        """
+        List all available models (downloaded).
+        
+        Returns:
+            List of available model names
+        """
+        try:
+            session = await self._get_session()
+            
+            async with session.get(f"{self._endpoint}/api/tags") as response:
+                if response.status == 200:
+                    data = await response.json()
+                    models = data.get("models", [])
+                    return [m.get("name", "") for m in models]
+                else:
+                    return []
+                    
+        except Exception as e:
+            logger.error(f"Error listing available models: {e}")
+            return []