v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/verify_thinking_mode.py
+++ b/verify_thinking_mode.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Script de vérification du mode thinking d'Ollama
+
+Vérifie que le thinking mode est bien désactivé pour optimiser les performances.
+"""
+
+import asyncio
+import aiohttp
+import requests
+import time
+
+
+async def test_thinking_mode_disabled():
+    """Test que le thinking mode est désactivé."""
+    print("🔍 Vérification du mode thinking...")
+    
+    endpoint = "http://localhost:11434"
+    
+    # Vérifier que Ollama est disponible
+    try:
+        response = requests.get(f"{endpoint}/api/tags", timeout=5)
+        if response.status_code != 200:
+            print("❌ Ollama non disponible")
+            return False
+    except Exception as e:
+        print(f"❌ Ollama non disponible: {e}")
+        return False
+    
+    print("✅ Ollama disponible")
+    
+    # Test avec /nothink (méthode officielle Qwen3)
+    payload = {
+        "model": "qwen3-vl:8b",
+        "prompt": "/nothink What is 2+2? Answer with just the number.",
+        "stream": False,
+        "options": {
+            "temperature": 0.0,
+            "num_predict": 50
+        }
+    }
+    
+    print(f"📤 Envoi requête avec /nothink...")
+    start_time = time.time()
+    
+    try:
+        response = requests.post(
+            f"{endpoint}/api/generate",
+            json=payload,
+            timeout=30
+        )
+        
+        elapsed = time.time() - start_time
+        
+        if response.status_code == 200:
+            data = response.json()
+            response_text = data.get("response", "")
+            
+            # Vérifier qu'il n'y a pas de balises de thinking
+            thinking_indicators = [
+                "<thinking>",
+                "</thinking>", 
+                "<think>",
+                "</think>",
+                "Let me think",
+                "I need to think"
+            ]
+            
+            has_thinking = any(indicator.lower() in response_text.lower() 
+                             for indicator in thinking_indicators)
+            
+            if has_thinking:
+                print(f"⚠️  Thinking mode détecté dans la réponse!")
+                print(f"   Réponse: {response_text[:200]}...")
+                return False
+            else:
+                print(f"✅ Thinking mode désactivé")
+                print(f"   Réponse: '{response_text.strip()}'")
+                print(f"   Temps: {elapsed:.2f}s")
+                print(f"   Tokens: {data.get('eval_count', 0)}")
+                return True
+        else:
+            print(f"❌ Erreur HTTP: {response.status_code}")
+            return False
+            
+    except Exception as e:
+        print(f"❌ Erreur: {e}")
+        return False
+
+
+def test_ollama_manager_options():
+    """Vérifier que OllamaManager utilise les bonnes options."""
+    print("\n⚙️  Vérification des options dans OllamaManager...")
+    
+    try:
+        from core.gpu.ollama_manager import OllamaManager
+        
+        # Lire le code source pour vérifier les options
+        import inspect
+        source = inspect.getsource(OllamaManager.load_model)
+        
+        if '/nothink' in source or 'nothink' in source:
+            print("✅ OllamaManager.load_model() utilise /nothink")
+        else:
+            print("⚠️  OllamaManager.load_model() n'utilise pas /nothink")
+            return False
+            
+        return True
+        
+    except Exception as e:
+        print(f"❌ Erreur: {e}")
+        return False
+
+
+def test_ollama_client_options():
+    """Vérifier que OllamaClient utilise les bonnes options."""
+    print("\n⚙️  Vérification des options dans OllamaClient...")
+    
+    try:
+        from core.detection.ollama_client import OllamaClient
+        
+        # Lire le code source pour vérifier les options
+        import inspect
+        source = inspect.getsource(OllamaClient.generate)
+        
+        if '/nothink' in source or 'nothink' in source:
+            print("✅ OllamaClient.generate() utilise /nothink")
+            return True
+        else:
+            print("⚠️  OllamaClient.generate() n'utilise pas /nothink")
+            return False
+            
+    except Exception as e:
+        print(f"❌ Erreur: {e}")
+        return False
+
+
+def main():
+    """Test principal."""
+    print("🚀 Test de désactivation du thinking mode Ollama\n")
+    
+    results = []
+    
+    # Test 1: Vérifier les options dans le code
+    results.append(("OllamaManager options", test_ollama_manager_options()))
+    results.append(("OllamaClient options", test_ollama_client_options()))
+    
+    # Test 2: Vérifier thinking mode en pratique
+    thinking_ok = asyncio.run(test_thinking_mode_disabled())
+    results.append(("Thinking mode désactivé", thinking_ok))
+    
+    print("\n📊 Résultats:")
+    all_ok = True
+    for name, ok in results:
+        status = "✅" if ok else "❌"
+        print(f"   {status} {name}")
+        if not ok:
+            all_ok = False
+    
+    if all_ok:
+        print("\n🎉 Tous les tests passent - Ollama optimisé !")
+    else:
+        print("\n⚠️  Certains tests ont échoué")
+    
+    return all_ok
+
+
+if __name__ == "__main__":
+    success = main()
+    exit(0 if success else 1)