- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
171 lines
5.1 KiB
Python
171 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script de vérification du mode thinking d'Ollama
|
|
|
|
Vérifie que le thinking mode est bien désactivé pour optimiser les performances.
|
|
"""
|
|
|
|
import asyncio
|
|
import aiohttp
|
|
import requests
|
|
import time
|
|
|
|
|
|
async def test_thinking_mode_disabled():
|
|
"""Test que le thinking mode est désactivé."""
|
|
print("🔍 Vérification du mode thinking...")
|
|
|
|
endpoint = "http://localhost:11434"
|
|
|
|
# Vérifier que Ollama est disponible
|
|
try:
|
|
response = requests.get(f"{endpoint}/api/tags", timeout=5)
|
|
if response.status_code != 200:
|
|
print("❌ Ollama non disponible")
|
|
return False
|
|
except Exception as e:
|
|
print(f"❌ Ollama non disponible: {e}")
|
|
return False
|
|
|
|
print("✅ Ollama disponible")
|
|
|
|
# Test avec /nothink (méthode officielle Qwen3)
|
|
payload = {
|
|
"model": "qwen3-vl:8b",
|
|
"prompt": "/nothink What is 2+2? Answer with just the number.",
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": 0.0,
|
|
"num_predict": 50
|
|
}
|
|
}
|
|
|
|
print(f"📤 Envoi requête avec /nothink...")
|
|
start_time = time.time()
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{endpoint}/api/generate",
|
|
json=payload,
|
|
timeout=30
|
|
)
|
|
|
|
elapsed = time.time() - start_time
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
response_text = data.get("response", "")
|
|
|
|
# Vérifier qu'il n'y a pas de balises de thinking
|
|
thinking_indicators = [
|
|
"<thinking>",
|
|
"</thinking>",
|
|
"<think>",
|
|
"</think>",
|
|
"Let me think",
|
|
"I need to think"
|
|
]
|
|
|
|
has_thinking = any(indicator.lower() in response_text.lower()
|
|
for indicator in thinking_indicators)
|
|
|
|
if has_thinking:
|
|
print(f"⚠️ Thinking mode détecté dans la réponse!")
|
|
print(f" Réponse: {response_text[:200]}...")
|
|
return False
|
|
else:
|
|
print(f"✅ Thinking mode désactivé")
|
|
print(f" Réponse: '{response_text.strip()}'")
|
|
print(f" Temps: {elapsed:.2f}s")
|
|
print(f" Tokens: {data.get('eval_count', 0)}")
|
|
return True
|
|
else:
|
|
print(f"❌ Erreur HTTP: {response.status_code}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur: {e}")
|
|
return False
|
|
|
|
|
|
def test_ollama_manager_options():
|
|
"""Vérifier que OllamaManager utilise les bonnes options."""
|
|
print("\n⚙️ Vérification des options dans OllamaManager...")
|
|
|
|
try:
|
|
from core.gpu.ollama_manager import OllamaManager
|
|
|
|
# Lire le code source pour vérifier les options
|
|
import inspect
|
|
source = inspect.getsource(OllamaManager.load_model)
|
|
|
|
if '/nothink' in source or 'nothink' in source:
|
|
print("✅ OllamaManager.load_model() utilise /nothink")
|
|
else:
|
|
print("⚠️ OllamaManager.load_model() n'utilise pas /nothink")
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur: {e}")
|
|
return False
|
|
|
|
|
|
def test_ollama_client_options():
|
|
"""Vérifier que OllamaClient utilise les bonnes options."""
|
|
print("\n⚙️ Vérification des options dans OllamaClient...")
|
|
|
|
try:
|
|
from core.detection.ollama_client import OllamaClient
|
|
|
|
# Lire le code source pour vérifier les options
|
|
import inspect
|
|
source = inspect.getsource(OllamaClient.generate)
|
|
|
|
if '/nothink' in source or 'nothink' in source:
|
|
print("✅ OllamaClient.generate() utilise /nothink")
|
|
return True
|
|
else:
|
|
print("⚠️ OllamaClient.generate() n'utilise pas /nothink")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur: {e}")
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Test principal."""
|
|
print("🚀 Test de désactivation du thinking mode Ollama\n")
|
|
|
|
results = []
|
|
|
|
# Test 1: Vérifier les options dans le code
|
|
results.append(("OllamaManager options", test_ollama_manager_options()))
|
|
results.append(("OllamaClient options", test_ollama_client_options()))
|
|
|
|
# Test 2: Vérifier thinking mode en pratique
|
|
thinking_ok = asyncio.run(test_thinking_mode_disabled())
|
|
results.append(("Thinking mode désactivé", thinking_ok))
|
|
|
|
print("\n📊 Résultats:")
|
|
all_ok = True
|
|
for name, ok in results:
|
|
status = "✅" if ok else "❌"
|
|
print(f" {status} {name}")
|
|
if not ok:
|
|
all_ok = False
|
|
|
|
if all_ok:
|
|
print("\n🎉 Tous les tests passent - Ollama optimisé !")
|
|
else:
|
|
print("\n⚠️ Certains tests ont échoué")
|
|
|
|
return all_ok
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
exit(0 if success else 1)
|