v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
170
verify_thinking_mode.py
Normal file
170
verify_thinking_mode.py
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script de vérification du mode thinking d'Ollama
|
||||
|
||||
Vérifie que le thinking mode est bien désactivé pour optimiser les performances.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import requests
|
||||
import time
|
||||
|
||||
|
||||
async def test_thinking_mode_disabled():
|
||||
"""Test que le thinking mode est désactivé."""
|
||||
print("🔍 Vérification du mode thinking...")
|
||||
|
||||
endpoint = "http://localhost:11434"
|
||||
|
||||
# Vérifier que Ollama est disponible
|
||||
try:
|
||||
response = requests.get(f"{endpoint}/api/tags", timeout=5)
|
||||
if response.status_code != 200:
|
||||
print("❌ Ollama non disponible")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"❌ Ollama non disponible: {e}")
|
||||
return False
|
||||
|
||||
print("✅ Ollama disponible")
|
||||
|
||||
# Test avec /nothink (méthode officielle Qwen3)
|
||||
payload = {
|
||||
"model": "qwen3-vl:8b",
|
||||
"prompt": "/nothink What is 2+2? Answer with just the number.",
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.0,
|
||||
"num_predict": 50
|
||||
}
|
||||
}
|
||||
|
||||
print(f"📤 Envoi requête avec /nothink...")
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{endpoint}/api/generate",
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
response_text = data.get("response", "")
|
||||
|
||||
# Vérifier qu'il n'y a pas de balises de thinking
|
||||
thinking_indicators = [
|
||||
"<thinking>",
|
||||
"</thinking>",
|
||||
"<think>",
|
||||
"</think>",
|
||||
"Let me think",
|
||||
"I need to think"
|
||||
]
|
||||
|
||||
has_thinking = any(indicator.lower() in response_text.lower()
|
||||
for indicator in thinking_indicators)
|
||||
|
||||
if has_thinking:
|
||||
print(f"⚠️ Thinking mode détecté dans la réponse!")
|
||||
print(f" Réponse: {response_text[:200]}...")
|
||||
return False
|
||||
else:
|
||||
print(f"✅ Thinking mode désactivé")
|
||||
print(f" Réponse: '{response_text.strip()}'")
|
||||
print(f" Temps: {elapsed:.2f}s")
|
||||
print(f" Tokens: {data.get('eval_count', 0)}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ Erreur HTTP: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_ollama_manager_options():
|
||||
"""Vérifier que OllamaManager utilise les bonnes options."""
|
||||
print("\n⚙️ Vérification des options dans OllamaManager...")
|
||||
|
||||
try:
|
||||
from core.gpu.ollama_manager import OllamaManager
|
||||
|
||||
# Lire le code source pour vérifier les options
|
||||
import inspect
|
||||
source = inspect.getsource(OllamaManager.load_model)
|
||||
|
||||
if '/nothink' in source or 'nothink' in source:
|
||||
print("✅ OllamaManager.load_model() utilise /nothink")
|
||||
else:
|
||||
print("⚠️ OllamaManager.load_model() n'utilise pas /nothink")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_ollama_client_options():
|
||||
"""Vérifier que OllamaClient utilise les bonnes options."""
|
||||
print("\n⚙️ Vérification des options dans OllamaClient...")
|
||||
|
||||
try:
|
||||
from core.detection.ollama_client import OllamaClient
|
||||
|
||||
# Lire le code source pour vérifier les options
|
||||
import inspect
|
||||
source = inspect.getsource(OllamaClient.generate)
|
||||
|
||||
if '/nothink' in source or 'nothink' in source:
|
||||
print("✅ OllamaClient.generate() utilise /nothink")
|
||||
return True
|
||||
else:
|
||||
print("⚠️ OllamaClient.generate() n'utilise pas /nothink")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Test principal."""
|
||||
print("🚀 Test de désactivation du thinking mode Ollama\n")
|
||||
|
||||
results = []
|
||||
|
||||
# Test 1: Vérifier les options dans le code
|
||||
results.append(("OllamaManager options", test_ollama_manager_options()))
|
||||
results.append(("OllamaClient options", test_ollama_client_options()))
|
||||
|
||||
# Test 2: Vérifier thinking mode en pratique
|
||||
thinking_ok = asyncio.run(test_thinking_mode_disabled())
|
||||
results.append(("Thinking mode désactivé", thinking_ok))
|
||||
|
||||
print("\n📊 Résultats:")
|
||||
all_ok = True
|
||||
for name, ok in results:
|
||||
status = "✅" if ok else "❌"
|
||||
print(f" {status} {name}")
|
||||
if not ok:
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
print("\n🎉 Tous les tests passent - Ollama optimisé !")
|
||||
else:
|
||||
print("\n⚠️ Certains tests ont échoué")
|
||||
|
||||
return all_ok
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user