rpa_vision_v3/verify_thinking_mode.py

#!/usr/bin/env python3
"""
Script de vérification du mode thinking d'Ollama

Vérifie que le thinking mode est bien désactivé pour optimiser les performances.
"""

import asyncio
import aiohttp
import requests
import time


async def test_thinking_mode_disabled():
    """Test que le thinking mode est désactivé."""
    print("🔍 Vérification du mode thinking...")

    endpoint = "http://localhost:11434"

    # Vérifier que Ollama est disponible
    try:
        response = requests.get(f"{endpoint}/api/tags", timeout=5)
        if response.status_code != 200:
            print("❌ Ollama non disponible")
            return False
    except Exception as e:
        print(f"❌ Ollama non disponible: {e}")
        return False

    print("✅ Ollama disponible")

    # Test avec /nothink (méthode officielle Qwen3)
    payload = {
        "model": "qwen3-vl:8b",
        "prompt": "/nothink What is 2+2? Answer with just the number.",
        "stream": False,
        "options": {
            "temperature": 0.0,
            "num_predict": 50
        }
    }

    print(f"📤 Envoi requête avec /nothink...")
    start_time = time.time()

    try:
        response = requests.post(
            f"{endpoint}/api/generate",
            json=payload,
            timeout=30
        )

        elapsed = time.time() - start_time

        if response.status_code == 200:
            data = response.json()
            response_text = data.get("response", "")

            # Vérifier qu'il n'y a pas de balises de thinking
            thinking_indicators = [
                "<thinking>",
                "</thinking>",
                "<think>",
                "</think>",
                "Let me think",
                "I need to think"
            ]

            has_thinking = any(indicator.lower() in response_text.lower()
                             for indicator in thinking_indicators)

            if has_thinking:
                print(f"⚠️  Thinking mode détecté dans la réponse!")
                print(f"   Réponse: {response_text[:200]}...")
                return False
            else:
                print(f"✅ Thinking mode désactivé")
                print(f"   Réponse: '{response_text.strip()}'")
                print(f"   Temps: {elapsed:.2f}s")
                print(f"   Tokens: {data.get('eval_count', 0)}")
                return True
        else:
            print(f"❌ Erreur HTTP: {response.status_code}")
            return False

    except Exception as e:
        print(f"❌ Erreur: {e}")
        return False


def test_ollama_manager_options():
    """Vérifier que OllamaManager utilise les bonnes options."""
    print("\n⚙️  Vérification des options dans OllamaManager...")

    try:
        from core.gpu.ollama_manager import OllamaManager

        # Lire le code source pour vérifier les options
        import inspect
        source = inspect.getsource(OllamaManager.load_model)

        if '/nothink' in source or 'nothink' in source:
            print("✅ OllamaManager.load_model() utilise /nothink")
        else:
            print("⚠️  OllamaManager.load_model() n'utilise pas /nothink")
            return False

        return True

    except Exception as e:
        print(f"❌ Erreur: {e}")
        return False


def test_ollama_client_options():
    """Vérifier que OllamaClient utilise les bonnes options."""
    print("\n⚙️  Vérification des options dans OllamaClient...")

    try:
        from core.detection.ollama_client import OllamaClient

        # Lire le code source pour vérifier les options
        import inspect
        source = inspect.getsource(OllamaClient.generate)

        if '/nothink' in source or 'nothink' in source:
            print("✅ OllamaClient.generate() utilise /nothink")
            return True
        else:
            print("⚠️  OllamaClient.generate() n'utilise pas /nothink")
            return False

    except Exception as e:
        print(f"❌ Erreur: {e}")
        return False


def main():
    """Test principal."""
    print("🚀 Test de désactivation du thinking mode Ollama\n")

    results = []

    # Test 1: Vérifier les options dans le code
    results.append(("OllamaManager options", test_ollama_manager_options()))
    results.append(("OllamaClient options", test_ollama_client_options()))

    # Test 2: Vérifier thinking mode en pratique
    thinking_ok = asyncio.run(test_thinking_mode_disabled())
    results.append(("Thinking mode désactivé", thinking_ok))

    print("\n📊 Résultats:")
    all_ok = True
    for name, ok in results:
        status = "✅" if ok else "❌"
        print(f"   {status} {name}")
        if not ok:
            all_ok = False

    if all_ok:
        print("\n🎉 Tous les tests passent - Ollama optimisé !")
    else:
        print("\n⚠️  Certains tests ont échoué")

    return all_ok


if __name__ == "__main__":
    success = main()
    exit(0 if success else 1)