Initial commit
This commit is contained in:
230
geniusia2/tests/test_llm_manager.py
Normal file
230
geniusia2/tests/test_llm_manager.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""
|
||||
Tests pour le gestionnaire LLM.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
from geniusia2.core.llm_manager import LLMManager
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ollama_client():
|
||||
"""Crée un client Ollama mocké."""
|
||||
with patch('geniusia2.core.llm_manager.ollama') as mock_ollama:
|
||||
mock_client = MagicMock()
|
||||
mock_client.list.return_value = {
|
||||
'models': [{'name': 'qwen2.5-vl:3b'}]
|
||||
}
|
||||
mock_ollama.Client.return_value = mock_client
|
||||
yield mock_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm_manager(mock_ollama_client):
|
||||
"""Crée une instance du gestionnaire LLM pour les tests."""
|
||||
return LLMManager(
|
||||
model_name="qwen2.5-vl:3b",
|
||||
ollama_host="localhost:11434",
|
||||
fallback_to_vision=True
|
||||
)
|
||||
|
||||
|
||||
def test_initialization(llm_manager):
|
||||
"""Test l'initialisation du gestionnaire."""
|
||||
assert llm_manager.model_name == "qwen2.5-vl:3b"
|
||||
assert llm_manager.ollama_host == "localhost:11434"
|
||||
assert llm_manager.fallback_to_vision is True
|
||||
|
||||
|
||||
def test_image_to_base64(llm_manager):
|
||||
"""Test la conversion d'image en base64."""
|
||||
# Créer une image de test
|
||||
test_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
||||
|
||||
# Convertir en base64
|
||||
b64_str = llm_manager._image_to_base64(test_image)
|
||||
|
||||
# Vérifier
|
||||
assert isinstance(b64_str, str)
|
||||
assert len(b64_str) > 0
|
||||
|
||||
|
||||
def test_fallback_to_vision_only(llm_manager):
|
||||
"""Test le fallback vers la vision pure."""
|
||||
detections = [
|
||||
{"label": "button1", "confidence": 0.7, "bbox": (10, 10, 50, 30)},
|
||||
{"label": "button2", "confidence": 0.9, "bbox": (100, 10, 50, 30)},
|
||||
{"label": "button3", "confidence": 0.6, "bbox": (200, 10, 50, 30)}
|
||||
]
|
||||
|
||||
result = llm_manager._fallback_to_vision_only(detections)
|
||||
|
||||
# Devrait sélectionner button2 (confiance la plus élevée)
|
||||
assert result["element_index"] == 1
|
||||
assert result["selected_element"]["label"] == "button2"
|
||||
assert result["confidence"] == 0.9
|
||||
|
||||
|
||||
def test_fallback_empty_detections(llm_manager):
|
||||
"""Test le fallback avec des détections vides."""
|
||||
result = llm_manager._fallback_to_vision_only([])
|
||||
|
||||
assert result["selected_element"] is None
|
||||
assert result["confidence"] == 0.0
|
||||
|
||||
|
||||
def test_parse_llm_response_valid_json(llm_manager):
|
||||
"""Test le parsing d'une réponse LLM valide."""
|
||||
detections = [
|
||||
{"label": "button1", "confidence": 0.7},
|
||||
{"label": "button2", "confidence": 0.9}
|
||||
]
|
||||
|
||||
response = """
|
||||
Voici mon analyse:
|
||||
{
|
||||
"element_index": 1,
|
||||
"confidence": 0.85,
|
||||
"reasoning": "Ce bouton correspond le mieux"
|
||||
}
|
||||
"""
|
||||
|
||||
result = llm_manager._parse_llm_response(response, detections)
|
||||
|
||||
assert result["element_index"] == 1
|
||||
assert result["confidence"] == 0.85
|
||||
assert "reasoning" in result
|
||||
|
||||
|
||||
def test_parse_llm_response_invalid_json(llm_manager):
|
||||
"""Test le parsing d'une réponse LLM invalide."""
|
||||
detections = [
|
||||
{"label": "button1", "confidence": 0.7},
|
||||
{"label": "button2", "confidence": 0.9}
|
||||
]
|
||||
|
||||
response = "Ceci n'est pas du JSON valide"
|
||||
|
||||
result = llm_manager._parse_llm_response(response, detections)
|
||||
|
||||
# Devrait fallback vers la vision pure
|
||||
assert result["element_index"] == 1 # button2 a la confiance la plus élevée
|
||||
assert "fallback" in result["reasoning"].lower()
|
||||
|
||||
|
||||
def test_reason_about_detections_empty(llm_manager):
|
||||
"""Test le raisonnement avec des détections vides."""
|
||||
result = llm_manager.reason_about_detections(
|
||||
detections=[],
|
||||
context={"window": "Test"},
|
||||
intent="cliquer sur valider"
|
||||
)
|
||||
|
||||
assert result["selected_element"] is None
|
||||
assert result["confidence"] == 0.0
|
||||
|
||||
|
||||
def test_reason_about_detections_with_mock(llm_manager, mock_ollama_client):
|
||||
"""Test le raisonnement avec un client mocké."""
|
||||
detections = [
|
||||
{
|
||||
"label": "valider",
|
||||
"confidence": 0.9,
|
||||
"bbox": (100, 100, 50, 30),
|
||||
"roi_image": np.random.randint(0, 255, (30, 50, 3), dtype=np.uint8)
|
||||
}
|
||||
]
|
||||
|
||||
# Configurer le mock pour retourner une réponse JSON valide
|
||||
mock_ollama_client.generate.return_value = {
|
||||
'response': '{"element_index": 0, "confidence": 0.95, "reasoning": "Bouton valider"}'
|
||||
}
|
||||
|
||||
result = llm_manager.reason_about_detections(
|
||||
detections=detections,
|
||||
context={"window": "Test"},
|
||||
intent="cliquer sur valider"
|
||||
)
|
||||
|
||||
assert result["element_index"] == 0
|
||||
assert result["confidence"] == 0.95
|
||||
|
||||
|
||||
def test_generate_with_vision(llm_manager, mock_ollama_client):
|
||||
"""Test la génération avec vision."""
|
||||
mock_ollama_client.generate.return_value = {
|
||||
'response': 'Ceci est une réponse de test'
|
||||
}
|
||||
|
||||
test_image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
|
||||
|
||||
response = llm_manager.generate_with_vision(
|
||||
prompt="Décris cette image",
|
||||
images=[test_image]
|
||||
)
|
||||
|
||||
assert response == 'Ceci est une réponse de test'
|
||||
assert mock_ollama_client.generate.called
|
||||
|
||||
|
||||
def test_score_action_relevance(llm_manager, mock_ollama_client):
|
||||
"""Test le scoring de pertinence d'action."""
|
||||
mock_ollama_client.generate.return_value = {
|
||||
'response': '0.85'
|
||||
}
|
||||
|
||||
action = {
|
||||
"action_type": "click",
|
||||
"target_element": "valider_button"
|
||||
}
|
||||
|
||||
score = llm_manager.score_action_relevance(
|
||||
action=action,
|
||||
intent="valider le formulaire"
|
||||
)
|
||||
|
||||
assert 0.0 <= score <= 1.0
|
||||
assert score == 0.85
|
||||
|
||||
|
||||
def test_is_available(llm_manager, mock_ollama_client):
|
||||
"""Test la vérification de disponibilité."""
|
||||
mock_ollama_client.list.return_value = {'models': []}
|
||||
|
||||
available = llm_manager.is_available()
|
||||
|
||||
assert isinstance(available, bool)
|
||||
|
||||
|
||||
def test_get_model_info(llm_manager):
|
||||
"""Test l'obtention des informations du modèle."""
|
||||
info = llm_manager.get_model_info()
|
||||
|
||||
assert "model_name" in info
|
||||
assert "host" in info
|
||||
assert "available" in info
|
||||
assert "fallback_enabled" in info
|
||||
assert info["model_name"] == "qwen2.5-vl:3b"
|
||||
|
||||
|
||||
def test_llm_manager_without_ollama():
|
||||
"""Test l'initialisation sans Ollama disponible."""
|
||||
with patch('geniusia2.core.llm_manager.ollama', None):
|
||||
with pytest.raises(ImportError):
|
||||
LLMManager()
|
||||
|
||||
|
||||
def test_client_initialization_error():
|
||||
"""Test la gestion d'erreur lors de l'initialisation."""
|
||||
with patch('geniusia2.core.llm_manager.ollama') as mock_ollama:
|
||||
mock_ollama.Client.side_effect = Exception("Connection error")
|
||||
|
||||
# Devrait créer le manager avec fallback
|
||||
manager = LLMManager(fallback_to_vision=True)
|
||||
assert manager.client is None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user