Files
rpa_vision_v3/tests/unit/test_gpu_resource_manager.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

605 lines
20 KiB
Python

"""
Property-based tests for GPU Resource Manager
Tests correctness properties defined in the design document.
Uses Hypothesis for property-based testing.
"""
import asyncio
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from hypothesis import given, strategies as st, settings, assume
# Configure pytest-asyncio
pytest_plugins = ('pytest_asyncio',)
try:
from core.gpu.gpu_resource_manager import (
GPUResourceManager,
GPUResourceConfig,
ExecutionMode,
ModelState,
VRAMInfo,
)
except ImportError as e:
pytest.skip(f"GPU Resource Manager not available: {e}", allow_module_level=True)
# =============================================================================
# Fixtures
# =============================================================================
@pytest.fixture
def config():
"""Test configuration with short timeouts."""
return GPUResourceConfig(
ollama_endpoint="http://localhost:11434",
vlm_model="test-model:latest",
idle_timeout_seconds=1,
load_timeout_seconds=5,
unload_timeout_seconds=2,
max_load_retries=2,
)
@pytest.fixture
def mock_ollama_manager():
"""Mock OllamaManager."""
manager = MagicMock()
manager.load_model = AsyncMock(return_value=True)
manager.unload_model = AsyncMock(return_value=True)
manager.is_model_loaded = AsyncMock(return_value=False)
manager.is_available = MagicMock(return_value=True)
return manager
@pytest.fixture
def mock_vram_monitor():
"""Mock VRAMMonitor."""
monitor = MagicMock()
monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
total_mb=12000,
used_mb=500,
free_mb=11500,
gpu_name="Test GPU",
gpu_utilization_percent=0
))
monitor.is_gpu_available = MagicMock(return_value=True)
return monitor
@pytest.fixture
def mock_clip_manager():
"""Mock CLIPManager."""
manager = MagicMock()
manager.migrate_to_device = AsyncMock(return_value=True)
manager.get_current_device = MagicMock(return_value="cpu")
return manager
@pytest.fixture
def gpu_manager(config, mock_ollama_manager, mock_vram_monitor, mock_clip_manager):
"""Create GPUResourceManager with mocked dependencies."""
# Reset singleton
GPUResourceManager.reset_instance()
manager = GPUResourceManager(config)
manager._ollama_manager = mock_ollama_manager
manager._vram_monitor = mock_vram_monitor
manager._clip_manager = mock_clip_manager
yield manager
# Cleanup
GPUResourceManager.reset_instance()
# =============================================================================
# Property 10: ensure_vlm_loaded blocking
# Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking
# Validates: Requirements 5.1
# =============================================================================
@pytest.mark.asyncio
async def test_ensure_vlm_loaded_returns_when_loaded(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
For any call to ensure_vlm_loaded(), the function should only return
when is_vlm_loaded() returns True.
"""
# Arrange
mock_ollama_manager.load_model = AsyncMock(return_value=True)
# Act
result = await gpu_manager.ensure_vlm_loaded()
# Assert
assert result is True
assert gpu_manager.is_vlm_loaded() is True
assert gpu_manager.get_vlm_state() == ModelState.LOADED
@pytest.mark.asyncio
async def test_ensure_vlm_loaded_already_loaded(gpu_manager):
"""
**Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
If VLM is already loaded, ensure_vlm_loaded should return immediately.
"""
# Arrange - set state to loaded
gpu_manager._vlm_state = ModelState.LOADED
# Act
result = await gpu_manager.ensure_vlm_loaded()
# Assert
assert result is True
assert gpu_manager.is_vlm_loaded() is True
# =============================================================================
# Property 11: ensure_vlm_unloaded blocking
# Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking
# Validates: Requirements 5.2
# =============================================================================
@pytest.mark.asyncio
async def test_ensure_vlm_unloaded_returns_when_unloaded(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
For any call to ensure_vlm_unloaded(), the function should only return
when is_vlm_loaded() returns False.
"""
# Arrange - start with loaded state
gpu_manager._vlm_state = ModelState.LOADED
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
# Act
result = await gpu_manager.ensure_vlm_unloaded()
# Assert
assert result is True
assert gpu_manager.is_vlm_loaded() is False
assert gpu_manager.get_vlm_state() == ModelState.UNLOADED
@pytest.mark.asyncio
async def test_ensure_vlm_unloaded_already_unloaded(gpu_manager):
"""
**Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
If VLM is already unloaded, ensure_vlm_unloaded should return immediately.
"""
# Arrange - already unloaded
gpu_manager._vlm_state = ModelState.UNLOADED
# Act
result = await gpu_manager.ensure_vlm_unloaded()
# Assert
assert result is True
assert gpu_manager.is_vlm_loaded() is False
# =============================================================================
# Property 12: get_clip_device validity
# Feature: gpu-resource-manager, Property 12: get_clip_device validity
# Validates: Requirements 5.3
# =============================================================================
@given(st.sampled_from(["cpu", "cuda"]))
@settings(max_examples=100)
def test_get_clip_device_returns_valid_value(device):
"""
**Feature: gpu-resource-manager, Property 12: get_clip_device validity**
For any call to get_clip_device(), the return value should be
either "cpu" or "cuda".
"""
# Reset singleton for each test
GPUResourceManager.reset_instance()
manager = GPUResourceManager(GPUResourceConfig())
manager._clip_device = device
# Act
result = manager.get_clip_device()
# Assert
assert result in ["cpu", "cuda"]
# Cleanup
GPUResourceManager.reset_instance()
def test_get_clip_device_default_is_cpu():
"""
**Feature: gpu-resource-manager, Property 12: get_clip_device validity**
Default CLIP device should be CPU.
"""
GPUResourceManager.reset_instance()
manager = GPUResourceManager(GPUResourceConfig())
assert manager.get_clip_device() == "cpu"
GPUResourceManager.reset_instance()
# =============================================================================
# Property 4: VRAM decrease on VLM unload
# Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload
# Validates: Requirements 1.4
# =============================================================================
@pytest.mark.asyncio
async def test_vram_decreases_on_vlm_unload(gpu_manager, mock_ollama_manager, mock_vram_monitor):
"""
**Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload**
For any VLM unload operation, the VRAM usage should decrease.
"""
# Arrange - simulate loaded state with high VRAM
gpu_manager._vlm_state = ModelState.LOADED
# Simulate VRAM before (high) and after (low) unload
vram_before = VRAMInfo(12000, 10500, 1500, "Test GPU", 50)
vram_after = VRAMInfo(12000, 500, 11500, "Test GPU", 0)
mock_vram_monitor.get_vram_info = MagicMock(side_effect=[vram_before, vram_after])
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
# Track emitted events
events = []
gpu_manager.on_resource_changed(lambda e: events.append(e))
# Act
result = await gpu_manager.ensure_vlm_unloaded()
# Assert
assert result is True
assert gpu_manager.is_vlm_loaded() is False
# Check that unload event was emitted with VRAM info
unload_events = [e for e in events if e.event_type == "model_unloaded"]
assert len(unload_events) >= 1
# =============================================================================
# Property 5: Status query completeness
# Feature: gpu-resource-manager, Property 5: Status query completeness
# Validates: Requirements 2.1
# =============================================================================
def test_get_status_returns_complete_status(gpu_manager, mock_vram_monitor):
"""
**Feature: gpu-resource-manager, Property 5: Status query completeness**
For any call to get_status(), the returned GPUResourceStatus should
contain valid values for all fields.
"""
# Act
status = gpu_manager.get_status()
# Assert - all fields should be present and valid
assert status.execution_mode in ExecutionMode
assert status.vlm_state in ModelState
assert isinstance(status.vlm_model, str)
assert status.clip_device in ["cpu", "cuda"]
assert status.vram is not None or status.degraded_mode
assert isinstance(status.idle_timeout_seconds, int)
assert isinstance(status.degraded_mode, bool)
@given(st.sampled_from(list(ExecutionMode)))
@settings(max_examples=100)
def test_get_status_reflects_execution_mode(mode):
"""
**Feature: gpu-resource-manager, Property 5: Status query completeness**
Status should accurately reflect the current execution mode.
"""
GPUResourceManager.reset_instance()
manager = GPUResourceManager(GPUResourceConfig())
manager._execution_mode = mode
# Mock VRAM monitor
manager._vram_monitor = MagicMock()
manager._vram_monitor.get_vram_info = MagicMock(return_value=None)
status = manager.get_status()
assert status.execution_mode == mode
GPUResourceManager.reset_instance()
# =============================================================================
# Property 7: Embedding pipeline consistency
# Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency
# Validates: Requirements 3.3
# =============================================================================
@pytest.mark.asyncio
async def test_clip_produces_valid_embeddings_after_migration(gpu_manager, mock_clip_manager):
"""
**Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency**
For any CLIP device change, the embedding pipeline should produce
valid embeddings after reinitialization.
"""
# Arrange
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
# Act - migrate to GPU
result = await gpu_manager.migrate_clip_to_gpu()
# Assert
assert result is True
assert gpu_manager.get_clip_device() == "cuda"
# Verify migration was called
mock_clip_manager.migrate_to_device.assert_called_with("cuda")
# =============================================================================
# Property 1: Mode transition triggers VLM unload
# Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload
# Validates: Requirements 1.1
# =============================================================================
@pytest.mark.asyncio
async def test_autopilot_mode_unloads_vlm(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload**
For any GPU Resource Manager in RECORDING mode with VLM loaded,
transitioning to AUTOPILOT mode should result in VLM being unloaded.
"""
# Arrange - start in RECORDING mode with VLM loaded
gpu_manager._execution_mode = ExecutionMode.RECORDING
gpu_manager._vlm_state = ModelState.LOADED
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
# Act
await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
# Assert
assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
assert gpu_manager.is_vlm_loaded() is False
mock_ollama_manager.unload_model.assert_called()
# =============================================================================
# Property 2: Mode transition triggers VLM load
# Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load
# Validates: Requirements 1.2
# =============================================================================
@pytest.mark.asyncio
async def test_recording_mode_loads_vlm(gpu_manager, mock_ollama_manager, mock_clip_manager):
"""
**Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load**
For any GPU Resource Manager in AUTOPILOT mode with VLM unloaded,
transitioning to RECORDING mode should result in VLM being loaded.
"""
# Arrange - start in AUTOPILOT mode with VLM unloaded
gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
gpu_manager._vlm_state = ModelState.UNLOADED
mock_ollama_manager.load_model = AsyncMock(return_value=True)
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
# Act
await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
# Assert
assert gpu_manager.get_execution_mode() == ExecutionMode.RECORDING
assert gpu_manager.is_vlm_loaded() is True
mock_ollama_manager.load_model.assert_called()
# =============================================================================
# Property 3: CLIP on GPU in AUTOPILOT
# Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT
# Validates: Requirements 1.3, 3.1
# =============================================================================
@pytest.mark.asyncio
async def test_clip_migrates_to_gpu_in_autopilot(gpu_manager, mock_ollama_manager, mock_clip_manager, mock_vram_monitor):
"""
**Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT**
For any GPU Resource Manager in AUTOPILOT mode with available VRAM > 1GB,
CLIP should be on GPU device.
"""
# Arrange - start in RECORDING mode
gpu_manager._execution_mode = ExecutionMode.RECORDING
gpu_manager._vlm_state = ModelState.LOADED
gpu_manager._clip_device = "cpu"
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
# Ensure enough VRAM is available
mock_vram_monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
total_mb=12000, used_mb=500, free_mb=11500, gpu_name="Test GPU", gpu_utilization_percent=0
))
# Act
await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
# Assert
assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
assert gpu_manager.get_clip_device() == "cuda"
mock_clip_manager.migrate_to_device.assert_called_with("cuda")
# =============================================================================
# Property 6: CLIP migration ordering
# Feature: gpu-resource-manager, Property 6: CLIP migration ordering
# Validates: Requirements 3.2
# =============================================================================
@pytest.mark.asyncio
async def test_clip_migrates_to_cpu_before_vlm_loads(gpu_manager, mock_ollama_manager, mock_clip_manager):
"""
**Feature: gpu-resource-manager, Property 6: CLIP migration ordering**
For any VLM load request when CLIP is on GPU, CLIP should be migrated
to CPU before VLM loading completes.
"""
# Arrange - CLIP on GPU, VLM unloaded
gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
gpu_manager._vlm_state = ModelState.UNLOADED
gpu_manager._clip_device = "cuda"
call_order = []
async def track_clip_migrate(device):
call_order.append(f"clip_to_{device}")
return True
async def track_vlm_load():
call_order.append("vlm_load")
return True
mock_clip_manager.migrate_to_device = track_clip_migrate
mock_ollama_manager.load_model = track_vlm_load
# Act
await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
# Assert - CLIP should migrate to CPU before VLM loads
assert "clip_to_cpu" in call_order
assert "vlm_load" in call_order
clip_idx = call_order.index("clip_to_cpu")
vlm_idx = call_order.index("vlm_load")
assert clip_idx < vlm_idx, "CLIP should migrate to CPU before VLM loads"
# =============================================================================
# Property 8: Idle timeout behavior
# Feature: gpu-resource-manager, Property 8: Idle timeout behavior
# Validates: Requirements 4.1, 4.3
# =============================================================================
def test_idle_timeout_uses_configured_value():
"""
**Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
For any configured idle_timeout value, VLM should be unloaded after
that duration of inactivity (not the default).
"""
GPUResourceManager.reset_instance()
# Configure custom timeout
config = GPUResourceConfig(idle_timeout_seconds=120)
manager = GPUResourceManager(config)
# Assert config is used
assert manager._config.idle_timeout_seconds == 120
status = manager.get_status()
assert status.idle_timeout_seconds == 120
GPUResourceManager.reset_instance()
@pytest.mark.asyncio
async def test_vlm_request_updates_last_request_time(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
VLM requests should update the last request timestamp.
"""
# Arrange
mock_ollama_manager.load_model = AsyncMock(return_value=True)
initial_time = gpu_manager._last_vlm_request
# Act
await gpu_manager.ensure_vlm_loaded()
# Assert
assert gpu_manager._last_vlm_request is not None
if initial_time is not None:
assert gpu_manager._last_vlm_request >= initial_time
# =============================================================================
# Property 9: On-demand VLM loading
# Feature: gpu-resource-manager, Property 9: On-demand VLM loading
# Validates: Requirements 4.2
# =============================================================================
@pytest.mark.asyncio
async def test_vlm_loads_on_demand_when_unloaded(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 9: On-demand VLM loading**
For any VLM request when VLM is unloaded, the request should complete
successfully after VLM is loaded.
"""
# Arrange - VLM is unloaded
gpu_manager._vlm_state = ModelState.UNLOADED
mock_ollama_manager.load_model = AsyncMock(return_value=True)
# Act - request VLM
result = await gpu_manager.ensure_vlm_loaded()
# Assert - VLM should be loaded
assert result is True
assert gpu_manager.is_vlm_loaded() is True
mock_ollama_manager.load_model.assert_called()
# =============================================================================
# Property 13: Sequential operation processing
# Feature: gpu-resource-manager, Property 13: Sequential operation processing
# Validates: Requirements 5.4
# =============================================================================
@pytest.mark.asyncio
async def test_concurrent_operations_processed_sequentially(gpu_manager, mock_ollama_manager):
"""
**Feature: gpu-resource-manager, Property 13: Sequential operation processing**
For any concurrent model operations, they should be processed
sequentially without race conditions.
"""
# Arrange
operation_order = []
async def slow_load():
operation_order.append("load_start")
await asyncio.sleep(0.1)
operation_order.append("load_end")
return True
async def slow_unload():
operation_order.append("unload_start")
await asyncio.sleep(0.1)
operation_order.append("unload_end")
return True
mock_ollama_manager.load_model = slow_load
mock_ollama_manager.unload_model = slow_unload
# Act - start concurrent operations
gpu_manager._vlm_state = ModelState.UNLOADED
# Start load
load_task = asyncio.create_task(gpu_manager.ensure_vlm_loaded())
await asyncio.sleep(0.01) # Let it start
# Wait for completion
await load_task
# Assert - operations should complete without interleaving
assert "load_start" in operation_order
assert "load_end" in operation_order