v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/tests/unit/test_gpu_resource_manager.py
+++ b/tests/unit/test_gpu_resource_manager.py
@@ -0,0 +1,604 @@
+"""
+Property-based tests for GPU Resource Manager
+
+Tests correctness properties defined in the design document.
+Uses Hypothesis for property-based testing.
+"""
+
+import asyncio
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from hypothesis import given, strategies as st, settings, assume
+
+# Configure pytest-asyncio
+pytest_plugins = ('pytest_asyncio',)
+
+try:
+    from core.gpu.gpu_resource_manager import (
+        GPUResourceManager,
+        GPUResourceConfig,
+        ExecutionMode,
+        ModelState,
+        VRAMInfo,
+    )
+except ImportError as e:
+    pytest.skip(f"GPU Resource Manager not available: {e}", allow_module_level=True)
+
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+@pytest.fixture
+def config():
+    """Test configuration with short timeouts."""
+    return GPUResourceConfig(
+        ollama_endpoint="http://localhost:11434",
+        vlm_model="test-model:latest",
+        idle_timeout_seconds=1,
+        load_timeout_seconds=5,
+        unload_timeout_seconds=2,
+        max_load_retries=2,
+    )
+
+
+@pytest.fixture
+def mock_ollama_manager():
+    """Mock OllamaManager."""
+    manager = MagicMock()
+    manager.load_model = AsyncMock(return_value=True)
+    manager.unload_model = AsyncMock(return_value=True)
+    manager.is_model_loaded = AsyncMock(return_value=False)
+    manager.is_available = MagicMock(return_value=True)
+    return manager
+
+
+@pytest.fixture
+def mock_vram_monitor():
+    """Mock VRAMMonitor."""
+    monitor = MagicMock()
+    monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
+        total_mb=12000,
+        used_mb=500,
+        free_mb=11500,
+        gpu_name="Test GPU",
+        gpu_utilization_percent=0
+    ))
+    monitor.is_gpu_available = MagicMock(return_value=True)
+    return monitor
+
+
+@pytest.fixture
+def mock_clip_manager():
+    """Mock CLIPManager."""
+    manager = MagicMock()
+    manager.migrate_to_device = AsyncMock(return_value=True)
+    manager.get_current_device = MagicMock(return_value="cpu")
+    return manager
+
+
+@pytest.fixture
+def gpu_manager(config, mock_ollama_manager, mock_vram_monitor, mock_clip_manager):
+    """Create GPUResourceManager with mocked dependencies."""
+    # Reset singleton
+    GPUResourceManager.reset_instance()
+    
+    manager = GPUResourceManager(config)
+    manager._ollama_manager = mock_ollama_manager
+    manager._vram_monitor = mock_vram_monitor
+    manager._clip_manager = mock_clip_manager
+    
+    yield manager
+    
+    # Cleanup
+    GPUResourceManager.reset_instance()
+
+
+# =============================================================================
+# Property 10: ensure_vlm_loaded blocking
+# Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking
+# Validates: Requirements 5.1
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_ensure_vlm_loaded_returns_when_loaded(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
+    
+    For any call to ensure_vlm_loaded(), the function should only return 
+    when is_vlm_loaded() returns True.
+    """
+    # Arrange
+    mock_ollama_manager.load_model = AsyncMock(return_value=True)
+    
+    # Act
+    result = await gpu_manager.ensure_vlm_loaded()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is True
+    assert gpu_manager.get_vlm_state() == ModelState.LOADED
+
+
+@pytest.mark.asyncio
+async def test_ensure_vlm_loaded_already_loaded(gpu_manager):
+    """
+    **Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
+    
+    If VLM is already loaded, ensure_vlm_loaded should return immediately.
+    """
+    # Arrange - set state to loaded
+    gpu_manager._vlm_state = ModelState.LOADED
+    
+    # Act
+    result = await gpu_manager.ensure_vlm_loaded()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is True
+
+
+# =============================================================================
+# Property 11: ensure_vlm_unloaded blocking
+# Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking
+# Validates: Requirements 5.2
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_ensure_vlm_unloaded_returns_when_unloaded(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
+    
+    For any call to ensure_vlm_unloaded(), the function should only return 
+    when is_vlm_loaded() returns False.
+    """
+    # Arrange - start with loaded state
+    gpu_manager._vlm_state = ModelState.LOADED
+    mock_ollama_manager.unload_model = AsyncMock(return_value=True)
+    
+    # Act
+    result = await gpu_manager.ensure_vlm_unloaded()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is False
+    assert gpu_manager.get_vlm_state() == ModelState.UNLOADED
+
+
+@pytest.mark.asyncio
+async def test_ensure_vlm_unloaded_already_unloaded(gpu_manager):
+    """
+    **Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
+    
+    If VLM is already unloaded, ensure_vlm_unloaded should return immediately.
+    """
+    # Arrange - already unloaded
+    gpu_manager._vlm_state = ModelState.UNLOADED
+    
+    # Act
+    result = await gpu_manager.ensure_vlm_unloaded()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is False
+
+
+# =============================================================================
+# Property 12: get_clip_device validity
+# Feature: gpu-resource-manager, Property 12: get_clip_device validity
+# Validates: Requirements 5.3
+# =============================================================================
+
+@given(st.sampled_from(["cpu", "cuda"]))
+@settings(max_examples=100)
+def test_get_clip_device_returns_valid_value(device):
+    """
+    **Feature: gpu-resource-manager, Property 12: get_clip_device validity**
+    
+    For any call to get_clip_device(), the return value should be 
+    either "cpu" or "cuda".
+    """
+    # Reset singleton for each test
+    GPUResourceManager.reset_instance()
+    
+    manager = GPUResourceManager(GPUResourceConfig())
+    manager._clip_device = device
+    
+    # Act
+    result = manager.get_clip_device()
+    
+    # Assert
+    assert result in ["cpu", "cuda"]
+    
+    # Cleanup
+    GPUResourceManager.reset_instance()
+
+
+def test_get_clip_device_default_is_cpu():
+    """
+    **Feature: gpu-resource-manager, Property 12: get_clip_device validity**
+    
+    Default CLIP device should be CPU.
+    """
+    GPUResourceManager.reset_instance()
+    manager = GPUResourceManager(GPUResourceConfig())
+    
+    assert manager.get_clip_device() == "cpu"
+    
+    GPUResourceManager.reset_instance()
+
+
+
+# =============================================================================
+# Property 4: VRAM decrease on VLM unload
+# Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload
+# Validates: Requirements 1.4
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_vram_decreases_on_vlm_unload(gpu_manager, mock_ollama_manager, mock_vram_monitor):
+    """
+    **Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload**
+    
+    For any VLM unload operation, the VRAM usage should decrease.
+    """
+    # Arrange - simulate loaded state with high VRAM
+    gpu_manager._vlm_state = ModelState.LOADED
+    
+    # Simulate VRAM before (high) and after (low) unload
+    vram_before = VRAMInfo(12000, 10500, 1500, "Test GPU", 50)
+    vram_after = VRAMInfo(12000, 500, 11500, "Test GPU", 0)
+    
+    mock_vram_monitor.get_vram_info = MagicMock(side_effect=[vram_before, vram_after])
+    mock_ollama_manager.unload_model = AsyncMock(return_value=True)
+    
+    # Track emitted events
+    events = []
+    gpu_manager.on_resource_changed(lambda e: events.append(e))
+    
+    # Act
+    result = await gpu_manager.ensure_vlm_unloaded()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is False
+    
+    # Check that unload event was emitted with VRAM info
+    unload_events = [e for e in events if e.event_type == "model_unloaded"]
+    assert len(unload_events) >= 1
+
+
+# =============================================================================
+# Property 5: Status query completeness
+# Feature: gpu-resource-manager, Property 5: Status query completeness
+# Validates: Requirements 2.1
+# =============================================================================
+
+def test_get_status_returns_complete_status(gpu_manager, mock_vram_monitor):
+    """
+    **Feature: gpu-resource-manager, Property 5: Status query completeness**
+    
+    For any call to get_status(), the returned GPUResourceStatus should 
+    contain valid values for all fields.
+    """
+    # Act
+    status = gpu_manager.get_status()
+    
+    # Assert - all fields should be present and valid
+    assert status.execution_mode in ExecutionMode
+    assert status.vlm_state in ModelState
+    assert isinstance(status.vlm_model, str)
+    assert status.clip_device in ["cpu", "cuda"]
+    assert status.vram is not None or status.degraded_mode
+    assert isinstance(status.idle_timeout_seconds, int)
+    assert isinstance(status.degraded_mode, bool)
+
+
+@given(st.sampled_from(list(ExecutionMode)))
+@settings(max_examples=100)
+def test_get_status_reflects_execution_mode(mode):
+    """
+    **Feature: gpu-resource-manager, Property 5: Status query completeness**
+    
+    Status should accurately reflect the current execution mode.
+    """
+    GPUResourceManager.reset_instance()
+    manager = GPUResourceManager(GPUResourceConfig())
+    manager._execution_mode = mode
+    
+    # Mock VRAM monitor
+    manager._vram_monitor = MagicMock()
+    manager._vram_monitor.get_vram_info = MagicMock(return_value=None)
+    
+    status = manager.get_status()
+    assert status.execution_mode == mode
+    
+    GPUResourceManager.reset_instance()
+
+
+# =============================================================================
+# Property 7: Embedding pipeline consistency
+# Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency
+# Validates: Requirements 3.3
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_clip_produces_valid_embeddings_after_migration(gpu_manager, mock_clip_manager):
+    """
+    **Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency**
+    
+    For any CLIP device change, the embedding pipeline should produce 
+    valid embeddings after reinitialization.
+    """
+    # Arrange
+    mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
+    
+    # Act - migrate to GPU
+    result = await gpu_manager.migrate_clip_to_gpu()
+    
+    # Assert
+    assert result is True
+    assert gpu_manager.get_clip_device() == "cuda"
+    
+    # Verify migration was called
+    mock_clip_manager.migrate_to_device.assert_called_with("cuda")
+
+
+
+# =============================================================================
+# Property 1: Mode transition triggers VLM unload
+# Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload
+# Validates: Requirements 1.1
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_autopilot_mode_unloads_vlm(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload**
+    
+    For any GPU Resource Manager in RECORDING mode with VLM loaded, 
+    transitioning to AUTOPILOT mode should result in VLM being unloaded.
+    """
+    # Arrange - start in RECORDING mode with VLM loaded
+    gpu_manager._execution_mode = ExecutionMode.RECORDING
+    gpu_manager._vlm_state = ModelState.LOADED
+    mock_ollama_manager.unload_model = AsyncMock(return_value=True)
+    
+    # Act
+    await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
+    
+    # Assert
+    assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
+    assert gpu_manager.is_vlm_loaded() is False
+    mock_ollama_manager.unload_model.assert_called()
+
+
+# =============================================================================
+# Property 2: Mode transition triggers VLM load
+# Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load
+# Validates: Requirements 1.2
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_recording_mode_loads_vlm(gpu_manager, mock_ollama_manager, mock_clip_manager):
+    """
+    **Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load**
+    
+    For any GPU Resource Manager in AUTOPILOT mode with VLM unloaded, 
+    transitioning to RECORDING mode should result in VLM being loaded.
+    """
+    # Arrange - start in AUTOPILOT mode with VLM unloaded
+    gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
+    gpu_manager._vlm_state = ModelState.UNLOADED
+    mock_ollama_manager.load_model = AsyncMock(return_value=True)
+    mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
+    
+    # Act
+    await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
+    
+    # Assert
+    assert gpu_manager.get_execution_mode() == ExecutionMode.RECORDING
+    assert gpu_manager.is_vlm_loaded() is True
+    mock_ollama_manager.load_model.assert_called()
+
+
+# =============================================================================
+# Property 3: CLIP on GPU in AUTOPILOT
+# Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT
+# Validates: Requirements 1.3, 3.1
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_clip_migrates_to_gpu_in_autopilot(gpu_manager, mock_ollama_manager, mock_clip_manager, mock_vram_monitor):
+    """
+    **Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT**
+    
+    For any GPU Resource Manager in AUTOPILOT mode with available VRAM > 1GB, 
+    CLIP should be on GPU device.
+    """
+    # Arrange - start in RECORDING mode
+    gpu_manager._execution_mode = ExecutionMode.RECORDING
+    gpu_manager._vlm_state = ModelState.LOADED
+    gpu_manager._clip_device = "cpu"
+    
+    mock_ollama_manager.unload_model = AsyncMock(return_value=True)
+    mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
+    
+    # Ensure enough VRAM is available
+    mock_vram_monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
+        total_mb=12000, used_mb=500, free_mb=11500, gpu_name="Test GPU", gpu_utilization_percent=0
+    ))
+    
+    # Act
+    await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
+    
+    # Assert
+    assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
+    assert gpu_manager.get_clip_device() == "cuda"
+    mock_clip_manager.migrate_to_device.assert_called_with("cuda")
+
+
+# =============================================================================
+# Property 6: CLIP migration ordering
+# Feature: gpu-resource-manager, Property 6: CLIP migration ordering
+# Validates: Requirements 3.2
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_clip_migrates_to_cpu_before_vlm_loads(gpu_manager, mock_ollama_manager, mock_clip_manager):
+    """
+    **Feature: gpu-resource-manager, Property 6: CLIP migration ordering**
+    
+    For any VLM load request when CLIP is on GPU, CLIP should be migrated 
+    to CPU before VLM loading completes.
+    """
+    # Arrange - CLIP on GPU, VLM unloaded
+    gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
+    gpu_manager._vlm_state = ModelState.UNLOADED
+    gpu_manager._clip_device = "cuda"
+    
+    call_order = []
+    
+    async def track_clip_migrate(device):
+        call_order.append(f"clip_to_{device}")
+        return True
+    
+    async def track_vlm_load():
+        call_order.append("vlm_load")
+        return True
+    
+    mock_clip_manager.migrate_to_device = track_clip_migrate
+    mock_ollama_manager.load_model = track_vlm_load
+    
+    # Act
+    await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
+    
+    # Assert - CLIP should migrate to CPU before VLM loads
+    assert "clip_to_cpu" in call_order
+    assert "vlm_load" in call_order
+    clip_idx = call_order.index("clip_to_cpu")
+    vlm_idx = call_order.index("vlm_load")
+    assert clip_idx < vlm_idx, "CLIP should migrate to CPU before VLM loads"
+
+
+
+# =============================================================================
+# Property 8: Idle timeout behavior
+# Feature: gpu-resource-manager, Property 8: Idle timeout behavior
+# Validates: Requirements 4.1, 4.3
+# =============================================================================
+
+def test_idle_timeout_uses_configured_value():
+    """
+    **Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
+    
+    For any configured idle_timeout value, VLM should be unloaded after 
+    that duration of inactivity (not the default).
+    """
+    GPUResourceManager.reset_instance()
+    
+    # Configure custom timeout
+    config = GPUResourceConfig(idle_timeout_seconds=120)
+    manager = GPUResourceManager(config)
+    
+    # Assert config is used
+    assert manager._config.idle_timeout_seconds == 120
+    
+    status = manager.get_status()
+    assert status.idle_timeout_seconds == 120
+    
+    GPUResourceManager.reset_instance()
+
+
+@pytest.mark.asyncio
+async def test_vlm_request_updates_last_request_time(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
+    
+    VLM requests should update the last request timestamp.
+    """
+    # Arrange
+    mock_ollama_manager.load_model = AsyncMock(return_value=True)
+    initial_time = gpu_manager._last_vlm_request
+    
+    # Act
+    await gpu_manager.ensure_vlm_loaded()
+    
+    # Assert
+    assert gpu_manager._last_vlm_request is not None
+    if initial_time is not None:
+        assert gpu_manager._last_vlm_request >= initial_time
+
+
+# =============================================================================
+# Property 9: On-demand VLM loading
+# Feature: gpu-resource-manager, Property 9: On-demand VLM loading
+# Validates: Requirements 4.2
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_vlm_loads_on_demand_when_unloaded(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 9: On-demand VLM loading**
+    
+    For any VLM request when VLM is unloaded, the request should complete 
+    successfully after VLM is loaded.
+    """
+    # Arrange - VLM is unloaded
+    gpu_manager._vlm_state = ModelState.UNLOADED
+    mock_ollama_manager.load_model = AsyncMock(return_value=True)
+    
+    # Act - request VLM
+    result = await gpu_manager.ensure_vlm_loaded()
+    
+    # Assert - VLM should be loaded
+    assert result is True
+    assert gpu_manager.is_vlm_loaded() is True
+    mock_ollama_manager.load_model.assert_called()
+
+
+# =============================================================================
+# Property 13: Sequential operation processing
+# Feature: gpu-resource-manager, Property 13: Sequential operation processing
+# Validates: Requirements 5.4
+# =============================================================================
+
+@pytest.mark.asyncio
+async def test_concurrent_operations_processed_sequentially(gpu_manager, mock_ollama_manager):
+    """
+    **Feature: gpu-resource-manager, Property 13: Sequential operation processing**
+    
+    For any concurrent model operations, they should be processed 
+    sequentially without race conditions.
+    """
+    # Arrange
+    operation_order = []
+    
+    async def slow_load():
+        operation_order.append("load_start")
+        await asyncio.sleep(0.1)
+        operation_order.append("load_end")
+        return True
+    
+    async def slow_unload():
+        operation_order.append("unload_start")
+        await asyncio.sleep(0.1)
+        operation_order.append("unload_end")
+        return True
+    
+    mock_ollama_manager.load_model = slow_load
+    mock_ollama_manager.unload_model = slow_unload
+    
+    # Act - start concurrent operations
+    gpu_manager._vlm_state = ModelState.UNLOADED
+    
+    # Start load
+    load_task = asyncio.create_task(gpu_manager.ensure_vlm_loaded())
+    await asyncio.sleep(0.01)  # Let it start
+    
+    # Wait for completion
+    await load_task
+    
+    # Assert - operations should complete without interleaving
+    assert "load_start" in operation_order
+    assert "load_end" in operation_order