Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 10s
security-audit / Scan secrets (grep) (push) Successful in 8s
tests / Lint (ruff + black) (push) Successful in 13s
tests / Tests unitaires (sans GPU) (push) Failing after 14s
tests / Tests sécurité (critique) (push) Has been skipped
Point de sauvegarde incluant les fichiers non committés des sessions précédentes (systemd, docs, agents, GPU manager). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
685 lines
23 KiB
Python
685 lines
23 KiB
Python
"""
|
|
Property-based tests for GPU Resource Manager
|
|
|
|
Tests correctness properties defined in the design document.
|
|
Uses Hypothesis for property-based testing.
|
|
"""
|
|
|
|
import asyncio
|
|
import pytest
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
from hypothesis import given, strategies as st, settings, assume
|
|
|
|
# Configure pytest-asyncio
|
|
pytest_plugins = ('pytest_asyncio',)
|
|
|
|
try:
|
|
from core.gpu.gpu_resource_manager import (
|
|
GPUResourceManager,
|
|
GPUResourceConfig,
|
|
ExecutionMode,
|
|
ModelState,
|
|
VRAMInfo,
|
|
)
|
|
except ImportError as e:
|
|
pytest.skip(f"GPU Resource Manager not available: {e}", allow_module_level=True)
|
|
|
|
|
|
# =============================================================================
|
|
# Fixtures
|
|
# =============================================================================
|
|
|
|
@pytest.fixture
|
|
def config():
|
|
"""Test configuration with short timeouts."""
|
|
return GPUResourceConfig(
|
|
ollama_endpoint="http://localhost:11434",
|
|
vlm_model="test-model:latest",
|
|
idle_timeout_seconds=1,
|
|
load_timeout_seconds=5,
|
|
unload_timeout_seconds=2,
|
|
max_load_retries=2,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ollama_manager():
|
|
"""Mock OllamaManager."""
|
|
manager = MagicMock()
|
|
manager.load_model = AsyncMock(return_value=True)
|
|
manager.unload_model = AsyncMock(return_value=True)
|
|
manager.is_model_loaded = AsyncMock(return_value=False)
|
|
manager.is_available = MagicMock(return_value=True)
|
|
return manager
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_vram_monitor():
|
|
"""Mock VRAMMonitor."""
|
|
monitor = MagicMock()
|
|
monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
|
|
total_mb=12000,
|
|
used_mb=500,
|
|
free_mb=11500,
|
|
gpu_name="Test GPU",
|
|
gpu_utilization_percent=0
|
|
))
|
|
monitor.is_gpu_available = MagicMock(return_value=True)
|
|
return monitor
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_clip_manager():
|
|
"""Mock CLIPManager."""
|
|
manager = MagicMock()
|
|
manager.migrate_to_device = AsyncMock(return_value=True)
|
|
manager.get_current_device = MagicMock(return_value="cpu")
|
|
return manager
|
|
|
|
|
|
@pytest.fixture
|
|
def gpu_manager(config, mock_ollama_manager, mock_vram_monitor, mock_clip_manager):
|
|
"""Create GPUResourceManager with mocked dependencies."""
|
|
# Reset singleton
|
|
GPUResourceManager.reset_instance()
|
|
|
|
manager = GPUResourceManager(config)
|
|
manager._ollama_manager = mock_ollama_manager
|
|
manager._vram_monitor = mock_vram_monitor
|
|
manager._clip_manager = mock_clip_manager
|
|
|
|
yield manager
|
|
|
|
# Cleanup
|
|
GPUResourceManager.reset_instance()
|
|
|
|
|
|
# =============================================================================
|
|
# Property 10: ensure_vlm_loaded blocking
|
|
# Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking
|
|
# Validates: Requirements 5.1
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_vlm_loaded_returns_when_loaded(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
|
|
|
|
For any call to ensure_vlm_loaded(), the function should only return
|
|
when is_vlm_loaded() returns True.
|
|
"""
|
|
# Arrange
|
|
mock_ollama_manager.load_model = AsyncMock(return_value=True)
|
|
|
|
# Act
|
|
result = await gpu_manager.ensure_vlm_loaded()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is True
|
|
assert gpu_manager.get_vlm_state() == ModelState.LOADED
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_vlm_loaded_already_loaded(gpu_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking**
|
|
|
|
If VLM is already loaded, ensure_vlm_loaded should return immediately.
|
|
"""
|
|
# Arrange - set state to loaded
|
|
gpu_manager._vlm_state = ModelState.LOADED
|
|
|
|
# Act
|
|
result = await gpu_manager.ensure_vlm_loaded()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is True
|
|
|
|
|
|
# =============================================================================
|
|
# Property 11: ensure_vlm_unloaded blocking
|
|
# Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking
|
|
# Validates: Requirements 5.2
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_vlm_unloaded_returns_when_unloaded(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
|
|
|
|
For any call to ensure_vlm_unloaded(), the function should only return
|
|
when is_vlm_loaded() returns False.
|
|
"""
|
|
# Arrange - start with loaded state
|
|
gpu_manager._vlm_state = ModelState.LOADED
|
|
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
|
|
|
|
# Act
|
|
result = await gpu_manager.ensure_vlm_unloaded()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is False
|
|
assert gpu_manager.get_vlm_state() == ModelState.UNLOADED
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ensure_vlm_unloaded_already_unloaded(gpu_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking**
|
|
|
|
If VLM is already unloaded, ensure_vlm_unloaded should return immediately.
|
|
"""
|
|
# Arrange - already unloaded
|
|
gpu_manager._vlm_state = ModelState.UNLOADED
|
|
|
|
# Act
|
|
result = await gpu_manager.ensure_vlm_unloaded()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is False
|
|
|
|
|
|
# =============================================================================
|
|
# Property 12: get_clip_device validity
|
|
# Feature: gpu-resource-manager, Property 12: get_clip_device validity
|
|
# Validates: Requirements 5.3
|
|
# =============================================================================
|
|
|
|
@given(st.sampled_from(["cpu", "cuda"]))
|
|
@settings(max_examples=100)
|
|
def test_get_clip_device_returns_valid_value(device):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 12: get_clip_device validity**
|
|
|
|
For any call to get_clip_device(), the return value should be
|
|
either "cpu" or "cuda".
|
|
"""
|
|
# Reset singleton for each test
|
|
GPUResourceManager.reset_instance()
|
|
|
|
manager = GPUResourceManager(GPUResourceConfig())
|
|
manager._clip_device = device
|
|
|
|
# Act
|
|
result = manager.get_clip_device()
|
|
|
|
# Assert
|
|
assert result in ["cpu", "cuda"]
|
|
|
|
# Cleanup
|
|
GPUResourceManager.reset_instance()
|
|
|
|
|
|
def test_get_clip_device_default_is_cpu():
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 12: get_clip_device validity**
|
|
|
|
Default CLIP device should be CPU.
|
|
"""
|
|
GPUResourceManager.reset_instance()
|
|
manager = GPUResourceManager(GPUResourceConfig())
|
|
|
|
assert manager.get_clip_device() == "cpu"
|
|
|
|
GPUResourceManager.reset_instance()
|
|
|
|
|
|
|
|
# =============================================================================
|
|
# Property 4: VRAM decrease on VLM unload
|
|
# Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload
|
|
# Validates: Requirements 1.4
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_vram_decreases_on_vlm_unload(gpu_manager, mock_ollama_manager, mock_vram_monitor):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload**
|
|
|
|
For any VLM unload operation, the VRAM usage should decrease.
|
|
"""
|
|
# Arrange - simulate loaded state with high VRAM
|
|
gpu_manager._vlm_state = ModelState.LOADED
|
|
|
|
# Simulate VRAM before (high) and after (low) unload
|
|
vram_before = VRAMInfo(12000, 10500, 1500, "Test GPU", 50)
|
|
vram_after = VRAMInfo(12000, 500, 11500, "Test GPU", 0)
|
|
|
|
mock_vram_monitor.get_vram_info = MagicMock(side_effect=[vram_before, vram_after])
|
|
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
|
|
|
|
# Track emitted events
|
|
events = []
|
|
gpu_manager.on_resource_changed(lambda e: events.append(e))
|
|
|
|
# Act
|
|
result = await gpu_manager.ensure_vlm_unloaded()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is False
|
|
|
|
# Check that unload event was emitted with VRAM info
|
|
unload_events = [e for e in events if e.event_type == "model_unloaded"]
|
|
assert len(unload_events) >= 1
|
|
|
|
|
|
# =============================================================================
|
|
# Property 5: Status query completeness
|
|
# Feature: gpu-resource-manager, Property 5: Status query completeness
|
|
# Validates: Requirements 2.1
|
|
# =============================================================================
|
|
|
|
def test_get_status_returns_complete_status(gpu_manager, mock_vram_monitor):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 5: Status query completeness**
|
|
|
|
For any call to get_status(), the returned GPUResourceStatus should
|
|
contain valid values for all fields.
|
|
"""
|
|
# Act
|
|
status = gpu_manager.get_status()
|
|
|
|
# Assert - all fields should be present and valid
|
|
assert status.execution_mode in ExecutionMode
|
|
assert status.vlm_state in ModelState
|
|
assert isinstance(status.vlm_model, str)
|
|
assert status.clip_device in ["cpu", "cuda"]
|
|
assert status.vram is not None or status.degraded_mode
|
|
assert isinstance(status.idle_timeout_seconds, int)
|
|
assert isinstance(status.degraded_mode, bool)
|
|
|
|
|
|
@given(st.sampled_from(list(ExecutionMode)))
|
|
@settings(max_examples=100)
|
|
def test_get_status_reflects_execution_mode(mode):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 5: Status query completeness**
|
|
|
|
Status should accurately reflect the current execution mode.
|
|
"""
|
|
GPUResourceManager.reset_instance()
|
|
manager = GPUResourceManager(GPUResourceConfig())
|
|
manager._execution_mode = mode
|
|
|
|
# Mock VRAM monitor
|
|
manager._vram_monitor = MagicMock()
|
|
manager._vram_monitor.get_vram_info = MagicMock(return_value=None)
|
|
|
|
status = manager.get_status()
|
|
assert status.execution_mode == mode
|
|
|
|
GPUResourceManager.reset_instance()
|
|
|
|
|
|
# =============================================================================
|
|
# Property 7: Embedding pipeline consistency
|
|
# Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency
|
|
# Validates: Requirements 3.3
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_clip_produces_valid_embeddings_after_migration(gpu_manager, mock_clip_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency**
|
|
|
|
For any CLIP device change, the embedding pipeline should produce
|
|
valid embeddings after reinitialization.
|
|
"""
|
|
# Arrange
|
|
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
|
|
|
|
# Act - migrate to GPU
|
|
result = await gpu_manager.migrate_clip_to_gpu()
|
|
|
|
# Assert
|
|
assert result is True
|
|
assert gpu_manager.get_clip_device() == "cuda"
|
|
|
|
# Verify migration was called
|
|
mock_clip_manager.migrate_to_device.assert_called_with("cuda")
|
|
|
|
|
|
|
|
# =============================================================================
|
|
# Property 1: Mode transition triggers VLM unload
|
|
# Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload
|
|
# Validates: Requirements 1.1
|
|
# =============================================================================
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.asyncio
|
|
async def test_autopilot_mode_unloads_vlm(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload**
|
|
|
|
For any GPU Resource Manager in RECORDING mode with VLM loaded,
|
|
transitioning to AUTOPILOT mode should result in VLM being unloaded.
|
|
"""
|
|
# Arrange - start in RECORDING mode with VLM loaded
|
|
gpu_manager._execution_mode = ExecutionMode.RECORDING
|
|
gpu_manager._vlm_state = ModelState.LOADED
|
|
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
|
|
|
|
# Act
|
|
await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
|
|
|
|
# Assert
|
|
assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
|
|
assert gpu_manager.is_vlm_loaded() is False
|
|
mock_ollama_manager.unload_model.assert_called()
|
|
|
|
|
|
# =============================================================================
|
|
# Property 2: Mode transition triggers VLM load
|
|
# Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load
|
|
# Validates: Requirements 1.2
|
|
# =============================================================================
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.asyncio
|
|
async def test_recording_mode_loads_vlm(gpu_manager, mock_ollama_manager, mock_clip_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load**
|
|
|
|
For any GPU Resource Manager in AUTOPILOT mode with VLM unloaded,
|
|
transitioning to RECORDING mode should result in VLM being loaded.
|
|
"""
|
|
# Arrange - start in AUTOPILOT mode with VLM unloaded
|
|
gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
|
|
gpu_manager._vlm_state = ModelState.UNLOADED
|
|
mock_ollama_manager.load_model = AsyncMock(return_value=True)
|
|
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
|
|
|
|
# Act
|
|
await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
|
|
|
|
# Assert
|
|
assert gpu_manager.get_execution_mode() == ExecutionMode.RECORDING
|
|
assert gpu_manager.is_vlm_loaded() is True
|
|
mock_ollama_manager.load_model.assert_called()
|
|
|
|
|
|
# =============================================================================
|
|
# Property 3: CLIP on GPU in AUTOPILOT
|
|
# Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT
|
|
# Validates: Requirements 1.3, 3.1
|
|
# =============================================================================
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.asyncio
|
|
async def test_clip_migrates_to_gpu_in_autopilot(gpu_manager, mock_ollama_manager, mock_clip_manager, mock_vram_monitor):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT**
|
|
|
|
For any GPU Resource Manager in AUTOPILOT mode with available VRAM > 1GB,
|
|
CLIP should be on GPU device.
|
|
"""
|
|
# Arrange - start in RECORDING mode
|
|
gpu_manager._execution_mode = ExecutionMode.RECORDING
|
|
gpu_manager._vlm_state = ModelState.LOADED
|
|
gpu_manager._clip_device = "cpu"
|
|
|
|
mock_ollama_manager.unload_model = AsyncMock(return_value=True)
|
|
mock_clip_manager.migrate_to_device = AsyncMock(return_value=True)
|
|
|
|
# Ensure enough VRAM is available
|
|
mock_vram_monitor.get_vram_info = MagicMock(return_value=VRAMInfo(
|
|
total_mb=12000, used_mb=500, free_mb=11500, gpu_name="Test GPU", gpu_utilization_percent=0
|
|
))
|
|
|
|
# Act
|
|
await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
|
|
|
|
# Assert
|
|
assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT
|
|
assert gpu_manager.get_clip_device() == "cuda"
|
|
mock_clip_manager.migrate_to_device.assert_called_with("cuda")
|
|
|
|
|
|
# =============================================================================
|
|
# Property 6: CLIP migration ordering
|
|
# Feature: gpu-resource-manager, Property 6: CLIP migration ordering
|
|
# Validates: Requirements 3.2
|
|
# =============================================================================
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.asyncio
|
|
async def test_clip_migrates_to_cpu_before_vlm_loads(gpu_manager, mock_ollama_manager, mock_clip_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 6: CLIP migration ordering**
|
|
|
|
For any VLM load request when CLIP is on GPU, CLIP should be migrated
|
|
to CPU before VLM loading completes.
|
|
"""
|
|
# Arrange - CLIP on GPU, VLM unloaded
|
|
gpu_manager._execution_mode = ExecutionMode.AUTOPILOT
|
|
gpu_manager._vlm_state = ModelState.UNLOADED
|
|
gpu_manager._clip_device = "cuda"
|
|
|
|
call_order = []
|
|
|
|
async def track_clip_migrate(device):
|
|
call_order.append(f"clip_to_{device}")
|
|
return True
|
|
|
|
async def track_vlm_load():
|
|
call_order.append("vlm_load")
|
|
return True
|
|
|
|
mock_clip_manager.migrate_to_device = track_clip_migrate
|
|
mock_ollama_manager.load_model = track_vlm_load
|
|
|
|
# Act
|
|
await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
|
|
|
|
# Assert - CLIP should migrate to CPU before VLM loads
|
|
assert "clip_to_cpu" in call_order
|
|
assert "vlm_load" in call_order
|
|
clip_idx = call_order.index("clip_to_cpu")
|
|
vlm_idx = call_order.index("vlm_load")
|
|
assert clip_idx < vlm_idx, "CLIP should migrate to CPU before VLM loads"
|
|
|
|
|
|
|
|
# =============================================================================
|
|
# Property 8: Idle timeout behavior
|
|
# Feature: gpu-resource-manager, Property 8: Idle timeout behavior
|
|
# Validates: Requirements 4.1, 4.3
|
|
# =============================================================================
|
|
|
|
def test_idle_timeout_uses_configured_value():
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
|
|
|
|
For any configured idle_timeout value, VLM should be unloaded after
|
|
that duration of inactivity (not the default).
|
|
"""
|
|
GPUResourceManager.reset_instance()
|
|
|
|
# Configure custom timeout
|
|
config = GPUResourceConfig(idle_timeout_seconds=120)
|
|
manager = GPUResourceManager(config)
|
|
|
|
# Assert config is used
|
|
assert manager._config.idle_timeout_seconds == 120
|
|
|
|
status = manager.get_status()
|
|
assert status.idle_timeout_seconds == 120
|
|
|
|
GPUResourceManager.reset_instance()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_vlm_request_updates_last_request_time(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 8: Idle timeout behavior**
|
|
|
|
VLM requests should update the last request timestamp.
|
|
"""
|
|
# Arrange
|
|
mock_ollama_manager.load_model = AsyncMock(return_value=True)
|
|
initial_time = gpu_manager._last_vlm_request
|
|
|
|
# Act
|
|
await gpu_manager.ensure_vlm_loaded()
|
|
|
|
# Assert
|
|
assert gpu_manager._last_vlm_request is not None
|
|
if initial_time is not None:
|
|
assert gpu_manager._last_vlm_request >= initial_time
|
|
|
|
|
|
# =============================================================================
|
|
# Property 9: On-demand VLM loading
|
|
# Feature: gpu-resource-manager, Property 9: On-demand VLM loading
|
|
# Validates: Requirements 4.2
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_vlm_loads_on_demand_when_unloaded(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 9: On-demand VLM loading**
|
|
|
|
For any VLM request when VLM is unloaded, the request should complete
|
|
successfully after VLM is loaded.
|
|
"""
|
|
# Arrange - VLM is unloaded
|
|
gpu_manager._vlm_state = ModelState.UNLOADED
|
|
mock_ollama_manager.load_model = AsyncMock(return_value=True)
|
|
|
|
# Act - request VLM
|
|
result = await gpu_manager.ensure_vlm_loaded()
|
|
|
|
# Assert - VLM should be loaded
|
|
assert result is True
|
|
assert gpu_manager.is_vlm_loaded() is True
|
|
mock_ollama_manager.load_model.assert_called()
|
|
|
|
|
|
# =============================================================================
|
|
# Property 13: Sequential operation processing
|
|
# Feature: gpu-resource-manager, Property 13: Sequential operation processing
|
|
# Validates: Requirements 5.4
|
|
# =============================================================================
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_concurrent_operations_processed_sequentially(gpu_manager, mock_ollama_manager):
|
|
"""
|
|
**Feature: gpu-resource-manager, Property 13: Sequential operation processing**
|
|
|
|
For any concurrent model operations, they should be processed
|
|
sequentially without race conditions.
|
|
"""
|
|
# Arrange
|
|
operation_order = []
|
|
|
|
async def slow_load():
|
|
operation_order.append("load_start")
|
|
await asyncio.sleep(0.1)
|
|
operation_order.append("load_end")
|
|
return True
|
|
|
|
async def slow_unload():
|
|
operation_order.append("unload_start")
|
|
await asyncio.sleep(0.1)
|
|
operation_order.append("unload_end")
|
|
return True
|
|
|
|
mock_ollama_manager.load_model = slow_load
|
|
mock_ollama_manager.unload_model = slow_unload
|
|
|
|
# Act - start concurrent operations
|
|
gpu_manager._vlm_state = ModelState.UNLOADED
|
|
|
|
# Start load
|
|
load_task = asyncio.create_task(gpu_manager.ensure_vlm_loaded())
|
|
await asyncio.sleep(0.01) # Let it start
|
|
|
|
# Wait for completion
|
|
await load_task
|
|
|
|
# Assert - operations should complete without interleaving
|
|
assert "load_start" in operation_order
|
|
assert "load_end" in operation_order
|
|
|
|
|
|
# =============================================================================
|
|
# Tests pour acquire_inference (tâche 1 — sérialisation GPU concurrente)
|
|
# =============================================================================
|
|
|
|
|
|
class TestAcquireInference:
|
|
"""Sérialisation des appels GPU via acquire_inference()."""
|
|
|
|
def test_acquire_release_basic(self, config):
|
|
"""Le lock s'acquiert et se relâche sans erreur."""
|
|
GPUResourceManager.reset_instance()
|
|
manager = GPUResourceManager(config)
|
|
|
|
with manager.acquire_inference() as acquired:
|
|
assert acquired is True
|
|
|
|
# Après sortie du contexte, on peut reprendre le lock immédiatement
|
|
with manager.acquire_inference(timeout=0.5) as acquired2:
|
|
assert acquired2 is True
|
|
|
|
def test_acquire_inference_timeout(self, config):
|
|
"""Si un autre thread tient le lock, le timeout retourne False."""
|
|
import threading
|
|
|
|
GPUResourceManager.reset_instance()
|
|
manager = GPUResourceManager(config)
|
|
held = threading.Event()
|
|
release = threading.Event()
|
|
|
|
def holder():
|
|
with manager.acquire_inference():
|
|
held.set()
|
|
release.wait(timeout=5.0)
|
|
|
|
thread = threading.Thread(target=holder, daemon=True)
|
|
thread.start()
|
|
assert held.wait(timeout=2.0)
|
|
|
|
with manager.acquire_inference(timeout=0.1) as acquired:
|
|
assert acquired is False
|
|
|
|
release.set()
|
|
thread.join(timeout=2.0)
|
|
|
|
def test_acquire_inference_serializes_concurrent_calls(self, config):
|
|
"""Deux threads ne peuvent pas être dans la section critique en même temps."""
|
|
import threading
|
|
import time as _time
|
|
|
|
GPUResourceManager.reset_instance()
|
|
manager = GPUResourceManager(config)
|
|
|
|
inside = [] # compteur des threads actuellement dans la section
|
|
max_concurrent = [0]
|
|
lock = threading.Lock()
|
|
|
|
def worker():
|
|
with manager.acquire_inference():
|
|
with lock:
|
|
inside.append(1)
|
|
max_concurrent[0] = max(max_concurrent[0], len(inside))
|
|
_time.sleep(0.05)
|
|
with lock:
|
|
inside.pop()
|
|
|
|
threads = [threading.Thread(target=worker) for _ in range(5)]
|
|
for t in threads:
|
|
t.start()
|
|
for t in threads:
|
|
t.join(timeout=5.0)
|
|
|
|
assert max_concurrent[0] == 1, (
|
|
f"Attendu max 1 thread simultané, observé {max_concurrent[0]}"
|
|
)
|