""" Property-based tests for GPU Resource Manager Tests correctness properties defined in the design document. Uses Hypothesis for property-based testing. """ import asyncio import pytest from unittest.mock import AsyncMock, MagicMock, patch from hypothesis import given, strategies as st, settings, assume # Configure pytest-asyncio pytest_plugins = ('pytest_asyncio',) try: from core.gpu.gpu_resource_manager import ( GPUResourceManager, GPUResourceConfig, ExecutionMode, ModelState, VRAMInfo, ) except ImportError as e: pytest.skip(f"GPU Resource Manager not available: {e}", allow_module_level=True) # ============================================================================= # Fixtures # ============================================================================= @pytest.fixture def config(): """Test configuration with short timeouts.""" return GPUResourceConfig( ollama_endpoint="http://localhost:11434", vlm_model="test-model:latest", idle_timeout_seconds=1, load_timeout_seconds=5, unload_timeout_seconds=2, max_load_retries=2, ) @pytest.fixture def mock_ollama_manager(): """Mock OllamaManager.""" manager = MagicMock() manager.load_model = AsyncMock(return_value=True) manager.unload_model = AsyncMock(return_value=True) manager.is_model_loaded = AsyncMock(return_value=False) manager.is_available = MagicMock(return_value=True) return manager @pytest.fixture def mock_vram_monitor(): """Mock VRAMMonitor.""" monitor = MagicMock() monitor.get_vram_info = MagicMock(return_value=VRAMInfo( total_mb=12000, used_mb=500, free_mb=11500, gpu_name="Test GPU", gpu_utilization_percent=0 )) monitor.is_gpu_available = MagicMock(return_value=True) return monitor @pytest.fixture def mock_clip_manager(): """Mock CLIPManager.""" manager = MagicMock() manager.migrate_to_device = AsyncMock(return_value=True) manager.get_current_device = MagicMock(return_value="cpu") return manager @pytest.fixture def gpu_manager(config, mock_ollama_manager, mock_vram_monitor, mock_clip_manager): """Create GPUResourceManager with mocked dependencies.""" # Reset singleton GPUResourceManager.reset_instance() manager = GPUResourceManager(config) manager._ollama_manager = mock_ollama_manager manager._vram_monitor = mock_vram_monitor manager._clip_manager = mock_clip_manager yield manager # Cleanup GPUResourceManager.reset_instance() # ============================================================================= # Property 10: ensure_vlm_loaded blocking # Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking # Validates: Requirements 5.1 # ============================================================================= @pytest.mark.asyncio async def test_ensure_vlm_loaded_returns_when_loaded(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking** For any call to ensure_vlm_loaded(), the function should only return when is_vlm_loaded() returns True. """ # Arrange mock_ollama_manager.load_model = AsyncMock(return_value=True) # Act result = await gpu_manager.ensure_vlm_loaded() # Assert assert result is True assert gpu_manager.is_vlm_loaded() is True assert gpu_manager.get_vlm_state() == ModelState.LOADED @pytest.mark.asyncio async def test_ensure_vlm_loaded_already_loaded(gpu_manager): """ **Feature: gpu-resource-manager, Property 10: ensure_vlm_loaded blocking** If VLM is already loaded, ensure_vlm_loaded should return immediately. """ # Arrange - set state to loaded gpu_manager._vlm_state = ModelState.LOADED # Act result = await gpu_manager.ensure_vlm_loaded() # Assert assert result is True assert gpu_manager.is_vlm_loaded() is True # ============================================================================= # Property 11: ensure_vlm_unloaded blocking # Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking # Validates: Requirements 5.2 # ============================================================================= @pytest.mark.asyncio async def test_ensure_vlm_unloaded_returns_when_unloaded(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking** For any call to ensure_vlm_unloaded(), the function should only return when is_vlm_loaded() returns False. """ # Arrange - start with loaded state gpu_manager._vlm_state = ModelState.LOADED mock_ollama_manager.unload_model = AsyncMock(return_value=True) # Act result = await gpu_manager.ensure_vlm_unloaded() # Assert assert result is True assert gpu_manager.is_vlm_loaded() is False assert gpu_manager.get_vlm_state() == ModelState.UNLOADED @pytest.mark.asyncio async def test_ensure_vlm_unloaded_already_unloaded(gpu_manager): """ **Feature: gpu-resource-manager, Property 11: ensure_vlm_unloaded blocking** If VLM is already unloaded, ensure_vlm_unloaded should return immediately. """ # Arrange - already unloaded gpu_manager._vlm_state = ModelState.UNLOADED # Act result = await gpu_manager.ensure_vlm_unloaded() # Assert assert result is True assert gpu_manager.is_vlm_loaded() is False # ============================================================================= # Property 12: get_clip_device validity # Feature: gpu-resource-manager, Property 12: get_clip_device validity # Validates: Requirements 5.3 # ============================================================================= @given(st.sampled_from(["cpu", "cuda"])) @settings(max_examples=100) def test_get_clip_device_returns_valid_value(device): """ **Feature: gpu-resource-manager, Property 12: get_clip_device validity** For any call to get_clip_device(), the return value should be either "cpu" or "cuda". """ # Reset singleton for each test GPUResourceManager.reset_instance() manager = GPUResourceManager(GPUResourceConfig()) manager._clip_device = device # Act result = manager.get_clip_device() # Assert assert result in ["cpu", "cuda"] # Cleanup GPUResourceManager.reset_instance() def test_get_clip_device_default_is_cpu(): """ **Feature: gpu-resource-manager, Property 12: get_clip_device validity** Default CLIP device should be CPU. """ GPUResourceManager.reset_instance() manager = GPUResourceManager(GPUResourceConfig()) assert manager.get_clip_device() == "cpu" GPUResourceManager.reset_instance() # ============================================================================= # Property 4: VRAM decrease on VLM unload # Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload # Validates: Requirements 1.4 # ============================================================================= @pytest.mark.asyncio async def test_vram_decreases_on_vlm_unload(gpu_manager, mock_ollama_manager, mock_vram_monitor): """ **Feature: gpu-resource-manager, Property 4: VRAM decrease on VLM unload** For any VLM unload operation, the VRAM usage should decrease. """ # Arrange - simulate loaded state with high VRAM gpu_manager._vlm_state = ModelState.LOADED # Simulate VRAM before (high) and after (low) unload vram_before = VRAMInfo(12000, 10500, 1500, "Test GPU", 50) vram_after = VRAMInfo(12000, 500, 11500, "Test GPU", 0) mock_vram_monitor.get_vram_info = MagicMock(side_effect=[vram_before, vram_after]) mock_ollama_manager.unload_model = AsyncMock(return_value=True) # Track emitted events events = [] gpu_manager.on_resource_changed(lambda e: events.append(e)) # Act result = await gpu_manager.ensure_vlm_unloaded() # Assert assert result is True assert gpu_manager.is_vlm_loaded() is False # Check that unload event was emitted with VRAM info unload_events = [e for e in events if e.event_type == "model_unloaded"] assert len(unload_events) >= 1 # ============================================================================= # Property 5: Status query completeness # Feature: gpu-resource-manager, Property 5: Status query completeness # Validates: Requirements 2.1 # ============================================================================= def test_get_status_returns_complete_status(gpu_manager, mock_vram_monitor): """ **Feature: gpu-resource-manager, Property 5: Status query completeness** For any call to get_status(), the returned GPUResourceStatus should contain valid values for all fields. """ # Act status = gpu_manager.get_status() # Assert - all fields should be present and valid assert status.execution_mode in ExecutionMode assert status.vlm_state in ModelState assert isinstance(status.vlm_model, str) assert status.clip_device in ["cpu", "cuda"] assert status.vram is not None or status.degraded_mode assert isinstance(status.idle_timeout_seconds, int) assert isinstance(status.degraded_mode, bool) @given(st.sampled_from(list(ExecutionMode))) @settings(max_examples=100) def test_get_status_reflects_execution_mode(mode): """ **Feature: gpu-resource-manager, Property 5: Status query completeness** Status should accurately reflect the current execution mode. """ GPUResourceManager.reset_instance() manager = GPUResourceManager(GPUResourceConfig()) manager._execution_mode = mode # Mock VRAM monitor manager._vram_monitor = MagicMock() manager._vram_monitor.get_vram_info = MagicMock(return_value=None) status = manager.get_status() assert status.execution_mode == mode GPUResourceManager.reset_instance() # ============================================================================= # Property 7: Embedding pipeline consistency # Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency # Validates: Requirements 3.3 # ============================================================================= @pytest.mark.asyncio async def test_clip_produces_valid_embeddings_after_migration(gpu_manager, mock_clip_manager): """ **Feature: gpu-resource-manager, Property 7: Embedding pipeline consistency** For any CLIP device change, the embedding pipeline should produce valid embeddings after reinitialization. """ # Arrange mock_clip_manager.migrate_to_device = AsyncMock(return_value=True) # Act - migrate to GPU result = await gpu_manager.migrate_clip_to_gpu() # Assert assert result is True assert gpu_manager.get_clip_device() == "cuda" # Verify migration was called mock_clip_manager.migrate_to_device.assert_called_with("cuda") # ============================================================================= # Property 1: Mode transition triggers VLM unload # Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload # Validates: Requirements 1.1 # ============================================================================= @pytest.mark.asyncio async def test_autopilot_mode_unloads_vlm(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 1: Mode transition triggers VLM unload** For any GPU Resource Manager in RECORDING mode with VLM loaded, transitioning to AUTOPILOT mode should result in VLM being unloaded. """ # Arrange - start in RECORDING mode with VLM loaded gpu_manager._execution_mode = ExecutionMode.RECORDING gpu_manager._vlm_state = ModelState.LOADED mock_ollama_manager.unload_model = AsyncMock(return_value=True) # Act await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT) # Assert assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT assert gpu_manager.is_vlm_loaded() is False mock_ollama_manager.unload_model.assert_called() # ============================================================================= # Property 2: Mode transition triggers VLM load # Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load # Validates: Requirements 1.2 # ============================================================================= @pytest.mark.asyncio async def test_recording_mode_loads_vlm(gpu_manager, mock_ollama_manager, mock_clip_manager): """ **Feature: gpu-resource-manager, Property 2: Mode transition triggers VLM load** For any GPU Resource Manager in AUTOPILOT mode with VLM unloaded, transitioning to RECORDING mode should result in VLM being loaded. """ # Arrange - start in AUTOPILOT mode with VLM unloaded gpu_manager._execution_mode = ExecutionMode.AUTOPILOT gpu_manager._vlm_state = ModelState.UNLOADED mock_ollama_manager.load_model = AsyncMock(return_value=True) mock_clip_manager.migrate_to_device = AsyncMock(return_value=True) # Act await gpu_manager.set_execution_mode(ExecutionMode.RECORDING) # Assert assert gpu_manager.get_execution_mode() == ExecutionMode.RECORDING assert gpu_manager.is_vlm_loaded() is True mock_ollama_manager.load_model.assert_called() # ============================================================================= # Property 3: CLIP on GPU in AUTOPILOT # Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT # Validates: Requirements 1.3, 3.1 # ============================================================================= @pytest.mark.asyncio async def test_clip_migrates_to_gpu_in_autopilot(gpu_manager, mock_ollama_manager, mock_clip_manager, mock_vram_monitor): """ **Feature: gpu-resource-manager, Property 3: CLIP on GPU in AUTOPILOT** For any GPU Resource Manager in AUTOPILOT mode with available VRAM > 1GB, CLIP should be on GPU device. """ # Arrange - start in RECORDING mode gpu_manager._execution_mode = ExecutionMode.RECORDING gpu_manager._vlm_state = ModelState.LOADED gpu_manager._clip_device = "cpu" mock_ollama_manager.unload_model = AsyncMock(return_value=True) mock_clip_manager.migrate_to_device = AsyncMock(return_value=True) # Ensure enough VRAM is available mock_vram_monitor.get_vram_info = MagicMock(return_value=VRAMInfo( total_mb=12000, used_mb=500, free_mb=11500, gpu_name="Test GPU", gpu_utilization_percent=0 )) # Act await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT) # Assert assert gpu_manager.get_execution_mode() == ExecutionMode.AUTOPILOT assert gpu_manager.get_clip_device() == "cuda" mock_clip_manager.migrate_to_device.assert_called_with("cuda") # ============================================================================= # Property 6: CLIP migration ordering # Feature: gpu-resource-manager, Property 6: CLIP migration ordering # Validates: Requirements 3.2 # ============================================================================= @pytest.mark.asyncio async def test_clip_migrates_to_cpu_before_vlm_loads(gpu_manager, mock_ollama_manager, mock_clip_manager): """ **Feature: gpu-resource-manager, Property 6: CLIP migration ordering** For any VLM load request when CLIP is on GPU, CLIP should be migrated to CPU before VLM loading completes. """ # Arrange - CLIP on GPU, VLM unloaded gpu_manager._execution_mode = ExecutionMode.AUTOPILOT gpu_manager._vlm_state = ModelState.UNLOADED gpu_manager._clip_device = "cuda" call_order = [] async def track_clip_migrate(device): call_order.append(f"clip_to_{device}") return True async def track_vlm_load(): call_order.append("vlm_load") return True mock_clip_manager.migrate_to_device = track_clip_migrate mock_ollama_manager.load_model = track_vlm_load # Act await gpu_manager.set_execution_mode(ExecutionMode.RECORDING) # Assert - CLIP should migrate to CPU before VLM loads assert "clip_to_cpu" in call_order assert "vlm_load" in call_order clip_idx = call_order.index("clip_to_cpu") vlm_idx = call_order.index("vlm_load") assert clip_idx < vlm_idx, "CLIP should migrate to CPU before VLM loads" # ============================================================================= # Property 8: Idle timeout behavior # Feature: gpu-resource-manager, Property 8: Idle timeout behavior # Validates: Requirements 4.1, 4.3 # ============================================================================= def test_idle_timeout_uses_configured_value(): """ **Feature: gpu-resource-manager, Property 8: Idle timeout behavior** For any configured idle_timeout value, VLM should be unloaded after that duration of inactivity (not the default). """ GPUResourceManager.reset_instance() # Configure custom timeout config = GPUResourceConfig(idle_timeout_seconds=120) manager = GPUResourceManager(config) # Assert config is used assert manager._config.idle_timeout_seconds == 120 status = manager.get_status() assert status.idle_timeout_seconds == 120 GPUResourceManager.reset_instance() @pytest.mark.asyncio async def test_vlm_request_updates_last_request_time(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 8: Idle timeout behavior** VLM requests should update the last request timestamp. """ # Arrange mock_ollama_manager.load_model = AsyncMock(return_value=True) initial_time = gpu_manager._last_vlm_request # Act await gpu_manager.ensure_vlm_loaded() # Assert assert gpu_manager._last_vlm_request is not None if initial_time is not None: assert gpu_manager._last_vlm_request >= initial_time # ============================================================================= # Property 9: On-demand VLM loading # Feature: gpu-resource-manager, Property 9: On-demand VLM loading # Validates: Requirements 4.2 # ============================================================================= @pytest.mark.asyncio async def test_vlm_loads_on_demand_when_unloaded(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 9: On-demand VLM loading** For any VLM request when VLM is unloaded, the request should complete successfully after VLM is loaded. """ # Arrange - VLM is unloaded gpu_manager._vlm_state = ModelState.UNLOADED mock_ollama_manager.load_model = AsyncMock(return_value=True) # Act - request VLM result = await gpu_manager.ensure_vlm_loaded() # Assert - VLM should be loaded assert result is True assert gpu_manager.is_vlm_loaded() is True mock_ollama_manager.load_model.assert_called() # ============================================================================= # Property 13: Sequential operation processing # Feature: gpu-resource-manager, Property 13: Sequential operation processing # Validates: Requirements 5.4 # ============================================================================= @pytest.mark.asyncio async def test_concurrent_operations_processed_sequentially(gpu_manager, mock_ollama_manager): """ **Feature: gpu-resource-manager, Property 13: Sequential operation processing** For any concurrent model operations, they should be processed sequentially without race conditions. """ # Arrange operation_order = [] async def slow_load(): operation_order.append("load_start") await asyncio.sleep(0.1) operation_order.append("load_end") return True async def slow_unload(): operation_order.append("unload_start") await asyncio.sleep(0.1) operation_order.append("unload_end") return True mock_ollama_manager.load_model = slow_load mock_ollama_manager.unload_model = slow_unload # Act - start concurrent operations gpu_manager._vlm_state = ModelState.UNLOADED # Start load load_task = asyncio.create_task(gpu_manager.ensure_vlm_loaded()) await asyncio.sleep(0.01) # Let it start # Wait for completion await load_task # Assert - operations should complete without interleaving assert "load_start" in operation_order assert "load_end" in operation_order