chore: sauvegarde complète avant factorisation executor

Point de sauvegarde incluant les fichiers non committés des sessions précédentes (systemd, docs, agents, GPU manager). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 17:03:44 +02:00
parent 623be15bfe
commit 447fbb2c6e
1869 changed files with 791438 additions and 324 deletions
--- a/tests/unit/test_gpu_resource_manager.py
+++ b/tests/unit/test_gpu_resource_manager.py
@@ -606,3 +606,79 @@ async def test_concurrent_operations_processed_sequentially(gpu_manager, mock_ol
    # Assert - operations should complete without interleaving
    assert "load_start" in operation_order
    assert "load_end" in operation_order
+
+
+# =============================================================================
+# Tests pour acquire_inference (tâche 1 — sérialisation GPU concurrente)
+# =============================================================================
+
+
+class TestAcquireInference:
+    """Sérialisation des appels GPU via acquire_inference()."""
+
+    def test_acquire_release_basic(self, config):
+        """Le lock s'acquiert et se relâche sans erreur."""
+        GPUResourceManager.reset_instance()
+        manager = GPUResourceManager(config)
+
+        with manager.acquire_inference() as acquired:
+            assert acquired is True
+
+        # Après sortie du contexte, on peut reprendre le lock immédiatement
+        with manager.acquire_inference(timeout=0.5) as acquired2:
+            assert acquired2 is True
+
+    def test_acquire_inference_timeout(self, config):
+        """Si un autre thread tient le lock, le timeout retourne False."""
+        import threading
+
+        GPUResourceManager.reset_instance()
+        manager = GPUResourceManager(config)
+        held = threading.Event()
+        release = threading.Event()
+
+        def holder():
+            with manager.acquire_inference():
+                held.set()
+                release.wait(timeout=5.0)
+
+        thread = threading.Thread(target=holder, daemon=True)
+        thread.start()
+        assert held.wait(timeout=2.0)
+
+        with manager.acquire_inference(timeout=0.1) as acquired:
+            assert acquired is False
+
+        release.set()
+        thread.join(timeout=2.0)
+
+    def test_acquire_inference_serializes_concurrent_calls(self, config):
+        """Deux threads ne peuvent pas être dans la section critique en même temps."""
+        import threading
+        import time as _time
+
+        GPUResourceManager.reset_instance()
+        manager = GPUResourceManager(config)
+
+        inside = []  # compteur des threads actuellement dans la section
+        max_concurrent = [0]
+        lock = threading.Lock()
+
+        def worker():
+            with manager.acquire_inference():
+                with lock:
+                    inside.append(1)
+                    max_concurrent[0] = max(max_concurrent[0], len(inside))
+                _time.sleep(0.05)
+                with lock:
+                    inside.pop()
+
+        threads = [threading.Thread(target=worker) for _ in range(5)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join(timeout=5.0)
+
+        assert max_concurrent[0] == 1, (
+            f"Attendu max 1 thread simultané, observé {max_concurrent[0]}"
+        )