feat(p1g): device policy GPU/CPU paramétrable pour la cascade vision

resolve_device(auto/cuda/cpu) avec garde-fou VRAM et fallback CPU propre. Bascule EasyOCR/SoM/docTR sur GPU si VRAM libre, rollback env sans toucher au code. - core/gpu/device_policy.py (nouveau) : resolve_device + garde-fou VRAM (max_total_gb) - core/detection/som_engine.py, core/llm/ocr_extractor.py, agent_v0/server_v1/resolve_engine.py : câblage device auto (35 lignes) - tests/unit/test_device_policy.py : 15 tests (verts venv réel) Rollback sans toucher au code : RPA_VISION_DEVICE=cpu (force CPU global) / RPA_EASYOCR_GPU=0. Bench GPU réel (latence) + activation large après verdict Qwen. QG Qwen deja valide sur le patch. Mergé depuis worktree agent-a4f390f410e00ad7c (base 5b2afa362), 3 fichiers cibles non modifiés dans le principal (zéro écrasement), dry-run apply propre. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 15:20:52 +02:00
parent d00fe7b00b
commit 0e215da842
5 changed files with 337 additions and 9 deletions
--- a/tests/unit/test_device_policy.py
+++ b/tests/unit/test_device_policy.py
@@ -0,0 +1,153 @@
+"""Tests TDD pour la résolution de device paramétrable (auto/cuda/cpu).
+
+Objectif : basculer OCR/YOLO sur GPU local quand la VRAM est libre, SANS
+hardcoder cuda, avec garde-fou VRAM et fallback CPU propre.
+
+Tous les tests mockent `torch.cuda.is_available` et `torch.cuda.mem_get_info`
+pour ne PAS dépendre du GPU réel de la machine de CI/dev.
+"""
+
+from __future__ import annotations
+
+import importlib
+from unittest import mock
+
+import pytest
+
+from core.gpu import device_policy
+
+
+GB = 1024 ** 3
+
+
+def _mock_cuda(available: bool, free_gb: float = 0.0, total_gb: float = 12.0):
+    """Construit un contexte de mock torch.cuda cohérent.
+
+    free_gb / total_gb sont exprimés en Go ; mem_get_info renvoie des octets.
+    """
+    free_bytes = int(free_gb * GB)
+    total_bytes = int(total_gb * GB)
+    return mock.patch.multiple(
+        device_policy.torch.cuda,
+        is_available=mock.Mock(return_value=available),
+        mem_get_info=mock.Mock(return_value=(free_bytes, total_bytes)),
+    )
+
+
+# ── requested="cpu" ─────────────────────────────────────────────────────────
+
+def test_resolve_cpu_explicit_returns_cpu(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=12.0):
+        assert device_policy.resolve_device("cpu") == "cpu"
+
+
+# ── requested="cuda" ────────────────────────────────────────────────────────
+
+def test_resolve_cuda_falls_back_to_cpu_when_unavailable(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=False):
+        assert device_policy.resolve_device("cuda") == "cpu"
+
+
+def test_resolve_cuda_returns_cuda_when_available(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=8.0):
+        assert device_policy.resolve_device("cuda") == "cuda"
+
+
+# ── requested="auto" (défaut) ───────────────────────────────────────────────
+
+def test_resolve_auto_cuda_when_vram_sufficient(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=8.0):
+        assert device_policy.resolve_device("auto", min_free_gb=2.0) == "cuda"
+
+
+def test_resolve_auto_cpu_when_vram_insufficient(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=1.0):
+        assert device_policy.resolve_device("auto", min_free_gb=2.0) == "cpu"
+
+
+def test_resolve_auto_cpu_when_cuda_unavailable(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=False):
+        assert device_policy.resolve_device("auto") == "cpu"
+
+
+def test_resolve_default_is_auto(monkeypatch):
+    """Sans argument, le défaut est 'auto'."""
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=8.0):
+        assert device_policy.resolve_device() == "cuda"
+
+
+# ── garde-fou : usage total ne doit pas dépasser le plafond ─────────────────
+
+def test_resolve_auto_cpu_when_switch_would_exceed_total_cap(monkeypatch):
+    """Si basculer cuda ferait dépasser le plafond d'usage total (6 Go par
+    défaut), on reste CPU même si la VRAM libre dépasse min_free_gb.
+
+    total=12, free=4 → used=8 > cap 6 → CPU.
+    """
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=4.0, total_gb=12.0):
+        assert device_policy.resolve_device("auto", min_free_gb=2.0,
+                                             max_total_gb=6.0) == "cpu"
+
+
+def test_resolve_auto_cuda_when_under_total_cap(monkeypatch):
+    """total=12, free=11 → used=1 < cap 6 et free 11 ≥ min 2 → CUDA."""
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with _mock_cuda(available=True, free_gb=11.0, total_gb=12.0):
+        assert device_policy.resolve_device("auto", min_free_gb=2.0,
+                                             max_total_gb=6.0) == "cuda"
+
+
+# ── override env RPA_VISION_DEVICE ──────────────────────────────────────────
+
+def test_env_override_cpu_forces_cpu_even_in_auto(monkeypatch):
+    monkeypatch.setenv("RPA_VISION_DEVICE", "cpu")
+    with _mock_cuda(available=True, free_gb=12.0):
+        assert device_policy.resolve_device("auto") == "cpu"
+        assert device_policy.resolve_device("cuda") == "cpu"
+
+
+def test_env_override_cuda_takes_precedence_over_requested_cpu(monkeypatch):
+    """L'override env prime sur l'argument requested."""
+    monkeypatch.setenv("RPA_VISION_DEVICE", "cuda")
+    with _mock_cuda(available=True, free_gb=8.0):
+        assert device_policy.resolve_device("cpu") == "cuda"
+
+
+def test_env_override_cuda_still_falls_back_when_unavailable(monkeypatch):
+    monkeypatch.setenv("RPA_VISION_DEVICE", "cuda")
+    with _mock_cuda(available=False):
+        assert device_policy.resolve_device("auto") == "cpu"
+
+
+def test_env_override_invalid_value_ignored(monkeypatch):
+    """Une valeur env invalide est ignorée (on retombe sur requested)."""
+    monkeypatch.setenv("RPA_VISION_DEVICE", "banana")
+    with _mock_cuda(available=False):
+        assert device_policy.resolve_device("auto") == "cpu"
+
+
+# ── robustesse : pas de crash si torch.cuda lève ───────────────────────────
+
+def test_resolve_auto_cpu_on_torch_exception(monkeypatch):
+    monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
+    with mock.patch.object(
+        device_policy.torch.cuda, "is_available",
+        side_effect=RuntimeError("driver boom"),
+    ):
+        assert device_policy.resolve_device("auto") == "cpu"
+
+
+# ── import-safe : pas d'effet de bord à l'import ───────────────────────────
+
+def test_module_import_is_safe():
+    """Réimporter le module ne doit déclencher aucun chargement modèle/GPU."""
+    importlib.reload(device_policy)
+    assert hasattr(device_policy, "resolve_device")