resolve_device('auto') renvoyait 'cpu' sur le GB10 : le plafond max_total_gb=6
(pensé pour la RTX 12 Go dédiés) voyait used≈99 Go car la mémoire UNIFIÉE compte
la RAM système. Au-dessus de DEFAULT_LARGE_VRAM_GB=24 (grosse carte / mémoire
unifiée), le plafond n'est plus appliqué ; seul free >= min_free_gb décide.
RTX (<=24 Go) inchangée.
Détecté au bench GB10 2026-06-08 (auto->cpu, OCR 10x plus lent). +2 tests (17/17).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
174 lines
7.3 KiB
Python
174 lines
7.3 KiB
Python
"""Tests TDD pour la résolution de device paramétrable (auto/cuda/cpu).
|
|
|
|
Objectif : basculer OCR/YOLO sur GPU local quand la VRAM est libre, SANS
|
|
hardcoder cuda, avec garde-fou VRAM et fallback CPU propre.
|
|
|
|
Tous les tests mockent `torch.cuda.is_available` et `torch.cuda.mem_get_info`
|
|
pour ne PAS dépendre du GPU réel de la machine de CI/dev.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
from unittest import mock
|
|
|
|
import pytest
|
|
|
|
from core.gpu import device_policy
|
|
|
|
|
|
GB = 1024 ** 3
|
|
|
|
|
|
def _mock_cuda(available: bool, free_gb: float = 0.0, total_gb: float = 12.0):
|
|
"""Construit un contexte de mock torch.cuda cohérent.
|
|
|
|
free_gb / total_gb sont exprimés en Go ; mem_get_info renvoie des octets.
|
|
"""
|
|
free_bytes = int(free_gb * GB)
|
|
total_bytes = int(total_gb * GB)
|
|
return mock.patch.multiple(
|
|
device_policy.torch.cuda,
|
|
is_available=mock.Mock(return_value=available),
|
|
mem_get_info=mock.Mock(return_value=(free_bytes, total_bytes)),
|
|
)
|
|
|
|
|
|
# ── requested="cpu" ─────────────────────────────────────────────────────────
|
|
|
|
def test_resolve_cpu_explicit_returns_cpu(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=12.0):
|
|
assert device_policy.resolve_device("cpu") == "cpu"
|
|
|
|
|
|
# ── requested="cuda" ────────────────────────────────────────────────────────
|
|
|
|
def test_resolve_cuda_falls_back_to_cpu_when_unavailable(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=False):
|
|
assert device_policy.resolve_device("cuda") == "cpu"
|
|
|
|
|
|
def test_resolve_cuda_returns_cuda_when_available(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=8.0):
|
|
assert device_policy.resolve_device("cuda") == "cuda"
|
|
|
|
|
|
# ── requested="auto" (défaut) ───────────────────────────────────────────────
|
|
|
|
def test_resolve_auto_cuda_when_vram_sufficient(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=8.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0) == "cuda"
|
|
|
|
|
|
def test_resolve_auto_cpu_when_vram_insufficient(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=1.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0) == "cpu"
|
|
|
|
|
|
def test_resolve_auto_cpu_when_cuda_unavailable(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=False):
|
|
assert device_policy.resolve_device("auto") == "cpu"
|
|
|
|
|
|
def test_resolve_default_is_auto(monkeypatch):
|
|
"""Sans argument, le défaut est 'auto'."""
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=8.0):
|
|
assert device_policy.resolve_device() == "cuda"
|
|
|
|
|
|
# ── garde-fou : usage total ne doit pas dépasser le plafond ─────────────────
|
|
|
|
def test_resolve_auto_cpu_when_switch_would_exceed_total_cap(monkeypatch):
|
|
"""Si basculer cuda ferait dépasser le plafond d'usage total (6 Go par
|
|
défaut), on reste CPU même si la VRAM libre dépasse min_free_gb.
|
|
|
|
total=12, free=4 → used=8 > cap 6 → CPU.
|
|
"""
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=4.0, total_gb=12.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0,
|
|
max_total_gb=6.0) == "cpu"
|
|
|
|
|
|
def test_resolve_auto_cuda_when_under_total_cap(monkeypatch):
|
|
"""total=12, free=11 → used=1 < cap 6 et free 11 ≥ min 2 → CUDA."""
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=11.0, total_gb=12.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0,
|
|
max_total_gb=6.0) == "cuda"
|
|
|
|
|
|
# ── mémoire unifiée / grosse carte (DGX GB10) : plafond inapplicable ─────────
|
|
|
|
def test_resolve_auto_cuda_on_unified_memory_ignores_total_cap(monkeypatch):
|
|
"""Mémoire unifiée GB10 : total=121, free=22 → used=99 > cap 6, MAIS total
|
|
> seuil grosse mémoire (24) → plafond ignoré, free 22 ≥ min 2 → CUDA.
|
|
Sans ce comportement, le DGX tomberait à tort sur CPU (régression observée
|
|
au bench GB10 2026-06-08)."""
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=22.0, total_gb=121.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0,
|
|
max_total_gb=6.0) == "cuda"
|
|
|
|
|
|
def test_resolve_auto_cpu_on_large_memory_when_free_too_low(monkeypatch):
|
|
"""Grosse mémoire mais free < min → CPU (free reste le garde-fou réel)."""
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with _mock_cuda(available=True, free_gb=1.0, total_gb=121.0):
|
|
assert device_policy.resolve_device("auto", min_free_gb=2.0) == "cpu"
|
|
|
|
|
|
# ── override env RPA_VISION_DEVICE ──────────────────────────────────────────
|
|
|
|
def test_env_override_cpu_forces_cpu_even_in_auto(monkeypatch):
|
|
monkeypatch.setenv("RPA_VISION_DEVICE", "cpu")
|
|
with _mock_cuda(available=True, free_gb=12.0):
|
|
assert device_policy.resolve_device("auto") == "cpu"
|
|
assert device_policy.resolve_device("cuda") == "cpu"
|
|
|
|
|
|
def test_env_override_cuda_takes_precedence_over_requested_cpu(monkeypatch):
|
|
"""L'override env prime sur l'argument requested."""
|
|
monkeypatch.setenv("RPA_VISION_DEVICE", "cuda")
|
|
with _mock_cuda(available=True, free_gb=8.0):
|
|
assert device_policy.resolve_device("cpu") == "cuda"
|
|
|
|
|
|
def test_env_override_cuda_still_falls_back_when_unavailable(monkeypatch):
|
|
monkeypatch.setenv("RPA_VISION_DEVICE", "cuda")
|
|
with _mock_cuda(available=False):
|
|
assert device_policy.resolve_device("auto") == "cpu"
|
|
|
|
|
|
def test_env_override_invalid_value_ignored(monkeypatch):
|
|
"""Une valeur env invalide est ignorée (on retombe sur requested)."""
|
|
monkeypatch.setenv("RPA_VISION_DEVICE", "banana")
|
|
with _mock_cuda(available=False):
|
|
assert device_policy.resolve_device("auto") == "cpu"
|
|
|
|
|
|
# ── robustesse : pas de crash si torch.cuda lève ───────────────────────────
|
|
|
|
def test_resolve_auto_cpu_on_torch_exception(monkeypatch):
|
|
monkeypatch.delenv("RPA_VISION_DEVICE", raising=False)
|
|
with mock.patch.object(
|
|
device_policy.torch.cuda, "is_available",
|
|
side_effect=RuntimeError("driver boom"),
|
|
):
|
|
assert device_policy.resolve_device("auto") == "cpu"
|
|
|
|
|
|
# ── import-safe : pas d'effet de bord à l'import ───────────────────────────
|
|
|
|
def test_module_import_is_safe():
|
|
"""Réimporter le module ne doit déclencher aucun chargement modèle/GPU."""
|
|
importlib.reload(device_policy)
|
|
assert hasattr(device_policy, "resolve_device")
|