Files
rpa_vision_v3/core/gpu/preflight.py
Dom ad15237fe0 feat: smart systray Léa (plyer), preflight GPU, fix tests, support qwen3-vl
- Smart systray (pystray+plyer) remplace PyQt5 : notifications toast,
  menu dynamique avec workflows, chat "Que dois-je faire ?", icône colorée
- Preflight GPU : check_machine_ready() + @pytest.mark.gpu dans conftest
- Correction 63 tests cassés → 0 failed (1200 passed)
- Tests VWB obsolètes déplacés vers _a_trier/
- Support qwen3-vl:8b sur GPU (remplace qwen2.5vl:3b)
  - fix images < 32x32 (Ollama panic)
  - fix force_json=False (qwen3-vl incompatible)
  - fix temperature 0.1 (0.0 bloque avec images)
- Fix captor Windows : Key.esc, _get_key_name()
- Fix LeaServerClient : check_connection, list_workflows format
- deploy_windows.py : packaging propre client Windows
- VWB : edges visibles (#607d8b) + fitView automatique

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:25:12 +01:00

273 lines
8.4 KiB
Python

"""
Preflight GPU Check — Vérification machine avant tout lancement.
Vérifie que le GPU et la VRAM sont suffisamment libres avant de lancer
des tests, replays, ou tout processus gourmand en ressources.
Usage:
from core.gpu.preflight import check_machine_ready, require_gpu_ready
# Vérification simple
result = check_machine_ready()
if not result.ready:
print(f"Machine pas prête : {result.reason}")
# Avec seuils personnalisés
result = check_machine_ready(min_free_vram_mb=2000, max_gpu_util_percent=50)
# Comme décorateur (skip le test si GPU pas dispo)
@require_gpu_ready(min_free_vram_mb=1000)
def test_something():
...
"""
import functools
import logging
import subprocess
from dataclasses import dataclass, field
from typing import List, Optional
import pytest
logger = logging.getLogger(__name__)
# Seuils par défaut
DEFAULT_MIN_FREE_VRAM_MB = 1000 # 1 GB minimum libre
DEFAULT_MAX_GPU_UTIL_PERCENT = 80 # GPU pas saturé à plus de 80%
DEFAULT_MAX_FOREIGN_PROCESSES = 5 # Alerte si trop de processus GPU
@dataclass
class GPUProcess:
"""Processus utilisant le GPU."""
pid: int
name: str
vram_mb: int
is_own: bool # True si c'est un processus rpa_vision_v3
@dataclass
class PreflightResult:
"""Résultat de la vérification machine."""
ready: bool
reason: Optional[str] = None
# État GPU
gpu_name: str = ""
total_vram_mb: int = 0
used_vram_mb: int = 0
free_vram_mb: int = 0
gpu_utilization_percent: int = 0
# Processus
gpu_processes: List[GPUProcess] = field(default_factory=list)
foreign_processes: List[GPUProcess] = field(default_factory=list)
# Avertissements (non-bloquants)
warnings: List[str] = field(default_factory=list)
def __str__(self) -> str:
status = "PRÊT" if self.ready else "PAS PRÊT"
lines = [
f"[GPU Preflight: {status}]",
f" GPU: {self.gpu_name}",
f" VRAM: {self.used_vram_mb}/{self.total_vram_mb} MB "
f"(libre: {self.free_vram_mb} MB)",
f" Utilisation GPU: {self.gpu_utilization_percent}%",
f" Processus GPU: {len(self.gpu_processes)} "
f"(dont {len(self.foreign_processes)} externes)",
]
if not self.ready:
lines.append(f" Raison: {self.reason}")
for w in self.warnings:
lines.append(f"{w}")
if self.foreign_processes:
lines.append(" Processus externes:")
for p in self.foreign_processes:
lines.append(f" - PID {p.pid}: {p.name} ({p.vram_mb} MB)")
return "\n".join(lines)
def _get_gpu_info() -> Optional[dict]:
"""Récupère les infos GPU via nvidia-smi."""
try:
result = subprocess.run(
[
"nvidia-smi",
"--query-gpu=name,memory.total,memory.used,memory.free,utilization.gpu",
"--format=csv,noheader,nounits",
],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode != 0:
return None
parts = [p.strip() for p in result.stdout.strip().split(",")]
if len(parts) < 5:
return None
return {
"name": parts[0],
"total_mb": int(parts[1]),
"used_mb": int(parts[2]),
"free_mb": int(parts[3]),
"utilization": int(parts[4]) if parts[4].isdigit() else 0,
}
except Exception as e:
logger.error(f"nvidia-smi échoué : {e}")
return None
def _get_gpu_processes() -> List[GPUProcess]:
"""Liste les processus utilisant le GPU."""
try:
result = subprocess.run(
[
"nvidia-smi",
"--query-compute-apps=pid,process_name,used_gpu_memory",
"--format=csv,noheader,nounits",
],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode != 0:
return []
processes = []
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
parts = [p.strip() for p in line.split(",")]
if len(parts) < 3:
continue
pid = int(parts[0])
name = parts[1]
vram = int(parts[2]) if parts[2].strip().isdigit() else 0
is_own = "rpa_vision_v3" in name
processes.append(GPUProcess(
pid=pid,
name=name,
vram_mb=vram,
is_own=is_own,
))
return processes
except Exception as e:
logger.error(f"Impossible de lister les processus GPU : {e}")
return []
def check_machine_ready(
min_free_vram_mb: int = DEFAULT_MIN_FREE_VRAM_MB,
max_gpu_util_percent: int = DEFAULT_MAX_GPU_UTIL_PERCENT,
max_foreign_processes: int = DEFAULT_MAX_FOREIGN_PROCESSES,
) -> PreflightResult:
"""
Vérifie que la machine est prête pour un lancement GPU.
Args:
min_free_vram_mb: VRAM libre minimum requise (défaut: 1000 MB)
max_gpu_util_percent: Utilisation GPU max tolérée (défaut: 80%)
max_foreign_processes: Nombre max de processus externes avant alerte
Returns:
PreflightResult avec l'état détaillé
"""
result = PreflightResult(ready=True)
# 1. Vérifier que le GPU est accessible
gpu_info = _get_gpu_info()
if gpu_info is None:
result.ready = False
result.reason = "GPU inaccessible (nvidia-smi échoué)"
logger.warning(result.reason)
return result
result.gpu_name = gpu_info["name"]
result.total_vram_mb = gpu_info["total_mb"]
result.used_vram_mb = gpu_info["used_mb"]
result.free_vram_mb = gpu_info["free_mb"]
result.gpu_utilization_percent = gpu_info["utilization"]
# 2. Lister les processus GPU
result.gpu_processes = _get_gpu_processes()
result.foreign_processes = [p for p in result.gpu_processes if not p.is_own]
# 3. Vérifier VRAM libre
if result.free_vram_mb < min_free_vram_mb:
result.ready = False
result.reason = (
f"VRAM insuffisante : {result.free_vram_mb} MB libre "
f"(minimum requis : {min_free_vram_mb} MB)"
)
logger.warning(result.reason)
return result
# 4. Vérifier utilisation GPU
if result.gpu_utilization_percent > max_gpu_util_percent:
result.ready = False
result.reason = (
f"GPU surchargé : {result.gpu_utilization_percent}% "
f"(maximum toléré : {max_gpu_util_percent}%)"
)
logger.warning(result.reason)
return result
# 5. Avertissements (non-bloquants)
if len(result.foreign_processes) > max_foreign_processes:
result.warnings.append(
f"{len(result.foreign_processes)} processus externes sur le GPU"
)
foreign_vram = sum(p.vram_mb for p in result.foreign_processes)
if foreign_vram > result.total_vram_mb * 0.5:
result.warnings.append(
f"Processus externes utilisent {foreign_vram} MB "
f"({foreign_vram * 100 // result.total_vram_mb}% de la VRAM)"
)
if result.free_vram_mb < min_free_vram_mb * 2:
result.warnings.append(
f"VRAM libre ({result.free_vram_mb} MB) proche du seuil minimum"
)
if result.warnings:
for w in result.warnings:
logger.info(f"Preflight warning: {w}")
logger.info(
f"GPU preflight OK: {result.free_vram_mb} MB libre, "
f"{result.gpu_utilization_percent}% utilisation"
)
return result
def require_gpu_ready(
min_free_vram_mb: int = DEFAULT_MIN_FREE_VRAM_MB,
max_gpu_util_percent: int = DEFAULT_MAX_GPU_UTIL_PERCENT,
):
"""
Décorateur pytest — skip le test si le GPU n'est pas prêt.
Usage:
@require_gpu_ready(min_free_vram_mb=2000)
def test_heavy_gpu_operation():
...
"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
result = check_machine_ready(
min_free_vram_mb=min_free_vram_mb,
max_gpu_util_percent=max_gpu_util_percent,
)
if not result.ready:
pytest.skip(f"GPU pas prêt : {result.reason}")
return func(*args, **kwargs)
return wrapper
return decorator