v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
310
examples/diagnostic_vlm.py
Normal file
310
examples/diagnostic_vlm.py
Normal file
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Diagnostic Complet du VLM
|
||||
|
||||
Vérifie:
|
||||
1. État de la mémoire RAM
|
||||
2. Modèle chargé en mémoire
|
||||
3. Mode thinking désactivé
|
||||
4. Performance et cache
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import psutil
|
||||
import requests
|
||||
import json
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from core.detection.ollama_client import OllamaClient
|
||||
|
||||
|
||||
def format_bytes(bytes_val):
|
||||
"""Formater les bytes en unités lisibles"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if bytes_val < 1024.0:
|
||||
return f"{bytes_val:.2f} {unit}"
|
||||
bytes_val /= 1024.0
|
||||
return f"{bytes_val:.2f} TB"
|
||||
|
||||
|
||||
def check_system_memory():
|
||||
"""Vérifier l'état de la mémoire système"""
|
||||
print("=" * 80)
|
||||
print("1. ÉTAT DE LA MÉMOIRE SYSTÈME")
|
||||
print("=" * 80)
|
||||
|
||||
mem = psutil.virtual_memory()
|
||||
|
||||
print(f"\nMémoire RAM:")
|
||||
print(f" Total: {format_bytes(mem.total)}")
|
||||
print(f" Disponible: {format_bytes(mem.available)}")
|
||||
print(f" Utilisée: {format_bytes(mem.used)} ({mem.percent}%)")
|
||||
print(f" Libre: {format_bytes(mem.free)}")
|
||||
|
||||
if mem.percent > 90:
|
||||
print(f"\n⚠️ ALERTE: Mémoire RAM critique ({mem.percent}%)")
|
||||
return False
|
||||
elif mem.percent > 75:
|
||||
print(f"\n⚠️ Attention: Mémoire RAM élevée ({mem.percent}%)")
|
||||
return True
|
||||
else:
|
||||
print(f"\n✓ Mémoire RAM OK ({mem.percent}%)")
|
||||
return True
|
||||
|
||||
|
||||
def check_ollama_status():
|
||||
"""Vérifier l'état d'Ollama"""
|
||||
print("\n" + "=" * 80)
|
||||
print("2. ÉTAT D'OLLAMA")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
# Vérifier la connexion
|
||||
response = requests.get("http://localhost:11434/api/tags", timeout=5)
|
||||
if response.status_code != 200:
|
||||
print("❌ Ollama ne répond pas correctement")
|
||||
return False
|
||||
|
||||
print("\n✓ Ollama est actif")
|
||||
|
||||
# Lister les modèles
|
||||
data = response.json()
|
||||
models = data.get('models', [])
|
||||
|
||||
print(f"\nModèles disponibles: {len(models)}")
|
||||
for model in models:
|
||||
name = model.get('name', 'unknown')
|
||||
size = model.get('size', 0)
|
||||
print(f" - {name:30s} | Taille: {format_bytes(size)}")
|
||||
|
||||
# Vérifier qwen3-vl:8b
|
||||
qwen_found = any('qwen3-vl:8b' in m.get('name', '') for m in models)
|
||||
if qwen_found:
|
||||
print("\n✓ Modèle qwen3-vl:8b trouvé")
|
||||
return True
|
||||
else:
|
||||
print("\n❌ Modèle qwen3-vl:8b non trouvé")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Erreur de connexion à Ollama: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def check_model_loaded():
|
||||
"""Vérifier si le modèle est chargé en mémoire"""
|
||||
print("\n" + "=" * 80)
|
||||
print("3. MODÈLE EN MÉMOIRE")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
# Faire une requête simple pour forcer le chargement
|
||||
response = requests.post(
|
||||
"http://localhost:11434/api/generate",
|
||||
json={
|
||||
"model": "qwen3-vl:8b",
|
||||
"prompt": "test",
|
||||
"stream": False,
|
||||
"options": {"num_predict": 1}
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
print("\n✓ Modèle qwen3-vl:8b chargé et fonctionnel")
|
||||
|
||||
# Vérifier les processus Ollama
|
||||
ollama_procs = []
|
||||
for proc in psutil.process_iter(['pid', 'name', 'memory_info']):
|
||||
try:
|
||||
if 'ollama' in proc.info['name'].lower():
|
||||
ollama_procs.append(proc)
|
||||
except:
|
||||
pass
|
||||
|
||||
if ollama_procs:
|
||||
print(f"\nProcessus Ollama actifs: {len(ollama_procs)}")
|
||||
for proc in ollama_procs:
|
||||
mem_mb = proc.info['memory_info'].rss / (1024 * 1024)
|
||||
print(f" PID {proc.info['pid']}: {mem_mb:.0f} MB")
|
||||
|
||||
return True
|
||||
else:
|
||||
print(f"\n❌ Erreur lors du chargement: HTTP {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Erreur: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def test_thinking_mode():
|
||||
"""Tester si le mode thinking est désactivé"""
|
||||
print("\n" + "=" * 80)
|
||||
print("4. TEST MODE THINKING")
|
||||
print("=" * 80)
|
||||
|
||||
try:
|
||||
client = OllamaClient(model="qwen3-vl:8b")
|
||||
|
||||
# Test avec un prompt simple
|
||||
print("\nTest de génération...")
|
||||
import time
|
||||
start = time.time()
|
||||
|
||||
result = client.generate(
|
||||
prompt="What is 2+2? Answer with just the number.",
|
||||
temperature=0.0,
|
||||
max_tokens=10
|
||||
)
|
||||
|
||||
elapsed = time.time() - start
|
||||
|
||||
if result["success"]:
|
||||
response = result["response"].strip()
|
||||
print(f"✓ Réponse: {response}")
|
||||
print(f"✓ Temps: {elapsed:.2f}s")
|
||||
|
||||
# Vérifier qu'il n'y a pas de balises <think>
|
||||
if "<think>" in response or "<thinking>" in response:
|
||||
print("\n⚠️ Mode thinking détecté dans la réponse!")
|
||||
print(" Le mode thinking n'est peut-être pas désactivé")
|
||||
return False
|
||||
else:
|
||||
print("\n✓ Pas de balises thinking détectées")
|
||||
|
||||
# Vérifier la vitesse (thinking mode est plus lent)
|
||||
if elapsed < 2.0:
|
||||
print(f"✓ Temps de réponse rapide ({elapsed:.2f}s) - thinking probablement off")
|
||||
return True
|
||||
else:
|
||||
print(f"⚠️ Temps de réponse lent ({elapsed:.2f}s) - thinking peut-être actif")
|
||||
return False
|
||||
else:
|
||||
print(f"❌ Erreur: {result.get('error')}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Erreur: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def check_configuration():
|
||||
"""Vérifier la configuration actuelle"""
|
||||
print("\n" + "=" * 80)
|
||||
print("5. CONFIGURATION ACTUELLE")
|
||||
print("=" * 80)
|
||||
|
||||
from core.detection.ui_detector import DetectionConfig
|
||||
|
||||
config = DetectionConfig()
|
||||
|
||||
print(f"\nDétection UI:")
|
||||
print(f" VLM Model: {config.vlm_model}")
|
||||
print(f" VLM Endpoint: {config.vlm_endpoint}")
|
||||
print(f" Confidence Threshold: {config.confidence_threshold}")
|
||||
print(f" Min Region Size: {config.min_region_size}px")
|
||||
print(f" Max Region Size: {config.max_region_size}px")
|
||||
print(f" Use VLM: {config.use_vlm_classification}")
|
||||
print(f" Merge Overlapping: {config.merge_overlapping}")
|
||||
print(f" IoU Threshold: {config.iou_threshold}")
|
||||
|
||||
# Recommandations
|
||||
print("\n📋 Recommandations:")
|
||||
|
||||
mem = psutil.virtual_memory()
|
||||
if mem.percent > 75:
|
||||
print(" ⚠️ Mémoire RAM élevée - Considérer:")
|
||||
print(" - Fermer d'autres applications")
|
||||
print(" - Augmenter max_elements pour limiter le traitement")
|
||||
print(" - Utiliser un modèle plus léger (granite3.2-vision:2b)")
|
||||
|
||||
if config.confidence_threshold < 0.7:
|
||||
print(f" ⚠️ Seuil de confiance bas ({config.confidence_threshold})")
|
||||
print(" - Recommandé: 0.7 ou plus pour production")
|
||||
print(" - Évite les faux positifs")
|
||||
|
||||
if config.min_region_size < 15:
|
||||
print(f" ℹ️ Taille minimale basse ({config.min_region_size}px)")
|
||||
print(" - Détecte plus d'éléments mais plus de bruit")
|
||||
print(" - Augmente la charge VLM")
|
||||
|
||||
|
||||
def test_async_capability():
|
||||
"""Tester si le mode asynchrone est possible"""
|
||||
print("\n" + "=" * 80)
|
||||
print("6. CAPACITÉ ASYNCHRONE")
|
||||
print("=" * 80)
|
||||
|
||||
print("\n📊 Analyse:")
|
||||
print(" Architecture actuelle: Synchrone séquentielle")
|
||||
print(" - Chaque élément est classifié l'un après l'autre")
|
||||
print(" - Temps total = nb_éléments × temps_par_élément")
|
||||
|
||||
print("\n🚀 Mode asynchrone possible:")
|
||||
print(" ✓ Ollama supporte les requêtes concurrentes")
|
||||
print(" ✓ Python asyncio/aiohttp disponible")
|
||||
print(" ✓ Gain potentiel: 3-5x plus rapide")
|
||||
|
||||
print("\n💡 Implémentation suggérée:")
|
||||
print(" 1. Utiliser asyncio + aiohttp")
|
||||
print(" 2. Batch de 5-10 éléments en parallèle")
|
||||
print(" 3. Limiter la concurrence pour éviter surcharge mémoire")
|
||||
|
||||
print("\n⚠️ Considérations:")
|
||||
print(" - Augmente l'utilisation RAM (plusieurs requêtes simultanées)")
|
||||
print(" - Nécessite monitoring de la charge Ollama")
|
||||
print(" - Recommandé seulement si RAM > 16GB disponible")
|
||||
|
||||
mem = psutil.virtual_memory()
|
||||
if mem.available > 16 * 1024 * 1024 * 1024: # 16GB
|
||||
print("\n✓ RAM suffisante pour mode asynchrone")
|
||||
return True
|
||||
else:
|
||||
print(f"\n⚠️ RAM disponible limitée ({format_bytes(mem.available)})")
|
||||
print(" Mode asynchrone déconseillé")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Diagnostic complet"""
|
||||
print("\n🔍 DIAGNOSTIC COMPLET DU VLM\n")
|
||||
|
||||
results = {
|
||||
"memory": check_system_memory(),
|
||||
"ollama": check_ollama_status(),
|
||||
"model_loaded": check_model_loaded(),
|
||||
"thinking_off": test_thinking_mode(),
|
||||
"async_capable": test_async_capability()
|
||||
}
|
||||
|
||||
check_configuration()
|
||||
|
||||
# Résumé
|
||||
print("\n" + "=" * 80)
|
||||
print("RÉSUMÉ DU DIAGNOSTIC")
|
||||
print("=" * 80)
|
||||
|
||||
print(f"\n✓ Mémoire système: {'OK' if results['memory'] else 'PROBLÈME'}")
|
||||
print(f"✓ Ollama actif: {'OK' if results['ollama'] else 'PROBLÈME'}")
|
||||
print(f"✓ Modèle chargé: {'OK' if results['model_loaded'] else 'PROBLÈME'}")
|
||||
print(f"✓ Thinking désactivé: {'OK' if results['thinking_off'] else 'À VÉRIFIER'}")
|
||||
print(f"✓ Async possible: {'OUI' if results['async_capable'] else 'NON RECOMMANDÉ'}")
|
||||
|
||||
all_ok = all(results.values())
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
if all_ok:
|
||||
print("🎉 SYSTÈME OPTIMAL - Prêt pour production")
|
||||
else:
|
||||
print("⚠️ ATTENTION - Quelques points à améliorer")
|
||||
print("=" * 80)
|
||||
|
||||
return all_ok
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
Reference in New Issue
Block a user