v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/examples/debug_vlm_response.py
+++ b/examples/debug_vlm_response.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""
+Debug: Voir ce que le VLM retourne réellement
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from core.detection.ollama_client import OllamaClient
+
+
+def test_vlm_response():
+    """Tester différents prompts avec le VLM"""
+    
+    client = OllamaClient(model="qwen3-vl:8b")
+    screenshot_path = "rpa_vision_v3/examples/test_ui_screenshot.png"
+    
+    print("=" * 80)
+    print("TEST 1: Prompt simple")
+    print("=" * 80)
+    
+    prompt1 = "Describe what you see in this image."
+    result1 = client.generate(prompt1, image_path=screenshot_path, temperature=0.1)
+    
+    if result1["success"]:
+        print(f"✓ Réponse reçue ({len(result1['response'])} caractères)")
+        print(f"\nRéponse:\n{result1['response']}\n")
+    else:
+        print(f"❌ Erreur: {result1['error']}")
+    
+    print("\n" + "=" * 80)
+    print("TEST 2: Demander de lister les boutons")
+    print("=" * 80)
+    
+    prompt2 = "List all the buttons you can see in this image. For each button, tell me its label."
+    result2 = client.generate(prompt2, image_path=screenshot_path, temperature=0.1)
+    
+    if result2["success"]:
+        print(f"✓ Réponse reçue ({len(result2['response'])} caractères)")
+        print(f"\nRéponse:\n{result2['response']}\n")
+    else:
+        print(f"❌ Erreur: {result2['error']}")
+    
+    print("\n" + "=" * 80)
+    print("TEST 3: Demander JSON simple")
+    print("=" * 80)
+    
+    prompt3 = """List the buttons in this image as JSON.
+Format: [{"label": "button text"}]
+Return only the JSON array."""
+    
+    result3 = client.generate(prompt3, image_path=screenshot_path, temperature=0.0)
+    
+    if result3["success"]:
+        print(f"✓ Réponse reçue ({len(result3['response'])} caractères)")
+        print(f"\nRéponse:\n{result3['response']}\n")
+    else:
+        print(f"❌ Erreur: {result3['error']}")
+
+
+if __name__ == "__main__":
+    test_vlm_response()