feat: smart systray Léa (plyer), preflight GPU, fix tests, support qwen3-vl

- Smart systray (pystray+plyer) remplace PyQt5 : notifications toast,
  menu dynamique avec workflows, chat "Que dois-je faire ?", icône colorée
- Preflight GPU : check_machine_ready() + @pytest.mark.gpu dans conftest
- Correction 63 tests cassés → 0 failed (1200 passed)
- Tests VWB obsolètes déplacés vers _a_trier/
- Support qwen3-vl:8b sur GPU (remplace qwen2.5vl:3b)
  - fix images < 32x32 (Ollama panic)
  - fix force_json=False (qwen3-vl incompatible)
  - fix temperature 0.1 (0.0 bloque avec images)
- Fix captor Windows : Key.esc, _get_key_name()
- Fix LeaServerClient : check_connection, list_workflows format
- deploy_windows.py : packaging propre client Windows
- VWB : edges visibles (#607d8b) + fitView automatique

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-16 22:25:12 +01:00
parent cf495dd82f
commit ad15237fe0
36 changed files with 432 additions and 8103 deletions

View File

@@ -288,27 +288,31 @@ Respond with just the role name, nothing else."""
Returns:
Dict avec 'type', 'role', 'text', 'confidence', 'success'
"""
# System prompt "zéro tolérance" - Force le VLM à NE produire QUE du JSON
system_prompt = """You are a UI element classifier.
Your ONLY task is to output valid JSON. Never explain. Never comment. Never discuss.
Expected format:
{"type": "...", "role": "...", "text": "..."}"""
# System prompt direct — pas de thinking, JSON uniquement
system_prompt = "You are a JSON-only UI classifier. No thinking. No explanation. Output raw JSON only."
# User prompt simplifié et direct
prompt = """Classify this UI element:
- Type: Choose ONE from [button, text_input, checkbox, radio, dropdown, tab, link, icon, table_row, menu_item]
- Role: Choose ONE from [primary_action, cancel, submit, form_input, search_field, navigation, settings, close, delete, edit, save]
- Text: Any visible text (empty string if none)
# User prompt avec exemples explicites pour guider le modèle
prompt = """/no_think
Look at this UI element image and classify it. Reply with ONLY a JSON object, nothing else.
Output JSON only."""
Types: button, text_input, checkbox, radio, dropdown, tab, link, icon, table_row, menu_item
Roles: primary_action, cancel, submit, form_input, search_field, navigation, settings, close, delete, edit, save
Example 1: {"type": "button", "role": "submit", "text": "OK"}
Example 2: {"type": "text_input", "role": "form_input", "text": ""}
Example 3: {"type": "icon", "role": "close", "text": "X"}
Your answer:"""
# Note: force_json=False car qwen3-vl ne supporte pas format:json
# temperature=0.1 car qwen3-vl bloque à 0.0 avec des images
result = self.generate(
prompt,
image=element_image,
system_prompt=system_prompt,
temperature=0.0,
max_tokens=150,
force_json=True
temperature=0.1,
max_tokens=200,
force_json=False
)
if result["success"]:
@@ -381,6 +385,13 @@ Output JSON only."""
if image.mode != 'RGB':
image = image.convert('RGB')
# 1b. Minimum 32x32 (requis par qwen3-vl, sinon Ollama panic)
min_size = 32
if image.width < min_size or image.height < min_size:
new_w = max(image.width, min_size)
new_h = max(image.height, min_size)
image = image.resize((new_w, new_h), Image.NEAREST)
# 2. Redimensionnement intelligent : max 1280px sur le côté long
max_size = 1280
if max(image.size) > max_size:

View File

@@ -72,7 +72,7 @@ class DetectionConfig:
# - "qwen2.5vl:3b" (léger, tient en GPU 12GB avec split partiel)
# - "qwen2.5vl:7b" (meilleur mais 13GB mémoire, CPU-only sur RTX 5070)
# - "qwen3-vl:8b" (plus gros, supporté mais plus d'erreurs JSON)
vlm_model: str = "qwen2.5vl:3b"
vlm_model: str = "qwen3-vl:8b"
vlm_endpoint: str = "http://localhost:11434"
use_vlm_classification: bool = True # Utiliser VLM pour classifier
@@ -218,7 +218,14 @@ class UIDetector:
logger.debug("Step 2: Classifying regions with VLM...")
ui_elements = []
# Taille minimale pour le VLM Ollama (qwen3-vl exige >= 32x32)
MIN_VLM_SIZE = 32
for i, region in enumerate(regions):
# Ignorer les régions trop petites
if region.w < 5 or region.h < 5:
continue
# Extraire le crop de la région
crop = pil_image.crop((
region.x,
@@ -226,7 +233,13 @@ class UIDetector:
region.x + region.w,
region.y + region.h
))
# Agrandir les crops trop petits pour le VLM (pad ou resize)
if crop.width < MIN_VLM_SIZE or crop.height < MIN_VLM_SIZE:
new_w = max(crop.width, MIN_VLM_SIZE)
new_h = max(crop.height, MIN_VLM_SIZE)
crop = crop.resize((new_w, new_h), Image.NEAREST)
# Classifier avec VLM
element = self._classify_region(
crop,