feat: agent Windows consomme UIA — capture + résolution
Câblage agent Windows pour le pipeline V4 :
captor.py — capture UIA pendant l'enregistrement
- _inject_uia_snapshot() appelé après chaque clic
- Ajoute evt['uia_snapshot'] = {name, control_type, parent_path, ...}
- Non-bloquant : fallback silencieux si helper absent
- ~10-20ms par clic, pas de ralentissement perceptible
executor.py — résolution UIA locale au replay
- _resolve_via_uia_local() : appelle lea_uia.exe find via UIAHelper
- Court-circuit prioritaire avant le GroundingEngine serveur
- Activé quand resolve_order[0] == "uia" et target_spec.uia_target présent
- Coordonnées pixel-perfect (bounding_rect → center)
- Fallback transparent vers le grounding serveur si UIA échoue
uia_helper.py copié dans agent_v1/core/ (wrapper Python pour lea_uia.exe)
Auto-détection du binaire dans C:\Lea\helpers\lea_uia.exe
Singleton partagé get_shared_helper()
Déployé et validé sur la VM Windows :
- query_at(100,100) → "Bureau 1" en 10ms depuis Python
- Binaire lea_uia.exe trouvé et fonctionnel
- Les 3 modules Python sont dans C:\Lea\agent_v1\core\
Ce qui est maintenant possible (après redémarrage de Léa sur la VM) :
- Enregistrer un workflow : chaque clic aura un uia_snapshot
- Compiler via /workflow/compile : plan V4 avec stratégie UIA primaire
- Rejouer via /replay/plan : l'agent utilise UIA (10-20ms) au lieu de VLM (2-5s)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -178,8 +178,41 @@ class EventCaptorV1:
|
|||||||
"timestamp": now,
|
"timestamp": now,
|
||||||
}
|
}
|
||||||
self._inject_screen_metadata(event)
|
self._inject_screen_metadata(event)
|
||||||
|
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||||
|
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||||
|
# des données vision comme aujourd'hui.
|
||||||
|
self._inject_uia_snapshot(event, x, y)
|
||||||
self.on_event(event)
|
self.on_event(event)
|
||||||
|
|
||||||
|
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||||
|
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||||
|
|
||||||
|
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||||
|
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .uia_helper import get_shared_helper
|
||||||
|
helper = get_shared_helper()
|
||||||
|
if not helper.available:
|
||||||
|
return
|
||||||
|
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||||
|
if element is None:
|
||||||
|
return
|
||||||
|
event["uia_snapshot"] = {
|
||||||
|
"name": element.name,
|
||||||
|
"control_type": element.control_type,
|
||||||
|
"class_name": element.class_name,
|
||||||
|
"automation_id": element.automation_id,
|
||||||
|
"bounding_rect": list(element.bounding_rect),
|
||||||
|
"is_enabled": element.is_enabled,
|
||||||
|
"is_offscreen": element.is_offscreen,
|
||||||
|
"parent_path": element.parent_path,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
# Non bloquant — on continue sans UIA
|
||||||
|
import logging
|
||||||
|
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||||
|
|
||||||
def _on_scroll(self, x, y, dx, dy):
|
def _on_scroll(self, x, y, dx, dy):
|
||||||
event = {
|
event = {
|
||||||
"type": "mouse_scroll",
|
"type": "mouse_scroll",
|
||||||
|
|||||||
@@ -264,6 +264,59 @@ class ActionExecutorV1:
|
|||||||
logger.warning(f"Acteur gemma4 indisponible : {e}")
|
logger.warning(f"Acteur gemma4 indisponible : {e}")
|
||||||
return "EXECUTER"
|
return "EXECUTER"
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# UIA local — résolution via lea_uia.exe (helper Rust)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _resolve_via_uia_local(
|
||||||
|
self, uia_target: dict, screen_width: int, screen_height: int,
|
||||||
|
):
|
||||||
|
"""Résoudre une cible via UIA local (lea_uia.exe).
|
||||||
|
|
||||||
|
Le plan V4 contient un uia_target (nom, control_type, parent_path).
|
||||||
|
On appelle le helper Rust qui interroge UIAutomationCore.dll et
|
||||||
|
retourne les coordonnées pixel-perfect de l'élément.
|
||||||
|
|
||||||
|
Retourne (x_pct, y_pct) si trouvé, None sinon.
|
||||||
|
Le fallback vers le grounding serveur est géré par l'appelant.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .uia_helper import get_shared_helper
|
||||||
|
helper = get_shared_helper()
|
||||||
|
if not helper.available:
|
||||||
|
return None
|
||||||
|
|
||||||
|
name = uia_target.get("name", "")
|
||||||
|
control_type = uia_target.get("control_type", "") or None
|
||||||
|
automation_id = uia_target.get("automation_id", "") or None
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
element = helper.find_by_name(
|
||||||
|
name=name,
|
||||||
|
control_type=control_type,
|
||||||
|
automation_id=automation_id,
|
||||||
|
timeout_ms=1500,
|
||||||
|
)
|
||||||
|
if element is None or not element.is_clickable():
|
||||||
|
return None
|
||||||
|
|
||||||
|
cx, cy = element.center()
|
||||||
|
if screen_width <= 0 or screen_height <= 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
x_pct = cx / screen_width
|
||||||
|
y_pct = cy / screen_height
|
||||||
|
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return (x_pct, y_pct)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIA local resolve erreur : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Observer — pré-analyse écran avant chaque action
|
# Observer — pré-analyse écran avant chaque action
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -506,8 +559,29 @@ class ActionExecutorV1:
|
|||||||
return result
|
return result
|
||||||
# EXECUTER → continuer normalement
|
# EXECUTER → continuer normalement
|
||||||
|
|
||||||
if visual_mode and target_spec and server_url:
|
# ── V4 : UIA local (si resolve_order l'indique et helper dispo) ──
|
||||||
# ── GROUNDING : localisation pure via GroundingEngine ──
|
# Court-circuite le grounding serveur pour les clicks sur Windows natif.
|
||||||
|
# 10-20ms au lieu de 2-5s pour un clic — c'est le cœur du V4.
|
||||||
|
uia_resolved = False
|
||||||
|
if visual_mode and target_spec and action_type == "click":
|
||||||
|
resolve_order = target_spec.get("resolve_order", [])
|
||||||
|
uia_target = target_spec.get("uia_target")
|
||||||
|
if resolve_order and resolve_order[0] == "uia" and uia_target:
|
||||||
|
uia_coords = self._resolve_via_uia_local(uia_target, width, height)
|
||||||
|
if uia_coords:
|
||||||
|
x_pct, y_pct = uia_coords
|
||||||
|
result["visual_resolved"] = True
|
||||||
|
result["resolution_method"] = "v4_uia_local"
|
||||||
|
result["resolution_score"] = 0.95
|
||||||
|
uia_resolved = True
|
||||||
|
print(f" [UIA] résolu en local: ({x_pct:.4f}, {y_pct:.4f})")
|
||||||
|
logger.info(
|
||||||
|
f"V4 UIA local OK : {uia_target.get('name', '?')} "
|
||||||
|
f"→ ({x_pct:.4f}, {y_pct:.4f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not uia_resolved and visual_mode and target_spec and server_url:
|
||||||
|
# ── GROUNDING : localisation pure via GroundingEngine (fallback) ──
|
||||||
from .grounding import GroundingEngine
|
from .grounding import GroundingEngine
|
||||||
grounding = GroundingEngine(self)
|
grounding = GroundingEngine(self)
|
||||||
grounding_result = grounding.locate(
|
grounding_result = grounding.locate(
|
||||||
|
|||||||
278
agent_v0/agent_v1/core/uia_helper.py
Normal file
278
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
# core/workflow/uia_helper.py
|
||||||
|
"""
|
||||||
|
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||||
|
|
||||||
|
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||||
|
Communique via subprocess + stdin/stdout JSON.
|
||||||
|
|
||||||
|
Pourquoi un helper Rust ?
|
||||||
|
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||||
|
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||||
|
- Pas de problèmes de threading COM en Python
|
||||||
|
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||||
|
|
||||||
|
Architecture :
|
||||||
|
Python executor
|
||||||
|
↓ subprocess.run
|
||||||
|
lea_uia.exe query --x 812 --y 436
|
||||||
|
↓ UIA API Windows
|
||||||
|
JSON response
|
||||||
|
↓ stdout
|
||||||
|
Python executor parse JSON
|
||||||
|
|
||||||
|
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||||
|
toutes les méthodes retournent None → fallback vision automatique.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Timeout par défaut pour les appels UIA (en secondes)
|
||||||
|
_DEFAULT_TIMEOUT = 5.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UiaElement:
|
||||||
|
"""Représentation Python d'un élément UIA."""
|
||||||
|
name: str = ""
|
||||||
|
control_type: str = ""
|
||||||
|
class_name: str = ""
|
||||||
|
automation_id: str = ""
|
||||||
|
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||||
|
is_enabled: bool = False
|
||||||
|
is_offscreen: bool = True
|
||||||
|
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||||
|
process_name: str = ""
|
||||||
|
|
||||||
|
def center(self) -> Tuple[int, int]:
|
||||||
|
"""Retourner le centre du rectangle (pixels)."""
|
||||||
|
x1, y1, x2, y2 = self.bounding_rect
|
||||||
|
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||||
|
|
||||||
|
def width(self) -> int:
|
||||||
|
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||||
|
|
||||||
|
def height(self) -> int:
|
||||||
|
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||||
|
|
||||||
|
def is_clickable(self) -> bool:
|
||||||
|
"""Peut-on cliquer dessus ?"""
|
||||||
|
return (
|
||||||
|
self.is_enabled
|
||||||
|
and not self.is_offscreen
|
||||||
|
and self.width() > 0
|
||||||
|
and self.height() > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
def path_signature(self) -> str:
|
||||||
|
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||||
|
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||||
|
parts.append(f"{self.control_type}[{self.name}]")
|
||||||
|
return " > ".join(parts)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"control_type": self.control_type,
|
||||||
|
"class_name": self.class_name,
|
||||||
|
"automation_id": self.automation_id,
|
||||||
|
"bounding_rect": list(self.bounding_rect),
|
||||||
|
"is_enabled": self.is_enabled,
|
||||||
|
"is_offscreen": self.is_offscreen,
|
||||||
|
"parent_path": self.parent_path,
|
||||||
|
"process_name": self.process_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||||
|
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||||
|
if isinstance(rect, list) and len(rect) >= 4:
|
||||||
|
rect = tuple(rect[:4])
|
||||||
|
else:
|
||||||
|
rect = (0, 0, 0, 0)
|
||||||
|
return cls(
|
||||||
|
name=d.get("name", ""),
|
||||||
|
control_type=d.get("control_type", ""),
|
||||||
|
class_name=d.get("class_name", ""),
|
||||||
|
automation_id=d.get("automation_id", ""),
|
||||||
|
bounding_rect=rect,
|
||||||
|
is_enabled=d.get("is_enabled", False),
|
||||||
|
is_offscreen=d.get("is_offscreen", True),
|
||||||
|
parent_path=d.get("parent_path", []),
|
||||||
|
process_name=d.get("process_name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UIAHelper:
|
||||||
|
"""Wrapper Python pour lea_uia.exe."""
|
||||||
|
|
||||||
|
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||||
|
self._helper_path = helper_path or self._find_helper()
|
||||||
|
self._timeout = timeout
|
||||||
|
self._available = self._check_available()
|
||||||
|
|
||||||
|
def _find_helper(self) -> str:
|
||||||
|
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||||
|
candidates = [
|
||||||
|
r"C:\Lea\helpers\lea_uia.exe",
|
||||||
|
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||||
|
"agent_rust", "lea_uia", "target",
|
||||||
|
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||||
|
"./helpers/lea_uia.exe",
|
||||||
|
"lea_uia.exe",
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return os.path.abspath(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _check_available(self) -> bool:
|
||||||
|
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||||
|
return False
|
||||||
|
if not self._helper_path:
|
||||||
|
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||||
|
return False
|
||||||
|
if not os.path.isfile(self._helper_path):
|
||||||
|
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
@property
|
||||||
|
def helper_path(self) -> str:
|
||||||
|
return self._helper_path
|
||||||
|
|
||||||
|
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||||
|
if not self._available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self._helper_path] + args,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=self._timeout,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.debug(
|
||||||
|
f"UIAHelper: exit code {result.returncode}, "
|
||||||
|
f"stderr: {result.stderr[:200]}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
output = result.stdout.strip()
|
||||||
|
if not output:
|
||||||
|
return None
|
||||||
|
return json.loads(output)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||||
|
return None
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIAHelper: erreur {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def health(self) -> bool:
|
||||||
|
"""Vérifier que UIA répond."""
|
||||||
|
data = self._run(["health"])
|
||||||
|
return data is not None and data.get("status") == "ok"
|
||||||
|
|
||||||
|
def query_at(
|
||||||
|
self,
|
||||||
|
x: int,
|
||||||
|
y: int,
|
||||||
|
with_parents: bool = True,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Récupérer l'élément UIA à une position écran.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x, y: Coordonnées pixel absolues
|
||||||
|
with_parents: Inclure la hiérarchie des parents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||||
|
"""
|
||||||
|
args = ["query", "--x", str(x), "--y", str(y)]
|
||||||
|
if not with_parents:
|
||||||
|
args.append("--with-parents=false")
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def find_by_name(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
control_type: Optional[str] = None,
|
||||||
|
automation_id: Optional[str] = None,
|
||||||
|
window: Optional[str] = None,
|
||||||
|
timeout_ms: int = 2000,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Nom exact de l'élément
|
||||||
|
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||||
|
automation_id: ID d'automation
|
||||||
|
window: Restreindre à une fenêtre spécifique
|
||||||
|
timeout_ms: Timeout de recherche en millisecondes
|
||||||
|
"""
|
||||||
|
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||||
|
if control_type:
|
||||||
|
args.extend(["--control-type", control_type])
|
||||||
|
if automation_id:
|
||||||
|
args.extend(["--automation-id", automation_id])
|
||||||
|
if window:
|
||||||
|
args.extend(["--window", window])
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||||
|
"""Capturer l'élément ayant le focus + son contexte."""
|
||||||
|
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Instance globale partagée (singleton léger)
|
||||||
|
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_helper() -> UIAHelper:
|
||||||
|
"""Retourner une instance partagée de UIAHelper."""
|
||||||
|
global _SHARED_HELPER
|
||||||
|
if _SHARED_HELPER is None:
|
||||||
|
_SHARED_HELPER = UIAHelper()
|
||||||
|
return _SHARED_HELPER
|
||||||
Reference in New Issue
Block a user