feat: premier replay E2E + mode apprentissage supervisé

Premier replay fonctionnel de bout en bout (Bloc-notes, Chrome). Corrections critiques : - Fix double-lancement agent (Lea.bat start /b + verrou PID) - Sérialisation replay (threading.Lock dans poll_and_execute) - Garde UIA bbox >50% écran (rejet conteneurs "Bureau") - Filtre fenêtres bruit système (systray overflow) - Auto-nettoyage replays bloqués (paused_need_help) Cascade visuelle complète dans session_cleaner : - UIA local (10ms) → template matching (100ms) → serveur docTR/VLM - Nettoyage bureau pré-replay (clic "Afficher le bureau") - Crops 80x80 + vlm_description pour chaque clic Grounding contraint à la fenêtre active : - Capture croppée à la fenêtre au lieu de l'écran entier - Conversion coordonnées fenêtre → écran - Élimine les faux positifs taskbar/systray Mode apprentissage supervisé (SUPERVISE → capture humaine) : - Léa passe en mode capture quand elle est perdue - Capture mini-workflow humain (clics + frappes + combos) - Fin par Ctrl+Shift+L ou timeout inactivité 10s - Correction stockée dans target_memory.db via serveur Deploy Windows complet (grounding.py, policy.py, uia_helper.py). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 07:42:50 +02:00
parent 816b37af98
commit 33c198b827
12 changed files with 1561 additions and 60 deletions
--- a/agent_v0/deploy/windows_client/agent_v1/core/executor.py
+++ b/agent_v0/deploy/windows_client/agent_v1/core/executor.py
@@ -17,6 +17,7 @@ import base64
 import hashlib
 import io
 import os
+import threading
 import time
 import logging

@@ -72,6 +73,8 @@ class ActionExecutorV1:
        # different de celui qui utilise l'instance).
        self._sct = None
        self.running = True
+        # Verrou de sérialisation — une seule action replay à la fois
+        self._replay_lock = threading.Lock()
        # Backoff exponentiel pour le polling replay (evite de marteler le serveur)
        self._poll_backoff = 1.0       # Delai actuel (secondes)
        self._poll_backoff_min = 1.0   # Delai minimal (reset apres succes)
@@ -241,6 +244,107 @@ class ActionExecutorV1:
            logger.warning(f"Acteur gemma4 indisponible : {e}")
            return "EXECUTER"

+    # =========================================================================
+    # UIA local — résolution via lea_uia.exe (helper Rust)
+    # =========================================================================
+
+    def _resolve_via_uia_local(
+        self, uia_target: dict, screen_width: int, screen_height: int,
+    ):
+        """Résoudre une cible via UIA local (lea_uia.exe).
+
+        Le plan contient un uia_target (nom, control_type, parent_path).
+        On appelle le helper Rust qui interroge UIAutomationCore.dll et
+        retourne les coordonnées pixel-perfect de l'élément.
+
+        STRICT : si l'élément trouvé n'appartient pas à la bonne fenêtre
+        parente (comparaison du parent_path), on REFUSE.
+
+        Retourne (x_pct, y_pct) si trouvé ET validé, None sinon.
+        """
+        try:
+            from .uia_helper import get_shared_helper
+            helper = get_shared_helper()
+            if not helper.available:
+                return None
+
+            name = uia_target.get("name", "")
+            control_type = uia_target.get("control_type", "") or None
+            automation_id = uia_target.get("automation_id", "") or None
+            expected_parent_path = uia_target.get("parent_path", []) or []
+
+            if not name:
+                return None
+
+            element = helper.find_by_name(
+                name=name,
+                control_type=control_type,
+                automation_id=automation_id,
+                timeout_ms=1500,
+            )
+            if element is None or not element.is_clickable():
+                logger.debug(f"UIA: '{name}' non trouvé ou non cliquable")
+                return None
+
+            # ── VÉRIFICATION STRICTE du parent_path ──
+            if expected_parent_path:
+                expected_root = None
+                for p in expected_parent_path:
+                    if p.get("control_type", "").lower() in ("fenêtre", "window"):
+                        expected_root = p.get("name", "").strip()
+                        break
+
+                if expected_root:
+                    found_root = None
+                    for p in element.parent_path:
+                        if p.get("control_type", "").lower() in ("fenêtre", "window"):
+                            found_root = p.get("name", "").strip()
+                            break
+
+                    if found_root and expected_root != found_root:
+                        if (expected_root.lower() not in found_root.lower()
+                                and found_root.lower() not in expected_root.lower()):
+                            logger.warning(
+                                f"UIA REJET : '{name}' trouvé dans '{found_root}' "
+                                f"mais attendu dans '{expected_root}'"
+                            )
+                            print(
+                                f"    [UIA] REJET — '{name}' trouvé dans mauvaise fenêtre "
+                                f"({found_root} ≠ {expected_root})"
+                            )
+                            return None
+
+            # ── GARDE : rejeter les éléments géants (conteneurs) ──
+            elem_w = element.width()
+            elem_h = element.height()
+            screen_area = screen_width * screen_height
+            elem_area = elem_w * elem_h
+            if screen_area > 0 and elem_area / screen_area > 0.5:
+                logger.warning(
+                    f"UIA REJET : '{name}' couvre {elem_area / screen_area * 100:.0f}% "
+                    f"de l'écran ({elem_w}x{elem_h}) — conteneur, pas un élément cliquable"
+                )
+                print(
+                    f"    [UIA] REJET — '{name}' trop grand "
+                    f"({elem_w}x{elem_h}, {elem_area / screen_area * 100:.0f}% écran)"
+                )
+                return None
+
+            cx, cy = element.center()
+            if screen_width <= 0 or screen_height <= 0:
+                return None
+
+            x_pct = cx / screen_width
+            y_pct = cy / screen_height
+            if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
+                return None
+
+            return (x_pct, y_pct)
+
+        except Exception as e:
+            logger.debug(f"UIA local resolve erreur : {e}")
+            return None
+
    # =========================================================================
    # Observer — pré-analyse écran avant chaque action
    # =========================================================================
@@ -385,6 +489,11 @@ class ActionExecutorV1:
            "visual_resolved": False,
        }

+        # ── Délai inter-actions (anti race condition mss) ──
+        wait_before = action.get("wait_before", 0.5)
+        if wait_before > 0:
+            time.sleep(wait_before)
+
        try:
            monitor = self.sct.monitors[1]
            width, height = monitor["width"], monitor["height"]
@@ -393,6 +502,14 @@ class ActionExecutorV1:
            x_pct = action.get("x_pct", 0.0)
            y_pct = action.get("y_pct", 0.0)

+            # ── Diagnostic résolution ──
+            logger.info(
+                f"[REPLAY] Action {action_id} ({action_type}) — "
+                f"écran replay: {width}x{height}, "
+                f"x_pct={x_pct:.4f}, y_pct={y_pct:.4f} "
+                f"→ pixel ({int(x_pct * width)}, {int(y_pct * height)})"
+            )
+
            # Extraire le nom de l'application depuis un titre de fenêtre
            def _app_name(title):
                for sep in [" – ", " - ", " — "]:
@@ -477,8 +594,27 @@ class ActionExecutorV1:
                            return result
                        # EXECUTER → continuer normalement

-            if visual_mode and target_spec and server_url:
-                # ── GROUNDING : localisation pure via GroundingEngine ──
+            # ── UIA local : résolution rapide via lea_uia.exe ──
+            uia_resolved = False
+            if visual_mode and target_spec and action_type == "click":
+                resolve_order = target_spec.get("resolve_order", [])
+                uia_target = target_spec.get("uia_target")
+                if resolve_order and resolve_order[0] == "uia" and uia_target:
+                    uia_coords = self._resolve_via_uia_local(uia_target, width, height)
+                    if uia_coords:
+                        x_pct, y_pct = uia_coords
+                        result["visual_resolved"] = True
+                        result["resolution_method"] = "uia_local"
+                        result["resolution_score"] = 0.95
+                        uia_resolved = True
+                        print(f"    [UIA] résolu en local: ({x_pct:.4f}, {y_pct:.4f})")
+                        logger.info(
+                            f"UIA local OK : {uia_target.get('name', '?')} "
+                            f"→ ({x_pct:.4f}, {y_pct:.4f})"
+                        )
+
+            if not uia_resolved and visual_mode and target_spec and server_url:
+                # ── GROUNDING : localisation pure via GroundingEngine (fallback) ──
                from .grounding import GroundingEngine
                grounding = GroundingEngine(self)
                grounding_result = grounding.locate(
@@ -510,8 +646,8 @@ class ActionExecutorV1:

            if action_type == "click":
                # Si visual_mode est activé, le resolve DOIT réussir.
-                # Pas de fallback blind — on arrête le replay si la cible
-                # n'est pas trouvée visuellement. C'est un RPA VISUEL.
+                # Pas de fallback blind — Léa VOIT l'écran et CHERCHE
+                # l'élément. Si toute la cascade échoue → pause supervisée.
                if visual_mode and not result.get("visual_resolved"):
                    # ── Policy : décider quoi faire quand grounding échoue ──
                    from .policy import PolicyEngine, Decision
@@ -533,7 +669,6 @@ class ActionExecutorV1:
                    )

                    if policy_decision.decision == Decision.RETRY:
-                        # Re-tenter le grounding après correction (popup fermée, etc.)
                        resolved2 = self._resolve_target_visual(
                            server_url, target_spec, x_pct, y_pct, width, height
                        )
@@ -543,7 +678,6 @@ class ActionExecutorV1:
                            result["visual_resolved"] = True
                            print(f"    [POLICY] Re-resolve OK après {policy_decision.action_taken}")
                        else:
-                            # Re-resolve échoué — SUPERVISE (rendre la main)
                            result["success"] = False
                            result["error"] = "target_not_found"
                            result["target_description"] = target_desc
@@ -1200,20 +1334,17 @@ Example: x_pct=0.50, y_pct=0.30"""
    def poll_and_execute(self, session_id: str, server_url: str, machine_id: str = "default") -> bool:
        """
        Poll le serveur pour recuperer et executer la prochaine action.
-
-        1. GET /replay/next pour recuperer l'action
-        2. Execute l'action (clic, texte, etc.)
-        3. POST /replay/result avec le resultat + screenshot
-
-        Args:
-            session_id: Identifiant de la session courante
-            server_url: URL de base du serveur streaming
-            machine_id: Identifiant de la machine (pour le replay multi-machine)
-
-        Retourne True si une action a ete executee, False sinon.
-        IMPORTANT: Si une action est recue, le resultat est TOUJOURS rapporte
-        au serveur (meme en cas d'erreur d'execution).
+        Sérialisé par _replay_lock — une seule action à la fois.
        """
+        if not self._replay_lock.acquire(blocking=False):
+            return False
+        try:
+            return self._poll_and_execute_inner(session_id, server_url, machine_id)
+        finally:
+            self._replay_lock.release()
+
+    def _poll_and_execute_inner(self, session_id: str, server_url: str, machine_id: str) -> bool:
+        """Implémentation interne de poll_and_execute (protégée par _replay_lock)."""
        import requests

        replay_next_url = f"{server_url}/traces/stream/replay/next"
--- a/agent_v0/deploy/windows_client/agent_v1/core/grounding.py
+++ b/agent_v0/deploy/windows_client/agent_v1/core/grounding.py
@@ -0,0 +1,214 @@
+# agent_v1/core/grounding.py
+"""
+Module Grounding — localisation pure d'éléments UI sur l'écran.
+
+Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
+Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
+
+Stratégies disponibles (cascade configurable) :
+1. Serveur SomEngine + VLM (GPU distant)
+2. Template matching local (CPU, ~10ms)
+3. VLM local direct (CPU/GPU local)
+
+Séparé de Policy (qui décide quoi faire quand grounding échoue).
+Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
+"""
+
+import base64
+import logging
+import os
+import time
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GroundingResult:
+    """Résultat d'une tentative de localisation visuelle."""
+    found: bool                     # L'élément a été trouvé
+    x_pct: float = 0.0             # Position X en % (0.0-1.0)
+    y_pct: float = 0.0             # Position Y en % (0.0-1.0)
+    method: str = ""               # Méthode utilisée (server_som, anchor_template, vlm_direct...)
+    score: float = 0.0             # Confiance (0.0-1.0)
+    elapsed_ms: float = 0.0        # Temps de résolution
+    detail: str = ""               # Info supplémentaire (label trouvé, raison échec)
+    raw: Optional[Dict] = None     # Données brutes du resolver (pour debug)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "found": self.found,
+            "x_pct": self.x_pct,
+            "y_pct": self.y_pct,
+            "method": self.method,
+            "score": round(self.score, 3),
+            "elapsed_ms": round(self.elapsed_ms, 1),
+            "detail": self.detail,
+        }
+
+
+# Résultat singleton pour "pas trouvé"
+NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
+
+
+class GroundingEngine:
+    """Moteur de localisation visuelle d'éléments UI.
+
+    Encapsule la cascade de résolution (serveur → template → VLM local)
+    avec une interface unifiée. Ne prend aucune décision — c'est le rôle
+    de PolicyEngine.
+
+    Usage :
+        engine = GroundingEngine(executor)
+        result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
+        if result.found:
+            click(result.x_pct, result.y_pct)
+    """
+
+    def __init__(self, executor):
+        """
+        Args:
+            executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
+        """
+        self._executor = executor
+
+    def locate(
+        self,
+        server_url: str,
+        target_spec: Dict[str, Any],
+        fallback_x: float,
+        fallback_y: float,
+        screen_width: int,
+        screen_height: int,
+        strategies: Optional[List[str]] = None,
+    ) -> GroundingResult:
+        """Localiser un élément UI sur l'écran.
+
+        Exécute la cascade de stratégies dans l'ordre et retourne
+        dès qu'une stratégie trouve l'élément.
+
+        Args:
+            server_url: URL du serveur (SomEngine + VLM GPU)
+            target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
+            fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
+            screen_width, screen_height: Résolution écran
+            strategies: Liste ordonnée de stratégies à essayer.
+                        Par défaut : ["server", "template", "vlm_local"]
+
+        Returns:
+            GroundingResult avec found=True et coordonnées, ou NOT_FOUND
+        """
+        if strategies is None:
+            strategies = ["server", "template", "vlm_local"]
+
+        # ── Apprentissage : réordonner les stratégies selon l'historique ──
+        # Si le Learning sait quelle méthode marche pour cette cible,
+        # la mettre en premier. C'est la boucle d'apprentissage.
+        learned = target_spec.get("_learned_strategy", "")
+        if learned:
+            strategy_map = {
+                "som_text_match": "server",
+                "grounding_vlm": "server",
+                "server_som": "server",
+                "anchor_template": "template",
+                "template_matching": "template",
+                "hybrid_text_direct": "vlm_local",
+                "hybrid_vlm_text": "vlm_local",
+                "vlm_direct": "vlm_local",
+            }
+            preferred = strategy_map.get(learned, "")
+            if preferred and preferred in strategies:
+                strategies = [preferred] + [s for s in strategies if s != preferred]
+                logger.info(
+                    f"Grounding: stratégie réordonnée par l'apprentissage → "
+                    f"{strategies} (learned={learned})"
+                )
+
+        t_start = time.time()
+        screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
+        if not screenshot_b64:
+            return GroundingResult(
+                found=False, detail="Capture screenshot échouée",
+                elapsed_ms=(time.time() - t_start) * 1000,
+            )
+
+        for strategy in strategies:
+            result = self._try_strategy(
+                strategy, server_url, screenshot_b64, target_spec,
+                fallback_x, fallback_y, screen_width, screen_height,
+            )
+            if result.found:
+                result.elapsed_ms = (time.time() - t_start) * 1000
+                return result
+
+        return GroundingResult(
+            found=False,
+            detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
+            elapsed_ms=(time.time() - t_start) * 1000,
+        )
+
+    def _try_strategy(
+        self,
+        strategy: str,
+        server_url: str,
+        screenshot_b64: str,
+        target_spec: Dict[str, Any],
+        fallback_x: float,
+        fallback_y: float,
+        screen_width: int,
+        screen_height: int,
+    ) -> GroundingResult:
+        """Essayer une stratégie de grounding unique."""
+
+        if strategy == "server" and server_url:
+            raw = self._executor._server_resolve_target(
+                server_url, screenshot_b64, target_spec,
+                fallback_x, fallback_y, screen_width, screen_height,
+            )
+            if raw and raw.get("resolved"):
+                return GroundingResult(
+                    found=True,
+                    x_pct=raw["x_pct"],
+                    y_pct=raw["y_pct"],
+                    method=raw.get("method", "server"),
+                    score=raw.get("score", 0.0),
+                    detail=raw.get("matched_element", {}).get("label", ""),
+                    raw=raw,
+                )
+
+        elif strategy == "template":
+            anchor_b64 = target_spec.get("anchor_image_base64", "")
+            if anchor_b64:
+                raw = self._executor._template_match_anchor(
+                    screenshot_b64, anchor_b64, screen_width, screen_height,
+                )
+                if raw and raw.get("resolved"):
+                    return GroundingResult(
+                        found=True,
+                        x_pct=raw["x_pct"],
+                        y_pct=raw["y_pct"],
+                        method="anchor_template",
+                        score=raw.get("score", 0.0),
+                        raw=raw,
+                    )
+
+        elif strategy == "vlm_local":
+            by_text = target_spec.get("by_text", "")
+            vlm_desc = target_spec.get("vlm_description", "")
+            if vlm_desc or by_text:
+                raw = self._executor._hybrid_vlm_resolve(
+                    screenshot_b64, target_spec, screen_width, screen_height,
+                )
+                if raw and raw.get("resolved"):
+                    return GroundingResult(
+                        found=True,
+                        x_pct=raw["x_pct"],
+                        y_pct=raw["y_pct"],
+                        method=raw.get("method", "vlm_local"),
+                        score=raw.get("score", 0.0),
+                        detail=raw.get("matched_element", {}).get("label", ""),
+                        raw=raw,
+                    )
+
+        return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
--- a/agent_v0/deploy/windows_client/agent_v1/core/policy.py
+++ b/agent_v0/deploy/windows_client/agent_v1/core/policy.py
@@ -0,0 +1,152 @@
+# agent_v1/core/policy.py
+"""
+Module Policy — décisions intelligentes quand le grounding échoue.
+
+Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
+Ne localise AUCUN élément — c'est le rôle du Grounding.
+
+Décisions possibles :
+- RETRY : re-tenter le grounding (après popup fermée, par exemple)
+- SKIP : l'action n'est plus nécessaire (état déjà atteint)
+- ABORT : arrêter le workflow (état incohérent)
+- SUPERVISE : rendre la main à l'utilisateur
+
+Séparé de Grounding (qui localise les éléments).
+Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
+"""
+
+import logging
+import os
+import time
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class Decision(Enum):
+    """Décisions possibles quand le grounding échoue."""
+    RETRY = "retry"             # Re-tenter (après correction : popup fermée, navigation...)
+    SKIP = "skip"               # Action inutile (état déjà atteint)
+    ABORT = "abort"             # Arrêter le workflow (état incohérent)
+    SUPERVISE = "supervise"     # Rendre la main à l'utilisateur (Léa dit "je bloque")
+    CONTINUE = "continue"       # Continuer malgré l'échec (action non critique)
+
+
+@dataclass
+class PolicyDecision:
+    """Résultat d'une décision Policy."""
+    decision: Decision
+    reason: str                     # Explication de la décision
+    action_taken: str = ""          # Action corrective effectuée (ex: "popup fermée")
+    elapsed_ms: float = 0.0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "decision": self.decision.value,
+            "reason": self.reason,
+            "action_taken": self.action_taken,
+            "elapsed_ms": round(self.elapsed_ms, 1),
+        }
+
+
+class PolicyEngine:
+    """Moteur de décision quand le grounding échoue.
+
+    Cascade de décision :
+    1. Popup détectée ? → fermer et RETRY
+    2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
+    3. Fallback → SUPERVISE (rendre la main)
+
+    Usage :
+        policy = PolicyEngine(executor)
+        decision = policy.decide(action, target_spec, grounding_result)
+        if decision.decision == Decision.RETRY:
+            # re-tenter le grounding
+        elif decision.decision == Decision.SKIP:
+            # marquer comme réussi, passer à la suite
+    """
+
+    def __init__(self, executor):
+        self._executor = executor
+
+    def decide(
+        self,
+        action: Dict[str, Any],
+        target_spec: Dict[str, Any],
+        retry_count: int = 0,
+        max_retries: int = 1,
+    ) -> PolicyDecision:
+        """Décider quoi faire quand le grounding a échoué.
+
+        Cascade :
+        1. Si c'est le premier essai → tenter de fermer une popup → RETRY
+        2. Si retry déjà fait → demander à l'acteur gemma4
+        3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
+
+        Args:
+            action: L'action qui a échoué
+            target_spec: La cible non trouvée
+            retry_count: Nombre de retries déjà faits
+            max_retries: Maximum de retries autorisés
+        """
+        t_start = time.time()
+
+        # ── Étape 1 : Tentative de fermeture popup (premier essai) ──
+        if retry_count == 0:
+            popup_handled = self._try_close_popup()
+            if popup_handled:
+                return PolicyDecision(
+                    decision=Decision.RETRY,
+                    reason="Popup détectée et fermée, re-tentative",
+                    action_taken="popup_closed",
+                    elapsed_ms=(time.time() - t_start) * 1000,
+                )
+
+        # ── Étape 2 : Max retries atteint → acteur gemma4 ──
+        if retry_count >= max_retries:
+            actor_decision = self._ask_actor(action, target_spec)
+
+            if actor_decision == "PASSER":
+                return PolicyDecision(
+                    decision=Decision.SKIP,
+                    reason="Acteur gemma4 : l'état est déjà atteint",
+                    elapsed_ms=(time.time() - t_start) * 1000,
+                )
+            elif actor_decision == "STOPPER":
+                return PolicyDecision(
+                    decision=Decision.ABORT,
+                    reason="Acteur gemma4 : état incohérent, arrêt",
+                    elapsed_ms=(time.time() - t_start) * 1000,
+                )
+            else:
+                # EXECUTER ou inconnu → pause supervisée
+                return PolicyDecision(
+                    decision=Decision.SUPERVISE,
+                    reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
+                    elapsed_ms=(time.time() - t_start) * 1000,
+                )
+
+        # ── Étape 3 : Encore des retries disponibles → RETRY ──
+        return PolicyDecision(
+            decision=Decision.RETRY,
+            reason=f"Retry {retry_count + 1}/{max_retries}",
+            elapsed_ms=(time.time() - t_start) * 1000,
+        )
+
+    def _try_close_popup(self) -> bool:
+        """Tenter de fermer une popup via le handler VLM existant."""
+        try:
+            return self._executor._handle_popup_vlm()
+        except Exception as e:
+            logger.debug(f"Policy: popup handler échoué : {e}")
+            return False
+
+    def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
+        """Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
+        try:
+            return self._executor._actor_decide(action, target_spec)
+        except Exception as e:
+            logger.debug(f"Policy: acteur gemma4 échoué : {e}")
+            return "EXECUTER"  # Fallback → supervisé
--- a/agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
+++ b/agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
@@ -0,0 +1,294 @@
+# core/workflow/uia_helper.py
+"""
+UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
+
+Expose une API Python simple pour interroger UIA via le binaire Rust.
+Communique via subprocess + stdin/stdout JSON.
+
+Pourquoi un helper Rust ?
+- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
+- Binaire standalone ~500 Ko, aucune dépendance runtime
+- Pas de problèmes de threading COM en Python
+- Crash-safe (le crash du helper n'affecte pas l'agent Python)
+
+Architecture :
+    Python executor
+        ↓ subprocess.run
+    lea_uia.exe query --x 812 --y 436
+        ↓ UIA API Windows
+    JSON response
+        ↓ stdout
+    Python executor parse JSON
+
+Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
+toutes les méthodes retournent None → fallback vision automatique.
+"""
+
+import json
+import logging
+import os
+import platform
+import subprocess
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Timeout par défaut pour les appels UIA (en secondes)
+_DEFAULT_TIMEOUT = 5.0
+
+# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
+# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
+# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
+# visible à l'écran → ralentit la souris et pollue les screenshots
+# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
+#
+# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
+# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
+# est ignoré. getattr() gère le cas où Python expose déjà la constante
+# sur Windows.
+if platform.system() == "Windows":
+    _SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
+else:
+    _SUBPROCESS_CREATION_FLAGS = 0
+
+
+@dataclass
+class UiaElement:
+    """Représentation Python d'un élément UIA."""
+    name: str = ""
+    control_type: str = ""
+    class_name: str = ""
+    automation_id: str = ""
+    bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
+    is_enabled: bool = False
+    is_offscreen: bool = True
+    parent_path: List[Dict[str, str]] = field(default_factory=list)
+    process_name: str = ""
+
+    def center(self) -> Tuple[int, int]:
+        """Retourner le centre du rectangle (pixels)."""
+        x1, y1, x2, y2 = self.bounding_rect
+        return ((x1 + x2) // 2, (y1 + y2) // 2)
+
+    def width(self) -> int:
+        return self.bounding_rect[2] - self.bounding_rect[0]
+
+    def height(self) -> int:
+        return self.bounding_rect[3] - self.bounding_rect[1]
+
+    def is_clickable(self) -> bool:
+        """Peut-on cliquer dessus ?"""
+        return (
+            self.is_enabled
+            and not self.is_offscreen
+            and self.width() > 0
+            and self.height() > 0
+        )
+
+    def path_signature(self) -> str:
+        """Signature du chemin parent (pour retrouver l'élément)."""
+        parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
+        parts.append(f"{self.control_type}[{self.name}]")
+        return " > ".join(parts)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "name": self.name,
+            "control_type": self.control_type,
+            "class_name": self.class_name,
+            "automation_id": self.automation_id,
+            "bounding_rect": list(self.bounding_rect),
+            "is_enabled": self.is_enabled,
+            "is_offscreen": self.is_offscreen,
+            "parent_path": self.parent_path,
+            "process_name": self.process_name,
+        }
+
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
+        rect = d.get("bounding_rect", [0, 0, 0, 0])
+        if isinstance(rect, list) and len(rect) >= 4:
+            rect = tuple(rect[:4])
+        else:
+            rect = (0, 0, 0, 0)
+        return cls(
+            name=d.get("name", ""),
+            control_type=d.get("control_type", ""),
+            class_name=d.get("class_name", ""),
+            automation_id=d.get("automation_id", ""),
+            bounding_rect=rect,
+            is_enabled=d.get("is_enabled", False),
+            is_offscreen=d.get("is_offscreen", True),
+            parent_path=d.get("parent_path", []),
+            process_name=d.get("process_name", ""),
+        )
+
+
+class UIAHelper:
+    """Wrapper Python pour lea_uia.exe."""
+
+    def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
+        self._helper_path = helper_path or self._find_helper()
+        self._timeout = timeout
+        self._available = self._check_available()
+
+    def _find_helper(self) -> str:
+        """Trouver lea_uia.exe dans les emplacements standards."""
+        candidates = [
+            r"C:\Lea\helpers\lea_uia.exe",
+            os.path.join(os.path.dirname(__file__), "..", "..",
+                         "agent_rust", "lea_uia", "target",
+                         "x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
+            "./helpers/lea_uia.exe",
+            "lea_uia.exe",
+        ]
+        for path in candidates:
+            if os.path.isfile(path):
+                return os.path.abspath(path)
+        return ""
+
+    def _check_available(self) -> bool:
+        """Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
+        if platform.system() != "Windows":
+            logger.debug("UIAHelper: Linux/Mac — helper désactivé")
+            return False
+        if not self._helper_path:
+            logger.debug("UIAHelper: lea_uia.exe introuvable")
+            return False
+        if not os.path.isfile(self._helper_path):
+            logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
+            return False
+        return True
+
+    @property
+    def available(self) -> bool:
+        return self._available
+
+    @property
+    def helper_path(self) -> str:
+        return self._helper_path
+
+    def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
+        """Exécuter lea_uia.exe avec les arguments et parser le JSON."""
+        if not self._available:
+            return None
+        try:
+            result = subprocess.run(
+                [self._helper_path] + args,
+                capture_output=True,
+                text=True,
+                timeout=self._timeout,
+                encoding="utf-8",
+                errors="replace",
+                creationflags=_SUBPROCESS_CREATION_FLAGS,
+            )
+            if result.returncode != 0:
+                logger.debug(
+                    f"UIAHelper: exit code {result.returncode}, "
+                    f"stderr: {result.stderr[:200]}"
+                )
+                return None
+            output = result.stdout.strip()
+            if not output:
+                return None
+            return json.loads(output)
+        except subprocess.TimeoutExpired:
+            logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
+            return None
+        except json.JSONDecodeError as e:
+            logger.debug(f"UIAHelper: JSON invalide — {e}")
+            return None
+        except Exception as e:
+            logger.debug(f"UIAHelper: erreur {e}")
+            return None
+
+    def health(self) -> bool:
+        """Vérifier que UIA répond."""
+        data = self._run(["health"])
+        return data is not None and data.get("status") == "ok"
+
+    def query_at(
+        self,
+        x: int,
+        y: int,
+        with_parents: bool = True,
+    ) -> Optional[UiaElement]:
+        """Récupérer l'élément UIA à une position écran.
+
+        Args:
+            x, y: Coordonnées pixel absolues
+            with_parents: Inclure la hiérarchie des parents
+
+        Returns:
+            UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
+        """
+        args = ["query", "--x", str(x), "--y", str(y)]
+        if not with_parents:
+            args.append("--with-parents=false")
+
+        data = self._run(args)
+        if not data or data.get("status") != "ok":
+            return None
+
+        elem_data = data.get("element")
+        if not elem_data:
+            return None
+        return UiaElement.from_dict(elem_data)
+
+    def find_by_name(
+        self,
+        name: str,
+        control_type: Optional[str] = None,
+        automation_id: Optional[str] = None,
+        window: Optional[str] = None,
+        timeout_ms: int = 2000,
+    ) -> Optional[UiaElement]:
+        """Rechercher un élément par son nom (+ filtres optionnels).
+
+        Args:
+            name: Nom exact de l'élément
+            control_type: Type de contrôle (Button, Edit, MenuItem...)
+            automation_id: ID d'automation
+            window: Restreindre à une fenêtre spécifique
+            timeout_ms: Timeout de recherche en millisecondes
+        """
+        args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
+        if control_type:
+            args.extend(["--control-type", control_type])
+        if automation_id:
+            args.extend(["--automation-id", automation_id])
+        if window:
+            args.extend(["--window", window])
+
+        data = self._run(args)
+        if not data or data.get("status") != "ok":
+            return None
+
+        elem_data = data.get("element")
+        if not elem_data:
+            return None
+        return UiaElement.from_dict(elem_data)
+
+    def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
+        """Capturer l'élément ayant le focus + son contexte."""
+        data = self._run(["capture", "--max-depth", str(max_depth)])
+        if not data or data.get("status") != "ok":
+            return None
+
+        elem_data = data.get("element")
+        if not elem_data:
+            return None
+        return UiaElement.from_dict(elem_data)
+
+
+# Instance globale partagée (singleton léger)
+_SHARED_HELPER: Optional[UIAHelper] = None
+
+
+def get_shared_helper() -> UIAHelper:
+    """Retourner une instance partagée de UIAHelper."""
+    global _SHARED_HELPER
+    if _SHARED_HELPER is None:
+        _SHARED_HELPER = UIAHelper()
+    return _SHARED_HELPER