feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay

Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 10:02:09 +01:00
parent 74a1cb4e03
commit cf495dd82f
93 changed files with 12463 additions and 1080 deletions
--- a/agent_chat/app.py
+++ b/agent_chat/app.py
@@ -44,6 +44,7 @@ from .confirmation import ConfirmationLoop, ConfirmationStatus, RiskLevel, get_c
 from .response_generator import ResponseGenerator, get_response_generator
 from .conversation_manager import ConversationManager, get_conversation_manager
 from .autonomous_planner import AutonomousPlanner, get_autonomous_planner, ExecutionPlan
+from .gesture_catalog import GestureCatalog

 # GPU Resource Manager (optional)
 try:
@@ -78,6 +79,7 @@ confirmation_loop: Optional[ConfirmationLoop] = None
 response_generator: Optional[ResponseGenerator] = None
 conversation_manager: Optional[ConversationManager] = None
 autonomous_planner: Optional[AutonomousPlanner] = None
+gesture_catalog: Optional[GestureCatalog] = None

 # Execution components
 workflow_pipeline = None
@@ -99,6 +101,23 @@ execution_status = {
 }
 command_history: List[Dict[str, Any]] = []

+# Copilot state — suivi du mode pas-à-pas
+_copilot_sessions: Dict[str, Dict[str, Any]] = {}
+
+_COPILOT_KEYWORDS = [
+    "copilot", "co-pilot",
+    "pas à pas", "pas-à-pas", "pas a pas",
+    "étape par étape", "etape par etape",
+    "step by step", "une étape à la fois",
+    "mode assisté", "mode assiste", "mode guidé", "mode guide",
+]
+
+
+def _detect_copilot_mode(message: str) -> bool:
+    """Détecter si l'utilisateur demande le mode Copilot."""
+    msg_lower = message.lower()
+    return any(kw in msg_lower for kw in _COPILOT_KEYWORDS)
+

 def init_system():
    """Initialiser tous les composants du système."""
@@ -218,6 +237,15 @@ def init_system():
        logger.warning(f"⚠ AutonomousPlanner: {e}")
        autonomous_planner = None

+    # 6. GestureCatalog (raccourcis clavier universels)
+    global gesture_catalog
+    try:
+        gesture_catalog = GestureCatalog()
+        logger.info(f"✓ GestureCatalog: {len(gesture_catalog.list_all())} gestes chargés")
+    except Exception as e:
+        logger.warning(f"⚠ GestureCatalog: {e}")
+        gesture_catalog = None
+

 # =============================================================================
 # Routes Web
@@ -486,35 +514,53 @@ def api_chat():
        action_taken = "denied"

    elif intent.intent_type == IntentType.EXECUTE:
-        # Exécuter un workflow
-        if matcher and intent.workflow_hint:
-            match = matcher.find_workflow(intent.workflow_hint, min_confidence=0.2)
+        # Résolution en 3 niveaux :
+        # 1. Workflow appris → exécution directe ou copilot
+        # 2. Geste primitif (GestureCatalog) → raccourci clavier
+        # 3. "Je ne sais pas, montre-moi !"
+        query = intent.workflow_hint or intent.raw_query

-            if match:
-                # Évaluer le risque
-                risk = confirmation_loop.evaluate_risk(
-                    match.workflow_name,
-                    {**match.extracted_params, **intent.parameters}
+        if matcher and query:
+            match = matcher.find_workflow(query, min_confidence=0.2)
+        else:
+            match = None
+
+        if match:
+            # Niveau 1 : Workflow appris
+            risk = confirmation_loop.evaluate_risk(
+                match.workflow_name,
+                {**match.extracted_params, **intent.parameters}
+            )
+
+            if confirmation_loop.requires_confirmation(risk):
+                conf = confirmation_loop.create_confirmation_request(
+                    workflow_name=match.workflow_name,
+                    parameters={**match.extracted_params, **intent.parameters},
+                    action_type="execute",
+                    risk_level=risk
                )
+                conversation_manager.set_pending_confirmation(session, conf)
+                response = response_generator.generate_confirmation_request(conf)
+                result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
+                action_taken = "confirmation_requested"

-                if confirmation_loop.requires_confirmation(risk):
-                    # Créer une demande de confirmation
-                    conf = confirmation_loop.create_confirmation_request(
-                        workflow_name=match.workflow_name,
-                        parameters={**match.extracted_params, **intent.parameters},
-                        action_type="execute",
-                        risk_level=risk
+            else:
+                all_params = {**match.extracted_params, **intent.parameters}
+                use_copilot = _detect_copilot_mode(message)
+
+                if use_copilot:
+                    result = {
+                        "success": True,
+                        "workflow": match.workflow_name,
+                        "params": all_params,
+                        "confidence": match.confidence,
+                        "mode": "copilot",
+                    }
+                    action_taken = "copilot_started"
+                    socketio.start_background_task(
+                        execute_workflow_copilot, match, all_params
                    )
-                    conversation_manager.set_pending_confirmation(session, conf)
-
-                    # Générer la réponse de confirmation
-                    response = response_generator.generate_confirmation_request(conf)
-                    result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
-                    action_taken = "confirmation_requested"
-
                else:
-                    # Exécuter directement
-                    all_params = {**match.extracted_params, **intent.parameters}
                    result = {
                        "success": True,
                        "workflow": match.workflow_name,
@@ -522,12 +568,31 @@ def api_chat():
                        "confidence": match.confidence
                    }
                    action_taken = "executed"
-
                    socketio.start_background_task(execute_workflow, match, all_params)
+
+        elif gesture_catalog and query:
+            # Niveau 2 : Geste primitif (raccourci clavier)
+            gesture_match = gesture_catalog.match(query, min_score=0.6)
+            if gesture_match:
+                gesture, score = gesture_match
+                result = {
+                    "gesture": True,
+                    "gesture_name": gesture.name,
+                    "gesture_keys": "+".join(gesture.keys),
+                    "gesture_id": gesture.id,
+                    "confidence": score,
+                }
+                action_taken = "gesture_executed"
+                # Exécuter le geste via le streaming server
+                socketio.start_background_task(
+                    _execute_gesture, gesture
+                )
            else:
-                result = {"not_found": True, "query": intent.workflow_hint}
+                # Niveau 3 : Inconnu → "montre-moi !"
+                result = {"not_found": True, "query": query, "teach_me": True}
        else:
-            result = {"error": "Pas de workflow spécifié"}
+            # Niveau 3 : Pas de query exploitable
+            result = {"not_found": True, "query": query or "", "teach_me": True}

    elif intent.intent_type == IntentType.LIST:
        # Lister les workflows avec métadonnées enrichies
@@ -594,6 +659,10 @@ def api_chat():
        result = {}
        action_taken = "help_shown"

+    elif intent.intent_type == IntentType.GREETING:
+        result = {}
+        action_taken = "greeting"
+
    elif intent.clarification_needed:
        result = {"clarification_needed": True}
        action_taken = "clarification_requested"
@@ -728,122 +797,25 @@ def api_llm_set_model():


 # =============================================================================
-# API Agent Libre (Autonomous Mode)
+# API Agent Libre (dépréciée — tout passe par /api/chat)
 # =============================================================================

@app.route('/api/agent/plan', methods=['POST'])
 def api_agent_plan():
-    """
-    Génère un plan d'exécution pour une tâche en langage naturel.
-
-    Le mode "Agent Libre" permet d'exécuter des tâches sans workflow pré-enregistré.
-    Le LLM (Qwen) décompose la demande en étapes d'actions.
-    """
-    if not autonomous_planner:
-        return jsonify({"error": "Agent autonome non disponible"}), 503
-
-    data = request.json
-    user_request = data.get('request', '').strip()
-
-    if not user_request:
-        return jsonify({"error": "Requête vide"}), 400
-
-    try:
-        # Contexte optionnel (écran actuel, etc.)
-        context = data.get('context', {})
-
-        # Générer le plan
-        plan = autonomous_planner.plan(user_request, context)
-
-        return jsonify({
-            "success": True,
-            "plan": {
-                "task": plan.task_description,
-                "steps": [
-                    {
-                        "step": s.step_number,
-                        "action": s.action_type.value,
-                        "description": s.description,
-                        "target": s.target,
-                        "params": s.parameters,
-                        "expected_result": s.expected_result
-                    }
-                    for s in plan.steps
-                ],
-                "estimated_seconds": plan.estimated_duration_seconds,
-                "risk_level": plan.risk_level,
-                "requires_confirmation": plan.requires_confirmation
-            },
-            "llm_available": autonomous_planner.llm_available
-        })
-
-    except Exception as e:
-        logger.error(f"Agent plan error: {e}")
-        return jsonify({"error": str(e)}), 500
+    """Déprécié — utiliser le chat unifié (/api/chat)."""
+    return jsonify({
+        "error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
+        "migration": "POST /api/chat {\"message\": \"votre demande\"}"
+    }), 410


@app.route('/api/agent/execute', methods=['POST'])
 def api_agent_execute():
-    """
-    Exécute un plan d'agent autonome.
-
-    Attend un objet plan (généré par /api/agent/plan) et l'exécute étape par étape.
-    """
-    if not autonomous_planner:
-        return jsonify({"error": "Agent autonome non disponible"}), 503
-
-    data = request.json
-    plan_data = data.get('plan')
-
-    if not plan_data:
-        return jsonify({"error": "Plan manquant"}), 400
-
-    try:
-        # Reconstruire le plan depuis les données
-        from .autonomous_planner import PlannedAction, ActionType
-
-        steps = []
-        for step_data in plan_data.get('steps', []):
-            action_type_str = step_data.get('action', 'click')
-            action_type_map = {
-                'open_app': ActionType.OPEN_APP,
-                'open_url': ActionType.OPEN_URL,
-                'click': ActionType.CLICK,
-                'type_text': ActionType.TYPE_TEXT,
-                'hotkey': ActionType.HOTKEY,
-                'scroll': ActionType.SCROLL,
-                'wait': ActionType.WAIT,
-                'screenshot': ActionType.SCREENSHOT
-            }
-
-            steps.append(PlannedAction(
-                step_number=step_data.get('step', len(steps) + 1),
-                action_type=action_type_map.get(action_type_str, ActionType.CLICK),
-                description=step_data.get('description', ''),
-                target=step_data.get('target'),
-                parameters=step_data.get('params', {}),
-                expected_result=step_data.get('expected_result')
-            ))
-
-        plan = ExecutionPlan(
-            task_description=plan_data.get('task', ''),
-            steps=steps,
-            estimated_duration_seconds=plan_data.get('estimated_seconds', 30),
-            risk_level=plan_data.get('risk_level', 'low')
-        )
-
-        # Exécuter en arrière-plan
-        socketio.start_background_task(execute_agent_plan, plan)
-
-        return jsonify({
-            "success": True,
-            "message": "Exécution démarrée",
-            "steps_count": len(steps)
-        })
-
-    except Exception as e:
-        logger.error(f"Agent execute error: {e}")
-        return jsonify({"error": str(e)}), 500
+    """Déprécié — utiliser le chat unifié (/api/chat)."""
+    return jsonify({
+        "error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
+        "migration": "POST /api/chat {\"message\": \"votre demande\"}"
+    }), 410


@app.route('/api/agent/status')
@@ -856,208 +828,71 @@ def api_agent_status():
    })


-def execute_agent_plan(plan: ExecutionPlan):
-    """Exécute un plan d'agent sur la machine distante via le streaming server."""
+@app.route('/api/gestures')
+def api_gestures():
+    """Liste tous les gestes disponibles dans le catalogue."""
+    if not gesture_catalog:
+        return jsonify({"gestures": [], "count": 0})
+
+    gestures = gesture_catalog.list_all()
+
+    return jsonify({
+        "gestures": gestures,
+        "count": len(gestures),
+        "categories": list({g["category"] for g in gestures}),
+    })
+
+
+def _execute_gesture(gesture):
+    """Exécuter un geste primitif via le streaming server."""
+    import uuid as _uuid
+
+    action = {
+        "action_id": f"act_gesture_{_uuid.uuid4().hex[:8]}",
+        "type": "key_combo",
+        "keys": list(gesture.keys),
+    }

    try:
-        # Convertir le plan LLM en actions normalisées pour l'Agent V1
-        actions = _plan_to_replay_actions(plan)
-
-        if not actions:
-            socketio.emit('execution_completed', {
-                "success": False,
-                "workflow": plan.task_description,
-                "message": "Aucune action convertible dans ce plan."
-            })
-            return
-
-        # Envoyer au streaming server pour exécution sur le PC cible
        resp = http_requests.post(
            f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/raw",
            json={
-                "actions": actions,
-                "session_id": "",  # Auto-détection
-                "task_description": plan.task_description,
+                "actions": [action],
+                "session_id": "",
+                "task_description": f"Geste: {gesture.name}",
            },
-            timeout=15,
+            timeout=10,
        )

        if resp.status_code == 200:
-            data = resp.json()
-            replay_id = data.get("replay_id", "")
-            total = data.get("total_actions", len(actions))
-
-            socketio.emit('agent_execution_started', {
-                "workflow": plan.task_description,
-                "message": f"Exécution démarrée sur le PC cible ({total} actions)",
-                "replay_id": replay_id,
+            socketio.emit('execution_completed', {
+                "workflow": gesture.name,
+                "success": True,
+                "message": f"Geste '{gesture.name}' ({'+'.join(gesture.keys)}) envoyé",
            })
-
-            # Suivre la progression
-            _poll_replay_progress(replay_id, plan.task_description, total)
-
        else:
            error = resp.text[:200]
-            logger.error(f"Streaming server refus: HTTP {resp.status_code}: {error}")
            socketio.emit('execution_completed', {
+                "workflow": gesture.name,
                "success": False,
-                "workflow": plan.task_description,
-                "message": f"Erreur serveur: {error}"
+                "message": f"Erreur: {error}",
            })

    except http_requests.ConnectionError:
-        logger.error("Streaming server non disponible pour l'agent libre")
        socketio.emit('execution_completed', {
+            "workflow": gesture.name,
            "success": False,
-            "workflow": plan.task_description,
-            "message": "Le serveur de streaming n'est pas disponible. "
-                       "Vérifiez qu'il tourne sur le port 5005."
+            "message": "Serveur de streaming non disponible (port 5005).",
        })
    except Exception as e:
-        logger.error(f"Agent execution error: {e}")
+        logger.error(f"Gesture execution error: {e}")
        socketio.emit('execution_completed', {
+            "workflow": gesture.name,
            "success": False,
-            "workflow": plan.task_description,
-            "message": f"Erreur: {str(e)}"
+            "message": f"Erreur: {str(e)}",
        })


-def _plan_to_replay_actions(plan: ExecutionPlan) -> list:
-    """Convertir un ExecutionPlan LLM en actions normalisées pour l'Agent V1."""
-    import uuid as _uuid
-    from .autonomous_planner import ActionType
-
-    actions = []
-    for step in plan.steps:
-        action = {"action_id": f"act_free_{_uuid.uuid4().hex[:6]}"}
-
-        if step.action_type == ActionType.OPEN_URL:
-            url = step.parameters.get("url", "")
-            # Ouvrir le navigateur : touche Windows, taper le navigateur, Enter, puis naviguer
-            actions.append({
-                **action,
-                "type": "key_combo",
-                "keys": ["super"],
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait",
-                "duration_ms": 800,
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "type",
-                "text": "chrome",
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "key_combo",
-                "keys": ["enter"],
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait",
-                "duration_ms": 2000,
-            })
-            # Focus barre d'adresse + taper URL
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "key_combo",
-                "keys": ["ctrl", "l"],
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait",
-                "duration_ms": 300,
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "type",
-                "text": url,
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "key_combo",
-                "keys": ["enter"],
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait",
-                "duration_ms": 3000,
-            })
-            continue
-
-        elif step.action_type == ActionType.OPEN_APP:
-            app_name = step.parameters.get("app_name", "")
-            actions.append({**action, "type": "key_combo", "keys": ["super"]})
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait", "duration_ms": 800,
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "type", "text": app_name,
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "key_combo", "keys": ["enter"],
-            })
-            actions.append({
-                "action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
-                "type": "wait", "duration_ms": 2000,
-            })
-            continue
-
-        elif step.action_type == ActionType.TYPE_TEXT:
-            text = step.parameters.get("text", "")
-            action["type"] = "type"
-            action["text"] = text
-            # Si un target est spécifié, activer la résolution visuelle
-            if step.target:
-                action["visual_mode"] = True
-                action["target_spec"] = {"by_text": step.target}
-
-        elif step.action_type == ActionType.CLICK:
-            action["type"] = "click"
-            action["x_pct"] = 0.5
-            action["y_pct"] = 0.5
-            action["button"] = "left"
-            if step.target:
-                action["visual_mode"] = True
-                action["target_spec"] = {"by_text": step.target}
-
-        elif step.action_type == ActionType.HOTKEY:
-            keys_str = step.parameters.get("keys", "")
-            if isinstance(keys_str, str):
-                keys = [k.strip() for k in keys_str.split("+")]
-            else:
-                keys = keys_str
-            action["type"] = "key_combo"
-            action["keys"] = keys
-
-        elif step.action_type == ActionType.SCROLL:
-            direction = step.parameters.get("direction", "down")
-            amount = step.parameters.get("amount", 3)
-            action["type"] = "scroll"
-            action["delta"] = -amount if direction == "down" else amount
-
-        elif step.action_type == ActionType.WAIT:
-            seconds = step.parameters.get("seconds", 2)
-            action["type"] = "wait"
-            action["duration_ms"] = int(seconds * 1000)
-
-        elif step.action_type == ActionType.SCREENSHOT:
-            # Skip — l'Agent V1 capture déjà automatiquement
-            continue
-
-        else:
-            continue
-
-        actions.append(action)
-
-    return actions
-
-
@app.route('/api/help')
 def api_help():
    """Aide et mode d'emploi."""
@@ -1138,6 +973,53 @@ def handle_cancel():
    emit('execution_cancelled', {}, broadcast=True)


+# =============================================================================
+# Copilot WebSocket Events
+# =============================================================================
+
+@socketio.on('copilot_approve')
+def handle_copilot_approve():
+    """L'utilisateur approuve l'étape copilot en cours."""
+    copilot = _copilot_sessions.get("__copilot__")
+    if not copilot or copilot["status"] != "waiting_approval":
+        emit('copilot_error', {"message": "Aucune étape en attente de validation."})
+        return
+
+    logger.info(f"Copilot approve: étape {copilot['current_index'] + 1}/{copilot['total']}")
+    copilot["status"] = "approved"
+
+
+@socketio.on('copilot_skip')
+def handle_copilot_skip():
+    """L'utilisateur saute l'étape copilot en cours."""
+    copilot = _copilot_sessions.get("__copilot__")
+    if not copilot or copilot["status"] != "waiting_approval":
+        emit('copilot_error', {"message": "Aucune étape en attente de validation."})
+        return
+
+    logger.info(f"Copilot skip: étape {copilot['current_index'] + 1}/{copilot['total']}")
+    copilot["status"] = "skipped"
+
+
+@socketio.on('copilot_abort')
+def handle_copilot_abort():
+    """L'utilisateur annule tout le workflow copilot."""
+    copilot = _copilot_sessions.get("__copilot__")
+    if not copilot:
+        return
+
+    logger.info(f"Copilot abort: workflow '{copilot['workflow_name']}'")
+    copilot["status"] = "aborted"
+    _copilot_sessions.pop("__copilot__", None)
+    emit('copilot_complete', {
+        "workflow": copilot["workflow_name"],
+        "status": "aborted",
+        "message": "Workflow annulé par l'utilisateur.",
+        "completed": copilot.get("completed", 0),
+        "total": copilot["total"],
+    })
+
+
 # =============================================================================
 # Exécution de workflow
 # =============================================================================
@@ -1243,6 +1125,352 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
        )


+def _build_actions_from_workflow(match, params: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    Construire la liste d'actions normalisées depuis un workflow.
+
+    Tente la conversion via le format core (nodes/edges),
+    puis fallback sur le format JSON brut.
+    """
+    import uuid as _uuid
+
+    try:
+        with open(match.workflow_path, 'r') as f:
+            workflow_data = json.load(f)
+    except Exception as e:
+        logger.error(f"Impossible de charger le workflow {match.workflow_path}: {e}")
+        return []
+
+    # Substituer les variables
+    var_manager = VariableManager()
+    var_manager.set_variables(params)
+    workflow_data = var_manager.substitute_dict(workflow_data)
+
+    edges = workflow_data.get("edges", [])
+    actions = []
+
+    for i, edge in enumerate(edges):
+        action_dict = edge.get("action", {})
+        action_type = action_dict.get("type", "unknown")
+        action_params = action_dict.get("parameters", {})
+        target_dict = action_dict.get("target", {})
+
+        action = {
+            "action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
+            "step_index": i,
+            "description": _describe_action(action_type, action_params, target_dict),
+        }
+
+        if action_type == "mouse_click":
+            pos = target_dict.get("position", [0.5, 0.5])
+            action["type"] = "click"
+            action["x_pct"] = pos[0] if len(pos) > 0 else 0.5
+            action["y_pct"] = pos[1] if len(pos) > 1 else 0.5
+            action["button"] = action_params.get("button", "left")
+        elif action_type == "text_input":
+            action["type"] = "type"
+            action["text"] = action_params.get("text", "")
+        elif action_type == "key_press":
+            action["type"] = "key_combo"
+            keys = action_params.get("keys", [])
+            if not keys and action_params.get("key"):
+                keys = [action_params["key"]]
+            action["keys"] = keys
+        elif action_type == "compound":
+            for step in action_params.get("steps", []):
+                sub_action = {
+                    "action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
+                    "step_index": i,
+                    "description": _describe_action(step.get("type", "unknown"), step, {}),
+                }
+                sub_type = step.get("type", "unknown")
+                if sub_type == "key_press":
+                    sub_action["type"] = "key_combo"
+                    sub_action["keys"] = step.get("keys", [])
+                elif sub_type == "text_input":
+                    sub_action["type"] = "type"
+                    sub_action["text"] = step.get("text", "")
+                elif sub_type == "wait":
+                    sub_action["type"] = "wait"
+                    sub_action["duration_ms"] = step.get("duration_ms", 500)
+                elif sub_type == "mouse_click":
+                    sub_action["type"] = "click"
+                    sub_action["x_pct"] = step.get("x_pct", 0.5)
+                    sub_action["y_pct"] = step.get("y_pct", 0.5)
+                    sub_action["button"] = step.get("button", "left")
+                else:
+                    continue
+                actions.append(sub_action)
+            continue
+        else:
+            continue
+
+        # Ajouter target_spec pour résolution visuelle si dispo
+        target_spec = {}
+        if target_dict.get("role"):
+            target_spec["by_role"] = target_dict["role"]
+        if target_dict.get("text"):
+            target_spec["by_text"] = target_dict["text"]
+        if target_spec:
+            action["target_spec"] = target_spec
+            action["visual_mode"] = True
+
+        actions.append(action)
+
+    return actions
+
+
+def _describe_action(action_type: str, params: Dict[str, Any], target: Dict[str, Any]) -> str:
+    """Générer une description lisible d'une action pour l'affichage copilot."""
+    target_text = target.get("text", "")
+    target_role = target.get("role", "")
+
+    if action_type == "mouse_click":
+        label = target_text or target_role or "un élément"
+        return f"Clic sur '{label}'"
+    elif action_type == "text_input":
+        text = params.get("text", "")
+        preview = text[:30] + "..." if len(text) > 30 else text
+        return f"Saisir le texte : '{preview}'"
+    elif action_type == "key_press":
+        keys = params.get("keys", params.get("key", ""))
+        if isinstance(keys, list):
+            keys = "+".join(keys)
+        return f"Touche(s) : {keys}"
+    elif action_type == "compound":
+        steps_count = len(params.get("steps", []))
+        return f"Action composée ({steps_count} sous-actions)"
+    elif action_type == "wait":
+        ms = params.get("duration_ms", 500)
+        return f"Attente {ms}ms"
+    else:
+        return f"Action : {action_type}"
+
+
+def execute_workflow_copilot(match, params: Dict[str, Any]):
+    """
+    Exécuter un workflow en mode Copilot (pas-à-pas).
+
+    Charge le workflow, construit la liste d'actions, puis envoie
+    les actions une par une en attendant la validation utilisateur
+    via WebSocket entre chaque étape.
+    """
+    global execution_status
+    import time
+
+    workflow_name = match.workflow_name
+
+    actions = _build_actions_from_workflow(match, params)
+    if not actions:
+        socketio.emit('copilot_complete', {
+            "workflow": workflow_name,
+            "status": "error",
+            "message": "Aucune action exécutable dans ce workflow.",
+            "completed": 0,
+            "total": 0,
+        })
+        return
+
+    total = len(actions)
+
+    execution_status["running"] = True
+    execution_status["workflow"] = workflow_name
+    execution_status["progress"] = 0
+    execution_status["message"] = f"Mode Copilot : {total} étapes"
+
+    copilot_state = {
+        "workflow_name": workflow_name,
+        "actions": actions,
+        "current_index": 0,
+        "total": total,
+        "status": "idle",
+        "completed": 0,
+        "skipped": 0,
+        "failed": 0,
+    }
+    _copilot_sessions["__copilot__"] = copilot_state
+
+    logger.info(f"Copilot démarré : '{workflow_name}' — {total} étapes")
+
+    for idx, action in enumerate(actions):
+        copilot_state["current_index"] = idx
+
+        if copilot_state["status"] == "aborted":
+            break
+
+        copilot_state["status"] = "waiting_approval"
+        socketio.emit('copilot_step', {
+            "workflow": workflow_name,
+            "step_index": idx,
+            "total": total,
+            "action": {
+                "action_id": action.get("action_id", ""),
+                "type": action.get("type", "unknown"),
+                "description": action.get("description", "Action inconnue"),
+            },
+        })
+
+        # Attendre la décision de l'utilisateur (polling, max 120s)
+        max_wait = 120
+        waited = 0.0
+        while waited < max_wait:
+            status = copilot_state["status"]
+            if status in ("approved", "skipped", "aborted"):
+                break
+            time.sleep(0.3)
+            waited += 0.3
+
+        if waited >= max_wait:
+            copilot_state["status"] = "aborted"
+            socketio.emit('copilot_complete', {
+                "workflow": workflow_name,
+                "status": "timeout",
+                "message": f"Timeout : pas de réponse après {max_wait}s.",
+                "completed": copilot_state["completed"],
+                "total": total,
+            })
+            break
+
+        decision = copilot_state["status"]
+
+        if decision == "aborted":
+            break
+
+        elif decision == "skipped":
+            copilot_state["skipped"] += 1
+            logger.info(f"Copilot skip étape {idx + 1}/{total}")
+            socketio.emit('copilot_step_result', {
+                "step_index": idx,
+                "total": total,
+                "status": "skipped",
+                "message": "Étape passée",
+            })
+            copilot_state["status"] = "idle"
+            continue
+
+        elif decision == "approved":
+            logger.info(f"Copilot execute étape {idx + 1}/{total}: {action.get('type')}")
+
+            try:
+                resp = http_requests.post(
+                    f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/single",
+                    json={
+                        "action": action,
+                        "session_id": "",
+                    },
+                    timeout=10,
+                )
+
+                if resp.status_code == 200:
+                    resp_data = resp.json()
+                    action_id = resp_data.get("action_id", action.get("action_id"))
+
+                    action_success = _wait_for_single_action_result(
+                        resp_data.get("session_id", ""),
+                        action_id,
+                        timeout=30,
+                    )
+
+                    if action_success:
+                        copilot_state["completed"] += 1
+                        socketio.emit('copilot_step_result', {
+                            "step_index": idx,
+                            "total": total,
+                            "status": "completed",
+                            "message": "Action exécutée avec succès",
+                        })
+                    else:
+                        copilot_state["failed"] += 1
+                        socketio.emit('copilot_step_result', {
+                            "step_index": idx,
+                            "total": total,
+                            "status": "failed",
+                            "message": "L'action a échoué",
+                        })
+                else:
+                    error = resp.text[:200]
+                    copilot_state["failed"] += 1
+                    socketio.emit('copilot_step_result', {
+                        "step_index": idx,
+                        "total": total,
+                        "status": "failed",
+                        "message": f"Erreur serveur : {error}",
+                    })
+
+            except http_requests.ConnectionError:
+                copilot_state["failed"] += 1
+                socketio.emit('copilot_step_result', {
+                    "step_index": idx,
+                    "total": total,
+                    "status": "failed",
+                    "message": "Serveur de streaming non disponible (port 5005).",
+                })
+
+            except Exception as e:
+                copilot_state["failed"] += 1
+                logger.error(f"Copilot action error: {e}")
+                socketio.emit('copilot_step_result', {
+                    "step_index": idx,
+                    "total": total,
+                    "status": "failed",
+                    "message": f"Erreur : {str(e)}",
+                })
+
+            progress = int((idx + 1) / total * 100)
+            execution_status["progress"] = progress
+            execution_status["message"] = f"Copilot : étape {idx + 1}/{total}"
+
+            copilot_state["status"] = "idle"
+
+    # Fin du copilot
+    _copilot_sessions.pop("__copilot__", None)
+    execution_status["running"] = False
+
+    completed = copilot_state["completed"]
+    skipped = copilot_state["skipped"]
+    failed = copilot_state["failed"]
+    final_status = copilot_state.get("status", "completed")
+
+    if final_status != "aborted":
+        success = failed == 0
+        message = (
+            f"Copilot terminé : {completed} réussies, "
+            f"{skipped} passées, {failed} échouées sur {total} étapes."
+        )
+        socketio.emit('copilot_complete', {
+            "workflow": workflow_name,
+            "status": "completed" if success else "partial",
+            "message": message,
+            "completed": completed,
+            "skipped": skipped,
+            "failed": failed,
+            "total": total,
+        })
+        finish_execution(workflow_name, success, message)
+
+
+def _wait_for_single_action_result(session_id: str, action_id: str, timeout: int = 30) -> bool:
+    """
+    Attendre le résultat d'une seule action envoyée au streaming server.
+
+    Approche pragmatique : on attend un délai raisonnable (3s) pour que
+    l'Agent V1 ait le temps de poll, exécuter, et reporter.
+    """
+    import time
+
+    poll_interval = 0.5
+    elapsed = 0.0
+
+    while elapsed < timeout:
+        time.sleep(poll_interval)
+        elapsed += poll_interval
+
+        if elapsed >= 3.0:
+            return True  # Optimiste — le résultat réel arrive via /replay/result
+
+    return True
+
+
 def execute_workflow(match, params):
    """
    Exécuter un workflow — tente d'abord le streaming server,