From 74a1cb4e0393d8bbc8f3e59b382bbfc016877ca8 Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 15 Mar 2026 08:41:53 +0100 Subject: [PATCH] =?UTF-8?q?feat(agent-libre):=20ex=C3=A9cuter=20les=20plan?= =?UTF-8?q?s=20LLM=20sur=20le=20PC=20cible=20via=20streaming=20server?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mode "Agent Libre" envoyait les actions localement (Linux) au lieu du PC Windows. Maintenant les plans LLM sont convertis en actions normalisées et envoyés au streaming server via POST /replay/raw. L'Agent V1 les exécute sur la bonne machine. Co-Authored-By: Claude Opus 4.6 --- agent_chat/app.py | 203 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 183 insertions(+), 20 deletions(-) diff --git a/agent_chat/app.py b/agent_chat/app.py index e98c5d838..1f18604b5 100644 --- a/agent_chat/app.py +++ b/agent_chat/app.py @@ -857,35 +857,62 @@ def api_agent_status(): def execute_agent_plan(plan: ExecutionPlan): - """Exécute un plan d'agent en arrière-plan.""" - import asyncio + """Exécute un plan d'agent sur la machine distante via le streaming server.""" try: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) + # Convertir le plan LLM en actions normalisées pour l'Agent V1 + actions = _plan_to_replay_actions(plan) - results = loop.run_until_complete(autonomous_planner.execute_plan(plan)) + if not actions: + socketio.emit('execution_completed', { + "success": False, + "workflow": plan.task_description, + "message": "Aucune action convertible dans ce plan." + }) + return - loop.close() + # Envoyer au streaming server pour exécution sur le PC cible + resp = http_requests.post( + f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/raw", + json={ + "actions": actions, + "session_id": "", # Auto-détection + "task_description": plan.task_description, + }, + timeout=15, + ) - # Envoyer le résultat final - success_count = sum(1 for r in results if r.success) - total = len(results) + if resp.status_code == 200: + data = resp.json() + replay_id = data.get("replay_id", "") + total = data.get("total_actions", len(actions)) + socketio.emit('agent_execution_started', { + "workflow": plan.task_description, + "message": f"Exécution démarrée sur le PC cible ({total} actions)", + "replay_id": replay_id, + }) + + # Suivre la progression + _poll_replay_progress(replay_id, plan.task_description, total) + + else: + error = resp.text[:200] + logger.error(f"Streaming server refus: HTTP {resp.status_code}: {error}") + socketio.emit('execution_completed', { + "success": False, + "workflow": plan.task_description, + "message": f"Erreur serveur: {error}" + }) + + except http_requests.ConnectionError: + logger.error("Streaming server non disponible pour l'agent libre") socketio.emit('execution_completed', { - "success": success_count == total, + "success": False, "workflow": plan.task_description, - "message": f"{success_count}/{total} étapes réussies", - "results": [ - { - "step": r.action.step_number, - "success": r.success, - "message": r.message - } - for r in results - ] + "message": "Le serveur de streaming n'est pas disponible. " + "Vérifiez qu'il tourne sur le port 5005." }) - except Exception as e: logger.error(f"Agent execution error: {e}") socketio.emit('execution_completed', { @@ -895,6 +922,142 @@ def execute_agent_plan(plan: ExecutionPlan): }) +def _plan_to_replay_actions(plan: ExecutionPlan) -> list: + """Convertir un ExecutionPlan LLM en actions normalisées pour l'Agent V1.""" + import uuid as _uuid + from .autonomous_planner import ActionType + + actions = [] + for step in plan.steps: + action = {"action_id": f"act_free_{_uuid.uuid4().hex[:6]}"} + + if step.action_type == ActionType.OPEN_URL: + url = step.parameters.get("url", "") + # Ouvrir le navigateur : touche Windows, taper le navigateur, Enter, puis naviguer + actions.append({ + **action, + "type": "key_combo", + "keys": ["super"], + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", + "duration_ms": 800, + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "type", + "text": "chrome", + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "key_combo", + "keys": ["enter"], + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", + "duration_ms": 2000, + }) + # Focus barre d'adresse + taper URL + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "key_combo", + "keys": ["ctrl", "l"], + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", + "duration_ms": 300, + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "type", + "text": url, + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "key_combo", + "keys": ["enter"], + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", + "duration_ms": 3000, + }) + continue + + elif step.action_type == ActionType.OPEN_APP: + app_name = step.parameters.get("app_name", "") + actions.append({**action, "type": "key_combo", "keys": ["super"]}) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", "duration_ms": 800, + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "type", "text": app_name, + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "key_combo", "keys": ["enter"], + }) + actions.append({ + "action_id": f"act_free_{_uuid.uuid4().hex[:6]}", + "type": "wait", "duration_ms": 2000, + }) + continue + + elif step.action_type == ActionType.TYPE_TEXT: + text = step.parameters.get("text", "") + action["type"] = "type" + action["text"] = text + # Si un target est spécifié, activer la résolution visuelle + if step.target: + action["visual_mode"] = True + action["target_spec"] = {"by_text": step.target} + + elif step.action_type == ActionType.CLICK: + action["type"] = "click" + action["x_pct"] = 0.5 + action["y_pct"] = 0.5 + action["button"] = "left" + if step.target: + action["visual_mode"] = True + action["target_spec"] = {"by_text": step.target} + + elif step.action_type == ActionType.HOTKEY: + keys_str = step.parameters.get("keys", "") + if isinstance(keys_str, str): + keys = [k.strip() for k in keys_str.split("+")] + else: + keys = keys_str + action["type"] = "key_combo" + action["keys"] = keys + + elif step.action_type == ActionType.SCROLL: + direction = step.parameters.get("direction", "down") + amount = step.parameters.get("amount", 3) + action["type"] = "scroll" + action["delta"] = -amount if direction == "down" else amount + + elif step.action_type == ActionType.WAIT: + seconds = step.parameters.get("seconds", 2) + action["type"] = "wait" + action["duration_ms"] = int(seconds * 1000) + + elif step.action_type == ActionType.SCREENSHOT: + # Skip — l'Agent V1 capture déjà automatiquement + continue + + else: + continue + + actions.append(action) + + return actions + + @app.route('/api/help') def api_help(): """Aide et mode d'emploi."""