feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,7 @@ from .confirmation import ConfirmationLoop, ConfirmationStatus, RiskLevel, get_c
|
||||
from .response_generator import ResponseGenerator, get_response_generator
|
||||
from .conversation_manager import ConversationManager, get_conversation_manager
|
||||
from .autonomous_planner import AutonomousPlanner, get_autonomous_planner, ExecutionPlan
|
||||
from .gesture_catalog import GestureCatalog
|
||||
|
||||
# GPU Resource Manager (optional)
|
||||
try:
|
||||
@@ -78,6 +79,7 @@ confirmation_loop: Optional[ConfirmationLoop] = None
|
||||
response_generator: Optional[ResponseGenerator] = None
|
||||
conversation_manager: Optional[ConversationManager] = None
|
||||
autonomous_planner: Optional[AutonomousPlanner] = None
|
||||
gesture_catalog: Optional[GestureCatalog] = None
|
||||
|
||||
# Execution components
|
||||
workflow_pipeline = None
|
||||
@@ -99,6 +101,23 @@ execution_status = {
|
||||
}
|
||||
command_history: List[Dict[str, Any]] = []
|
||||
|
||||
# Copilot state — suivi du mode pas-à-pas
|
||||
_copilot_sessions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
_COPILOT_KEYWORDS = [
|
||||
"copilot", "co-pilot",
|
||||
"pas à pas", "pas-à-pas", "pas a pas",
|
||||
"étape par étape", "etape par etape",
|
||||
"step by step", "une étape à la fois",
|
||||
"mode assisté", "mode assiste", "mode guidé", "mode guide",
|
||||
]
|
||||
|
||||
|
||||
def _detect_copilot_mode(message: str) -> bool:
|
||||
"""Détecter si l'utilisateur demande le mode Copilot."""
|
||||
msg_lower = message.lower()
|
||||
return any(kw in msg_lower for kw in _COPILOT_KEYWORDS)
|
||||
|
||||
|
||||
def init_system():
|
||||
"""Initialiser tous les composants du système."""
|
||||
@@ -218,6 +237,15 @@ def init_system():
|
||||
logger.warning(f"⚠ AutonomousPlanner: {e}")
|
||||
autonomous_planner = None
|
||||
|
||||
# 6. GestureCatalog (raccourcis clavier universels)
|
||||
global gesture_catalog
|
||||
try:
|
||||
gesture_catalog = GestureCatalog()
|
||||
logger.info(f"✓ GestureCatalog: {len(gesture_catalog.list_all())} gestes chargés")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠ GestureCatalog: {e}")
|
||||
gesture_catalog = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Routes Web
|
||||
@@ -486,35 +514,53 @@ def api_chat():
|
||||
action_taken = "denied"
|
||||
|
||||
elif intent.intent_type == IntentType.EXECUTE:
|
||||
# Exécuter un workflow
|
||||
if matcher and intent.workflow_hint:
|
||||
match = matcher.find_workflow(intent.workflow_hint, min_confidence=0.2)
|
||||
# Résolution en 3 niveaux :
|
||||
# 1. Workflow appris → exécution directe ou copilot
|
||||
# 2. Geste primitif (GestureCatalog) → raccourci clavier
|
||||
# 3. "Je ne sais pas, montre-moi !"
|
||||
query = intent.workflow_hint or intent.raw_query
|
||||
|
||||
if match:
|
||||
# Évaluer le risque
|
||||
risk = confirmation_loop.evaluate_risk(
|
||||
match.workflow_name,
|
||||
{**match.extracted_params, **intent.parameters}
|
||||
if matcher and query:
|
||||
match = matcher.find_workflow(query, min_confidence=0.2)
|
||||
else:
|
||||
match = None
|
||||
|
||||
if match:
|
||||
# Niveau 1 : Workflow appris
|
||||
risk = confirmation_loop.evaluate_risk(
|
||||
match.workflow_name,
|
||||
{**match.extracted_params, **intent.parameters}
|
||||
)
|
||||
|
||||
if confirmation_loop.requires_confirmation(risk):
|
||||
conf = confirmation_loop.create_confirmation_request(
|
||||
workflow_name=match.workflow_name,
|
||||
parameters={**match.extracted_params, **intent.parameters},
|
||||
action_type="execute",
|
||||
risk_level=risk
|
||||
)
|
||||
conversation_manager.set_pending_confirmation(session, conf)
|
||||
response = response_generator.generate_confirmation_request(conf)
|
||||
result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
|
||||
action_taken = "confirmation_requested"
|
||||
|
||||
if confirmation_loop.requires_confirmation(risk):
|
||||
# Créer une demande de confirmation
|
||||
conf = confirmation_loop.create_confirmation_request(
|
||||
workflow_name=match.workflow_name,
|
||||
parameters={**match.extracted_params, **intent.parameters},
|
||||
action_type="execute",
|
||||
risk_level=risk
|
||||
else:
|
||||
all_params = {**match.extracted_params, **intent.parameters}
|
||||
use_copilot = _detect_copilot_mode(message)
|
||||
|
||||
if use_copilot:
|
||||
result = {
|
||||
"success": True,
|
||||
"workflow": match.workflow_name,
|
||||
"params": all_params,
|
||||
"confidence": match.confidence,
|
||||
"mode": "copilot",
|
||||
}
|
||||
action_taken = "copilot_started"
|
||||
socketio.start_background_task(
|
||||
execute_workflow_copilot, match, all_params
|
||||
)
|
||||
conversation_manager.set_pending_confirmation(session, conf)
|
||||
|
||||
# Générer la réponse de confirmation
|
||||
response = response_generator.generate_confirmation_request(conf)
|
||||
result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
|
||||
action_taken = "confirmation_requested"
|
||||
|
||||
else:
|
||||
# Exécuter directement
|
||||
all_params = {**match.extracted_params, **intent.parameters}
|
||||
result = {
|
||||
"success": True,
|
||||
"workflow": match.workflow_name,
|
||||
@@ -522,12 +568,31 @@ def api_chat():
|
||||
"confidence": match.confidence
|
||||
}
|
||||
action_taken = "executed"
|
||||
|
||||
socketio.start_background_task(execute_workflow, match, all_params)
|
||||
|
||||
elif gesture_catalog and query:
|
||||
# Niveau 2 : Geste primitif (raccourci clavier)
|
||||
gesture_match = gesture_catalog.match(query, min_score=0.6)
|
||||
if gesture_match:
|
||||
gesture, score = gesture_match
|
||||
result = {
|
||||
"gesture": True,
|
||||
"gesture_name": gesture.name,
|
||||
"gesture_keys": "+".join(gesture.keys),
|
||||
"gesture_id": gesture.id,
|
||||
"confidence": score,
|
||||
}
|
||||
action_taken = "gesture_executed"
|
||||
# Exécuter le geste via le streaming server
|
||||
socketio.start_background_task(
|
||||
_execute_gesture, gesture
|
||||
)
|
||||
else:
|
||||
result = {"not_found": True, "query": intent.workflow_hint}
|
||||
# Niveau 3 : Inconnu → "montre-moi !"
|
||||
result = {"not_found": True, "query": query, "teach_me": True}
|
||||
else:
|
||||
result = {"error": "Pas de workflow spécifié"}
|
||||
# Niveau 3 : Pas de query exploitable
|
||||
result = {"not_found": True, "query": query or "", "teach_me": True}
|
||||
|
||||
elif intent.intent_type == IntentType.LIST:
|
||||
# Lister les workflows avec métadonnées enrichies
|
||||
@@ -594,6 +659,10 @@ def api_chat():
|
||||
result = {}
|
||||
action_taken = "help_shown"
|
||||
|
||||
elif intent.intent_type == IntentType.GREETING:
|
||||
result = {}
|
||||
action_taken = "greeting"
|
||||
|
||||
elif intent.clarification_needed:
|
||||
result = {"clarification_needed": True}
|
||||
action_taken = "clarification_requested"
|
||||
@@ -728,122 +797,25 @@ def api_llm_set_model():
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Agent Libre (Autonomous Mode)
|
||||
# API Agent Libre (dépréciée — tout passe par /api/chat)
|
||||
# =============================================================================
|
||||
|
||||
@app.route('/api/agent/plan', methods=['POST'])
|
||||
def api_agent_plan():
|
||||
"""
|
||||
Génère un plan d'exécution pour une tâche en langage naturel.
|
||||
|
||||
Le mode "Agent Libre" permet d'exécuter des tâches sans workflow pré-enregistré.
|
||||
Le LLM (Qwen) décompose la demande en étapes d'actions.
|
||||
"""
|
||||
if not autonomous_planner:
|
||||
return jsonify({"error": "Agent autonome non disponible"}), 503
|
||||
|
||||
data = request.json
|
||||
user_request = data.get('request', '').strip()
|
||||
|
||||
if not user_request:
|
||||
return jsonify({"error": "Requête vide"}), 400
|
||||
|
||||
try:
|
||||
# Contexte optionnel (écran actuel, etc.)
|
||||
context = data.get('context', {})
|
||||
|
||||
# Générer le plan
|
||||
plan = autonomous_planner.plan(user_request, context)
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"plan": {
|
||||
"task": plan.task_description,
|
||||
"steps": [
|
||||
{
|
||||
"step": s.step_number,
|
||||
"action": s.action_type.value,
|
||||
"description": s.description,
|
||||
"target": s.target,
|
||||
"params": s.parameters,
|
||||
"expected_result": s.expected_result
|
||||
}
|
||||
for s in plan.steps
|
||||
],
|
||||
"estimated_seconds": plan.estimated_duration_seconds,
|
||||
"risk_level": plan.risk_level,
|
||||
"requires_confirmation": plan.requires_confirmation
|
||||
},
|
||||
"llm_available": autonomous_planner.llm_available
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Agent plan error: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
"""Déprécié — utiliser le chat unifié (/api/chat)."""
|
||||
return jsonify({
|
||||
"error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
|
||||
"migration": "POST /api/chat {\"message\": \"votre demande\"}"
|
||||
}), 410
|
||||
|
||||
|
||||
@app.route('/api/agent/execute', methods=['POST'])
|
||||
def api_agent_execute():
|
||||
"""
|
||||
Exécute un plan d'agent autonome.
|
||||
|
||||
Attend un objet plan (généré par /api/agent/plan) et l'exécute étape par étape.
|
||||
"""
|
||||
if not autonomous_planner:
|
||||
return jsonify({"error": "Agent autonome non disponible"}), 503
|
||||
|
||||
data = request.json
|
||||
plan_data = data.get('plan')
|
||||
|
||||
if not plan_data:
|
||||
return jsonify({"error": "Plan manquant"}), 400
|
||||
|
||||
try:
|
||||
# Reconstruire le plan depuis les données
|
||||
from .autonomous_planner import PlannedAction, ActionType
|
||||
|
||||
steps = []
|
||||
for step_data in plan_data.get('steps', []):
|
||||
action_type_str = step_data.get('action', 'click')
|
||||
action_type_map = {
|
||||
'open_app': ActionType.OPEN_APP,
|
||||
'open_url': ActionType.OPEN_URL,
|
||||
'click': ActionType.CLICK,
|
||||
'type_text': ActionType.TYPE_TEXT,
|
||||
'hotkey': ActionType.HOTKEY,
|
||||
'scroll': ActionType.SCROLL,
|
||||
'wait': ActionType.WAIT,
|
||||
'screenshot': ActionType.SCREENSHOT
|
||||
}
|
||||
|
||||
steps.append(PlannedAction(
|
||||
step_number=step_data.get('step', len(steps) + 1),
|
||||
action_type=action_type_map.get(action_type_str, ActionType.CLICK),
|
||||
description=step_data.get('description', ''),
|
||||
target=step_data.get('target'),
|
||||
parameters=step_data.get('params', {}),
|
||||
expected_result=step_data.get('expected_result')
|
||||
))
|
||||
|
||||
plan = ExecutionPlan(
|
||||
task_description=plan_data.get('task', ''),
|
||||
steps=steps,
|
||||
estimated_duration_seconds=plan_data.get('estimated_seconds', 30),
|
||||
risk_level=plan_data.get('risk_level', 'low')
|
||||
)
|
||||
|
||||
# Exécuter en arrière-plan
|
||||
socketio.start_background_task(execute_agent_plan, plan)
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"message": "Exécution démarrée",
|
||||
"steps_count": len(steps)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Agent execute error: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
"""Déprécié — utiliser le chat unifié (/api/chat)."""
|
||||
return jsonify({
|
||||
"error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
|
||||
"migration": "POST /api/chat {\"message\": \"votre demande\"}"
|
||||
}), 410
|
||||
|
||||
|
||||
@app.route('/api/agent/status')
|
||||
@@ -856,208 +828,71 @@ def api_agent_status():
|
||||
})
|
||||
|
||||
|
||||
def execute_agent_plan(plan: ExecutionPlan):
|
||||
"""Exécute un plan d'agent sur la machine distante via le streaming server."""
|
||||
@app.route('/api/gestures')
|
||||
def api_gestures():
|
||||
"""Liste tous les gestes disponibles dans le catalogue."""
|
||||
if not gesture_catalog:
|
||||
return jsonify({"gestures": [], "count": 0})
|
||||
|
||||
gestures = gesture_catalog.list_all()
|
||||
|
||||
return jsonify({
|
||||
"gestures": gestures,
|
||||
"count": len(gestures),
|
||||
"categories": list({g["category"] for g in gestures}),
|
||||
})
|
||||
|
||||
|
||||
def _execute_gesture(gesture):
|
||||
"""Exécuter un geste primitif via le streaming server."""
|
||||
import uuid as _uuid
|
||||
|
||||
action = {
|
||||
"action_id": f"act_gesture_{_uuid.uuid4().hex[:8]}",
|
||||
"type": "key_combo",
|
||||
"keys": list(gesture.keys),
|
||||
}
|
||||
|
||||
try:
|
||||
# Convertir le plan LLM en actions normalisées pour l'Agent V1
|
||||
actions = _plan_to_replay_actions(plan)
|
||||
|
||||
if not actions:
|
||||
socketio.emit('execution_completed', {
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": "Aucune action convertible dans ce plan."
|
||||
})
|
||||
return
|
||||
|
||||
# Envoyer au streaming server pour exécution sur le PC cible
|
||||
resp = http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/raw",
|
||||
json={
|
||||
"actions": actions,
|
||||
"session_id": "", # Auto-détection
|
||||
"task_description": plan.task_description,
|
||||
"actions": [action],
|
||||
"session_id": "",
|
||||
"task_description": f"Geste: {gesture.name}",
|
||||
},
|
||||
timeout=15,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
replay_id = data.get("replay_id", "")
|
||||
total = data.get("total_actions", len(actions))
|
||||
|
||||
socketio.emit('agent_execution_started', {
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Exécution démarrée sur le PC cible ({total} actions)",
|
||||
"replay_id": replay_id,
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": True,
|
||||
"message": f"Geste '{gesture.name}' ({'+'.join(gesture.keys)}) envoyé",
|
||||
})
|
||||
|
||||
# Suivre la progression
|
||||
_poll_replay_progress(replay_id, plan.task_description, total)
|
||||
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
logger.error(f"Streaming server refus: HTTP {resp.status_code}: {error}")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Erreur serveur: {error}"
|
||||
"message": f"Erreur: {error}",
|
||||
})
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
logger.error("Streaming server non disponible pour l'agent libre")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": "Le serveur de streaming n'est pas disponible. "
|
||||
"Vérifiez qu'il tourne sur le port 5005."
|
||||
"message": "Serveur de streaming non disponible (port 5005).",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Agent execution error: {e}")
|
||||
logger.error(f"Gesture execution error: {e}")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Erreur: {str(e)}"
|
||||
"message": f"Erreur: {str(e)}",
|
||||
})
|
||||
|
||||
|
||||
def _plan_to_replay_actions(plan: ExecutionPlan) -> list:
|
||||
"""Convertir un ExecutionPlan LLM en actions normalisées pour l'Agent V1."""
|
||||
import uuid as _uuid
|
||||
from .autonomous_planner import ActionType
|
||||
|
||||
actions = []
|
||||
for step in plan.steps:
|
||||
action = {"action_id": f"act_free_{_uuid.uuid4().hex[:6]}"}
|
||||
|
||||
if step.action_type == ActionType.OPEN_URL:
|
||||
url = step.parameters.get("url", "")
|
||||
# Ouvrir le navigateur : touche Windows, taper le navigateur, Enter, puis naviguer
|
||||
actions.append({
|
||||
**action,
|
||||
"type": "key_combo",
|
||||
"keys": ["super"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 800,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type",
|
||||
"text": "chrome",
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 2000,
|
||||
})
|
||||
# Focus barre d'adresse + taper URL
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["ctrl", "l"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 300,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type",
|
||||
"text": url,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 3000,
|
||||
})
|
||||
continue
|
||||
|
||||
elif step.action_type == ActionType.OPEN_APP:
|
||||
app_name = step.parameters.get("app_name", "")
|
||||
actions.append({**action, "type": "key_combo", "keys": ["super"]})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait", "duration_ms": 800,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type", "text": app_name,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo", "keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait", "duration_ms": 2000,
|
||||
})
|
||||
continue
|
||||
|
||||
elif step.action_type == ActionType.TYPE_TEXT:
|
||||
text = step.parameters.get("text", "")
|
||||
action["type"] = "type"
|
||||
action["text"] = text
|
||||
# Si un target est spécifié, activer la résolution visuelle
|
||||
if step.target:
|
||||
action["visual_mode"] = True
|
||||
action["target_spec"] = {"by_text": step.target}
|
||||
|
||||
elif step.action_type == ActionType.CLICK:
|
||||
action["type"] = "click"
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["button"] = "left"
|
||||
if step.target:
|
||||
action["visual_mode"] = True
|
||||
action["target_spec"] = {"by_text": step.target}
|
||||
|
||||
elif step.action_type == ActionType.HOTKEY:
|
||||
keys_str = step.parameters.get("keys", "")
|
||||
if isinstance(keys_str, str):
|
||||
keys = [k.strip() for k in keys_str.split("+")]
|
||||
else:
|
||||
keys = keys_str
|
||||
action["type"] = "key_combo"
|
||||
action["keys"] = keys
|
||||
|
||||
elif step.action_type == ActionType.SCROLL:
|
||||
direction = step.parameters.get("direction", "down")
|
||||
amount = step.parameters.get("amount", 3)
|
||||
action["type"] = "scroll"
|
||||
action["delta"] = -amount if direction == "down" else amount
|
||||
|
||||
elif step.action_type == ActionType.WAIT:
|
||||
seconds = step.parameters.get("seconds", 2)
|
||||
action["type"] = "wait"
|
||||
action["duration_ms"] = int(seconds * 1000)
|
||||
|
||||
elif step.action_type == ActionType.SCREENSHOT:
|
||||
# Skip — l'Agent V1 capture déjà automatiquement
|
||||
continue
|
||||
|
||||
else:
|
||||
continue
|
||||
|
||||
actions.append(action)
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
@app.route('/api/help')
|
||||
def api_help():
|
||||
"""Aide et mode d'emploi."""
|
||||
@@ -1138,6 +973,53 @@ def handle_cancel():
|
||||
emit('execution_cancelled', {}, broadcast=True)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Copilot WebSocket Events
|
||||
# =============================================================================
|
||||
|
||||
@socketio.on('copilot_approve')
|
||||
def handle_copilot_approve():
|
||||
"""L'utilisateur approuve l'étape copilot en cours."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot or copilot["status"] != "waiting_approval":
|
||||
emit('copilot_error', {"message": "Aucune étape en attente de validation."})
|
||||
return
|
||||
|
||||
logger.info(f"Copilot approve: étape {copilot['current_index'] + 1}/{copilot['total']}")
|
||||
copilot["status"] = "approved"
|
||||
|
||||
|
||||
@socketio.on('copilot_skip')
|
||||
def handle_copilot_skip():
|
||||
"""L'utilisateur saute l'étape copilot en cours."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot or copilot["status"] != "waiting_approval":
|
||||
emit('copilot_error', {"message": "Aucune étape en attente de validation."})
|
||||
return
|
||||
|
||||
logger.info(f"Copilot skip: étape {copilot['current_index'] + 1}/{copilot['total']}")
|
||||
copilot["status"] = "skipped"
|
||||
|
||||
|
||||
@socketio.on('copilot_abort')
|
||||
def handle_copilot_abort():
|
||||
"""L'utilisateur annule tout le workflow copilot."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot:
|
||||
return
|
||||
|
||||
logger.info(f"Copilot abort: workflow '{copilot['workflow_name']}'")
|
||||
copilot["status"] = "aborted"
|
||||
_copilot_sessions.pop("__copilot__", None)
|
||||
emit('copilot_complete', {
|
||||
"workflow": copilot["workflow_name"],
|
||||
"status": "aborted",
|
||||
"message": "Workflow annulé par l'utilisateur.",
|
||||
"completed": copilot.get("completed", 0),
|
||||
"total": copilot["total"],
|
||||
})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Exécution de workflow
|
||||
# =============================================================================
|
||||
@@ -1243,6 +1125,352 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
|
||||
)
|
||||
|
||||
|
||||
def _build_actions_from_workflow(match, params: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Construire la liste d'actions normalisées depuis un workflow.
|
||||
|
||||
Tente la conversion via le format core (nodes/edges),
|
||||
puis fallback sur le format JSON brut.
|
||||
"""
|
||||
import uuid as _uuid
|
||||
|
||||
try:
|
||||
with open(match.workflow_path, 'r') as f:
|
||||
workflow_data = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de charger le workflow {match.workflow_path}: {e}")
|
||||
return []
|
||||
|
||||
# Substituer les variables
|
||||
var_manager = VariableManager()
|
||||
var_manager.set_variables(params)
|
||||
workflow_data = var_manager.substitute_dict(workflow_data)
|
||||
|
||||
edges = workflow_data.get("edges", [])
|
||||
actions = []
|
||||
|
||||
for i, edge in enumerate(edges):
|
||||
action_dict = edge.get("action", {})
|
||||
action_type = action_dict.get("type", "unknown")
|
||||
action_params = action_dict.get("parameters", {})
|
||||
target_dict = action_dict.get("target", {})
|
||||
|
||||
action = {
|
||||
"action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
|
||||
"step_index": i,
|
||||
"description": _describe_action(action_type, action_params, target_dict),
|
||||
}
|
||||
|
||||
if action_type == "mouse_click":
|
||||
pos = target_dict.get("position", [0.5, 0.5])
|
||||
action["type"] = "click"
|
||||
action["x_pct"] = pos[0] if len(pos) > 0 else 0.5
|
||||
action["y_pct"] = pos[1] if len(pos) > 1 else 0.5
|
||||
action["button"] = action_params.get("button", "left")
|
||||
elif action_type == "text_input":
|
||||
action["type"] = "type"
|
||||
action["text"] = action_params.get("text", "")
|
||||
elif action_type == "key_press":
|
||||
action["type"] = "key_combo"
|
||||
keys = action_params.get("keys", [])
|
||||
if not keys and action_params.get("key"):
|
||||
keys = [action_params["key"]]
|
||||
action["keys"] = keys
|
||||
elif action_type == "compound":
|
||||
for step in action_params.get("steps", []):
|
||||
sub_action = {
|
||||
"action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
|
||||
"step_index": i,
|
||||
"description": _describe_action(step.get("type", "unknown"), step, {}),
|
||||
}
|
||||
sub_type = step.get("type", "unknown")
|
||||
if sub_type == "key_press":
|
||||
sub_action["type"] = "key_combo"
|
||||
sub_action["keys"] = step.get("keys", [])
|
||||
elif sub_type == "text_input":
|
||||
sub_action["type"] = "type"
|
||||
sub_action["text"] = step.get("text", "")
|
||||
elif sub_type == "wait":
|
||||
sub_action["type"] = "wait"
|
||||
sub_action["duration_ms"] = step.get("duration_ms", 500)
|
||||
elif sub_type == "mouse_click":
|
||||
sub_action["type"] = "click"
|
||||
sub_action["x_pct"] = step.get("x_pct", 0.5)
|
||||
sub_action["y_pct"] = step.get("y_pct", 0.5)
|
||||
sub_action["button"] = step.get("button", "left")
|
||||
else:
|
||||
continue
|
||||
actions.append(sub_action)
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
# Ajouter target_spec pour résolution visuelle si dispo
|
||||
target_spec = {}
|
||||
if target_dict.get("role"):
|
||||
target_spec["by_role"] = target_dict["role"]
|
||||
if target_dict.get("text"):
|
||||
target_spec["by_text"] = target_dict["text"]
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
action["visual_mode"] = True
|
||||
|
||||
actions.append(action)
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
def _describe_action(action_type: str, params: Dict[str, Any], target: Dict[str, Any]) -> str:
|
||||
"""Générer une description lisible d'une action pour l'affichage copilot."""
|
||||
target_text = target.get("text", "")
|
||||
target_role = target.get("role", "")
|
||||
|
||||
if action_type == "mouse_click":
|
||||
label = target_text or target_role or "un élément"
|
||||
return f"Clic sur '{label}'"
|
||||
elif action_type == "text_input":
|
||||
text = params.get("text", "")
|
||||
preview = text[:30] + "..." if len(text) > 30 else text
|
||||
return f"Saisir le texte : '{preview}'"
|
||||
elif action_type == "key_press":
|
||||
keys = params.get("keys", params.get("key", ""))
|
||||
if isinstance(keys, list):
|
||||
keys = "+".join(keys)
|
||||
return f"Touche(s) : {keys}"
|
||||
elif action_type == "compound":
|
||||
steps_count = len(params.get("steps", []))
|
||||
return f"Action composée ({steps_count} sous-actions)"
|
||||
elif action_type == "wait":
|
||||
ms = params.get("duration_ms", 500)
|
||||
return f"Attente {ms}ms"
|
||||
else:
|
||||
return f"Action : {action_type}"
|
||||
|
||||
|
||||
def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
"""
|
||||
Exécuter un workflow en mode Copilot (pas-à-pas).
|
||||
|
||||
Charge le workflow, construit la liste d'actions, puis envoie
|
||||
les actions une par une en attendant la validation utilisateur
|
||||
via WebSocket entre chaque étape.
|
||||
"""
|
||||
global execution_status
|
||||
import time
|
||||
|
||||
workflow_name = match.workflow_name
|
||||
|
||||
actions = _build_actions_from_workflow(match, params)
|
||||
if not actions:
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "error",
|
||||
"message": "Aucune action exécutable dans ce workflow.",
|
||||
"completed": 0,
|
||||
"total": 0,
|
||||
})
|
||||
return
|
||||
|
||||
total = len(actions)
|
||||
|
||||
execution_status["running"] = True
|
||||
execution_status["workflow"] = workflow_name
|
||||
execution_status["progress"] = 0
|
||||
execution_status["message"] = f"Mode Copilot : {total} étapes"
|
||||
|
||||
copilot_state = {
|
||||
"workflow_name": workflow_name,
|
||||
"actions": actions,
|
||||
"current_index": 0,
|
||||
"total": total,
|
||||
"status": "idle",
|
||||
"completed": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
}
|
||||
_copilot_sessions["__copilot__"] = copilot_state
|
||||
|
||||
logger.info(f"Copilot démarré : '{workflow_name}' — {total} étapes")
|
||||
|
||||
for idx, action in enumerate(actions):
|
||||
copilot_state["current_index"] = idx
|
||||
|
||||
if copilot_state["status"] == "aborted":
|
||||
break
|
||||
|
||||
copilot_state["status"] = "waiting_approval"
|
||||
socketio.emit('copilot_step', {
|
||||
"workflow": workflow_name,
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"action": {
|
||||
"action_id": action.get("action_id", ""),
|
||||
"type": action.get("type", "unknown"),
|
||||
"description": action.get("description", "Action inconnue"),
|
||||
},
|
||||
})
|
||||
|
||||
# Attendre la décision de l'utilisateur (polling, max 120s)
|
||||
max_wait = 120
|
||||
waited = 0.0
|
||||
while waited < max_wait:
|
||||
status = copilot_state["status"]
|
||||
if status in ("approved", "skipped", "aborted"):
|
||||
break
|
||||
time.sleep(0.3)
|
||||
waited += 0.3
|
||||
|
||||
if waited >= max_wait:
|
||||
copilot_state["status"] = "aborted"
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "timeout",
|
||||
"message": f"Timeout : pas de réponse après {max_wait}s.",
|
||||
"completed": copilot_state["completed"],
|
||||
"total": total,
|
||||
})
|
||||
break
|
||||
|
||||
decision = copilot_state["status"]
|
||||
|
||||
if decision == "aborted":
|
||||
break
|
||||
|
||||
elif decision == "skipped":
|
||||
copilot_state["skipped"] += 1
|
||||
logger.info(f"Copilot skip étape {idx + 1}/{total}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "skipped",
|
||||
"message": "Étape passée",
|
||||
})
|
||||
copilot_state["status"] = "idle"
|
||||
continue
|
||||
|
||||
elif decision == "approved":
|
||||
logger.info(f"Copilot execute étape {idx + 1}/{total}: {action.get('type')}")
|
||||
|
||||
try:
|
||||
resp = http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/single",
|
||||
json={
|
||||
"action": action,
|
||||
"session_id": "",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
resp_data = resp.json()
|
||||
action_id = resp_data.get("action_id", action.get("action_id"))
|
||||
|
||||
action_success = _wait_for_single_action_result(
|
||||
resp_data.get("session_id", ""),
|
||||
action_id,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if action_success:
|
||||
copilot_state["completed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "completed",
|
||||
"message": "Action exécutée avec succès",
|
||||
})
|
||||
else:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": "L'action a échoué",
|
||||
})
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": f"Erreur serveur : {error}",
|
||||
})
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": "Serveur de streaming non disponible (port 5005).",
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
copilot_state["failed"] += 1
|
||||
logger.error(f"Copilot action error: {e}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": f"Erreur : {str(e)}",
|
||||
})
|
||||
|
||||
progress = int((idx + 1) / total * 100)
|
||||
execution_status["progress"] = progress
|
||||
execution_status["message"] = f"Copilot : étape {idx + 1}/{total}"
|
||||
|
||||
copilot_state["status"] = "idle"
|
||||
|
||||
# Fin du copilot
|
||||
_copilot_sessions.pop("__copilot__", None)
|
||||
execution_status["running"] = False
|
||||
|
||||
completed = copilot_state["completed"]
|
||||
skipped = copilot_state["skipped"]
|
||||
failed = copilot_state["failed"]
|
||||
final_status = copilot_state.get("status", "completed")
|
||||
|
||||
if final_status != "aborted":
|
||||
success = failed == 0
|
||||
message = (
|
||||
f"Copilot terminé : {completed} réussies, "
|
||||
f"{skipped} passées, {failed} échouées sur {total} étapes."
|
||||
)
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "completed" if success else "partial",
|
||||
"message": message,
|
||||
"completed": completed,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"total": total,
|
||||
})
|
||||
finish_execution(workflow_name, success, message)
|
||||
|
||||
|
||||
def _wait_for_single_action_result(session_id: str, action_id: str, timeout: int = 30) -> bool:
|
||||
"""
|
||||
Attendre le résultat d'une seule action envoyée au streaming server.
|
||||
|
||||
Approche pragmatique : on attend un délai raisonnable (3s) pour que
|
||||
l'Agent V1 ait le temps de poll, exécuter, et reporter.
|
||||
"""
|
||||
import time
|
||||
|
||||
poll_interval = 0.5
|
||||
elapsed = 0.0
|
||||
|
||||
while elapsed < timeout:
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
|
||||
if elapsed >= 3.0:
|
||||
return True # Optimiste — le résultat réel arrive via /replay/result
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def execute_workflow(match, params):
|
||||
"""
|
||||
Exécuter un workflow — tente d'abord le streaming server,
|
||||
|
||||
Reference in New Issue
Block a user