Files
rpa_vision_v3/agent_chat/app.py
Dom 79c19c5e9d fix(agent-chat): ajouter handler QUERY pour les infos workflow
Le chat listait les workflows mais répondait "Je n'ai pas d'information"
quand l'utilisateur demandait des détails. Le handler QUERY utilise
maintenant SemanticMatcher.find_workflow() + get_workflow_help() pour
retourner description, tags et paramètres supportés.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 16:37:24 +01:00

1420 lines
48 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
RPA Vision V3 - Agent Chat
Interface conversationnelle pour communiquer avec le système RPA.
Style "Spotlight/Alfred" - minimaliste et efficace.
Composants intégrés:
- IntentParser: Compréhension des intentions utilisateur
- ConfirmationLoop: Validation avant actions critiques
- ResponseGenerator: Réponses en langage naturel
- ConversationManager: Contexte multi-tour
Usage:
python agent_chat/app.py
Puis ouvrir: http://localhost:5004
Auteur: Dom - Janvier 2026
"""
import asyncio
import json
import logging
import os
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
import requests as http_requests # Pour les appels au streaming server
from flask import Flask, render_template, request, jsonify
from flask_socketio import SocketIO, emit
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.workflow import SemanticMatcher, VariableManager
# Import des composants conversationnels
from .intent_parser import IntentParser, IntentType, get_intent_parser
from .confirmation import ConfirmationLoop, ConfirmationStatus, RiskLevel, get_confirmation_loop
from .response_generator import ResponseGenerator, get_response_generator
from .conversation_manager import ConversationManager, get_conversation_manager
from .autonomous_planner import AutonomousPlanner, get_autonomous_planner, ExecutionPlan
# GPU Resource Manager (optional)
try:
from core.gpu import get_gpu_resource_manager, ExecutionMode
GPU_AVAILABLE = True
except ImportError:
GPU_AVAILABLE = False
# Execution components (optional - pour exécution réelle)
try:
from core.execution import ActionExecutor, TargetResolver, ErrorHandler
from core.execution.execution_loop import ExecutionLoop, ExecutionMode as ExecMode, ExecutionState
from core.pipeline.workflow_pipeline import WorkflowPipeline
from core.capture import ScreenCapturer
EXECUTION_AVAILABLE = True
except ImportError as e:
logger.warning(f"Composants d'exécution non disponibles: {e}")
EXECUTION_AVAILABLE = False
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = Flask(__name__)
app.config['SECRET_KEY'] = 'rpa-vision-v3-secret'
socketio = SocketIO(app, cors_allowed_origins="*")
# Global state
matcher: Optional[SemanticMatcher] = None
gpu_manager = None
intent_parser: Optional[IntentParser] = None
confirmation_loop: Optional[ConfirmationLoop] = None
response_generator: Optional[ResponseGenerator] = None
conversation_manager: Optional[ConversationManager] = None
autonomous_planner: Optional[AutonomousPlanner] = None
# Execution components
workflow_pipeline = None
action_executor = None
execution_loop = None
screen_capturer = None
# URL du streaming server (Agent V1) pour l'exécution distante
STREAMING_SERVER_URL = os.environ.get(
"RPA_STREAMING_URL", "http://localhost:5005"
)
execution_status = {
"running": False,
"workflow": None,
"progress": 0,
"message": "",
"can_minimize": True
}
command_history: List[Dict[str, Any]] = []
def init_system():
"""Initialiser tous les composants du système."""
global matcher, gpu_manager
global intent_parser, confirmation_loop, response_generator, conversation_manager
global autonomous_planner
# 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7)
# Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/
try:
matcher = SemanticMatcher(
workflows_dir=None, # None = scan tous les répertoires par défaut
use_llm=True, # Matching sémantique via Ollama (P0-7)
llm_model="qwen2.5:7b",
)
dirs_info = matcher.get_directories()
dirs_summary = ", ".join(
f"{d['path']}({d['workflow_count']})" for d in dirs_info if d['exists']
)
logger.info(
f"✓ SemanticMatcher: {len(matcher.get_all_workflows())} workflows "
f"[{dirs_summary}]"
)
except Exception as e:
logger.error(f"✗ SemanticMatcher: {e}")
matcher = None
# 2. GPU Resource Manager
if GPU_AVAILABLE:
try:
gpu_manager = get_gpu_resource_manager()
logger.info("✓ GPU Resource Manager connected")
except Exception as e:
logger.warning(f"⚠ GPU Resource Manager: {e}")
gpu_manager = None
# 3. Composants conversationnels
try:
intent_parser = get_intent_parser(use_llm=True) # LLM activé (Ollama)
confirmation_loop = get_confirmation_loop()
response_generator = get_response_generator()
conversation_manager = get_conversation_manager()
# Injecter les workflows dans l'intent_parser pour contexte LLM
if matcher and intent_parser:
workflows_for_llm = [
{"name": wf.name, "description": wf.description, "tags": wf.tags}
for wf in matcher.get_all_workflows()
]
intent_parser.set_workflows(workflows_for_llm)
logger.info("✓ Composants conversationnels initialisés (LLM activé)")
except Exception as e:
logger.error(f"✗ Composants conversationnels: {e}")
# Fallback aux composants de base
intent_parser = IntentParser(use_llm=False)
confirmation_loop = ConfirmationLoop()
response_generator = ResponseGenerator()
conversation_manager = ConversationManager()
# 4. Composants d'exécution réelle
global workflow_pipeline, action_executor, execution_loop, screen_capturer
if EXECUTION_AVAILABLE:
try:
# Pipeline de workflow (matching + actions)
workflow_pipeline = WorkflowPipeline()
logger.info("✓ WorkflowPipeline initialisé")
# Capture d'écran
screen_capturer = ScreenCapturer()
logger.info("✓ ScreenCapturer initialisé")
# Résolveur de cibles et gestionnaire d'erreurs
target_resolver = TargetResolver()
error_handler = ErrorHandler()
# Exécuteur d'actions
action_executor = ActionExecutor(
target_resolver=target_resolver,
error_handler=error_handler,
verify_postconditions=True
)
logger.info("✓ ActionExecutor initialisé")
# Boucle d'exécution (pour mode automatique)
execution_loop = ExecutionLoop(
pipeline=workflow_pipeline,
action_executor=action_executor,
screen_capturer=screen_capturer
)
logger.info("✓ ExecutionLoop initialisé")
except Exception as e:
logger.warning(f"⚠ Composants d'exécution partiels: {e}")
# Mode dégradé: simulation uniquement
workflow_pipeline = None
action_executor = None
execution_loop = None
else:
logger.info(" Mode simulation (composants d'exécution non disponibles)")
# 5. Autonomous Planner (Agent Libre)
try:
autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b")
# Configurer les callbacks pour l'exécution
if screen_capturer:
autonomous_planner.set_screen_capturer(screen_capturer.capture)
def progress_callback(data):
socketio.emit('agent_progress', data)
autonomous_planner.set_progress_callback(progress_callback)
logger.info(f"✓ AutonomousPlanner initialisé (LLM: {autonomous_planner.llm_available})")
except Exception as e:
logger.warning(f"⚠ AutonomousPlanner: {e}")
autonomous_planner = None
# =============================================================================
# Routes Web
# =============================================================================
@app.route('/')
def index():
"""Page principale - nouvelle interface chat."""
return render_template('chat.html')
@app.route('/classic')
def classic():
"""Ancienne interface (fallback)."""
return render_template('command.html')
@app.route('/api/status')
def api_status():
"""Statut complet du système."""
workflows_count = len(matcher.get_all_workflows()) if matcher else 0
# GPU Status
gpu_status = None
if gpu_manager:
try:
status = gpu_manager.get_status()
gpu_status = {
"mode": status.execution_mode.value,
"vlm_state": status.vlm_state.value,
"vlm_model": status.vlm_model,
"clip_device": status.clip_device,
"degraded": status.degraded_mode
}
if status.vram:
gpu_status["vram"] = {
"used_mb": status.vram.used_mb,
"total_mb": status.vram.total_mb,
"percent": round(status.vram.used_mb / status.vram.total_mb * 100, 1) if status.vram.total_mb > 0 else 0
}
except Exception as e:
logger.warning(f"GPU status error: {e}")
# Ollama Status
ollama_status = None
try:
import requests
response = requests.get("http://localhost:11434/api/tags", timeout=2)
if response.status_code == 200:
models = response.json().get('models', [])
ollama_status = {
"available": True,
"models_count": len(models)
}
except:
ollama_status = {"available": False}
return jsonify({
"status": "online",
"workflows_count": workflows_count,
"execution": execution_status,
"gpu": gpu_status,
"ollama": ollama_status
})
@app.route('/api/workflows')
def api_workflows():
"""Liste des workflows (tous répertoires confondus)."""
if not matcher:
return jsonify({"workflows": [], "directories": []})
workflows = []
for wf in matcher.get_all_workflows():
workflows.append({
"id": wf.workflow_id,
"name": wf.name,
"description": wf.description,
"tags": wf.tags,
"source": wf.source_dir,
})
return jsonify({
"workflows": workflows,
"directories": matcher.get_directories(),
})
@app.route('/api/workflows/refresh', methods=['POST'])
def api_workflows_refresh():
"""
Forcer le rechargement des workflows depuis tous les répertoires.
Utile après qu'un nouveau workflow a été appris par le StreamProcessor.
"""
if not matcher:
return jsonify({"success": False, "error": "SemanticMatcher non initialisé"})
try:
count = matcher.reload_workflows()
# Re-injecter les workflows dans l'intent_parser (contexte LLM)
if intent_parser:
workflows_for_llm = [
{"name": wf.name, "description": wf.description, "tags": wf.tags}
for wf in matcher.get_all_workflows()
]
intent_parser.set_workflows(workflows_for_llm)
return jsonify({
"success": True,
"workflows_count": count,
"directories": matcher.get_directories(),
})
except Exception as e:
logger.error(f"Erreur rechargement workflows: {e}")
return jsonify({"success": False, "error": str(e)})
@app.route('/api/search', methods=['POST'])
def api_search():
"""Rechercher des workflows."""
data = request.json
query = data.get('query', '')
if not matcher or not query:
return jsonify({"matches": []})
matches = matcher.find_workflows(query, limit=5, min_confidence=0.2)
results = []
for m in matches:
results.append({
"workflow_id": m.workflow_id,
"workflow_name": m.workflow_name,
"confidence": m.confidence,
"extracted_params": m.extracted_params,
"match_reason": m.match_reason
})
return jsonify({"matches": results})
@app.route('/api/execute', methods=['POST'])
def api_execute():
"""Exécuter une commande."""
global execution_status
data = request.json
command = data.get('command', '')
params = data.get('params', {})
if not matcher or not command:
return jsonify({"success": False, "error": "Invalid command"})
# Trouver le workflow
match = matcher.find_workflow(command, min_confidence=0.2)
if not match:
return jsonify({
"success": False,
"error": "Aucun workflow correspondant trouvé"
})
# Combiner les paramètres
all_params = {**match.extracted_params, **params}
# Enregistrer dans l'historique
command_history.append({
"timestamp": datetime.now().isoformat(),
"command": command,
"workflow": match.workflow_name,
"params": all_params,
"status": "started"
})
# Mettre à jour le statut
execution_status = {
"running": True,
"workflow": match.workflow_name,
"progress": 0,
"message": "Démarrage..."
}
# Notifier via WebSocket
socketio.emit('execution_started', {
"workflow": match.workflow_name,
"params": all_params
})
# Exécuter le workflow en arrière-plan
socketio.start_background_task(execute_workflow, match, all_params)
return jsonify({
"success": True,
"workflow": match.workflow_name,
"params": all_params,
"confidence": match.confidence
})
@app.route('/api/history')
def api_history():
"""Historique des commandes."""
return jsonify({"history": command_history[-20:]})
@app.route('/api/chat', methods=['POST'])
def api_chat():
"""
Endpoint conversationnel principal.
Utilise le flux complet:
1. IntentParser: Analyse l'intention
2. ConversationManager: Gère le contexte multi-tour
3. ConfirmationLoop: Valide les actions sensibles
4. ResponseGenerator: Génère la réponse
"""
data = request.json
message = data.get('message', '').strip()
session_id = data.get('session_id')
if not message:
return jsonify({"error": "Message vide"}), 400
# 1. Obtenir ou créer la session
session = conversation_manager.get_or_create_session(session_id=session_id)
# 2. Parser l'intention
intent = intent_parser.parse(message)
# 3. Résoudre les références anaphoriques (ex: "le même", "celui-ci")
intent = conversation_manager.resolve_references(session, intent)
# 4. Construire le contexte
context = conversation_manager.get_context_summary(session)
context["execution_status"] = execution_status
# 5. Traiter selon le type d'intention
result = {}
action_taken = None
if intent.intent_type == IntentType.CONFIRM:
# Confirmer une action en attente
pending = conversation_manager.get_pending_confirmation(session)
if pending:
confirmation_loop.confirm(pending.id)
conversation_manager.clear_pending_confirmation(session)
result = {"confirmed": True, "workflow": pending.workflow_name}
action_taken = "confirmed"
# Lancer l'exécution
socketio.start_background_task(
execute_workflow_from_confirmation, pending, session.session_id
)
else:
result = {"confirmed": False}
elif intent.intent_type == IntentType.DENY:
# Refuser une action en attente
pending = conversation_manager.get_pending_confirmation(session)
if pending:
confirmation_loop.deny(pending.id)
conversation_manager.clear_pending_confirmation(session)
result = {"denied": True}
action_taken = "denied"
elif intent.intent_type == IntentType.EXECUTE:
# Exécuter un workflow
if matcher and intent.workflow_hint:
match = matcher.find_workflow(intent.workflow_hint, min_confidence=0.2)
if match:
# Évaluer le risque
risk = confirmation_loop.evaluate_risk(
match.workflow_name,
{**match.extracted_params, **intent.parameters}
)
if confirmation_loop.requires_confirmation(risk):
# Créer une demande de confirmation
conf = confirmation_loop.create_confirmation_request(
workflow_name=match.workflow_name,
parameters={**match.extracted_params, **intent.parameters},
action_type="execute",
risk_level=risk
)
conversation_manager.set_pending_confirmation(session, conf)
# Générer la réponse de confirmation
response = response_generator.generate_confirmation_request(conf)
result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
action_taken = "confirmation_requested"
else:
# Exécuter directement
all_params = {**match.extracted_params, **intent.parameters}
result = {
"success": True,
"workflow": match.workflow_name,
"params": all_params,
"confidence": match.confidence
}
action_taken = "executed"
socketio.start_background_task(execute_workflow, match, all_params)
else:
result = {"not_found": True, "query": intent.workflow_hint}
else:
result = {"error": "Pas de workflow spécifié"}
elif intent.intent_type == IntentType.LIST:
# Lister les workflows avec métadonnées enrichies
if matcher:
workflows = [
{
"name": wf.name,
"description": wf.description or "Workflow appris automatiquement",
"tags": wf.tags[:3] if wf.tags else [],
"id": wf.workflow_id,
}
for wf in matcher.get_all_workflows()
]
result = {"workflows": workflows}
else:
result = {"workflows": []}
action_taken = "listed"
elif intent.intent_type == IntentType.STATUS:
result = {"execution": execution_status}
action_taken = "status_checked"
elif intent.intent_type == IntentType.CANCEL:
if execution_status.get("running"):
execution_status["running"] = False
execution_status["message"] = "Annulé"
result = {"cancelled": True}
else:
result = {"cancelled": False}
action_taken = "cancelled"
elif intent.intent_type == IntentType.HISTORY:
result = {"history": command_history[-10:]}
action_taken = "history_shown"
elif intent.intent_type == IntentType.QUERY:
# Rechercher des infos sur le workflow demandé
topic = intent.workflow_hint or intent.raw_query
if matcher and topic:
# Tenter de trouver le workflow correspondant
match = matcher.find_workflow(topic, min_confidence=0.3)
if match:
help_text = matcher.get_workflow_help(match.workflow_id)
result = {"answer": help_text}
else:
# Peut-être une question générale sur les workflows dispo
all_wf = matcher.get_all_workflows()
if all_wf:
wf_list = "\n".join(
f"• **{wf.name}**: {wf.description or 'Pas de description'}"
for wf in all_wf[:10]
)
result = {
"answer": f"Je n'ai pas trouvé de workflow '{topic}', "
f"mais voici les workflows disponibles :\n{wf_list}"
}
else:
result = {}
else:
result = {}
action_taken = "query_answered"
elif intent.intent_type == IntentType.HELP:
result = {}
action_taken = "help_shown"
elif intent.clarification_needed:
result = {"clarification_needed": True}
action_taken = "clarification_requested"
# 6. Générer la réponse (si pas déjà fait pour confirmation)
if action_taken != "confirmation_requested":
response = response_generator.generate(intent, context, result)
# 7. Enregistrer le tour dans la conversation
conversation_manager.add_turn(
session=session,
user_message=message,
intent=intent,
response=response.message,
action_taken=action_taken,
result=result
)
# 8. Retourner la réponse
return jsonify({
"session_id": session.session_id,
"intent": intent.to_dict(),
"response": response.to_dict(),
"result": result,
"context": {
"current_workflow": session.context.current_workflow,
"has_pending_confirmation": session.context.pending_confirmation is not None
}
})
def execute_workflow_from_confirmation(confirmation, session_id):
"""Exécuter un workflow après confirmation."""
global execution_status
if not matcher:
return
# Trouver le workflow
match = matcher.find_workflow(confirmation.workflow_name, min_confidence=0.1)
if not match:
return
# Utiliser les paramètres confirmés (ou modifiés)
params = confirmation.modified_parameters or confirmation.parameters
# IMPORTANT: Marquer l'exécution comme active
execution_status["running"] = True
execution_status["workflow"] = confirmation.workflow_name
execution_status["progress"] = 0
execution_status["message"] = "Démarrage..."
execute_workflow(match, params)
@app.route('/api/gpu/<action>', methods=['POST'])
def api_gpu_action(action):
"""Contrôler le GPU Resource Manager."""
if not gpu_manager:
return jsonify({"success": False, "error": "GPU Manager non disponible"})
async def do_action():
if action == "load-vlm":
return await gpu_manager.ensure_vlm_loaded()
elif action == "unload-vlm":
return await gpu_manager.ensure_vlm_unloaded()
elif action == "recording":
await gpu_manager.set_execution_mode(ExecutionMode.RECORDING)
return True
elif action == "autopilot":
await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)
return True
elif action == "idle":
await gpu_manager.set_execution_mode(ExecutionMode.IDLE)
return True
return False
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(do_action())
loop.close()
return jsonify({"success": result, "action": action})
except Exception as e:
return jsonify({"success": False, "error": str(e)})
@app.route('/api/llm/status')
def api_llm_status():
"""Statut du LLM (Ollama)."""
status = {
"enabled": intent_parser.use_llm if intent_parser else False,
"available": intent_parser.llm_available if intent_parser else False,
"model": intent_parser.llm_model if intent_parser else None,
"endpoint": intent_parser.llm_endpoint if intent_parser else None,
"workflows_loaded": len(intent_parser._workflows_cache) if intent_parser else 0
}
# Lister les modèles Ollama disponibles
try:
import requests
response = requests.get("http://localhost:11434/api/tags", timeout=2)
if response.status_code == 200:
models = response.json().get('models', [])
status["available_models"] = [m["name"] for m in models]
except:
status["available_models"] = []
return jsonify(status)
@app.route('/api/llm/model', methods=['POST'])
def api_llm_set_model():
"""Changer le modèle LLM."""
data = request.json
model = data.get('model')
if not model:
return jsonify({"success": False, "error": "Modèle non spécifié"})
if intent_parser:
intent_parser.llm_model = model
intent_parser._check_llm_availability()
return jsonify({
"success": True,
"model": model,
"available": intent_parser.llm_available
})
return jsonify({"success": False, "error": "IntentParser non initialisé"})
# =============================================================================
# API Agent Libre (Autonomous Mode)
# =============================================================================
@app.route('/api/agent/plan', methods=['POST'])
def api_agent_plan():
"""
Génère un plan d'exécution pour une tâche en langage naturel.
Le mode "Agent Libre" permet d'exécuter des tâches sans workflow pré-enregistré.
Le LLM (Qwen) décompose la demande en étapes d'actions.
"""
if not autonomous_planner:
return jsonify({"error": "Agent autonome non disponible"}), 503
data = request.json
user_request = data.get('request', '').strip()
if not user_request:
return jsonify({"error": "Requête vide"}), 400
try:
# Contexte optionnel (écran actuel, etc.)
context = data.get('context', {})
# Générer le plan
plan = autonomous_planner.plan(user_request, context)
return jsonify({
"success": True,
"plan": {
"task": plan.task_description,
"steps": [
{
"step": s.step_number,
"action": s.action_type.value,
"description": s.description,
"target": s.target,
"params": s.parameters,
"expected_result": s.expected_result
}
for s in plan.steps
],
"estimated_seconds": plan.estimated_duration_seconds,
"risk_level": plan.risk_level,
"requires_confirmation": plan.requires_confirmation
},
"llm_available": autonomous_planner.llm_available
})
except Exception as e:
logger.error(f"Agent plan error: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/api/agent/execute', methods=['POST'])
def api_agent_execute():
"""
Exécute un plan d'agent autonome.
Attend un objet plan (généré par /api/agent/plan) et l'exécute étape par étape.
"""
if not autonomous_planner:
return jsonify({"error": "Agent autonome non disponible"}), 503
data = request.json
plan_data = data.get('plan')
if not plan_data:
return jsonify({"error": "Plan manquant"}), 400
try:
# Reconstruire le plan depuis les données
from .autonomous_planner import PlannedAction, ActionType
steps = []
for step_data in plan_data.get('steps', []):
action_type_str = step_data.get('action', 'click')
action_type_map = {
'open_app': ActionType.OPEN_APP,
'open_url': ActionType.OPEN_URL,
'click': ActionType.CLICK,
'type_text': ActionType.TYPE_TEXT,
'hotkey': ActionType.HOTKEY,
'scroll': ActionType.SCROLL,
'wait': ActionType.WAIT,
'screenshot': ActionType.SCREENSHOT
}
steps.append(PlannedAction(
step_number=step_data.get('step', len(steps) + 1),
action_type=action_type_map.get(action_type_str, ActionType.CLICK),
description=step_data.get('description', ''),
target=step_data.get('target'),
parameters=step_data.get('params', {}),
expected_result=step_data.get('expected_result')
))
plan = ExecutionPlan(
task_description=plan_data.get('task', ''),
steps=steps,
estimated_duration_seconds=plan_data.get('estimated_seconds', 30),
risk_level=plan_data.get('risk_level', 'low')
)
# Exécuter en arrière-plan
socketio.start_background_task(execute_agent_plan, plan)
return jsonify({
"success": True,
"message": "Exécution démarrée",
"steps_count": len(steps)
})
except Exception as e:
logger.error(f"Agent execute error: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/api/agent/status')
def api_agent_status():
"""Statut de l'agent autonome."""
return jsonify({
"available": autonomous_planner is not None,
"llm_available": autonomous_planner.llm_available if autonomous_planner else False,
"llm_model": autonomous_planner.llm_model if autonomous_planner else None
})
def execute_agent_plan(plan: ExecutionPlan):
"""Exécute un plan d'agent en arrière-plan."""
import asyncio
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
results = loop.run_until_complete(autonomous_planner.execute_plan(plan))
loop.close()
# Envoyer le résultat final
success_count = sum(1 for r in results if r.success)
total = len(results)
socketio.emit('execution_completed', {
"success": success_count == total,
"workflow": plan.task_description,
"message": f"{success_count}/{total} étapes réussies",
"results": [
{
"step": r.action.step_number,
"success": r.success,
"message": r.message
}
for r in results
]
})
except Exception as e:
logger.error(f"Agent execution error: {e}")
socketio.emit('execution_completed', {
"success": False,
"workflow": plan.task_description,
"message": f"Erreur: {str(e)}"
})
@app.route('/api/help')
def api_help():
"""Aide et mode d'emploi."""
help_content = {
"title": "RPA Vision V3 - Mode d'emploi",
"sections": [
{
"title": "🎯 Commandes en langage naturel",
"content": """
Tapez simplement ce que vous voulez faire en français ou anglais.
Le système trouvera automatiquement le workflow correspondant.
**Exemples :**
- "facturer le client Acme"
- "exporter le rapport en PDF"
- "créer une facture pour Client ABC"
- "facturer les clients de A à Z"
"""
},
{
"title": "📋 Paramètres",
"content": """
Les paramètres sont extraits automatiquement de votre commande.
Vous pouvez aussi les spécifier manuellement dans le formulaire.
**Paramètres courants :**
- `client` : Nom du client
- `format` : Format d'export (pdf, excel)
- `start`, `end` : Plage de valeurs
"""
},
{
"title": "⌨️ Raccourcis clavier",
"content": """
- `Entrée` : Exécuter la commande
- `Échap` : Annuler / Fermer
- `↑` / `↓` : Naviguer dans l'historique
- `Ctrl+M` : Minimiser l'interface
"""
},
{
"title": "🔄 Pendant l'exécution",
"content": """
L'interface peut être minimisée pendant l'exécution.
Le workflow s'exécute en arrière-plan.
Vous serez notifié à la fin de l'exécution.
"""
}
]
}
return jsonify(help_content)
# =============================================================================
# WebSocket Events
# =============================================================================
@socketio.on('connect')
def handle_connect():
"""Client connecté."""
logger.info("Client connected")
emit('status', execution_status)
@socketio.on('disconnect')
def handle_disconnect():
"""Client déconnecté."""
logger.info("Client disconnected")
@socketio.on('cancel_execution')
def handle_cancel():
"""Annuler l'exécution."""
global execution_status
execution_status["running"] = False
execution_status["message"] = "Annulé"
emit('execution_cancelled', {}, broadcast=True)
# =============================================================================
# Exécution de workflow
# =============================================================================
def _try_streaming_server_replay(workflow_id: str, params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""
Tenter d'exécuter un workflow via le streaming server (Agent V1).
POST http://localhost:5005/api/v1/traces/stream/replay
avec workflow_id et params.
Returns:
Réponse du serveur si succès, None si indisponible ou erreur.
"""
try:
resp = http_requests.post(
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay",
json={
"workflow_id": workflow_id,
"session_id": f"chat_{datetime.now().strftime('%H%M%S')}",
"params": params or {},
},
timeout=5,
)
if resp.status_code == 200:
data = resp.json()
logger.info(f"Workflow {workflow_id} envoyé au streaming server: {data}")
return data
else:
logger.debug(
f"Streaming server refus (HTTP {resp.status_code}): "
f"{resp.text[:200]}"
)
except http_requests.ConnectionError:
logger.debug("Streaming server non disponible (connexion refusée)")
except http_requests.Timeout:
logger.debug("Streaming server timeout")
except Exception as e:
logger.debug(f"Erreur streaming server: {e}")
return None
def execute_workflow(match, params):
"""
Exécuter un workflow — tente d'abord le streaming server,
puis fallback sur l'exécution locale.
"""
global execution_status
import time
# Tenter l'exécution via le streaming server (Agent V1 distant)
replay_result = _try_streaming_server_replay(match.workflow_id, params)
if replay_result:
# Le streaming server a accepté le replay
execution_status["running"] = True
execution_status["workflow"] = match.workflow_name
execution_status["progress"] = 50
execution_status["message"] = "Envoyé au streaming server (Agent V1)"
socketio.emit('execution_progress', {
"progress": 50,
"step": "Exécution via streaming server...",
"current": 1,
"total": 1,
})
finish_execution(
match.workflow_name, True,
f"Workflow envoyé au streaming server ({replay_result.get('status', 'ok')})"
)
return
# Fallback : exécution locale
logger.info("Streaming server indisponible, exécution locale")
try:
# Charger le workflow
with open(match.workflow_path, 'r') as f:
workflow_data = json.load(f)
# Créer le VariableManager et injecter les paramètres
var_manager = VariableManager()
var_manager.set_variables(params)
# Substituer les variables
workflow_data = var_manager.substitute_dict(workflow_data)
# Obtenir les étapes (edges)
edges = workflow_data.get("edges", [])
total_steps = len(edges) if edges else 1
# Étape 1: Initialisation
update_progress(10, "Initialisation", 1, total_steps + 2)
time.sleep(0.3)
# Étape 2: Préparation GPU (si disponible)
if gpu_manager and GPU_AVAILABLE:
update_progress(15, "Préparation GPU...", 2, total_steps + 2)
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT))
loop.close()
except Exception as e:
logger.warning(f"GPU mode change failed: {e}")
# Vérifier si l'exécution réelle est disponible
use_real_execution = (
EXECUTION_AVAILABLE and
action_executor is not None and
screen_capturer is not None
)
if use_real_execution:
logger.info(f"🚀 Exécution RÉELLE du workflow: {match.workflow_name}")
_execute_workflow_real(workflow_data, edges, total_steps, params)
else:
logger.info(f"🎭 Exécution SIMULÉE du workflow: {match.workflow_name}")
_execute_workflow_simulated(edges, total_steps)
# Finalisation
if execution_status["running"]:
update_progress(95, "Finalisation...", total_steps + 1, total_steps + 2)
time.sleep(0.2)
finish_execution(match.workflow_name, True, "Workflow terminé avec succès")
except Exception as e:
logger.error(f"Execution error: {e}")
import traceback
traceback.print_exc()
finish_execution(match.workflow_name, False, f"Erreur: {str(e)}")
def _execute_workflow_real(workflow_data, edges, total_steps, params):
"""Exécution réelle avec ActionExecutor."""
import time
import tempfile
import os
from PIL import Image
# Capturer l'écran initial
update_progress(20, "Capture écran initial...", 2, total_steps + 2)
try:
screenshot_array = screen_capturer.capture()
if screenshot_array is None:
raise Exception("Capture retourne None")
# Sauvegarder le screenshot dans un fichier temporaire
temp_dir = Path("data/temp")
temp_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = str(temp_dir / f"capture_{datetime.now().strftime('%H%M%S')}.png")
# Convertir numpy array en image et sauvegarder
img = Image.fromarray(screenshot_array)
img.save(screenshot_path)
logger.info(f"📸 Screenshot capturé: {screenshot_path}")
except Exception as e:
logger.warning(f"Capture écran échouée: {e}, utilisation mode dégradé")
_execute_workflow_simulated(edges, total_steps)
return
# Créer le ScreenState complet pour l'exécution
try:
from core.models.screen_state import (
ScreenState, WindowContext, RawLevel, PerceptionLevel,
ContextLevel, EmbeddingRef
)
file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0
screen_state = ScreenState(
screen_state_id=f"agent_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
timestamp=datetime.now(),
session_id="agent_chat",
window=WindowContext(
app_name="unknown",
window_title="Unknown",
screen_resolution=[1920, 1080],
workspace="main"
),
raw=RawLevel(
screenshot_path=screenshot_path,
capture_method="agent_chat",
file_size_bytes=file_size
),
perception=PerceptionLevel(
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
detected_text=[],
text_detection_method="none",
confidence_avg=0.0
),
context=ContextLevel(),
ui_elements=[]
)
logger.info("✓ ScreenState complet créé")
except Exception as e:
logger.warning(f"Création ScreenState échouée: {e}, utilisation mode simulé")
_execute_workflow_simulated(edges, total_steps)
return
# Exécuter chaque edge avec ActionExecutor
success_count = 0
fail_count = 0
for i, edge in enumerate(edges):
if not execution_status["running"]:
logger.info("⏹️ Exécution annulée par l'utilisateur")
break
action = edge.get("action", {})
action_type = action.get("type", "unknown")
progress = int(20 + (i + 1) / total_steps * 70)
step_name = f"Étape {i+1}/{total_steps}: {action_type}"
update_progress(progress, step_name, i + 3, total_steps + 2)
logger.info(f"▶️ Exécution: {step_name}")
try:
# Créer un objet Edge compatible avec ActionExecutor
workflow_edge = _create_workflow_edge(edge, params)
# Exécuter l'action réelle
result = action_executor.execute_edge(workflow_edge, screen_state)
if result.status.value == "success":
success_count += 1
logger.info(f"{step_name} - Succès")
# Recapturer l'écran après chaque action réussie
try:
time.sleep(0.3) # Petit délai pour laisser l'UI se mettre à jour
new_screenshot = screen_capturer.capture()
if new_screenshot is not None:
new_path = str(temp_dir / f"capture_{datetime.now().strftime('%H%M%S_%f')}.png")
Image.fromarray(new_screenshot).save(new_path)
screen_state.raw.screenshot_path = new_path
screen_state.raw.file_size_bytes = os.path.getsize(new_path)
except Exception as recapture_err:
logger.debug(f"Recapture échouée: {recapture_err}")
pass # Continuer même si la recapture échoue
else:
fail_count += 1
logger.warning(f"⚠️ {step_name} - {result.status.value}: {result.message}")
# Continuer malgré l'échec (mode best-effort)
if fail_count >= 3:
logger.error("❌ Trop d'échecs, arrêt de l'exécution")
break
except Exception as e:
fail_count += 1
logger.error(f"❌ Erreur lors de {step_name}: {e}")
if fail_count >= 3:
logger.error("❌ Trop d'erreurs, arrêt de l'exécution")
break
logger.info(f"📊 Résultat: {success_count} succès, {fail_count} échecs sur {total_steps} étapes")
def _execute_workflow_simulated(edges, total_steps):
"""Exécution simulée (fallback)."""
import time
for i, edge in enumerate(edges):
if not execution_status["running"]:
break
action = edge.get("action", {})
action_type = action.get("type", "unknown")
progress = int(20 + (i + 1) / total_steps * 70)
step_name = f"Étape {i+1}: {action_type} (simulé)"
update_progress(progress, step_name, i + 3, total_steps + 2)
# Simulation avec délai
time.sleep(0.5)
logger.info(f"🎭 Simulé: {step_name}")
def _create_workflow_edge(edge_dict, params):
"""Créer un objet WorkflowEdge depuis un dictionnaire."""
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List
# Importer les vraies classes du système
try:
from core.models.workflow_graph import Action, TargetSpec, WorkflowEdge, EdgeConstraints, PostConditions, ActionType
USE_REAL_CLASSES = True
except ImportError:
USE_REAL_CLASSES = False
action_dict = edge_dict.get("action", {})
target_dict = action_dict.get("target", {})
# Substituer les paramètres dans le texte cible
target_text = target_dict.get("text", "")
if target_text and isinstance(target_text, str):
for key, val in params.items():
target_text = target_text.replace(f"{{{{{key}}}}}", str(val)) # {{key}}
target_text = target_text.replace(f"${{{key}}}", str(val)) # ${key}
target_text = target_text.replace(f"${key}", str(val)) # $key
if USE_REAL_CLASSES:
# Créer le TargetSpec avec les vraies classes
target_spec = TargetSpec(
by_role=target_dict.get("role"),
by_text=target_text if target_text else target_dict.get("text"),
by_position=target_dict.get("position"),
)
# Convertir le type d'action en ActionType Enum
action_type_str = action_dict.get("type", "mouse_click")
try:
action_type = ActionType(action_type_str)
except ValueError:
# Fallback si le type n'existe pas dans l'enum
action_type = ActionType.MOUSE_CLICK
# Créer l'Action
action = Action(
type=action_type,
target=target_spec,
parameters=action_dict.get("parameters", {})
)
# Créer le WorkflowEdge
return WorkflowEdge(
edge_id=edge_dict.get("edge_id", edge_dict.get("id", f"edge_{id(edge_dict)}")),
from_node=edge_dict.get("source", edge_dict.get("from_node", "")),
to_node=edge_dict.get("target", edge_dict.get("to_node", "")),
action=action,
constraints=EdgeConstraints(),
post_conditions=PostConditions()
)
else:
# Fallback: classes simples
@dataclass
class SimpleTargetSpec:
by_role: Optional[str] = None
by_text: Optional[str] = None
by_position: Optional[tuple] = None
selection_policy: str = "first"
fallback_strategy: str = "visual_similarity"
embedding_ref: Optional[Any] = None
context_hints: Dict[str, Any] = field(default_factory=dict)
hard_constraints: Dict[str, Any] = field(default_factory=dict)
weights: Dict[str, float] = field(default_factory=dict)
@dataclass
class SimpleAction:
type: str
target: SimpleTargetSpec
parameters: Dict[str, Any] = field(default_factory=dict)
@dataclass
class SimpleWorkflowEdge:
edge_id: str
from_node: str
to_node: str
action: SimpleAction
constraints: Any = None
post_conditions: Any = None
target_spec = SimpleTargetSpec(
by_role=target_dict.get("role"),
by_text=target_text if target_text else target_dict.get("text"),
)
action = SimpleAction(
type=action_dict.get("type", "unknown"),
target=target_spec,
parameters=action_dict.get("parameters", {})
)
return SimpleWorkflowEdge(
edge_id=edge_dict.get("edge_id", edge_dict.get("id", f"edge_{id(edge_dict)}")),
from_node=edge_dict.get("source", edge_dict.get("from_node", "")),
to_node=edge_dict.get("target", edge_dict.get("to_node", "")),
action=action,
constraints=None,
post_conditions=None
)
def update_progress(progress: int, message: str, current: int, total: int):
"""Mettre à jour la progression."""
global execution_status
execution_status["progress"] = progress
execution_status["message"] = message
socketio.emit('execution_progress', {
"progress": progress,
"step": message,
"current": current,
"total": total
})
def finish_execution(workflow_name: str, success: bool, message: str):
"""Terminer l'exécution."""
global execution_status
execution_status["running"] = False
execution_status["progress"] = 100 if success else 0
execution_status["message"] = message
# Mettre à jour l'historique
if command_history:
command_history[-1]["status"] = "completed" if success else "failed"
socketio.emit('execution_completed', {
"workflow": workflow_name,
"success": success,
"message": message
})
# =============================================================================
# Main
# =============================================================================
if __name__ == '__main__':
init_system()
print("""
╔════════════════════════════════════════════════════════════╗
║ RPA Vision V3 - Interface de Commande ║
║ ║
║ 🌐 http://localhost:5004 ║
║ ║
║ Ctrl+C pour arrêter ║
╚════════════════════════════════════════════════════════════╝
""")
socketio.run(app, host='127.0.0.1', port=5004, debug=False, allow_unsafe_werkzeug=True)