#!/usr/bin/env python3 """ RPA Vision V3 - Agent Chat Interface conversationnelle pour communiquer avec le système RPA. Style "Spotlight/Alfred" - minimaliste et efficace. Composants intégrés: - IntentParser: Compréhension des intentions utilisateur - ConfirmationLoop: Validation avant actions critiques - ResponseGenerator: Réponses en langage naturel - ConversationManager: Contexte multi-tour Usage: python agent_chat/app.py Puis ouvrir: http://localhost:5004 Auteur: Dom - Janvier 2026 """ import asyncio import json import logging import os import sys from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional import requests as http_requests # Pour les appels au streaming server from flask import Flask, render_template, request, jsonify, send_from_directory from flask_socketio import SocketIO, emit from werkzeug.utils import secure_filename # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent)) from core.workflow import SemanticMatcher, VariableManager # Import des composants conversationnels from .intent_parser import IntentParser, IntentType, get_intent_parser from .confirmation import ConfirmationLoop, ConfirmationStatus, RiskLevel, get_confirmation_loop from .response_generator import ResponseGenerator, GeneratedResponse, get_response_generator from .conversation_manager import ConversationManager, get_conversation_manager from .autonomous_planner import AutonomousPlanner, get_autonomous_planner, ExecutionPlan from .gesture_catalog import GestureCatalog # GPU Resource Manager (optional) try: from core.gpu import get_gpu_resource_manager, ExecutionMode GPU_AVAILABLE = True except ImportError: GPU_AVAILABLE = False # Import de données Excel/CSV try: from core.data.excel_importer import ExcelImporter from core.data.db_iterator import DBIterator DATA_IMPORT_AVAILABLE = True except ImportError: DATA_IMPORT_AVAILABLE = False # Execution components (optional - pour exécution réelle) try: from core.execution import ActionExecutor, TargetResolver, ErrorHandler from core.execution.execution_loop import ExecutionLoop, ExecutionMode as ExecMode, ExecutionState from core.pipeline.workflow_pipeline import WorkflowPipeline from core.capture import ScreenCapturer EXECUTION_AVAILABLE = True except ImportError as e: logger.warning(f"Composants d'exécution non disponibles: {e}") EXECUTION_AVAILABLE = False logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = Flask(__name__) import secrets as _secrets app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', _secrets.token_hex(32)) app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50 MB max upload (sécurité HIGH) _ALLOWED_ORIGINS = [ "http://localhost:3002", "http://localhost:5002", "https://vwb.labs.laurinebazin.design", "https://lea.labs.laurinebazin.design", ] socketio = SocketIO(app, cors_allowed_origins=_ALLOWED_ORIGINS) # ============================================================ # Headers de sécurité (sécurité HIGH) # ============================================================ @app.after_request def set_security_headers(response): """Ajouter les headers de sécurité à toutes les réponses.""" response.headers['X-Content-Type-Options'] = 'nosniff' response.headers['X-Frame-Options'] = 'SAMEORIGIN' response.headers['X-XSS-Protection'] = '1; mode=block' response.headers['Referrer-Policy'] = 'strict-origin-when-cross-origin' return response # Global state matcher: Optional[SemanticMatcher] = None gpu_manager = None intent_parser: Optional[IntentParser] = None confirmation_loop: Optional[ConfirmationLoop] = None response_generator: Optional[ResponseGenerator] = None conversation_manager: Optional[ConversationManager] = None autonomous_planner: Optional[AutonomousPlanner] = None gesture_catalog: Optional[GestureCatalog] = None # Execution components workflow_pipeline = None action_executor = None execution_loop = None screen_capturer = None # URL du streaming server (Agent V1) pour l'exécution distante STREAMING_SERVER_URL = os.environ.get( "RPA_STREAMING_URL", "http://localhost:5005" ) # Token API pour le streaming server _STREAMING_API_TOKEN = os.environ.get("RPA_API_TOKEN", "") def _streaming_headers() -> dict: """Headers d'authentification pour les appels au streaming server.""" headers = {"Content-Type": "application/json"} if _STREAMING_API_TOKEN: headers["Authorization"] = f"Bearer {_STREAMING_API_TOKEN}" return headers execution_status = { "running": False, "workflow": None, "progress": 0, "message": "", "can_minimize": True } command_history: List[Dict[str, Any]] = [] def _fetch_connected_machines() -> List[Dict[str, Any]]: """Récupérer la liste des machines connectées depuis le streaming server. Appel non-bloquant au endpoint /api/v1/traces/stream/machines. Retourne une liste vide si le serveur est injoignable. """ try: resp = http_requests.get( f"{STREAMING_SERVER_URL}/api/v1/traces/stream/machines", headers=_streaming_headers(), timeout=3, ) if resp.ok: return resp.json().get("machines", []) except Exception: pass return [] # Répertoire d'upload et chemin de la base de données PROJECT_ROOT = Path(__file__).parent.parent UPLOAD_DIR = PROJECT_ROOT / "data" / "uploads" DEFAULT_DB_PATH = PROJECT_ROOT / "data" / "databases" / "rpa_data.db" # Import de données — composants globaux excel_importer: Optional['ExcelImporter'] = None db_iterator: Optional['DBIterator'] = None # État d'import en attente de confirmation (session_id → infos d'import) _pending_imports: Dict[str, Dict[str, Any]] = {} # Copilot state — suivi du mode pas-à-pas _copilot_sessions: Dict[str, Dict[str, Any]] = {} _COPILOT_KEYWORDS = [ "copilot", "co-pilot", "pas à pas", "pas-à-pas", "pas a pas", "étape par étape", "etape par etape", "step by step", "une étape à la fois", "mode assisté", "mode assiste", "mode guidé", "mode guide", ] def _detect_copilot_mode(message: str) -> bool: """Détecter si l'utilisateur demande le mode Copilot.""" msg_lower = message.lower() return any(kw in msg_lower for kw in _COPILOT_KEYWORDS) def init_system(): """Initialiser tous les composants du système.""" global matcher, gpu_manager global intent_parser, confirmation_loop, response_generator, conversation_manager global autonomous_planner # 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7) # Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/ try: matcher = SemanticMatcher( workflows_dir=None, # None = scan tous les répertoires par défaut use_llm=True, # Matching sémantique via Ollama (P0-7) llm_model="qwen2.5:7b", ) dirs_info = matcher.get_directories() dirs_summary = ", ".join( f"{d['path']}({d['workflow_count']})" for d in dirs_info if d['exists'] ) logger.info( f"✓ SemanticMatcher: {len(matcher.get_all_workflows())} workflows " f"[{dirs_summary}]" ) except Exception as e: logger.error(f"✗ SemanticMatcher: {e}") matcher = None # 2. GPU Resource Manager if GPU_AVAILABLE: try: gpu_manager = get_gpu_resource_manager() logger.info("✓ GPU Resource Manager connected") except Exception as e: logger.warning(f"⚠ GPU Resource Manager: {e}") gpu_manager = None # 3. Composants conversationnels try: intent_parser = get_intent_parser(use_llm=True) # LLM activé (Ollama) confirmation_loop = get_confirmation_loop() response_generator = get_response_generator() conversation_manager = get_conversation_manager() # Injecter les workflows dans l'intent_parser pour contexte LLM if matcher and intent_parser: workflows_for_llm = [ {"name": wf.name, "description": wf.description, "tags": wf.tags} for wf in matcher.get_all_workflows() ] intent_parser.set_workflows(workflows_for_llm) logger.info("✓ Composants conversationnels initialisés (LLM activé)") except Exception as e: logger.error(f"✗ Composants conversationnels: {e}") # Fallback aux composants de base intent_parser = IntentParser(use_llm=False) confirmation_loop = ConfirmationLoop() response_generator = ResponseGenerator() conversation_manager = ConversationManager() # 4. Composants d'exécution réelle global workflow_pipeline, action_executor, execution_loop, screen_capturer if EXECUTION_AVAILABLE: try: # Pipeline de workflow (matching + actions) workflow_pipeline = WorkflowPipeline() logger.info("✓ WorkflowPipeline initialisé") # Capture d'écran screen_capturer = ScreenCapturer() logger.info("✓ ScreenCapturer initialisé") # Résolveur de cibles et gestionnaire d'erreurs target_resolver = TargetResolver() error_handler = ErrorHandler() # Exécuteur d'actions action_executor = ActionExecutor( target_resolver=target_resolver, error_handler=error_handler, verify_postconditions=True ) logger.info("✓ ActionExecutor initialisé") # Boucle d'exécution (pour mode automatique) execution_loop = ExecutionLoop( pipeline=workflow_pipeline, action_executor=action_executor, screen_capturer=screen_capturer ) logger.info("✓ ExecutionLoop initialisé") except Exception as e: logger.warning(f"⚠ Composants d'exécution partiels: {e}") # Mode dégradé: simulation uniquement workflow_pipeline = None action_executor = None execution_loop = None else: logger.info("ℹ Mode simulation (composants d'exécution non disponibles)") # 5. Autonomous Planner (Agent Libre) try: autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b") # Configurer les callbacks pour l'exécution if screen_capturer: autonomous_planner.set_screen_capturer(screen_capturer.capture) def progress_callback(data): socketio.emit('agent_progress', data) autonomous_planner.set_progress_callback(progress_callback) logger.info(f"✓ AutonomousPlanner initialisé (LLM: {autonomous_planner.llm_available})") except Exception as e: logger.warning(f"⚠ AutonomousPlanner: {e}") autonomous_planner = None # 6. GestureCatalog (raccourcis clavier universels) global gesture_catalog try: gesture_catalog = GestureCatalog() logger.info(f"✓ GestureCatalog: {len(gesture_catalog.list_all())} gestes chargés") except Exception as e: logger.warning(f"⚠ GestureCatalog: {e}") gesture_catalog = None # 7. Import de données Excel/CSV global excel_importer, db_iterator if DATA_IMPORT_AVAILABLE: try: UPLOAD_DIR.mkdir(parents=True, exist_ok=True) excel_importer = ExcelImporter(db_path=str(DEFAULT_DB_PATH)) db_iterator = DBIterator(db_path=str(DEFAULT_DB_PATH)) logger.info(f"✓ ExcelImporter + DBIterator (db: {DEFAULT_DB_PATH})") except Exception as e: logger.warning(f"⚠ Data import: {e}") excel_importer = None db_iterator = None else: logger.info("ℹ Import Excel non disponible (openpyxl manquant ?)") # ============================================================================= # Routes Web # ============================================================================= @app.route('/') def index(): """Page principale - nouvelle interface chat.""" return render_template('chat.html') @app.route('/classic') def classic(): """Ancienne interface (fallback).""" return render_template('command.html') @app.route('/api/status') def api_status(): """Statut complet du système.""" workflows_count = len(matcher.get_all_workflows()) if matcher else 0 # GPU Status gpu_status = None if gpu_manager: try: status = gpu_manager.get_status() gpu_status = { "mode": status.execution_mode.value, "vlm_state": status.vlm_state.value, "vlm_model": status.vlm_model, "clip_device": status.clip_device, "degraded": status.degraded_mode } if status.vram: gpu_status["vram"] = { "used_mb": status.vram.used_mb, "total_mb": status.vram.total_mb, "percent": round(status.vram.used_mb / status.vram.total_mb * 100, 1) if status.vram.total_mb > 0 else 0 } except Exception as e: logger.warning(f"GPU status error: {e}") # Ollama Status ollama_status = None try: import requests response = requests.get("http://localhost:11434/api/tags", timeout=2) if response.status_code == 200: models = response.json().get('models', []) ollama_status = { "available": True, "models_count": len(models) } except: ollama_status = {"available": False} return jsonify({ "status": "online", "workflows_count": workflows_count, "execution": execution_status, "gpu": gpu_status, "ollama": ollama_status }) @app.route('/api/workflows') def api_workflows(): """Liste unifiée des workflows (appris + VWB), filtrée par OS. Sources fusionnées : 1. Workflows appris (SemanticMatcher — data/training/workflows/) 2. Workflows VWB (port 5002 — SQLite, édités par l'humain) Dédupliqués par nom : si un workflow appris a été importé dans le VWB, seule la version VWB est retournée (c'est la version validée/corrigée). Query params: os: Filtrer par OS — 'windows' ou 'linux' (optionnel). Par défaut, détecte l'OS du serveur Léa (= la machine du docteur). """ if not matcher: return jsonify({"workflows": [], "directories": []}) # Détecter l'OS : paramètre explicite ou auto-détection depuis la plateforme os_filter = request.args.get('os') if not os_filter: import platform os_filter = 'windows' if platform.system().lower() == 'windows' else 'linux' seen_ids = set() workflows = [] # Source 1 : workflows appris (core JSON) for wf in matcher.get_all_workflows(): wf_data = { "id": wf.workflow_id, "name": wf.name, "description": wf.description, "tags": wf.tags, "source": wf.source_dir, "origin": "learned", } # Ajouter le machine_id source si disponible (workflows appris en streaming) machine_id = getattr(wf, '_machine_id', None) if machine_id: wf_data["machine_id"] = machine_id workflows.append(wf_data) seen_ids.add(wf.workflow_id) # Source 2 : workflows VWB (édités par l'humain) vwb_workflows = _fetch_vwb_workflows() for vwb_wf in vwb_workflows: vwb_id = vwb_wf.get("id", "") vwb_wf["origin"] = "vwb" # Si un workflow VWB a été importé depuis un appris, marquer le doublon desc = vwb_wf.get("description", "") or "" # Détecter les doublons par nom similaire ou description contenant l'ID core is_duplicate = False for core_id in seen_ids: if core_id in desc: is_duplicate = True break if vwb_id not in seen_ids and not is_duplicate: workflows.append(vwb_wf) seen_ids.add(vwb_id) # Filtrer par OS : ne montrer que les workflows compatibles avec la machine du docteur # Le machine_id ou source_dir contient le nom OS (ex: DESKTOP-58D5CAC_windows, dom-X870_linux) if os_filter: os_lower = os_filter.lower() filtered_workflows = [] for wf in workflows: mid = (wf.get("machine_id") or "").lower() src = (wf.get("source") or "").lower() # Un workflow VWB (sans machine_id) passe toujours le filtre if wf.get("origin") == "vwb" and not mid: filtered_workflows.append(wf) elif os_lower in mid or os_lower in src: filtered_workflows.append(wf) workflows = filtered_workflows # Récupérer la liste des machines connectées depuis le streaming server machines = _fetch_connected_machines() return jsonify({ "workflows": workflows, "directories": matcher.get_directories(), "machines": machines, }) def _fetch_vwb_workflows(): """Récupère les workflows depuis le VWB backend (port 5002).""" try: resp = http_requests.get( "http://localhost:5002/api/v3/session/state", timeout=3, ) if resp.ok: data = resp.json() wf_list = data.get("workflows_list", []) result = [] for wf in wf_list: result.append({ "id": wf.get("id", ""), "name": wf.get("name", ""), "description": wf.get("description", ""), "tags": wf.get("tags", []), "source": "vwb", "step_count": wf.get("step_count", 0), "review_status": wf.get("review_status"), }) return result except http_requests.ConnectionError: logger.debug("VWB backend (port 5002) indisponible") except Exception as e: logger.warning("Erreur récupération workflows VWB: %s", e) return [] @app.route('/api/workflows/refresh', methods=['POST']) def api_workflows_refresh(): """ Forcer le rechargement des workflows depuis tous les répertoires. Utile après qu'un nouveau workflow a été appris par le StreamProcessor. """ if not matcher: return jsonify({"success": False, "error": "SemanticMatcher non initialisé"}) try: count = matcher.reload_workflows() # Re-injecter les workflows dans l'intent_parser (contexte LLM) if intent_parser: workflows_for_llm = [ {"name": wf.name, "description": wf.description, "tags": wf.tags} for wf in matcher.get_all_workflows() ] intent_parser.set_workflows(workflows_for_llm) return jsonify({ "success": True, "workflows_count": count, "directories": matcher.get_directories(), }) except Exception as e: logger.error(f"Erreur rechargement workflows: {e}") return jsonify({"success": False, "error": str(e)}) @app.route('/api/machines') def api_machines(): """Liste des machines connectées au streaming server. Proxy vers le streaming server pour que le frontend puisse lister les machines et cibler un replay spécifique. """ machines = _fetch_connected_machines() return jsonify({"machines": machines}) @app.route('/api/search', methods=['POST']) def api_search(): """Rechercher des workflows.""" data = request.json query = data.get('query', '') if not matcher or not query: return jsonify({"matches": []}) matches = matcher.find_workflows(query, limit=5, min_confidence=0.2) results = [] for m in matches: results.append({ "workflow_id": m.workflow_id, "workflow_name": m.workflow_name, "confidence": m.confidence, "extracted_params": m.extracted_params, "match_reason": m.match_reason }) return jsonify({"matches": results}) @app.route('/api/execute', methods=['POST']) def api_execute(): """Exécuter une commande.""" global execution_status data = request.json command = data.get('command', '') params = data.get('params', {}) if not matcher or not command: return jsonify({"success": False, "error": "Invalid command"}) # Trouver le workflow match = matcher.find_workflow(command, min_confidence=0.2) if not match: return jsonify({ "success": False, "error": "Aucun workflow correspondant trouvé" }) # Combiner les paramètres all_params = {**match.extracted_params, **params} # Enregistrer dans l'historique command_history.append({ "timestamp": datetime.now().isoformat(), "command": command, "workflow": match.workflow_name, "params": all_params, "status": "started" }) # Mettre à jour le statut execution_status = { "running": True, "workflow": match.workflow_name, "progress": 0, "message": "Démarrage..." } # Notifier via WebSocket socketio.emit('execution_started', { "workflow": match.workflow_name, "params": all_params }) # Exécuter le workflow en arrière-plan socketio.start_background_task(execute_workflow, match, all_params) return jsonify({ "success": True, "workflow": match.workflow_name, "params": all_params, "confidence": match.confidence }) @app.route('/api/history') def api_history(): """Historique des commandes.""" return jsonify({"history": command_history[-20:]}) # ============================================================================= # Réponses LLM naturelles (persona Léa) # ============================================================================= # Modèle texte pour les réponses conversationnelles (pas besoin de vision) _LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL", "qwen3:8b") _LEA_SYSTEM_PROMPT = """Tu es Léa, une assistante professionnelle chaleureuse et bienveillante. Règles : - Tu vouvoies TOUJOURS l'utilisateur - Tu es naturelle, avec un peu d'humour quand c'est approprié - Réponses COURTES : 1 à 3 phrases maximum - JAMAIS de jargon technique (pas de workflow, RPA, API, agent, streaming, variable, base de données) - Tu parles de "tâches" pour désigner les processus automatisés - Tu parles d'"apprentissage" pour l'enregistrement de tâches - Tu utilises des emojis avec parcimonie (1 max par message) - Si on te demande quelque chose d'impossible (café, nourriture...) → réponds avec humour - Si l'utilisateur te remercie → sois chaleureuse - Si l'utilisateur est mécontent → sois empathique et propose ton aide - NE JAMAIS inventer des capacités que tu n'as pas - NE JAMAIS mentionner de termes techniques Ce que tu sais faire : - Apprendre des tâches en observant l'utilisateur - Refaire des tâches apprises - Importer des fichiers Excel - Répondre aux questions sur les tâches connues""" def _generate_lea_response( user_message: str, intent_result: dict, action_result: dict = None, ) -> Optional[str]: """Générer une réponse naturelle via le LLM avec le persona de Léa. Retourne le texte de la réponse, ou None si le LLM est indisponible (fallback vers les templates hardcodés). """ # Construire le contexte pour le LLM context_parts = [] if action_result: # Informer le LLM du résultat de l'action pour qu'il formule sa réponse summary = json.dumps(action_result, ensure_ascii=False, default=str)[:300] context_parts.append(f"[Action effectuée : {summary}]") if context_parts: user_prompt = f"{chr(10).join(context_parts)}\n\nUtilisateur : {user_message}\nLéa :" else: user_prompt = f"Utilisateur : {user_message}\nLéa :" # Appel Ollama via /api/chat try: resp = http_requests.post( "http://localhost:11434/api/chat", json={ "model": _LEA_LLM_MODEL, "messages": [ {"role": "system", "content": _LEA_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ], "stream": False, "think": False, # Désactiver le mode réflexion (qwen3) "options": {"temperature": 0.7, "num_predict": 150}, }, timeout=15, ) if resp.ok: content = resp.json().get("message", {}).get("content", "") # Nettoyer la réponse (enlever les balises think, etc.) content = content.strip() if "" in content: content = content.split("")[-1].strip() if content: return content except Exception as e: logger.warning(f"LLM indisponible pour la réponse Léa : {e}") return None # Fallback vers les templates @app.route('/api/chat', methods=['POST']) def api_chat(): """ Endpoint conversationnel principal. Utilise le flux complet: 1. IntentParser: Analyse l'intention 2. ConversationManager: Gère le contexte multi-tour 3. ConfirmationLoop: Valide les actions sensibles 4. ResponseGenerator: Génère la réponse """ data = request.json message = data.get('message', '').strip() session_id = data.get('session_id') if not message: return jsonify({"error": "Message vide"}), 400 # 1. Obtenir ou créer la session session = conversation_manager.get_or_create_session(session_id=session_id) # 2. Parser l'intention intent = intent_parser.parse(message) # 3. Résoudre les références anaphoriques (ex: "le même", "celui-ci") intent = conversation_manager.resolve_references(session, intent) # 4. Construire le contexte context = conversation_manager.get_context_summary(session) context["execution_status"] = execution_status # 5. Traiter selon le type d'intention result = {} action_taken = None if intent.intent_type == IntentType.CONFIRM: # Vérifier s'il y a un import en attente de confirmation pending_import = _pending_imports.pop(session.session_id, None) if pending_import: result = _execute_pending_import(pending_import) action_taken = "data_import" else: # Confirmer une action en attente (workflow) pending = conversation_manager.get_pending_confirmation(session) if pending: confirmation_loop.confirm(pending.id) conversation_manager.clear_pending_confirmation(session) result = {"confirmed": True, "workflow": pending.workflow_name} action_taken = "confirmed" # Lancer l'exécution socketio.start_background_task( execute_workflow_from_confirmation, pending, session.session_id ) else: result = {"confirmed": False} elif intent.intent_type == IntentType.DENY: # Annuler un import en attente _pending_imports.pop(session.session_id, None) # Refuser une action en attente pending = conversation_manager.get_pending_confirmation(session) if pending: confirmation_loop.deny(pending.id) conversation_manager.clear_pending_confirmation(session) result = {"denied": True} action_taken = "denied" elif intent.intent_type == IntentType.EXECUTE: # Résolution en 3 niveaux : # 1. Workflow appris → exécution directe ou copilot # 2. Geste primitif (GestureCatalog) → raccourci clavier # 3. "Je ne sais pas, montre-moi !" query = intent.workflow_hint or intent.raw_query if matcher and query: match = matcher.find_workflow(query, min_confidence=0.2) else: match = None if match: # Niveau 1 : Workflow appris risk = confirmation_loop.evaluate_risk( match.workflow_name, {**match.extracted_params, **intent.parameters} ) if confirmation_loop.requires_confirmation(risk): conf = confirmation_loop.create_confirmation_request( workflow_name=match.workflow_name, parameters={**match.extracted_params, **intent.parameters}, action_type="execute", risk_level=risk ) conversation_manager.set_pending_confirmation(session, conf) response = response_generator.generate_confirmation_request(conf) result = {"needs_confirmation": True, "confirmation": conf.to_dict()} action_taken = "confirmation_requested" else: all_params = {**match.extracted_params, **intent.parameters} use_copilot = _detect_copilot_mode(message) if use_copilot: result = { "success": True, "workflow": match.workflow_name, "params": all_params, "confidence": match.confidence, "mode": "copilot", } action_taken = "copilot_started" socketio.start_background_task( execute_workflow_copilot, match, all_params ) else: result = { "success": True, "workflow": match.workflow_name, "params": all_params, "confidence": match.confidence } action_taken = "executed" socketio.start_background_task(execute_workflow, match, all_params) elif gesture_catalog and query: # Niveau 2 : Geste primitif (raccourci clavier) gesture_match = gesture_catalog.match(query, min_score=0.6) if gesture_match: gesture, score = gesture_match result = { "gesture": True, "gesture_name": gesture.name, "gesture_keys": "+".join(gesture.keys), "gesture_id": gesture.id, "confidence": score, } action_taken = "gesture_executed" # Exécuter le geste via le streaming server socketio.start_background_task( _execute_gesture, gesture ) else: # Niveau 3 : Inconnu → "montre-moi !" result = {"not_found": True, "query": query, "teach_me": True} else: # Niveau 3 : Pas de query exploitable result = {"not_found": True, "query": query or "", "teach_me": True} elif intent.intent_type == IntentType.DATA_IMPORT: # Import de données Excel/CSV result = _handle_data_import(intent, session.session_id) action_taken = "data_import" elif intent.intent_type == IntentType.LIST: # Lister les workflows avec métadonnées enrichies if matcher: workflows = [ { "name": wf.name, "description": wf.description or "Workflow appris automatiquement", "tags": wf.tags[:3] if wf.tags else [], "id": wf.workflow_id, } for wf in matcher.get_all_workflows() ] result = {"workflows": workflows} else: result = {"workflows": []} action_taken = "listed" elif intent.intent_type == IntentType.STATUS: result = {"execution": execution_status} action_taken = "status_checked" elif intent.intent_type == IntentType.CANCEL: if execution_status.get("running"): execution_status["running"] = False execution_status["message"] = "Annulé" result = {"cancelled": True} else: result = {"cancelled": False} action_taken = "cancelled" elif intent.intent_type == IntentType.HISTORY: result = {"history": command_history[-10:]} action_taken = "history_shown" elif intent.intent_type == IntentType.QUERY: # Rechercher des infos sur le workflow demandé topic = intent.workflow_hint or intent.raw_query if matcher and topic: # Tenter de trouver le workflow correspondant match = matcher.find_workflow(topic, min_confidence=0.3) if match: help_text = matcher.get_workflow_help(match.workflow_id) result = {"answer": help_text} else: # Peut-être une question générale sur les workflows dispo all_wf = matcher.get_all_workflows() if all_wf: wf_list = "\n".join( f"• **{wf.name}**: {wf.description or 'Pas de description'}" for wf in all_wf[:10] ) result = { "answer": f"Je n'ai pas trouvé de workflow '{topic}', " f"mais voici les workflows disponibles :\n{wf_list}" } else: result = {} else: result = {} action_taken = "query_answered" elif intent.intent_type == IntentType.HELP: result = {} action_taken = "help_shown" elif intent.intent_type == IntentType.GREETING: result = {} action_taken = "greeting" elif intent.clarification_needed: result = {"clarification_needed": True} action_taken = "clarification_requested" # 6. Générer la réponse (si pas déjà fait pour confirmation) if action_taken != "confirmation_requested": # Tenter une réponse LLM naturelle (persona Léa) lea_text = _generate_lea_response(message, intent.to_dict(), result or None) if lea_text: # Réponse LLM réussie — on construit la GeneratedResponse # Garder les suggestions du template (utiles pour l'UX) if action_taken == "data_import": from dataclasses import replace as _dc_replace data_intent = _dc_replace(intent, intent_type=IntentType.DATA_IMPORT) fallback = response_generator.generate(data_intent, context, result) else: fallback = response_generator.generate(intent, context, result) response = GeneratedResponse( message=lea_text, suggestions=fallback.suggestions, action_required=fallback.action_required, action_type=fallback.action_type, metadata=fallback.metadata, ) else: # Fallback vers les templates hardcodés si LLM indisponible if action_taken == "data_import": from dataclasses import replace as _dc_replace data_intent = _dc_replace(intent, intent_type=IntentType.DATA_IMPORT) response = response_generator.generate(data_intent, context, result) else: response = response_generator.generate(intent, context, result) # 7. Enregistrer le tour dans la conversation conversation_manager.add_turn( session=session, user_message=message, intent=intent, response=response.message, action_taken=action_taken, result=result ) # 8. Retourner la réponse return jsonify({ "session_id": session.session_id, "intent": intent.to_dict(), "response": response.to_dict(), "result": result, "context": { "current_workflow": session.context.current_workflow, "has_pending_confirmation": session.context.pending_confirmation is not None } }) def execute_workflow_from_confirmation(confirmation, session_id): """Exécuter un workflow après confirmation.""" global execution_status if not matcher: return # Trouver le workflow match = matcher.find_workflow(confirmation.workflow_name, min_confidence=0.1) if not match: return # Utiliser les paramètres confirmés (ou modifiés) params = confirmation.modified_parameters or confirmation.parameters # IMPORTANT: Marquer l'exécution comme active execution_status["running"] = True execution_status["workflow"] = confirmation.workflow_name execution_status["progress"] = 0 execution_status["message"] = "Démarrage..." execute_workflow(match, params) @app.route('/api/gpu/', methods=['POST']) def api_gpu_action(action): """Contrôler le GPU Resource Manager.""" if not gpu_manager: return jsonify({"success": False, "error": "GPU Manager non disponible"}) async def do_action(): if action == "load-vlm": return await gpu_manager.ensure_vlm_loaded() elif action == "unload-vlm": return await gpu_manager.ensure_vlm_unloaded() elif action == "recording": await gpu_manager.set_execution_mode(ExecutionMode.RECORDING) return True elif action == "autopilot": await gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT) return True elif action == "idle": await gpu_manager.set_execution_mode(ExecutionMode.IDLE) return True return False try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) result = loop.run_until_complete(do_action()) loop.close() return jsonify({"success": result, "action": action}) except Exception as e: return jsonify({"success": False, "error": str(e)}) @app.route('/api/llm/status') def api_llm_status(): """Statut du LLM (Ollama).""" status = { "enabled": intent_parser.use_llm if intent_parser else False, "available": intent_parser.llm_available if intent_parser else False, "model": intent_parser.llm_model if intent_parser else None, "endpoint": intent_parser.llm_endpoint if intent_parser else None, "workflows_loaded": len(intent_parser._workflows_cache) if intent_parser else 0 } # Lister les modèles Ollama disponibles try: import requests response = requests.get("http://localhost:11434/api/tags", timeout=2) if response.status_code == 200: models = response.json().get('models', []) status["available_models"] = [m["name"] for m in models] except: status["available_models"] = [] return jsonify(status) @app.route('/api/llm/model', methods=['POST']) def api_llm_set_model(): """Changer le modèle LLM.""" data = request.json model = data.get('model') if not model: return jsonify({"success": False, "error": "Modèle non spécifié"}) if intent_parser: intent_parser.llm_model = model intent_parser._check_llm_availability() return jsonify({ "success": True, "model": model, "available": intent_parser.llm_available }) return jsonify({"success": False, "error": "IntentParser non initialisé"}) # ============================================================================= # API Agent Libre (dépréciée — tout passe par /api/chat) # ============================================================================= @app.route('/api/agent/plan', methods=['POST']) def api_agent_plan(): """Déprécié — utiliser le chat unifié (/api/chat).""" return jsonify({ "error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.", "migration": "POST /api/chat {\"message\": \"votre demande\"}" }), 410 @app.route('/api/agent/execute', methods=['POST']) def api_agent_execute(): """Déprécié — utiliser le chat unifié (/api/chat).""" return jsonify({ "error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.", "migration": "POST /api/chat {\"message\": \"votre demande\"}" }), 410 @app.route('/api/agent/status') def api_agent_status(): """Statut de l'agent autonome.""" return jsonify({ "available": autonomous_planner is not None, "llm_available": autonomous_planner.llm_available if autonomous_planner else False, "llm_model": autonomous_planner.llm_model if autonomous_planner else None }) @app.route('/api/gestures') def api_gestures(): """Liste tous les gestes disponibles dans le catalogue.""" if not gesture_catalog: return jsonify({"gestures": [], "count": 0}) gestures = gesture_catalog.list_all() return jsonify({ "gestures": gestures, "count": len(gestures), "categories": list({g["category"] for g in gestures}), }) def _execute_gesture(gesture): """Exécuter un geste primitif via le streaming server.""" import uuid as _uuid action = { "action_id": f"act_gesture_{_uuid.uuid4().hex[:8]}", "type": "key_combo", "keys": list(gesture.keys), } try: resp = http_requests.post( f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/raw", headers=_streaming_headers(), json={ "actions": [action], "session_id": "", "task_description": f"Geste: {gesture.name}", }, timeout=10, ) if resp.status_code == 200: socketio.emit('execution_completed', { "workflow": gesture.name, "success": True, "message": f"Geste '{gesture.name}' ({'+'.join(gesture.keys)}) envoyé", }) else: error = resp.text[:200] socketio.emit('execution_completed', { "workflow": gesture.name, "success": False, "message": f"Erreur: {error}", }) except http_requests.ConnectionError: socketio.emit('execution_completed', { "workflow": gesture.name, "success": False, "message": "Serveur de streaming non disponible (port 5005).", }) except Exception as e: logger.error(f"Gesture execution error: {e}") socketio.emit('execution_completed', { "workflow": gesture.name, "success": False, "message": f"Erreur: {str(e)}", }) # ============================================================================= # Import de données Excel/CSV # ============================================================================= def _handle_data_import(intent, session_id: str) -> Dict[str, Any]: """ Traiter une intention DATA_IMPORT. Gère les actions : import_file, import_folder, list_tables, table_info. """ if not DATA_IMPORT_AVAILABLE or not excel_importer: return {"error": "L'import Excel n'est pas disponible (openpyxl manquant)."} action = intent.parameters.get("action", "import_file") params = intent.parameters if action == "list_tables": return _data_list_tables() elif action == "table_info": table_name = params.get("table_name") if not table_name: return {"error": "Précisez le nom de la table (ex: 'info table patients')."} return _data_table_info(table_name) elif action == "import_folder": folder_path = params.get("folder_path") or params.get("file_path", "") if not folder_path: return {"error": "Précisez le chemin du dossier."} return _data_import_folder(folder_path) elif action == "import_file": file_path = params.get("file_path") if not file_path: return {"error": "Précisez le fichier à importer (ex: 'importe patients.xlsx')."} table_name = params.get("table_name") return _data_preview_file(file_path, table_name, session_id) return {"error": "Action d'import non reconnue."} def _resolve_file_path(file_path: str) -> Optional[Path]: """ Résoudre un chemin de fichier. Cherche dans l'ordre : 1. Chemin absolu tel quel 2. Relatif au dossier d'upload 3. Relatif à la racine du projet """ p = Path(file_path) # Chemin absolu if p.is_absolute() and p.exists(): return p # Relatif au dossier d'upload in_uploads = UPLOAD_DIR / p.name if in_uploads.exists(): return in_uploads # Relatif à la racine du projet in_project = PROJECT_ROOT / file_path if in_project.exists(): return in_project # Juste le nom du fichier dans uploads if not p.is_absolute(): in_uploads2 = UPLOAD_DIR / file_path if in_uploads2.exists(): return in_uploads2 return None def _data_preview_file(file_path: str, table_name: Optional[str], session_id: str) -> Dict[str, Any]: """Prévisualiser un fichier Excel avant import.""" resolved = _resolve_file_path(file_path) if not resolved: return {"file_not_found": True, "file_path": file_path} try: preview = excel_importer.preview(str(resolved)) # Déterminer le nom de la table if not table_name: table_name = resolved.stem.lower().replace(" ", "_").replace("-", "_") # Stocker l'import en attente pour cette session _pending_imports[session_id] = { "file_path": str(resolved), "table_name": table_name, "sheet_name": preview.sheet_name, } return { "preview": { "columns": preview.headers, "total_rows": preview.total_rows, "sheet_name": preview.sheet_name, "detected_types": preview.detected_types, "sample_rows": [ [str(v) if v is not None else "" for v in row] for row in preview.rows[:3] ], }, "filename": resolved.name, "table_name": table_name, "needs_confirmation": True, } except Exception as e: logger.error(f"Erreur prévisualisation {file_path}: {e}") return {"error": str(e)} def _execute_pending_import(pending: Dict[str, Any]) -> Dict[str, Any]: """Exécuter un import après confirmation utilisateur.""" if not excel_importer: return {"error": "ExcelImporter non disponible."} try: result = excel_importer.import_file( excel_path=pending["file_path"], table_name=pending.get("table_name"), sheet_name=pending.get("sheet_name"), ) if result.success: return { "imported": { "table_name": result.table_name, "row_count": result.row_count, "column_count": result.column_count, "columns": result.columns, "sheet_name": result.sheet_name, "db_path": result.db_path, } } else: return {"error": "; ".join(result.errors) or "Import échoué (0 lignes)."} except Exception as e: logger.error(f"Erreur import: {e}") return {"error": str(e)} def _data_list_tables() -> Dict[str, Any]: """Lister les tables disponibles dans la base.""" if not db_iterator: return {"tables_list": []} try: tables = db_iterator.list_tables() tables_info = [] for t in tables: try: count = db_iterator.count(t) except Exception: count = -1 tables_info.append({"name": t, "row_count": count}) return {"tables_list": tables_info} except FileNotFoundError: # Base pas encore créée — aucune table return {"tables_list": []} except Exception as e: logger.error(f"Erreur list_tables: {e}") return {"error": str(e)} def _data_table_info(table_name: str) -> Dict[str, Any]: """Récupérer les infos d'une table.""" if not db_iterator: return {"error": "DBIterator non disponible."} try: columns = db_iterator.get_columns(table_name) count = db_iterator.count(table_name) return { "table_info": { "table_name": table_name, "columns": columns, "row_count": count, } } except FileNotFoundError: return {"error": f"Aucune base de données trouvée."} except Exception as e: # Table n'existe probablement pas return {"error": f"Table '{table_name}' introuvable : {e}"} def _data_import_folder(folder_path: str) -> Dict[str, Any]: """Lister les fichiers Excel dans un dossier.""" p = Path(folder_path) if not p.exists() or not p.is_dir(): return {"folder_files": [], "folder": folder_path} excel_files = sorted( [f.name for f in p.iterdir() if f.suffix.lower() in (".xlsx", ".xls", ".csv")] ) return { "folder_files": excel_files, "folder": folder_path, } @app.route('/api/chat/upload', methods=['POST']) def api_chat_upload(): """ Upload d'un fichier Excel/CSV via le chat. Le fichier est stocké dans data/uploads/ et un aperçu est retourné. Validations de sécurité (HIGH) : - Taille max : 50 MB (via MAX_CONTENT_LENGTH Flask) - Extension autorisée : .xlsx, .xls, .csv - Type MIME vérifié (pas juste l'extension) - Nom de fichier assaini via secure_filename """ if 'file' not in request.files: return jsonify({"error": "Aucun fichier reçu."}), 400 file = request.files['file'] if not file.filename: return jsonify({"error": "Nom de fichier vide."}), 400 # Vérifier l'extension allowed_ext = {'.xlsx', '.xls', '.csv'} ext = Path(file.filename).suffix.lower() if ext not in allowed_ext: return jsonify({ "error": f"Format '{ext}' non supporté. Formats acceptés : {', '.join(allowed_ext)}" }), 400 # Validation du type MIME (sécurité HIGH — pas juste l'extension) _ALLOWED_MIMES = { '.xlsx': {'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/octet-stream'}, '.xls': {'application/vnd.ms-excel', 'application/octet-stream'}, '.csv': {'text/csv', 'text/plain', 'application/csv', 'application/octet-stream'}, } file_mime = file.content_type or '' allowed_mimes = _ALLOWED_MIMES.get(ext, set()) if file_mime and file_mime not in allowed_mimes: logger.warning( f"Upload rejeté : MIME '{file_mime}' invalide pour extension '{ext}' " f"(fichier: {file.filename})" ) return jsonify({ "error": f"Type MIME '{file_mime}' invalide pour un fichier {ext}. " f"Types attendus : {', '.join(allowed_mimes)}" }), 400 # Assainir le nom de fichier (sécurité HIGH — empêcher path traversal) UPLOAD_DIR.mkdir(parents=True, exist_ok=True) filename = secure_filename(file.filename) if not filename: # secure_filename peut retourner une chaîne vide pour des noms exotiques filename = f"upload_{datetime.now().strftime('%Y%m%d_%H%M%S')}{ext}" # Ajouter un timestamp pour éviter les collisions ts = datetime.now().strftime("%Y%m%d_%H%M%S") safe_name = f"{Path(filename).stem}_{ts}{ext}" save_path = UPLOAD_DIR / safe_name file.save(str(save_path)) logger.info(f"Fichier uploadé : {save_path}") # Retourner les infos pour que le frontend envoie un message d'import session_id = request.form.get('session_id', '') # Prévisualiser le fichier if DATA_IMPORT_AVAILABLE and excel_importer and ext in ('.xlsx', '.xls'): try: preview = excel_importer.preview(str(save_path)) table_name = Path(filename).stem.lower().replace(" ", "_").replace("-", "_") # Stocker l'import en attente _pending_imports[session_id] = { "file_path": str(save_path), "table_name": table_name, "sheet_name": preview.sheet_name, } cols_str = ", ".join(preview.headers[:8]) if len(preview.headers) > 8: cols_str += f"... (+{len(preview.headers) - 8})" return jsonify({ "success": True, "filename": file.filename, "saved_as": safe_name, "path": str(save_path), "preview": { "columns": preview.headers, "total_rows": preview.total_rows, "sheet_name": preview.sheet_name, }, "message": ( f"Fichier **{file.filename}** recu ! " f"{preview.total_rows} lignes, colonnes : {cols_str}. " f"Je cree la table '{table_name}' ?" ), "needs_confirmation": True, }) except Exception as e: logger.error(f"Erreur preview upload: {e}") return jsonify({ "success": True, "filename": file.filename, "saved_as": safe_name, "path": str(save_path), "error": f"Fichier sauvegardé mais erreur de lecture : {e}", }) else: return jsonify({ "success": True, "filename": file.filename, "saved_as": safe_name, "path": str(save_path), "message": f"Fichier **{file.filename}** sauvegardé dans {UPLOAD_DIR}.", }) @app.route('/api/help') def api_help(): """Aide et mode d'emploi.""" help_content = { "title": "RPA Vision V3 - Mode d'emploi", "sections": [ { "title": "🎯 Commandes en langage naturel", "content": """ Tapez simplement ce que vous voulez faire en français ou anglais. Le système trouvera automatiquement le workflow correspondant. **Exemples :** - "facturer le client Acme" - "exporter le rapport en PDF" - "créer une facture pour Client ABC" - "facturer les clients de A à Z" """ }, { "title": "📋 Paramètres", "content": """ Les paramètres sont extraits automatiquement de votre commande. Vous pouvez aussi les spécifier manuellement dans le formulaire. **Paramètres courants :** - `client` : Nom du client - `format` : Format d'export (pdf, excel) - `start`, `end` : Plage de valeurs """ }, { "title": "⌨️ Raccourcis clavier", "content": """ - `Entrée` : Exécuter la commande - `Échap` : Annuler / Fermer - `↑` / `↓` : Naviguer dans l'historique - `Ctrl+M` : Minimiser l'interface """ }, { "title": "🔄 Pendant l'exécution", "content": """ L'interface peut être minimisée pendant l'exécution. Le workflow s'exécute en arrière-plan. Vous serez notifié à la fin de l'exécution. """ } ] } return jsonify(help_content) # ============================================================================= # WebSocket Events # ============================================================================= @socketio.on('connect') def handle_connect(): """Client connecté.""" logger.info("Client connected") emit('status', execution_status) @socketio.on('disconnect') def handle_disconnect(): """Client déconnecté.""" logger.info("Client disconnected") @socketio.on('cancel_execution') def handle_cancel(): """Annuler l'exécution.""" global execution_status execution_status["running"] = False execution_status["message"] = "Annulé" emit('execution_cancelled', {}, broadcast=True) # ============================================================================= # Copilot WebSocket Events # ============================================================================= @socketio.on('copilot_approve') def handle_copilot_approve(): """L'utilisateur approuve l'étape copilot en cours.""" copilot = _copilot_sessions.get("__copilot__") if not copilot or copilot["status"] != "waiting_approval": emit('copilot_error', {"message": "Aucune étape en attente de validation."}) return logger.info(f"Copilot approve: étape {copilot['current_index'] + 1}/{copilot['total']}") copilot["status"] = "approved" @socketio.on('copilot_skip') def handle_copilot_skip(): """L'utilisateur saute l'étape copilot en cours.""" copilot = _copilot_sessions.get("__copilot__") if not copilot or copilot["status"] != "waiting_approval": emit('copilot_error', {"message": "Aucune étape en attente de validation."}) return logger.info(f"Copilot skip: étape {copilot['current_index'] + 1}/{copilot['total']}") copilot["status"] = "skipped" @socketio.on('copilot_abort') def handle_copilot_abort(): """L'utilisateur annule tout le workflow copilot.""" copilot = _copilot_sessions.get("__copilot__") if not copilot: return logger.info(f"Copilot abort: workflow '{copilot['workflow_name']}'") copilot["status"] = "aborted" _copilot_sessions.pop("__copilot__", None) emit('copilot_complete', { "workflow": copilot["workflow_name"], "status": "aborted", "message": "Workflow annulé par l'utilisateur.", "completed": copilot.get("completed", 0), "total": copilot["total"], }) # ============================================================================= # Exécution de workflow # ============================================================================= def _try_streaming_server_replay( workflow_id: str, params: Dict[str, Any], machine_id: Optional[str] = None, ) -> Optional[Dict[str, Any]]: """ Tenter d'exécuter un workflow via le streaming server (Agent V1). POST http://localhost:5005/api/v1/traces/stream/replay avec workflow_id, params et optionnellement machine_id (multi-machine). Args: workflow_id: Identifiant du workflow à exécuter params: Paramètres du workflow (variables) machine_id: Machine cible pour le replay (None = auto-détection) Returns: dict avec le résultat si succès. dict avec {"error": ...} si le serveur est UP mais refuse (ex: pas de session). None si le serveur est injoignable (connexion refusée, timeout). """ try: payload = { "workflow_id": workflow_id, "session_id": "", # Vide = auto-détection de la session Agent V1 active "params": params or {}, } # Ajouter le machine_id si spécifié (ciblage multi-machine) if machine_id: payload["machine_id"] = machine_id resp = http_requests.post( f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay", headers=_streaming_headers(), json=payload, timeout=15, ) if resp.status_code == 200: data = resp.json() logger.info( f"Workflow {workflow_id} envoyé au streaming server: {data}" + (f" (machine={machine_id})" if machine_id else "") ) return data else: # Le serveur est UP mais refuse — renvoyer l'erreur pour éviter le fallback local error_detail = resp.text[:200] logger.warning( f"Streaming server refus (HTTP {resp.status_code}): {error_detail}" ) return {"error": error_detail, "status_code": resp.status_code} except http_requests.ConnectionError: logger.debug("Streaming server non disponible (connexion refusée)") except http_requests.Timeout: logger.debug("Streaming server timeout") except Exception as e: logger.debug(f"Erreur streaming server: {e}") return None def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int): """Suivre la progression d'un replay distant via polling.""" import time max_wait = 120 # 2 minutes max poll_interval = 2.0 elapsed = 0 while elapsed < max_wait and execution_status.get("running"): time.sleep(poll_interval) elapsed += poll_interval try: resp = http_requests.get( f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/{replay_id}", headers=_streaming_headers(), timeout=3, ) if not resp.ok: continue data = resp.json() status = data.get("status", "unknown") completed = data.get("completed_actions", 0) failed = data.get("failed_actions", 0) progress = int(10 + (completed / max(total_actions, 1)) * 80) socketio.emit('execution_progress', { "progress": progress, "step": f"Action {completed}/{total_actions} exécutée", "current": completed, "total": total_actions, }) if status == "completed": finish_execution( workflow_name, failed == 0, f"Replay terminé : {completed} actions exécutées" + (f", {failed} échecs" if failed else "") ) return elif status == "failed": finish_execution( workflow_name, False, f"Replay échoué : {completed}/{total_actions} actions, {failed} échecs" ) return except Exception as e: logger.debug(f"Poll replay progress: {e}") # Timeout if execution_status.get("running"): finish_execution( workflow_name, False, f"Timeout — replay toujours en cours après {max_wait}s" ) def _build_actions_from_workflow(match, params: Dict[str, Any]) -> List[Dict[str, Any]]: """ Construire la liste d'actions normalisées depuis un workflow. Tente la conversion via le format core (nodes/edges), puis fallback sur le format JSON brut. """ import uuid as _uuid try: with open(match.workflow_path, 'r') as f: workflow_data = json.load(f) except Exception as e: logger.error(f"Impossible de charger le workflow {match.workflow_path}: {e}") return [] # Substituer les variables var_manager = VariableManager() var_manager.set_variables(params) workflow_data = var_manager.substitute_dict(workflow_data) edges = workflow_data.get("edges", []) actions = [] for i, edge in enumerate(edges): action_dict = edge.get("action", {}) action_type = action_dict.get("type", "unknown") action_params = action_dict.get("parameters", {}) target_dict = action_dict.get("target", {}) action = { "action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}", "step_index": i, "description": _describe_action(action_type, action_params, target_dict), } if action_type == "mouse_click": pos = target_dict.get("position", [0.5, 0.5]) action["type"] = "click" action["x_pct"] = pos[0] if len(pos) > 0 else 0.5 action["y_pct"] = pos[1] if len(pos) > 1 else 0.5 action["button"] = action_params.get("button", "left") elif action_type == "text_input": action["type"] = "type" action["text"] = action_params.get("text", "") elif action_type == "key_press": action["type"] = "key_combo" keys = action_params.get("keys", []) if not keys and action_params.get("key"): keys = [action_params["key"]] action["keys"] = keys elif action_type == "compound": for step in action_params.get("steps", []): sub_action = { "action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}", "step_index": i, "description": _describe_action(step.get("type", "unknown"), step, {}), } sub_type = step.get("type", "unknown") if sub_type == "key_press": sub_action["type"] = "key_combo" sub_action["keys"] = step.get("keys", []) elif sub_type == "text_input": sub_action["type"] = "type" sub_action["text"] = step.get("text", "") elif sub_type == "wait": sub_action["type"] = "wait" sub_action["duration_ms"] = step.get("duration_ms", 500) elif sub_type == "mouse_click": sub_action["type"] = "click" sub_action["x_pct"] = step.get("x_pct", 0.5) sub_action["y_pct"] = step.get("y_pct", 0.5) sub_action["button"] = step.get("button", "left") else: continue actions.append(sub_action) continue else: continue # Ajouter target_spec pour résolution visuelle si dispo target_spec = {} if target_dict.get("role"): target_spec["by_role"] = target_dict["role"] if target_dict.get("text"): target_spec["by_text"] = target_dict["text"] if target_spec: action["target_spec"] = target_spec action["visual_mode"] = True actions.append(action) return actions def _describe_action(action_type: str, params: Dict[str, Any], target: Dict[str, Any]) -> str: """Générer une description lisible d'une action pour l'affichage copilot.""" target_text = target.get("text", "") target_role = target.get("role", "") if action_type == "mouse_click": label = target_text or target_role or "un élément" return f"Clic sur '{label}'" elif action_type == "text_input": text = params.get("text", "") preview = text[:30] + "..." if len(text) > 30 else text return f"Saisir le texte : '{preview}'" elif action_type == "key_press": keys = params.get("keys", params.get("key", "")) if isinstance(keys, list): keys = "+".join(keys) return f"Touche(s) : {keys}" elif action_type == "compound": steps_count = len(params.get("steps", [])) return f"Action composée ({steps_count} sous-actions)" elif action_type == "wait": ms = params.get("duration_ms", 500) return f"Attente {ms}ms" else: return f"Action : {action_type}" def execute_workflow_copilot(match, params: Dict[str, Any]): """ Exécuter un workflow en mode Copilot (pas-à-pas). Charge le workflow, construit la liste d'actions, puis envoie les actions une par une en attendant la validation utilisateur via WebSocket entre chaque étape. """ global execution_status import time workflow_name = match.workflow_name actions = _build_actions_from_workflow(match, params) if not actions: socketio.emit('copilot_complete', { "workflow": workflow_name, "status": "error", "message": "Aucune action exécutable dans ce workflow.", "completed": 0, "total": 0, }) return total = len(actions) execution_status["running"] = True execution_status["workflow"] = workflow_name execution_status["progress"] = 0 execution_status["message"] = f"Mode Copilot : {total} étapes" copilot_state = { "workflow_name": workflow_name, "actions": actions, "current_index": 0, "total": total, "status": "idle", "completed": 0, "skipped": 0, "failed": 0, } _copilot_sessions["__copilot__"] = copilot_state logger.info(f"Copilot démarré : '{workflow_name}' — {total} étapes") for idx, action in enumerate(actions): copilot_state["current_index"] = idx if copilot_state["status"] == "aborted": break copilot_state["status"] = "waiting_approval" socketio.emit('copilot_step', { "workflow": workflow_name, "step_index": idx, "total": total, "action": { "action_id": action.get("action_id", ""), "type": action.get("type", "unknown"), "description": action.get("description", "Action inconnue"), }, }) # Attendre la décision de l'utilisateur (polling, max 120s) max_wait = 120 waited = 0.0 while waited < max_wait: status = copilot_state["status"] if status in ("approved", "skipped", "aborted"): break time.sleep(0.3) waited += 0.3 if waited >= max_wait: copilot_state["status"] = "aborted" socketio.emit('copilot_complete', { "workflow": workflow_name, "status": "timeout", "message": f"Timeout : pas de réponse après {max_wait}s.", "completed": copilot_state["completed"], "total": total, }) break decision = copilot_state["status"] if decision == "aborted": break elif decision == "skipped": copilot_state["skipped"] += 1 logger.info(f"Copilot skip étape {idx + 1}/{total}") socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "skipped", "message": "Étape passée", }) copilot_state["status"] = "idle" continue elif decision == "approved": logger.info(f"Copilot execute étape {idx + 1}/{total}: {action.get('type')}") try: resp = http_requests.post( f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/single", headers=_streaming_headers(), json={ "action": action, "session_id": "", }, timeout=10, ) if resp.status_code == 200: resp_data = resp.json() action_id = resp_data.get("action_id", action.get("action_id")) action_success = _wait_for_single_action_result( resp_data.get("session_id", ""), action_id, timeout=30, ) if action_success: copilot_state["completed"] += 1 socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "completed", "message": "Action exécutée avec succès", }) else: copilot_state["failed"] += 1 socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "failed", "message": "L'action a échoué", }) else: error = resp.text[:200] copilot_state["failed"] += 1 socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "failed", "message": f"Erreur serveur : {error}", }) except http_requests.ConnectionError: copilot_state["failed"] += 1 socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "failed", "message": "Serveur de streaming non disponible (port 5005).", }) except Exception as e: copilot_state["failed"] += 1 logger.error(f"Copilot action error: {e}") socketio.emit('copilot_step_result', { "step_index": idx, "total": total, "status": "failed", "message": f"Erreur : {str(e)}", }) progress = int((idx + 1) / total * 100) execution_status["progress"] = progress execution_status["message"] = f"Copilot : étape {idx + 1}/{total}" copilot_state["status"] = "idle" # Fin du copilot _copilot_sessions.pop("__copilot__", None) execution_status["running"] = False completed = copilot_state["completed"] skipped = copilot_state["skipped"] failed = copilot_state["failed"] final_status = copilot_state.get("status", "completed") if final_status != "aborted": success = failed == 0 message = ( f"Copilot terminé : {completed} réussies, " f"{skipped} passées, {failed} échouées sur {total} étapes." ) socketio.emit('copilot_complete', { "workflow": workflow_name, "status": "completed" if success else "partial", "message": message, "completed": completed, "skipped": skipped, "failed": failed, "total": total, }) finish_execution(workflow_name, success, message) def _wait_for_single_action_result(session_id: str, action_id: str, timeout: int = 30) -> bool: """ Attendre le résultat d'une seule action envoyée au streaming server. Approche pragmatique : on attend un délai raisonnable (3s) pour que l'Agent V1 ait le temps de poll, exécuter, et reporter. """ import time poll_interval = 0.5 elapsed = 0.0 while elapsed < timeout: time.sleep(poll_interval) elapsed += poll_interval if elapsed >= 3.0: return True # Optimiste — le résultat réel arrive via /replay/result return True def execute_workflow(match, params): """ Exécuter un workflow — tente d'abord le streaming server, puis fallback sur l'exécution locale. """ global execution_status import time # Tenter l'exécution via le streaming server (Agent V1 distant) replay_result = _try_streaming_server_replay(match.workflow_id, params) if replay_result: # Vérifier si c'est une erreur du serveur (UP mais pas de session/workflow) if "error" in replay_result: error_msg = replay_result["error"] status_code = replay_result.get("status_code", 0) if status_code == 404 and "session" in error_msg.lower(): # Pas de session Agent V1 active — NE PAS faire de fallback local finish_execution( match.workflow_name, False, "Aucun Agent V1 connecté. Lancez l'agent sur le PC cible " "et démarrez une session d'abord." ) elif status_code == 404: finish_execution( match.workflow_name, False, f"Workflow '{match.workflow_id}' non trouvé sur le serveur de streaming." ) else: finish_execution( match.workflow_name, False, f"Erreur serveur streaming : {error_msg}" ) return # Le streaming server a accepté le replay total_actions = replay_result.get("total_actions", 1) target_session = replay_result.get("session_id", "?") execution_status["running"] = True execution_status["workflow"] = match.workflow_name execution_status["progress"] = 10 execution_status["message"] = f"Envoyé à l'Agent V1 ({target_session})" socketio.emit('execution_progress', { "progress": 10, "step": f"Replay envoyé à l'Agent V1 — {total_actions} actions en attente", "current": 0, "total": total_actions, }) # Suivre la progression du replay (polling toutes les 2s, max 120s) replay_id = replay_result.get("replay_id") if replay_id: socketio.start_background_task( _poll_replay_progress, replay_id, match.workflow_name, total_actions ) else: finish_execution( match.workflow_name, True, f"Replay envoyé ({total_actions} actions)" ) return # Fallback : exécution locale (seulement si le streaming server est injoignable) logger.info("Streaming server injoignable, exécution locale") execution_status["running"] = True execution_status["workflow"] = match.workflow_name execution_status["progress"] = 0 execution_status["message"] = "Démarrage (exécution locale)..." try: # Charger le workflow with open(match.workflow_path, 'r') as f: workflow_data = json.load(f) # Créer le VariableManager et injecter les paramètres var_manager = VariableManager() var_manager.set_variables(params) # Substituer les variables workflow_data = var_manager.substitute_dict(workflow_data) # Obtenir les étapes (edges) edges = workflow_data.get("edges", []) total_steps = len(edges) if edges else 1 # Étape 1: Initialisation update_progress(10, "Initialisation", 1, total_steps + 2) time.sleep(0.3) # Étape 2: Préparation GPU (si disponible) if gpu_manager and GPU_AVAILABLE: update_progress(15, "Préparation GPU...", 2, total_steps + 2) try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(gpu_manager.set_execution_mode(ExecutionMode.AUTOPILOT)) loop.close() except Exception as e: logger.warning(f"GPU mode change failed: {e}") # Vérifier si l'exécution réelle est disponible use_real_execution = ( EXECUTION_AVAILABLE and action_executor is not None and screen_capturer is not None ) if use_real_execution: logger.info(f"🚀 Exécution RÉELLE du workflow: {match.workflow_name}") _execute_workflow_real(workflow_data, edges, total_steps, params) else: logger.info(f"🎭 Exécution SIMULÉE du workflow: {match.workflow_name}") _execute_workflow_simulated(edges, total_steps) # Finalisation if execution_status["running"]: update_progress(95, "Finalisation...", total_steps + 1, total_steps + 2) time.sleep(0.2) finish_execution(match.workflow_name, True, "Workflow terminé avec succès") except Exception as e: logger.error(f"Execution error: {e}") import traceback traceback.print_exc() finish_execution(match.workflow_name, False, f"Erreur: {str(e)}") def _execute_workflow_real(workflow_data, edges, total_steps, params): """Exécution réelle avec ActionExecutor.""" import time import tempfile import os from PIL import Image # Capturer l'écran initial update_progress(20, "Capture écran initial...", 2, total_steps + 2) try: screenshot_array = screen_capturer.capture() if screenshot_array is None: raise Exception("Capture retourne None") # Sauvegarder le screenshot dans un fichier temporaire temp_dir = Path("data/temp") temp_dir.mkdir(parents=True, exist_ok=True) screenshot_path = str(temp_dir / f"capture_{datetime.now().strftime('%H%M%S')}.png") # Convertir numpy array en image et sauvegarder img = Image.fromarray(screenshot_array) img.save(screenshot_path) logger.info(f"📸 Screenshot capturé: {screenshot_path}") except Exception as e: logger.warning(f"Capture écran échouée: {e}, utilisation mode dégradé") _execute_workflow_simulated(edges, total_steps) return # Créer le ScreenState complet pour l'exécution try: from core.models.screen_state import ( ScreenState, WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef ) file_size = os.path.getsize(screenshot_path) if os.path.exists(screenshot_path) else 0 screen_state = ScreenState( screen_state_id=f"agent_{datetime.now().strftime('%Y%m%d_%H%M%S')}", timestamp=datetime.now(), session_id="agent_chat", window=WindowContext( app_name="unknown", window_title="Unknown", screen_resolution=[1920, 1080], workspace="main" ), raw=RawLevel( screenshot_path=screenshot_path, capture_method="agent_chat", file_size_bytes=file_size ), perception=PerceptionLevel( embedding=EmbeddingRef(provider="", vector_id="", dimensions=512), detected_text=[], text_detection_method="none", confidence_avg=0.0 ), context=ContextLevel(), ui_elements=[] ) logger.info("✓ ScreenState complet créé") except Exception as e: logger.warning(f"Création ScreenState échouée: {e}, utilisation mode simulé") _execute_workflow_simulated(edges, total_steps) return # Exécuter chaque edge avec ActionExecutor success_count = 0 fail_count = 0 for i, edge in enumerate(edges): if not execution_status["running"]: logger.info("⏹️ Exécution annulée par l'utilisateur") break action = edge.get("action", {}) action_type = action.get("type", "unknown") progress = int(20 + (i + 1) / total_steps * 70) step_name = f"Étape {i+1}/{total_steps}: {action_type}" update_progress(progress, step_name, i + 3, total_steps + 2) logger.info(f"▶️ Exécution: {step_name}") try: # Créer un objet Edge compatible avec ActionExecutor workflow_edge = _create_workflow_edge(edge, params) # Exécuter l'action réelle result = action_executor.execute_edge(workflow_edge, screen_state) if result.status.value == "success": success_count += 1 logger.info(f"✅ {step_name} - Succès") # Recapturer l'écran après chaque action réussie try: time.sleep(0.3) # Petit délai pour laisser l'UI se mettre à jour new_screenshot = screen_capturer.capture() if new_screenshot is not None: new_path = str(temp_dir / f"capture_{datetime.now().strftime('%H%M%S_%f')}.png") Image.fromarray(new_screenshot).save(new_path) screen_state.raw.screenshot_path = new_path screen_state.raw.file_size_bytes = os.path.getsize(new_path) except Exception as recapture_err: logger.debug(f"Recapture échouée: {recapture_err}") pass # Continuer même si la recapture échoue else: fail_count += 1 logger.warning(f"⚠️ {step_name} - {result.status.value}: {result.message}") # Continuer malgré l'échec (mode best-effort) if fail_count >= 3: logger.error("❌ Trop d'échecs, arrêt de l'exécution") break except Exception as e: fail_count += 1 logger.error(f"❌ Erreur lors de {step_name}: {e}") if fail_count >= 3: logger.error("❌ Trop d'erreurs, arrêt de l'exécution") break logger.info(f"📊 Résultat: {success_count} succès, {fail_count} échecs sur {total_steps} étapes") def _execute_workflow_simulated(edges, total_steps): """Exécution simulée (fallback).""" import time for i, edge in enumerate(edges): if not execution_status["running"]: break action = edge.get("action", {}) action_type = action.get("type", "unknown") progress = int(20 + (i + 1) / total_steps * 70) step_name = f"Étape {i+1}: {action_type} (simulé)" update_progress(progress, step_name, i + 3, total_steps + 2) # Simulation avec délai time.sleep(0.5) logger.info(f"🎭 Simulé: {step_name}") def _create_workflow_edge(edge_dict, params): """Créer un objet WorkflowEdge depuis un dictionnaire.""" from dataclasses import dataclass, field from typing import Optional, Dict, Any, List # Importer les vraies classes du système try: from core.models.workflow_graph import Action, TargetSpec, WorkflowEdge, EdgeConstraints, PostConditions, ActionType USE_REAL_CLASSES = True except ImportError: USE_REAL_CLASSES = False action_dict = edge_dict.get("action", {}) target_dict = action_dict.get("target", {}) # Substituer les paramètres dans le texte cible target_text = target_dict.get("text", "") if target_text and isinstance(target_text, str): for key, val in params.items(): target_text = target_text.replace(f"{{{{{key}}}}}", str(val)) # {{key}} target_text = target_text.replace(f"${{{key}}}", str(val)) # ${key} target_text = target_text.replace(f"${key}", str(val)) # $key if USE_REAL_CLASSES: # Créer le TargetSpec avec les vraies classes target_spec = TargetSpec( by_role=target_dict.get("role"), by_text=target_text if target_text else target_dict.get("text"), by_position=target_dict.get("position"), ) # Convertir le type d'action en ActionType Enum action_type_str = action_dict.get("type", "mouse_click") try: action_type = ActionType(action_type_str) except ValueError: # Fallback si le type n'existe pas dans l'enum action_type = ActionType.MOUSE_CLICK # Créer l'Action action = Action( type=action_type, target=target_spec, parameters=action_dict.get("parameters", {}) ) # Créer le WorkflowEdge return WorkflowEdge( edge_id=edge_dict.get("edge_id", edge_dict.get("id", f"edge_{id(edge_dict)}")), from_node=edge_dict.get("source", edge_dict.get("from_node", "")), to_node=edge_dict.get("target", edge_dict.get("to_node", "")), action=action, constraints=EdgeConstraints(), post_conditions=PostConditions() ) else: # Fallback: classes simples @dataclass class SimpleTargetSpec: by_role: Optional[str] = None by_text: Optional[str] = None by_position: Optional[tuple] = None selection_policy: str = "first" fallback_strategy: str = "visual_similarity" embedding_ref: Optional[Any] = None context_hints: Dict[str, Any] = field(default_factory=dict) hard_constraints: Dict[str, Any] = field(default_factory=dict) weights: Dict[str, float] = field(default_factory=dict) @dataclass class SimpleAction: type: str target: SimpleTargetSpec parameters: Dict[str, Any] = field(default_factory=dict) @dataclass class SimpleWorkflowEdge: edge_id: str from_node: str to_node: str action: SimpleAction constraints: Any = None post_conditions: Any = None target_spec = SimpleTargetSpec( by_role=target_dict.get("role"), by_text=target_text if target_text else target_dict.get("text"), ) action = SimpleAction( type=action_dict.get("type", "unknown"), target=target_spec, parameters=action_dict.get("parameters", {}) ) return SimpleWorkflowEdge( edge_id=edge_dict.get("edge_id", edge_dict.get("id", f"edge_{id(edge_dict)}")), from_node=edge_dict.get("source", edge_dict.get("from_node", "")), to_node=edge_dict.get("target", edge_dict.get("to_node", "")), action=action, constraints=None, post_conditions=None ) def update_progress(progress: int, message: str, current: int, total: int): """Mettre à jour la progression.""" global execution_status execution_status["progress"] = progress execution_status["message"] = message socketio.emit('execution_progress', { "progress": progress, "step": message, "current": current, "total": total }) def finish_execution(workflow_name: str, success: bool, message: str): """Terminer l'exécution.""" global execution_status execution_status["running"] = False execution_status["progress"] = 100 if success else 0 execution_status["message"] = message # Mettre à jour l'historique if command_history: command_history[-1]["status"] = "completed" if success else "failed" socketio.emit('execution_completed', { "workflow": workflow_name, "success": success, "message": message }) # ============================================================================= # Main # ============================================================================= if __name__ == '__main__': init_system() print(""" ╔════════════════════════════════════════════════════════════╗ ║ RPA Vision V3 - Interface de Commande ║ ║ ║ ║ 🌐 http://localhost:5004 ║ ║ ║ ║ Ctrl+C pour arrêter ║ ╚════════════════════════════════════════════════════════════╝ """) socketio.run(app, host='0.0.0.0', port=5004, debug=False, allow_unsafe_werkzeug=True)