feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,7 @@ from .confirmation import ConfirmationLoop, ConfirmationStatus, RiskLevel, get_c
|
||||
from .response_generator import ResponseGenerator, get_response_generator
|
||||
from .conversation_manager import ConversationManager, get_conversation_manager
|
||||
from .autonomous_planner import AutonomousPlanner, get_autonomous_planner, ExecutionPlan
|
||||
from .gesture_catalog import GestureCatalog
|
||||
|
||||
# GPU Resource Manager (optional)
|
||||
try:
|
||||
@@ -78,6 +79,7 @@ confirmation_loop: Optional[ConfirmationLoop] = None
|
||||
response_generator: Optional[ResponseGenerator] = None
|
||||
conversation_manager: Optional[ConversationManager] = None
|
||||
autonomous_planner: Optional[AutonomousPlanner] = None
|
||||
gesture_catalog: Optional[GestureCatalog] = None
|
||||
|
||||
# Execution components
|
||||
workflow_pipeline = None
|
||||
@@ -99,6 +101,23 @@ execution_status = {
|
||||
}
|
||||
command_history: List[Dict[str, Any]] = []
|
||||
|
||||
# Copilot state — suivi du mode pas-à-pas
|
||||
_copilot_sessions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
_COPILOT_KEYWORDS = [
|
||||
"copilot", "co-pilot",
|
||||
"pas à pas", "pas-à-pas", "pas a pas",
|
||||
"étape par étape", "etape par etape",
|
||||
"step by step", "une étape à la fois",
|
||||
"mode assisté", "mode assiste", "mode guidé", "mode guide",
|
||||
]
|
||||
|
||||
|
||||
def _detect_copilot_mode(message: str) -> bool:
|
||||
"""Détecter si l'utilisateur demande le mode Copilot."""
|
||||
msg_lower = message.lower()
|
||||
return any(kw in msg_lower for kw in _COPILOT_KEYWORDS)
|
||||
|
||||
|
||||
def init_system():
|
||||
"""Initialiser tous les composants du système."""
|
||||
@@ -218,6 +237,15 @@ def init_system():
|
||||
logger.warning(f"⚠ AutonomousPlanner: {e}")
|
||||
autonomous_planner = None
|
||||
|
||||
# 6. GestureCatalog (raccourcis clavier universels)
|
||||
global gesture_catalog
|
||||
try:
|
||||
gesture_catalog = GestureCatalog()
|
||||
logger.info(f"✓ GestureCatalog: {len(gesture_catalog.list_all())} gestes chargés")
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠ GestureCatalog: {e}")
|
||||
gesture_catalog = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Routes Web
|
||||
@@ -486,35 +514,53 @@ def api_chat():
|
||||
action_taken = "denied"
|
||||
|
||||
elif intent.intent_type == IntentType.EXECUTE:
|
||||
# Exécuter un workflow
|
||||
if matcher and intent.workflow_hint:
|
||||
match = matcher.find_workflow(intent.workflow_hint, min_confidence=0.2)
|
||||
# Résolution en 3 niveaux :
|
||||
# 1. Workflow appris → exécution directe ou copilot
|
||||
# 2. Geste primitif (GestureCatalog) → raccourci clavier
|
||||
# 3. "Je ne sais pas, montre-moi !"
|
||||
query = intent.workflow_hint or intent.raw_query
|
||||
|
||||
if match:
|
||||
# Évaluer le risque
|
||||
risk = confirmation_loop.evaluate_risk(
|
||||
match.workflow_name,
|
||||
{**match.extracted_params, **intent.parameters}
|
||||
if matcher and query:
|
||||
match = matcher.find_workflow(query, min_confidence=0.2)
|
||||
else:
|
||||
match = None
|
||||
|
||||
if match:
|
||||
# Niveau 1 : Workflow appris
|
||||
risk = confirmation_loop.evaluate_risk(
|
||||
match.workflow_name,
|
||||
{**match.extracted_params, **intent.parameters}
|
||||
)
|
||||
|
||||
if confirmation_loop.requires_confirmation(risk):
|
||||
conf = confirmation_loop.create_confirmation_request(
|
||||
workflow_name=match.workflow_name,
|
||||
parameters={**match.extracted_params, **intent.parameters},
|
||||
action_type="execute",
|
||||
risk_level=risk
|
||||
)
|
||||
conversation_manager.set_pending_confirmation(session, conf)
|
||||
response = response_generator.generate_confirmation_request(conf)
|
||||
result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
|
||||
action_taken = "confirmation_requested"
|
||||
|
||||
if confirmation_loop.requires_confirmation(risk):
|
||||
# Créer une demande de confirmation
|
||||
conf = confirmation_loop.create_confirmation_request(
|
||||
workflow_name=match.workflow_name,
|
||||
parameters={**match.extracted_params, **intent.parameters},
|
||||
action_type="execute",
|
||||
risk_level=risk
|
||||
else:
|
||||
all_params = {**match.extracted_params, **intent.parameters}
|
||||
use_copilot = _detect_copilot_mode(message)
|
||||
|
||||
if use_copilot:
|
||||
result = {
|
||||
"success": True,
|
||||
"workflow": match.workflow_name,
|
||||
"params": all_params,
|
||||
"confidence": match.confidence,
|
||||
"mode": "copilot",
|
||||
}
|
||||
action_taken = "copilot_started"
|
||||
socketio.start_background_task(
|
||||
execute_workflow_copilot, match, all_params
|
||||
)
|
||||
conversation_manager.set_pending_confirmation(session, conf)
|
||||
|
||||
# Générer la réponse de confirmation
|
||||
response = response_generator.generate_confirmation_request(conf)
|
||||
result = {"needs_confirmation": True, "confirmation": conf.to_dict()}
|
||||
action_taken = "confirmation_requested"
|
||||
|
||||
else:
|
||||
# Exécuter directement
|
||||
all_params = {**match.extracted_params, **intent.parameters}
|
||||
result = {
|
||||
"success": True,
|
||||
"workflow": match.workflow_name,
|
||||
@@ -522,12 +568,31 @@ def api_chat():
|
||||
"confidence": match.confidence
|
||||
}
|
||||
action_taken = "executed"
|
||||
|
||||
socketio.start_background_task(execute_workflow, match, all_params)
|
||||
|
||||
elif gesture_catalog and query:
|
||||
# Niveau 2 : Geste primitif (raccourci clavier)
|
||||
gesture_match = gesture_catalog.match(query, min_score=0.6)
|
||||
if gesture_match:
|
||||
gesture, score = gesture_match
|
||||
result = {
|
||||
"gesture": True,
|
||||
"gesture_name": gesture.name,
|
||||
"gesture_keys": "+".join(gesture.keys),
|
||||
"gesture_id": gesture.id,
|
||||
"confidence": score,
|
||||
}
|
||||
action_taken = "gesture_executed"
|
||||
# Exécuter le geste via le streaming server
|
||||
socketio.start_background_task(
|
||||
_execute_gesture, gesture
|
||||
)
|
||||
else:
|
||||
result = {"not_found": True, "query": intent.workflow_hint}
|
||||
# Niveau 3 : Inconnu → "montre-moi !"
|
||||
result = {"not_found": True, "query": query, "teach_me": True}
|
||||
else:
|
||||
result = {"error": "Pas de workflow spécifié"}
|
||||
# Niveau 3 : Pas de query exploitable
|
||||
result = {"not_found": True, "query": query or "", "teach_me": True}
|
||||
|
||||
elif intent.intent_type == IntentType.LIST:
|
||||
# Lister les workflows avec métadonnées enrichies
|
||||
@@ -594,6 +659,10 @@ def api_chat():
|
||||
result = {}
|
||||
action_taken = "help_shown"
|
||||
|
||||
elif intent.intent_type == IntentType.GREETING:
|
||||
result = {}
|
||||
action_taken = "greeting"
|
||||
|
||||
elif intent.clarification_needed:
|
||||
result = {"clarification_needed": True}
|
||||
action_taken = "clarification_requested"
|
||||
@@ -728,122 +797,25 @@ def api_llm_set_model():
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# API Agent Libre (Autonomous Mode)
|
||||
# API Agent Libre (dépréciée — tout passe par /api/chat)
|
||||
# =============================================================================
|
||||
|
||||
@app.route('/api/agent/plan', methods=['POST'])
|
||||
def api_agent_plan():
|
||||
"""
|
||||
Génère un plan d'exécution pour une tâche en langage naturel.
|
||||
|
||||
Le mode "Agent Libre" permet d'exécuter des tâches sans workflow pré-enregistré.
|
||||
Le LLM (Qwen) décompose la demande en étapes d'actions.
|
||||
"""
|
||||
if not autonomous_planner:
|
||||
return jsonify({"error": "Agent autonome non disponible"}), 503
|
||||
|
||||
data = request.json
|
||||
user_request = data.get('request', '').strip()
|
||||
|
||||
if not user_request:
|
||||
return jsonify({"error": "Requête vide"}), 400
|
||||
|
||||
try:
|
||||
# Contexte optionnel (écran actuel, etc.)
|
||||
context = data.get('context', {})
|
||||
|
||||
# Générer le plan
|
||||
plan = autonomous_planner.plan(user_request, context)
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"plan": {
|
||||
"task": plan.task_description,
|
||||
"steps": [
|
||||
{
|
||||
"step": s.step_number,
|
||||
"action": s.action_type.value,
|
||||
"description": s.description,
|
||||
"target": s.target,
|
||||
"params": s.parameters,
|
||||
"expected_result": s.expected_result
|
||||
}
|
||||
for s in plan.steps
|
||||
],
|
||||
"estimated_seconds": plan.estimated_duration_seconds,
|
||||
"risk_level": plan.risk_level,
|
||||
"requires_confirmation": plan.requires_confirmation
|
||||
},
|
||||
"llm_available": autonomous_planner.llm_available
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Agent plan error: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
"""Déprécié — utiliser le chat unifié (/api/chat)."""
|
||||
return jsonify({
|
||||
"error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
|
||||
"migration": "POST /api/chat {\"message\": \"votre demande\"}"
|
||||
}), 410
|
||||
|
||||
|
||||
@app.route('/api/agent/execute', methods=['POST'])
|
||||
def api_agent_execute():
|
||||
"""
|
||||
Exécute un plan d'agent autonome.
|
||||
|
||||
Attend un objet plan (généré par /api/agent/plan) et l'exécute étape par étape.
|
||||
"""
|
||||
if not autonomous_planner:
|
||||
return jsonify({"error": "Agent autonome non disponible"}), 503
|
||||
|
||||
data = request.json
|
||||
plan_data = data.get('plan')
|
||||
|
||||
if not plan_data:
|
||||
return jsonify({"error": "Plan manquant"}), 400
|
||||
|
||||
try:
|
||||
# Reconstruire le plan depuis les données
|
||||
from .autonomous_planner import PlannedAction, ActionType
|
||||
|
||||
steps = []
|
||||
for step_data in plan_data.get('steps', []):
|
||||
action_type_str = step_data.get('action', 'click')
|
||||
action_type_map = {
|
||||
'open_app': ActionType.OPEN_APP,
|
||||
'open_url': ActionType.OPEN_URL,
|
||||
'click': ActionType.CLICK,
|
||||
'type_text': ActionType.TYPE_TEXT,
|
||||
'hotkey': ActionType.HOTKEY,
|
||||
'scroll': ActionType.SCROLL,
|
||||
'wait': ActionType.WAIT,
|
||||
'screenshot': ActionType.SCREENSHOT
|
||||
}
|
||||
|
||||
steps.append(PlannedAction(
|
||||
step_number=step_data.get('step', len(steps) + 1),
|
||||
action_type=action_type_map.get(action_type_str, ActionType.CLICK),
|
||||
description=step_data.get('description', ''),
|
||||
target=step_data.get('target'),
|
||||
parameters=step_data.get('params', {}),
|
||||
expected_result=step_data.get('expected_result')
|
||||
))
|
||||
|
||||
plan = ExecutionPlan(
|
||||
task_description=plan_data.get('task', ''),
|
||||
steps=steps,
|
||||
estimated_duration_seconds=plan_data.get('estimated_seconds', 30),
|
||||
risk_level=plan_data.get('risk_level', 'low')
|
||||
)
|
||||
|
||||
# Exécuter en arrière-plan
|
||||
socketio.start_background_task(execute_agent_plan, plan)
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"message": "Exécution démarrée",
|
||||
"steps_count": len(steps)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Agent execute error: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
"""Déprécié — utiliser le chat unifié (/api/chat)."""
|
||||
return jsonify({
|
||||
"error": "Cette API est dépréciée. Utilisez /api/chat avec du langage naturel.",
|
||||
"migration": "POST /api/chat {\"message\": \"votre demande\"}"
|
||||
}), 410
|
||||
|
||||
|
||||
@app.route('/api/agent/status')
|
||||
@@ -856,208 +828,71 @@ def api_agent_status():
|
||||
})
|
||||
|
||||
|
||||
def execute_agent_plan(plan: ExecutionPlan):
|
||||
"""Exécute un plan d'agent sur la machine distante via le streaming server."""
|
||||
@app.route('/api/gestures')
|
||||
def api_gestures():
|
||||
"""Liste tous les gestes disponibles dans le catalogue."""
|
||||
if not gesture_catalog:
|
||||
return jsonify({"gestures": [], "count": 0})
|
||||
|
||||
gestures = gesture_catalog.list_all()
|
||||
|
||||
return jsonify({
|
||||
"gestures": gestures,
|
||||
"count": len(gestures),
|
||||
"categories": list({g["category"] for g in gestures}),
|
||||
})
|
||||
|
||||
|
||||
def _execute_gesture(gesture):
|
||||
"""Exécuter un geste primitif via le streaming server."""
|
||||
import uuid as _uuid
|
||||
|
||||
action = {
|
||||
"action_id": f"act_gesture_{_uuid.uuid4().hex[:8]}",
|
||||
"type": "key_combo",
|
||||
"keys": list(gesture.keys),
|
||||
}
|
||||
|
||||
try:
|
||||
# Convertir le plan LLM en actions normalisées pour l'Agent V1
|
||||
actions = _plan_to_replay_actions(plan)
|
||||
|
||||
if not actions:
|
||||
socketio.emit('execution_completed', {
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": "Aucune action convertible dans ce plan."
|
||||
})
|
||||
return
|
||||
|
||||
# Envoyer au streaming server pour exécution sur le PC cible
|
||||
resp = http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/raw",
|
||||
json={
|
||||
"actions": actions,
|
||||
"session_id": "", # Auto-détection
|
||||
"task_description": plan.task_description,
|
||||
"actions": [action],
|
||||
"session_id": "",
|
||||
"task_description": f"Geste: {gesture.name}",
|
||||
},
|
||||
timeout=15,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
replay_id = data.get("replay_id", "")
|
||||
total = data.get("total_actions", len(actions))
|
||||
|
||||
socketio.emit('agent_execution_started', {
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Exécution démarrée sur le PC cible ({total} actions)",
|
||||
"replay_id": replay_id,
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": True,
|
||||
"message": f"Geste '{gesture.name}' ({'+'.join(gesture.keys)}) envoyé",
|
||||
})
|
||||
|
||||
# Suivre la progression
|
||||
_poll_replay_progress(replay_id, plan.task_description, total)
|
||||
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
logger.error(f"Streaming server refus: HTTP {resp.status_code}: {error}")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Erreur serveur: {error}"
|
||||
"message": f"Erreur: {error}",
|
||||
})
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
logger.error("Streaming server non disponible pour l'agent libre")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": "Le serveur de streaming n'est pas disponible. "
|
||||
"Vérifiez qu'il tourne sur le port 5005."
|
||||
"message": "Serveur de streaming non disponible (port 5005).",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Agent execution error: {e}")
|
||||
logger.error(f"Gesture execution error: {e}")
|
||||
socketio.emit('execution_completed', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"workflow": plan.task_description,
|
||||
"message": f"Erreur: {str(e)}"
|
||||
"message": f"Erreur: {str(e)}",
|
||||
})
|
||||
|
||||
|
||||
def _plan_to_replay_actions(plan: ExecutionPlan) -> list:
|
||||
"""Convertir un ExecutionPlan LLM en actions normalisées pour l'Agent V1."""
|
||||
import uuid as _uuid
|
||||
from .autonomous_planner import ActionType
|
||||
|
||||
actions = []
|
||||
for step in plan.steps:
|
||||
action = {"action_id": f"act_free_{_uuid.uuid4().hex[:6]}"}
|
||||
|
||||
if step.action_type == ActionType.OPEN_URL:
|
||||
url = step.parameters.get("url", "")
|
||||
# Ouvrir le navigateur : touche Windows, taper le navigateur, Enter, puis naviguer
|
||||
actions.append({
|
||||
**action,
|
||||
"type": "key_combo",
|
||||
"keys": ["super"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 800,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type",
|
||||
"text": "chrome",
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 2000,
|
||||
})
|
||||
# Focus barre d'adresse + taper URL
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["ctrl", "l"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 300,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type",
|
||||
"text": url,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 3000,
|
||||
})
|
||||
continue
|
||||
|
||||
elif step.action_type == ActionType.OPEN_APP:
|
||||
app_name = step.parameters.get("app_name", "")
|
||||
actions.append({**action, "type": "key_combo", "keys": ["super"]})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait", "duration_ms": 800,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "type", "text": app_name,
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo", "keys": ["enter"],
|
||||
})
|
||||
actions.append({
|
||||
"action_id": f"act_free_{_uuid.uuid4().hex[:6]}",
|
||||
"type": "wait", "duration_ms": 2000,
|
||||
})
|
||||
continue
|
||||
|
||||
elif step.action_type == ActionType.TYPE_TEXT:
|
||||
text = step.parameters.get("text", "")
|
||||
action["type"] = "type"
|
||||
action["text"] = text
|
||||
# Si un target est spécifié, activer la résolution visuelle
|
||||
if step.target:
|
||||
action["visual_mode"] = True
|
||||
action["target_spec"] = {"by_text": step.target}
|
||||
|
||||
elif step.action_type == ActionType.CLICK:
|
||||
action["type"] = "click"
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["button"] = "left"
|
||||
if step.target:
|
||||
action["visual_mode"] = True
|
||||
action["target_spec"] = {"by_text": step.target}
|
||||
|
||||
elif step.action_type == ActionType.HOTKEY:
|
||||
keys_str = step.parameters.get("keys", "")
|
||||
if isinstance(keys_str, str):
|
||||
keys = [k.strip() for k in keys_str.split("+")]
|
||||
else:
|
||||
keys = keys_str
|
||||
action["type"] = "key_combo"
|
||||
action["keys"] = keys
|
||||
|
||||
elif step.action_type == ActionType.SCROLL:
|
||||
direction = step.parameters.get("direction", "down")
|
||||
amount = step.parameters.get("amount", 3)
|
||||
action["type"] = "scroll"
|
||||
action["delta"] = -amount if direction == "down" else amount
|
||||
|
||||
elif step.action_type == ActionType.WAIT:
|
||||
seconds = step.parameters.get("seconds", 2)
|
||||
action["type"] = "wait"
|
||||
action["duration_ms"] = int(seconds * 1000)
|
||||
|
||||
elif step.action_type == ActionType.SCREENSHOT:
|
||||
# Skip — l'Agent V1 capture déjà automatiquement
|
||||
continue
|
||||
|
||||
else:
|
||||
continue
|
||||
|
||||
actions.append(action)
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
@app.route('/api/help')
|
||||
def api_help():
|
||||
"""Aide et mode d'emploi."""
|
||||
@@ -1138,6 +973,53 @@ def handle_cancel():
|
||||
emit('execution_cancelled', {}, broadcast=True)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Copilot WebSocket Events
|
||||
# =============================================================================
|
||||
|
||||
@socketio.on('copilot_approve')
|
||||
def handle_copilot_approve():
|
||||
"""L'utilisateur approuve l'étape copilot en cours."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot or copilot["status"] != "waiting_approval":
|
||||
emit('copilot_error', {"message": "Aucune étape en attente de validation."})
|
||||
return
|
||||
|
||||
logger.info(f"Copilot approve: étape {copilot['current_index'] + 1}/{copilot['total']}")
|
||||
copilot["status"] = "approved"
|
||||
|
||||
|
||||
@socketio.on('copilot_skip')
|
||||
def handle_copilot_skip():
|
||||
"""L'utilisateur saute l'étape copilot en cours."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot or copilot["status"] != "waiting_approval":
|
||||
emit('copilot_error', {"message": "Aucune étape en attente de validation."})
|
||||
return
|
||||
|
||||
logger.info(f"Copilot skip: étape {copilot['current_index'] + 1}/{copilot['total']}")
|
||||
copilot["status"] = "skipped"
|
||||
|
||||
|
||||
@socketio.on('copilot_abort')
|
||||
def handle_copilot_abort():
|
||||
"""L'utilisateur annule tout le workflow copilot."""
|
||||
copilot = _copilot_sessions.get("__copilot__")
|
||||
if not copilot:
|
||||
return
|
||||
|
||||
logger.info(f"Copilot abort: workflow '{copilot['workflow_name']}'")
|
||||
copilot["status"] = "aborted"
|
||||
_copilot_sessions.pop("__copilot__", None)
|
||||
emit('copilot_complete', {
|
||||
"workflow": copilot["workflow_name"],
|
||||
"status": "aborted",
|
||||
"message": "Workflow annulé par l'utilisateur.",
|
||||
"completed": copilot.get("completed", 0),
|
||||
"total": copilot["total"],
|
||||
})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Exécution de workflow
|
||||
# =============================================================================
|
||||
@@ -1243,6 +1125,352 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
|
||||
)
|
||||
|
||||
|
||||
def _build_actions_from_workflow(match, params: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Construire la liste d'actions normalisées depuis un workflow.
|
||||
|
||||
Tente la conversion via le format core (nodes/edges),
|
||||
puis fallback sur le format JSON brut.
|
||||
"""
|
||||
import uuid as _uuid
|
||||
|
||||
try:
|
||||
with open(match.workflow_path, 'r') as f:
|
||||
workflow_data = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de charger le workflow {match.workflow_path}: {e}")
|
||||
return []
|
||||
|
||||
# Substituer les variables
|
||||
var_manager = VariableManager()
|
||||
var_manager.set_variables(params)
|
||||
workflow_data = var_manager.substitute_dict(workflow_data)
|
||||
|
||||
edges = workflow_data.get("edges", [])
|
||||
actions = []
|
||||
|
||||
for i, edge in enumerate(edges):
|
||||
action_dict = edge.get("action", {})
|
||||
action_type = action_dict.get("type", "unknown")
|
||||
action_params = action_dict.get("parameters", {})
|
||||
target_dict = action_dict.get("target", {})
|
||||
|
||||
action = {
|
||||
"action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
|
||||
"step_index": i,
|
||||
"description": _describe_action(action_type, action_params, target_dict),
|
||||
}
|
||||
|
||||
if action_type == "mouse_click":
|
||||
pos = target_dict.get("position", [0.5, 0.5])
|
||||
action["type"] = "click"
|
||||
action["x_pct"] = pos[0] if len(pos) > 0 else 0.5
|
||||
action["y_pct"] = pos[1] if len(pos) > 1 else 0.5
|
||||
action["button"] = action_params.get("button", "left")
|
||||
elif action_type == "text_input":
|
||||
action["type"] = "type"
|
||||
action["text"] = action_params.get("text", "")
|
||||
elif action_type == "key_press":
|
||||
action["type"] = "key_combo"
|
||||
keys = action_params.get("keys", [])
|
||||
if not keys and action_params.get("key"):
|
||||
keys = [action_params["key"]]
|
||||
action["keys"] = keys
|
||||
elif action_type == "compound":
|
||||
for step in action_params.get("steps", []):
|
||||
sub_action = {
|
||||
"action_id": f"act_copilot_{_uuid.uuid4().hex[:8]}",
|
||||
"step_index": i,
|
||||
"description": _describe_action(step.get("type", "unknown"), step, {}),
|
||||
}
|
||||
sub_type = step.get("type", "unknown")
|
||||
if sub_type == "key_press":
|
||||
sub_action["type"] = "key_combo"
|
||||
sub_action["keys"] = step.get("keys", [])
|
||||
elif sub_type == "text_input":
|
||||
sub_action["type"] = "type"
|
||||
sub_action["text"] = step.get("text", "")
|
||||
elif sub_type == "wait":
|
||||
sub_action["type"] = "wait"
|
||||
sub_action["duration_ms"] = step.get("duration_ms", 500)
|
||||
elif sub_type == "mouse_click":
|
||||
sub_action["type"] = "click"
|
||||
sub_action["x_pct"] = step.get("x_pct", 0.5)
|
||||
sub_action["y_pct"] = step.get("y_pct", 0.5)
|
||||
sub_action["button"] = step.get("button", "left")
|
||||
else:
|
||||
continue
|
||||
actions.append(sub_action)
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
# Ajouter target_spec pour résolution visuelle si dispo
|
||||
target_spec = {}
|
||||
if target_dict.get("role"):
|
||||
target_spec["by_role"] = target_dict["role"]
|
||||
if target_dict.get("text"):
|
||||
target_spec["by_text"] = target_dict["text"]
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
action["visual_mode"] = True
|
||||
|
||||
actions.append(action)
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
def _describe_action(action_type: str, params: Dict[str, Any], target: Dict[str, Any]) -> str:
|
||||
"""Générer une description lisible d'une action pour l'affichage copilot."""
|
||||
target_text = target.get("text", "")
|
||||
target_role = target.get("role", "")
|
||||
|
||||
if action_type == "mouse_click":
|
||||
label = target_text or target_role or "un élément"
|
||||
return f"Clic sur '{label}'"
|
||||
elif action_type == "text_input":
|
||||
text = params.get("text", "")
|
||||
preview = text[:30] + "..." if len(text) > 30 else text
|
||||
return f"Saisir le texte : '{preview}'"
|
||||
elif action_type == "key_press":
|
||||
keys = params.get("keys", params.get("key", ""))
|
||||
if isinstance(keys, list):
|
||||
keys = "+".join(keys)
|
||||
return f"Touche(s) : {keys}"
|
||||
elif action_type == "compound":
|
||||
steps_count = len(params.get("steps", []))
|
||||
return f"Action composée ({steps_count} sous-actions)"
|
||||
elif action_type == "wait":
|
||||
ms = params.get("duration_ms", 500)
|
||||
return f"Attente {ms}ms"
|
||||
else:
|
||||
return f"Action : {action_type}"
|
||||
|
||||
|
||||
def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
"""
|
||||
Exécuter un workflow en mode Copilot (pas-à-pas).
|
||||
|
||||
Charge le workflow, construit la liste d'actions, puis envoie
|
||||
les actions une par une en attendant la validation utilisateur
|
||||
via WebSocket entre chaque étape.
|
||||
"""
|
||||
global execution_status
|
||||
import time
|
||||
|
||||
workflow_name = match.workflow_name
|
||||
|
||||
actions = _build_actions_from_workflow(match, params)
|
||||
if not actions:
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "error",
|
||||
"message": "Aucune action exécutable dans ce workflow.",
|
||||
"completed": 0,
|
||||
"total": 0,
|
||||
})
|
||||
return
|
||||
|
||||
total = len(actions)
|
||||
|
||||
execution_status["running"] = True
|
||||
execution_status["workflow"] = workflow_name
|
||||
execution_status["progress"] = 0
|
||||
execution_status["message"] = f"Mode Copilot : {total} étapes"
|
||||
|
||||
copilot_state = {
|
||||
"workflow_name": workflow_name,
|
||||
"actions": actions,
|
||||
"current_index": 0,
|
||||
"total": total,
|
||||
"status": "idle",
|
||||
"completed": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
}
|
||||
_copilot_sessions["__copilot__"] = copilot_state
|
||||
|
||||
logger.info(f"Copilot démarré : '{workflow_name}' — {total} étapes")
|
||||
|
||||
for idx, action in enumerate(actions):
|
||||
copilot_state["current_index"] = idx
|
||||
|
||||
if copilot_state["status"] == "aborted":
|
||||
break
|
||||
|
||||
copilot_state["status"] = "waiting_approval"
|
||||
socketio.emit('copilot_step', {
|
||||
"workflow": workflow_name,
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"action": {
|
||||
"action_id": action.get("action_id", ""),
|
||||
"type": action.get("type", "unknown"),
|
||||
"description": action.get("description", "Action inconnue"),
|
||||
},
|
||||
})
|
||||
|
||||
# Attendre la décision de l'utilisateur (polling, max 120s)
|
||||
max_wait = 120
|
||||
waited = 0.0
|
||||
while waited < max_wait:
|
||||
status = copilot_state["status"]
|
||||
if status in ("approved", "skipped", "aborted"):
|
||||
break
|
||||
time.sleep(0.3)
|
||||
waited += 0.3
|
||||
|
||||
if waited >= max_wait:
|
||||
copilot_state["status"] = "aborted"
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "timeout",
|
||||
"message": f"Timeout : pas de réponse après {max_wait}s.",
|
||||
"completed": copilot_state["completed"],
|
||||
"total": total,
|
||||
})
|
||||
break
|
||||
|
||||
decision = copilot_state["status"]
|
||||
|
||||
if decision == "aborted":
|
||||
break
|
||||
|
||||
elif decision == "skipped":
|
||||
copilot_state["skipped"] += 1
|
||||
logger.info(f"Copilot skip étape {idx + 1}/{total}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "skipped",
|
||||
"message": "Étape passée",
|
||||
})
|
||||
copilot_state["status"] = "idle"
|
||||
continue
|
||||
|
||||
elif decision == "approved":
|
||||
logger.info(f"Copilot execute étape {idx + 1}/{total}: {action.get('type')}")
|
||||
|
||||
try:
|
||||
resp = http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/single",
|
||||
json={
|
||||
"action": action,
|
||||
"session_id": "",
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
resp_data = resp.json()
|
||||
action_id = resp_data.get("action_id", action.get("action_id"))
|
||||
|
||||
action_success = _wait_for_single_action_result(
|
||||
resp_data.get("session_id", ""),
|
||||
action_id,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
if action_success:
|
||||
copilot_state["completed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "completed",
|
||||
"message": "Action exécutée avec succès",
|
||||
})
|
||||
else:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": "L'action a échoué",
|
||||
})
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": f"Erreur serveur : {error}",
|
||||
})
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": "Serveur de streaming non disponible (port 5005).",
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
copilot_state["failed"] += 1
|
||||
logger.error(f"Copilot action error: {e}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
"message": f"Erreur : {str(e)}",
|
||||
})
|
||||
|
||||
progress = int((idx + 1) / total * 100)
|
||||
execution_status["progress"] = progress
|
||||
execution_status["message"] = f"Copilot : étape {idx + 1}/{total}"
|
||||
|
||||
copilot_state["status"] = "idle"
|
||||
|
||||
# Fin du copilot
|
||||
_copilot_sessions.pop("__copilot__", None)
|
||||
execution_status["running"] = False
|
||||
|
||||
completed = copilot_state["completed"]
|
||||
skipped = copilot_state["skipped"]
|
||||
failed = copilot_state["failed"]
|
||||
final_status = copilot_state.get("status", "completed")
|
||||
|
||||
if final_status != "aborted":
|
||||
success = failed == 0
|
||||
message = (
|
||||
f"Copilot terminé : {completed} réussies, "
|
||||
f"{skipped} passées, {failed} échouées sur {total} étapes."
|
||||
)
|
||||
socketio.emit('copilot_complete', {
|
||||
"workflow": workflow_name,
|
||||
"status": "completed" if success else "partial",
|
||||
"message": message,
|
||||
"completed": completed,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"total": total,
|
||||
})
|
||||
finish_execution(workflow_name, success, message)
|
||||
|
||||
|
||||
def _wait_for_single_action_result(session_id: str, action_id: str, timeout: int = 30) -> bool:
|
||||
"""
|
||||
Attendre le résultat d'une seule action envoyée au streaming server.
|
||||
|
||||
Approche pragmatique : on attend un délai raisonnable (3s) pour que
|
||||
l'Agent V1 ait le temps de poll, exécuter, et reporter.
|
||||
"""
|
||||
import time
|
||||
|
||||
poll_interval = 0.5
|
||||
elapsed = 0.0
|
||||
|
||||
while elapsed < timeout:
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
|
||||
if elapsed >= 3.0:
|
||||
return True # Optimiste — le résultat réel arrive via /replay/result
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def execute_workflow(match, params):
|
||||
"""
|
||||
Exécuter un workflow — tente d'abord le streaming server,
|
||||
|
||||
644
agent_chat/gesture_catalog.py
Normal file
644
agent_chat/gesture_catalog.py
Normal file
@@ -0,0 +1,644 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
RPA Vision V3 - Catalogue de Primitives Gestuelles
|
||||
|
||||
Bibliothèque de gestes universels Windows (raccourcis clavier) que le système
|
||||
connaît nativement, sans apprentissage visuel.
|
||||
|
||||
Trois usages :
|
||||
1. Chat : l'utilisateur demande "ferme la fenêtre" → match direct → exécution
|
||||
2. Replay : une action enregistrée correspond à un geste connu → substitution
|
||||
automatique par le raccourci clavier (plus fiable que le clic visuel)
|
||||
3. Workflows : enrichissement automatique des workflows avec les primitives
|
||||
|
||||
Auteur: Dom — Mars 2026
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gesture:
|
||||
"""Un geste primitif universel."""
|
||||
id: str
|
||||
name: str
|
||||
description: str
|
||||
keys: List[str] # Ex: ["alt", "f4"], ["ctrl", "t"]
|
||||
aliases: List[str] = field(default_factory=list) # Termes alternatifs
|
||||
tags: List[str] = field(default_factory=list)
|
||||
context: str = "windows" # "windows", "chrome", "explorer", etc.
|
||||
category: str = "window" # "window", "navigation", "editing", "system"
|
||||
|
||||
def to_replay_action(self) -> Dict:
|
||||
"""Convertir en action de replay pour l'Agent V1."""
|
||||
return {
|
||||
"action_id": f"gesture_{self.id}_{uuid.uuid4().hex[:6]}",
|
||||
"type": "key_combo",
|
||||
"keys": self.keys,
|
||||
"gesture_id": self.id,
|
||||
"gesture_name": self.name,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Catalogue des primitives
|
||||
# =============================================================================
|
||||
|
||||
GESTURES: List[Gesture] = [
|
||||
# --- Gestion de fenêtres ---
|
||||
Gesture(
|
||||
id="win_close", name="Fermer la fenêtre",
|
||||
description="Fermer la fenêtre active",
|
||||
keys=["alt", "f4"],
|
||||
aliases=["fermer", "close", "quitter la fenêtre", "fermer l'application",
|
||||
"fermer le programme", "close window"],
|
||||
tags=["fenêtre", "fermer", "close"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_maximize", name="Agrandir la fenêtre",
|
||||
description="Agrandir la fenêtre au maximum",
|
||||
keys=["super", "up"],
|
||||
aliases=["agrandir", "maximize", "plein écran", "maximiser",
|
||||
"fullscreen", "agrandir la fenêtre"],
|
||||
tags=["fenêtre", "agrandir", "maximize"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_minimize", name="Réduire la fenêtre",
|
||||
description="Réduire la fenêtre dans la barre des tâches",
|
||||
keys=["super", "down"],
|
||||
aliases=["réduire", "minimize", "minimiser", "réduire la fenêtre",
|
||||
"mettre en bas"],
|
||||
tags=["fenêtre", "réduire", "minimize"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_minimize_all", name="Afficher le bureau",
|
||||
description="Réduire toutes les fenêtres (afficher le bureau)",
|
||||
keys=["super", "d"],
|
||||
aliases=["bureau", "desktop", "afficher le bureau", "tout réduire",
|
||||
"montrer le bureau", "show desktop"],
|
||||
tags=["bureau", "desktop", "minimize all"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_switch", name="Basculer entre fenêtres",
|
||||
description="Basculer vers la fenêtre suivante",
|
||||
keys=["alt", "tab"],
|
||||
aliases=["basculer", "switch", "changer de fenêtre",
|
||||
"fenêtre suivante", "alt tab"],
|
||||
tags=["fenêtre", "basculer", "switch"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_snap_left", name="Fenêtre à gauche",
|
||||
description="Ancrer la fenêtre à gauche de l'écran",
|
||||
keys=["super", "left"],
|
||||
aliases=["fenêtre à gauche", "snap left", "ancrer à gauche",
|
||||
"moitié gauche"],
|
||||
tags=["fenêtre", "snap", "gauche"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_snap_right", name="Fenêtre à droite",
|
||||
description="Ancrer la fenêtre à droite de l'écran",
|
||||
keys=["super", "right"],
|
||||
aliases=["fenêtre à droite", "snap right", "ancrer à droite",
|
||||
"moitié droite"],
|
||||
tags=["fenêtre", "snap", "droite"],
|
||||
category="window",
|
||||
),
|
||||
Gesture(
|
||||
id="win_restore", name="Restaurer la fenêtre",
|
||||
description="Restaurer la taille normale de la fenêtre",
|
||||
keys=["super", "down"],
|
||||
aliases=["restaurer", "restore", "taille normale",
|
||||
"fenêtre normale"],
|
||||
tags=["fenêtre", "restaurer", "restore"],
|
||||
category="window",
|
||||
),
|
||||
|
||||
# --- Navigation Chrome / navigateur ---
|
||||
Gesture(
|
||||
id="chrome_new_tab", name="Nouvel onglet",
|
||||
description="Ouvrir un nouvel onglet dans le navigateur",
|
||||
keys=["ctrl", "t"],
|
||||
aliases=["nouvel onglet", "new tab", "ouvrir un onglet",
|
||||
"ajouter un onglet", "nouveau tab"],
|
||||
tags=["chrome", "onglet", "tab", "nouveau"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_close_tab", name="Fermer l'onglet",
|
||||
description="Fermer l'onglet actif du navigateur",
|
||||
keys=["ctrl", "w"],
|
||||
aliases=["fermer l'onglet", "close tab", "fermer le tab",
|
||||
"fermer cet onglet"],
|
||||
tags=["chrome", "onglet", "fermer"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_next_tab", name="Onglet suivant",
|
||||
description="Passer à l'onglet suivant",
|
||||
keys=["ctrl", "tab"],
|
||||
aliases=["onglet suivant", "next tab", "tab suivant",
|
||||
"prochain onglet"],
|
||||
tags=["chrome", "onglet", "suivant"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_prev_tab", name="Onglet précédent",
|
||||
description="Passer à l'onglet précédent",
|
||||
keys=["ctrl", "shift", "tab"],
|
||||
aliases=["onglet précédent", "previous tab", "tab précédent",
|
||||
"onglet d'avant"],
|
||||
tags=["chrome", "onglet", "précédent"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_reopen_tab", name="Rouvrir le dernier onglet",
|
||||
description="Rouvrir le dernier onglet fermé",
|
||||
keys=["ctrl", "shift", "t"],
|
||||
aliases=["rouvrir l'onglet", "reopen tab", "onglet fermé",
|
||||
"restaurer l'onglet"],
|
||||
tags=["chrome", "onglet", "rouvrir"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_address_bar", name="Barre d'adresse",
|
||||
description="Sélectionner la barre d'adresse du navigateur",
|
||||
keys=["ctrl", "l"],
|
||||
aliases=["barre d'adresse", "address bar", "url bar",
|
||||
"aller à l'adresse", "sélectionner l'url"],
|
||||
tags=["chrome", "url", "adresse"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_refresh", name="Rafraîchir la page",
|
||||
description="Recharger la page web actuelle",
|
||||
keys=["f5"],
|
||||
aliases=["rafraîchir", "refresh", "recharger", "actualiser",
|
||||
"reload"],
|
||||
tags=["chrome", "rafraîchir", "reload"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_back", name="Page précédente",
|
||||
description="Retourner à la page précédente",
|
||||
keys=["alt", "left"],
|
||||
aliases=["retour", "back", "page précédente", "revenir en arrière",
|
||||
"page d'avant"],
|
||||
tags=["chrome", "retour", "back"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_forward", name="Page suivante",
|
||||
description="Aller à la page suivante",
|
||||
keys=["alt", "right"],
|
||||
aliases=["avancer", "forward", "page suivante"],
|
||||
tags=["chrome", "avancer", "forward"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_find", name="Rechercher dans la page",
|
||||
description="Ouvrir la barre de recherche dans la page",
|
||||
keys=["ctrl", "f"],
|
||||
aliases=["rechercher", "find", "chercher dans la page", "ctrl f",
|
||||
"trouver"],
|
||||
tags=["chrome", "rechercher", "find"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
Gesture(
|
||||
id="chrome_new_window", name="Nouvelle fenêtre",
|
||||
description="Ouvrir une nouvelle fenêtre de navigateur",
|
||||
keys=["ctrl", "n"],
|
||||
aliases=["nouvelle fenêtre", "new window", "ouvrir une fenêtre"],
|
||||
tags=["chrome", "fenêtre", "nouveau"],
|
||||
context="chrome",
|
||||
category="navigation",
|
||||
),
|
||||
|
||||
# --- Édition / presse-papier ---
|
||||
Gesture(
|
||||
id="edit_copy", name="Copier",
|
||||
description="Copier la sélection dans le presse-papier",
|
||||
keys=["ctrl", "c"],
|
||||
aliases=["copier", "copy", "ctrl c"],
|
||||
tags=["édition", "copier", "presse-papier"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_paste", name="Coller",
|
||||
description="Coller le contenu du presse-papier",
|
||||
keys=["ctrl", "v"],
|
||||
aliases=["coller", "paste", "ctrl v"],
|
||||
tags=["édition", "coller", "presse-papier"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_cut", name="Couper",
|
||||
description="Couper la sélection",
|
||||
keys=["ctrl", "x"],
|
||||
aliases=["couper", "cut", "ctrl x"],
|
||||
tags=["édition", "couper"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_undo", name="Annuler",
|
||||
description="Annuler la dernière action",
|
||||
keys=["ctrl", "z"],
|
||||
aliases=["annuler", "undo", "défaire", "ctrl z"],
|
||||
tags=["édition", "annuler", "undo"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_redo", name="Rétablir",
|
||||
description="Rétablir l'action annulée",
|
||||
keys=["ctrl", "y"],
|
||||
aliases=["rétablir", "redo", "refaire", "ctrl y"],
|
||||
tags=["édition", "rétablir", "redo"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_select_all", name="Tout sélectionner",
|
||||
description="Sélectionner tout le contenu",
|
||||
keys=["ctrl", "a"],
|
||||
aliases=["tout sélectionner", "select all", "sélectionner tout",
|
||||
"ctrl a"],
|
||||
tags=["édition", "sélection", "tout"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="edit_save", name="Enregistrer",
|
||||
description="Enregistrer le document/fichier actuel",
|
||||
keys=["ctrl", "s"],
|
||||
aliases=["enregistrer", "save", "sauvegarder", "ctrl s"],
|
||||
tags=["édition", "enregistrer", "save"],
|
||||
category="editing",
|
||||
),
|
||||
|
||||
# --- Système ---
|
||||
Gesture(
|
||||
id="sys_start_menu", name="Menu Démarrer",
|
||||
description="Ouvrir le menu Démarrer Windows",
|
||||
keys=["super"],
|
||||
aliases=["menu démarrer", "start menu", "démarrer", "windows",
|
||||
"touche windows"],
|
||||
tags=["système", "démarrer", "menu"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_task_manager", name="Gestionnaire des tâches",
|
||||
description="Ouvrir le gestionnaire des tâches",
|
||||
keys=["ctrl", "shift", "escape"],
|
||||
aliases=["gestionnaire des tâches", "task manager",
|
||||
"gestionnaire tâches", "processes"],
|
||||
tags=["système", "tâches", "processus"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_lock", name="Verrouiller le PC",
|
||||
description="Verrouiller la session Windows",
|
||||
keys=["super", "l"],
|
||||
aliases=["verrouiller", "lock", "verrouiller le pc",
|
||||
"verrouiller la session"],
|
||||
tags=["système", "verrouiller", "lock"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_screenshot", name="Capture d'écran",
|
||||
description="Prendre une capture d'écran",
|
||||
keys=["super", "shift", "s"],
|
||||
aliases=["capture d'écran", "screenshot", "capture écran",
|
||||
"impr écran"],
|
||||
tags=["système", "capture", "screenshot"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_explorer", name="Ouvrir l'explorateur",
|
||||
description="Ouvrir l'explorateur de fichiers Windows",
|
||||
keys=["super", "e"],
|
||||
aliases=["explorateur", "explorer", "ouvrir l'explorateur",
|
||||
"mes fichiers", "file explorer", "explorateur de fichiers"],
|
||||
tags=["système", "explorateur"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_run", name="Exécuter (Run)",
|
||||
description="Ouvrir la boîte de dialogue Exécuter",
|
||||
keys=["super", "r"],
|
||||
aliases=["exécuter", "run", "boîte exécuter"],
|
||||
tags=["système", "exécuter", "run"],
|
||||
category="system",
|
||||
),
|
||||
Gesture(
|
||||
id="sys_settings", name="Paramètres Windows",
|
||||
description="Ouvrir les paramètres Windows",
|
||||
keys=["super", "i"],
|
||||
aliases=["paramètres", "settings", "réglages",
|
||||
"paramètres windows"],
|
||||
tags=["système", "paramètres", "settings"],
|
||||
category="system",
|
||||
),
|
||||
|
||||
# --- Navigation texte ---
|
||||
Gesture(
|
||||
id="nav_home", name="Début de ligne",
|
||||
description="Aller au début de la ligne",
|
||||
keys=["home"],
|
||||
aliases=["début de ligne", "home", "début"],
|
||||
tags=["navigation", "texte", "début"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="nav_end", name="Fin de ligne",
|
||||
description="Aller à la fin de la ligne",
|
||||
keys=["end"],
|
||||
aliases=["fin de ligne", "end", "fin"],
|
||||
tags=["navigation", "texte", "fin"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="nav_enter", name="Valider / Entrée",
|
||||
description="Appuyer sur Entrée",
|
||||
keys=["enter"],
|
||||
aliases=["entrée", "enter", "valider", "confirmer", "ok"],
|
||||
tags=["navigation", "entrée", "valider"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="nav_escape", name="Échap / Annuler",
|
||||
description="Appuyer sur Échap (fermer popup, annuler)",
|
||||
keys=["escape"],
|
||||
aliases=["échap", "escape", "esc", "annuler", "fermer le popup",
|
||||
"fermer la popup", "fermer le dialogue"],
|
||||
tags=["navigation", "échap", "annuler", "popup"],
|
||||
category="editing",
|
||||
),
|
||||
Gesture(
|
||||
id="nav_tab", name="Champ suivant",
|
||||
description="Passer au champ suivant (Tab)",
|
||||
keys=["tab"],
|
||||
aliases=["tab", "champ suivant", "suivant", "prochain champ",
|
||||
"tabulation"],
|
||||
tags=["navigation", "tab", "champ"],
|
||||
category="editing",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class GestureCatalog:
|
||||
"""
|
||||
Catalogue de gestes primitifs avec matching sémantique.
|
||||
|
||||
Utilisé par :
|
||||
- Le chat (match direct quand l'utilisateur demande un geste)
|
||||
- Le replay (substitution automatique d'actions enregistrées)
|
||||
"""
|
||||
|
||||
def __init__(self, gestures: List[Gesture] = None):
|
||||
self.gestures = gestures or GESTURES
|
||||
# Index pour recherche rapide
|
||||
self._by_id: Dict[str, Gesture] = {g.id: g for g in self.gestures}
|
||||
# Pré-calculer les termes de recherche normalisés
|
||||
self._search_index: List[Tuple[Gesture, List[str]]] = []
|
||||
for g in self.gestures:
|
||||
terms = [g.name.lower(), g.description.lower()]
|
||||
terms.extend(a.lower() for a in g.aliases)
|
||||
terms.extend(t.lower() for t in g.tags)
|
||||
self._search_index.append((g, terms))
|
||||
|
||||
logger.info(f"GestureCatalog: {len(self.gestures)} primitives chargées")
|
||||
|
||||
def match(self, query: str, min_score: float = 0.45) -> Optional[Tuple[Gesture, float]]:
|
||||
"""
|
||||
Trouver le geste le plus proche d'une requête textuelle.
|
||||
|
||||
Returns:
|
||||
(Gesture, score) si match trouvé, None sinon.
|
||||
"""
|
||||
query_lower = query.lower().strip()
|
||||
if not query_lower:
|
||||
return None
|
||||
|
||||
best_gesture = None
|
||||
best_score = 0.0
|
||||
|
||||
for gesture, terms in self._search_index:
|
||||
score = self._compute_score(query_lower, terms, gesture)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_gesture = gesture
|
||||
|
||||
if best_gesture and best_score >= min_score:
|
||||
logger.debug(f"Gesture match: '{query}' → {best_gesture.id} (score={best_score:.2f})")
|
||||
return (best_gesture, best_score)
|
||||
|
||||
return None
|
||||
|
||||
def match_action(self, action: Dict) -> Optional[Gesture]:
|
||||
"""
|
||||
Détecter si une action de workflow correspond à un geste primitif.
|
||||
|
||||
Utilisé pendant le replay pour auto-substituer les actions visuelles
|
||||
par des raccourcis clavier plus fiables.
|
||||
|
||||
Patterns détectés :
|
||||
- Clic sur boutons de contrôle fenêtre (X, □, ─)
|
||||
- key_combo qui matche déjà un geste
|
||||
- Actions avec target_text contenant des mots-clés de geste
|
||||
"""
|
||||
action_type = action.get("type", "")
|
||||
|
||||
# key_combo → vérifier si c'est déjà un geste connu
|
||||
if action_type == "key_combo":
|
||||
keys = action.get("keys", [])
|
||||
return self._match_by_keys(keys)
|
||||
|
||||
# Clic sur un bouton de contrôle de fenêtre
|
||||
if action_type == "click":
|
||||
return self._match_click_as_gesture(action)
|
||||
|
||||
return None
|
||||
|
||||
def get_by_id(self, gesture_id: str) -> Optional[Gesture]:
|
||||
return self._by_id.get(gesture_id)
|
||||
|
||||
def get_by_category(self, category: str) -> List[Gesture]:
|
||||
return [g for g in self.gestures if g.category == category]
|
||||
|
||||
def get_by_context(self, context: str) -> List[Gesture]:
|
||||
"""Gestes applicables à un contexte (inclut toujours 'windows')."""
|
||||
return [
|
||||
g for g in self.gestures
|
||||
if g.context == context or g.context == "windows"
|
||||
]
|
||||
|
||||
def list_all(self) -> List[Dict]:
|
||||
"""Lister tous les gestes pour l'affichage."""
|
||||
return [
|
||||
{
|
||||
"id": g.id,
|
||||
"name": g.name,
|
||||
"description": g.description,
|
||||
"keys": "+".join(g.keys),
|
||||
"category": g.category,
|
||||
"context": g.context,
|
||||
}
|
||||
for g in self.gestures
|
||||
]
|
||||
|
||||
# =========================================================================
|
||||
# Scoring interne
|
||||
# =========================================================================
|
||||
|
||||
def _compute_score(self, query: str, terms: List[str], gesture: Gesture) -> float:
|
||||
"""Calculer le score de correspondance entre une requête et un geste."""
|
||||
best = 0.0
|
||||
query_words = set(query.split())
|
||||
|
||||
for term in terms:
|
||||
# Match exact
|
||||
if query == term:
|
||||
return 1.0
|
||||
|
||||
# Contenu dans l'un ou l'autre sens
|
||||
if query in term:
|
||||
score = len(query) / len(term) * 0.95
|
||||
best = max(best, score)
|
||||
continue
|
||||
if term in query:
|
||||
# Si le terme est un alias exact (mot unique) présent dans la requête
|
||||
# c'est un signal très fort : "copier le texte" contient "copier"
|
||||
if term in query_words:
|
||||
best = max(best, 0.85)
|
||||
else:
|
||||
score = len(term) / len(query) * 0.9
|
||||
best = max(best, score)
|
||||
continue
|
||||
|
||||
# Similarité de séquence
|
||||
ratio = SequenceMatcher(None, query, term).ratio()
|
||||
best = max(best, ratio)
|
||||
|
||||
# Bonus si tous les mots de la requête sont présents dans les termes
|
||||
all_terms_text = " ".join(terms)
|
||||
matched_words = sum(1 for w in query_words if w in all_terms_text)
|
||||
if query_words:
|
||||
word_ratio = matched_words / len(query_words)
|
||||
if word_ratio >= 0.8:
|
||||
best = max(best, 0.5 + word_ratio * 0.4)
|
||||
|
||||
return best
|
||||
|
||||
def _match_by_keys(self, keys: List[str]) -> Optional[Gesture]:
|
||||
"""Trouver un geste par sa combinaison de touches exacte."""
|
||||
keys_normalized = [k.lower() for k in keys]
|
||||
for gesture in self.gestures:
|
||||
if gesture.keys == keys_normalized:
|
||||
return gesture
|
||||
return None
|
||||
|
||||
def _match_click_as_gesture(self, action: Dict) -> Optional[Gesture]:
|
||||
"""
|
||||
Détecter si un clic correspond à un geste primitif.
|
||||
|
||||
Patterns :
|
||||
- Clic en haut à droite de la fenêtre (x > 95%, y < 5%) → fermer
|
||||
- target_text contenant ✕, ×, X, □, ─, etc.
|
||||
"""
|
||||
# Vérifier le target_text
|
||||
target_text = (
|
||||
action.get("target_text", "") or
|
||||
action.get("target_spec", {}).get("by_text", "")
|
||||
).strip()
|
||||
|
||||
if target_text:
|
||||
target_lower = target_text.lower()
|
||||
# Bouton fermer
|
||||
if target_lower in ("✕", "×", "x", "close", "fermer"):
|
||||
return self._by_id.get("win_close")
|
||||
# Bouton maximiser
|
||||
if target_lower in ("□", "☐", "maximize", "agrandir"):
|
||||
return self._by_id.get("win_maximize")
|
||||
# Bouton minimiser
|
||||
if target_lower in ("─", "—", "_", "minimize", "réduire"):
|
||||
return self._by_id.get("win_minimize")
|
||||
|
||||
# Vérifier la position relative (coin haut-droite = fermer)
|
||||
x_pct = action.get("x_pct", 0)
|
||||
y_pct = action.get("y_pct", 0)
|
||||
|
||||
if x_pct > 0.96 and y_pct < 0.04:
|
||||
return self._by_id.get("win_close")
|
||||
if 0.92 < x_pct < 0.96 and y_pct < 0.04:
|
||||
return self._by_id.get("win_maximize")
|
||||
if 0.88 < x_pct < 0.92 and y_pct < 0.04:
|
||||
return self._by_id.get("win_minimize")
|
||||
|
||||
return None
|
||||
|
||||
def optimize_replay_actions(self, actions: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
Optimiser une liste d'actions de replay en substituant les gestes connus.
|
||||
|
||||
Pour chaque action, si elle correspond à un geste primitif,
|
||||
on la remplace par le raccourci clavier équivalent.
|
||||
|
||||
Retourne la liste d'actions optimisée (les originales non-matchées
|
||||
sont conservées telles quelles).
|
||||
"""
|
||||
optimized = []
|
||||
substitutions = 0
|
||||
|
||||
for action in actions:
|
||||
gesture = self.match_action(action)
|
||||
if gesture and action.get("type") != "key_combo":
|
||||
# Substituer par le raccourci clavier
|
||||
new_action = gesture.to_replay_action()
|
||||
# Conserver l'action_id original pour le tracking
|
||||
new_action["action_id"] = action.get("action_id", new_action["action_id"])
|
||||
new_action["original_type"] = action.get("type")
|
||||
optimized.append(new_action)
|
||||
substitutions += 1
|
||||
logger.debug(
|
||||
f"Geste substitué: {action.get('type')} → {gesture.id} ({gesture.name})"
|
||||
)
|
||||
else:
|
||||
optimized.append(action)
|
||||
|
||||
if substitutions:
|
||||
logger.info(
|
||||
f"Replay optimisé: {substitutions} action(s) substituée(s) par des primitives"
|
||||
)
|
||||
|
||||
return optimized
|
||||
|
||||
|
||||
# Singleton
|
||||
_catalog: Optional[GestureCatalog] = None
|
||||
|
||||
|
||||
def get_gesture_catalog() -> GestureCatalog:
|
||||
global _catalog
|
||||
if _catalog is None:
|
||||
_catalog = GestureCatalog()
|
||||
return _catalog
|
||||
@@ -29,6 +29,7 @@ class IntentType(Enum):
|
||||
LIST = "list" # Lister les workflows disponibles
|
||||
CONFIGURE = "configure" # Configurer un paramètre
|
||||
HELP = "help" # Demander de l'aide
|
||||
GREETING = "greeting" # Salutation
|
||||
STATUS = "status" # Vérifier le statut
|
||||
CANCEL = "cancel" # Annuler l'exécution en cours
|
||||
HISTORY = "history" # Voir l'historique
|
||||
@@ -74,27 +75,64 @@ class IntentParser:
|
||||
# Patterns pour la détection d'intentions par règles
|
||||
INTENT_PATTERNS = {
|
||||
IntentType.EXECUTE: [
|
||||
r"(?:lance|exécute|démarre|fait|run|start|execute)\s+(.+)",
|
||||
# Verbes d'action explicites
|
||||
r"(?:lance|exécute|démarre|fai[st]|run|start|execute)\s+(.+)",
|
||||
r"(?:je veux|je voudrais|peux-tu)\s+(.+)",
|
||||
r"(?:facturer?|créer?|générer?|exporter?)\s+(.+)",
|
||||
r"^(.+)\s+(?:maintenant|tout de suite|svp|stp)$",
|
||||
# Gestes courants (UI actions) — doivent rester EXECUTE
|
||||
r"(?:ferme[rz]?|ouvr[eir]+[sz]?|clique[rz]?|sélectionne[rz]?|coche[rz]?|décoche[rz]?)\s+(.+)",
|
||||
r"(?:copie[rz]?|colle[rz]?|coupe[rz]?|supprime[rz]?|efface[rz]?)\s+(.+)",
|
||||
r"(?:tape[rz]?|écri[rstv]+[sz]?|saisi[rstv]*[sz]?|rempli[rstv]*[sz]?|entre[rz]?)\s+(.+)",
|
||||
r"(?:scroll(?:e[rz]?)?|défile[rz]?|fait(?:es)?\s+défiler)\s*(.+)?",
|
||||
r"(?:glisse[rz]?|drag(?:ue)?[rz]?|déplace[rz]?|bouge[rz]?)\s+(.+)",
|
||||
r"(?:double[- ]?clique[rz]?|clic\s+droit)\s+(.+)?",
|
||||
r"(?:enregistre[rz]?|sauvegarde[rz]?|save)\s+(.+)?",
|
||||
r"(?:imprime[rz]?|print)\s+(.+)?",
|
||||
r"(?:envoie[rz]?|send|mail(?:e[rz]?)?|transmet[sz]?)\s+(.+)",
|
||||
r"(?:télécharge[rz]?|download|upload)\s+(.+)?",
|
||||
r"(?:actualise[rz]?|rafraîchi[rstv]*[sz]?|refresh|recharge[rz]?)\s*(.+)?",
|
||||
r"(?:valide[rz]?|confirme[rz]?|soumets?|submit)\s+(.+)",
|
||||
r"(?:connecte[rz]?|login|log\s*in|sign\s*in)\s*(.+)?",
|
||||
r"(?:déconnecte[rz]?|logout|log\s*out|sign\s*out)\s*(.+)?",
|
||||
# Raccourcis clavier
|
||||
r"(?:ctrl|alt|shift|maj)\s*\+\s*\w+",
|
||||
],
|
||||
IntentType.LIST: [
|
||||
r"(?:liste|montre|affiche|quels sont)\s+(?:les\s+|des\s+)?(?:workflows?|processus|automatisations?)",
|
||||
r"(?:liste|montre|affiche|quels?\s+sont)\s+(?:les\s+|des\s+)?(?:workflows?|processus|automatisations?)",
|
||||
r"(?:quels?|quelles?)\s+(?:workflows?|processus|automatisations?)",
|
||||
r"liste\s+des\s+workflows?",
|
||||
r"(?:qu'est-ce que|que)\s+(?:je peux|tu peux)\s+faire",
|
||||
r"(?:workflows?|processus)\s+disponibles?",
|
||||
r"(?:voir|afficher)\s+(?:les\s+|tous\s+les\s+)?workflows?",
|
||||
],
|
||||
IntentType.QUERY: [
|
||||
r"(?:comment|pourquoi|quand|où|qui)\s+(.+)\?",
|
||||
# Questions directes avec mots interrogatifs
|
||||
r"(?:comment|pourquoi|quand|où|qui)\s+(.+)\??",
|
||||
r"(?:explique|décris|détaille)\s+(.+)",
|
||||
r"(?:qu'est-ce que|c'est quoi)\s+(.+)",
|
||||
# Questions avec "quel/quelle/quels/quelles" (exclure workflows → LIST)
|
||||
r"(?:quels?|quelles?)\s+(?!workflows?|processus|automatisations?)(.+)\??",
|
||||
# "quoi" comme question (pas une commande, pas "quoi faire" = HELP)
|
||||
r"^(?:c'est\s+)?quoi\s+(?!faire)(.+)\??$",
|
||||
r"^quoi\s*\?+$",
|
||||
# Questions indirectes
|
||||
r"(?:dis[- ]moi|raconte|informe[- ]moi)\s+(.+)",
|
||||
r"(?:je\s+(?:me\s+)?demande|je\s+(?:ne\s+)?comprends?\s+pas)\s+(.+)",
|
||||
],
|
||||
IntentType.HELP: [
|
||||
r"(?:aide|help|assistance|sos)",
|
||||
r"(?:comment ça marche|comment utiliser)",
|
||||
r"^(?:aide|help|assistance|sos)$",
|
||||
r"comment ça (?:marche|fonctionne)\s*\??",
|
||||
r"comment (?:utiliser|ça s'utilise|on fait)\s*\??",
|
||||
r"\?{2,}",
|
||||
# "que peux-tu faire", "quoi faire" = demande d'aide
|
||||
r"(?:qu'est-ce que|que)\s+(?:je peux|tu peux)\s+faire",
|
||||
r"^quoi\s+faire\s*\??$",
|
||||
r"(?:que\s+)?(?:puis-je|peux-tu|peut-on)\s+faire\s*\??",
|
||||
r"(?:besoin\s+d'aide|j'ai\s+besoin\s+d'aide)",
|
||||
],
|
||||
IntentType.GREETING: [
|
||||
r"^(?:bonjour|bonsoir|salut|hello|hi|hey|coucou|yo|wesh)(?:\s.*)?$",
|
||||
r"^(?:bonne?\s+(?:journée|soirée|nuit|matinée))$",
|
||||
],
|
||||
IntentType.STATUS: [
|
||||
r"(?:statut|status|état|où en est)",
|
||||
@@ -119,6 +157,35 @@ class IntentParser:
|
||||
],
|
||||
}
|
||||
|
||||
# Verbes d'action reconnus pour le fallback EXECUTE
|
||||
# Si aucun pattern ne matche, on vérifie la présence d'un de ces verbes
|
||||
# avant de classifier en EXECUTE
|
||||
ACTION_VERBS = {
|
||||
# Actions de workflow/exécution
|
||||
"lance", "lancer", "exécute", "exécuter", "démarre", "démarrer",
|
||||
"fait", "fais", "run", "start", "execute",
|
||||
# Actions métier
|
||||
"facture", "facturer", "crée", "créer", "génère", "générer",
|
||||
"exporte", "exporter", "importe", "importer",
|
||||
# Actions UI / gestes
|
||||
"ferme", "fermer", "ouvre", "ouvrir", "clique", "cliquer",
|
||||
"sélectionne", "sélectionner", "coche", "cocher", "décoche", "décocher",
|
||||
"copie", "copier", "colle", "coller", "coupe", "couper",
|
||||
"supprime", "supprimer", "efface", "effacer",
|
||||
"tape", "taper", "écris", "écrire", "saisis", "saisir",
|
||||
"remplis", "remplir", "entre", "entrer",
|
||||
"scroll", "scroller", "défile", "défiler",
|
||||
"glisse", "glisser", "déplace", "déplacer", "drag",
|
||||
"enregistre", "enregistrer", "sauvegarde", "sauvegarder", "save",
|
||||
"imprime", "imprimer", "print",
|
||||
"envoie", "envoyer", "send", "transmet", "transmettre",
|
||||
"télécharge", "télécharger", "download", "upload",
|
||||
"actualise", "actualiser", "rafraîchis", "rafraîchir", "refresh",
|
||||
"valide", "valider", "confirme", "confirmer", "soumets", "soumettre",
|
||||
"connecte", "connecter", "déconnecte", "déconnecter",
|
||||
"login", "logout",
|
||||
}
|
||||
|
||||
# Patterns pour l'extraction d'entités
|
||||
ENTITY_PATTERNS = {
|
||||
"client": [
|
||||
@@ -280,11 +347,18 @@ class IntentParser:
|
||||
best_confidence = confidence
|
||||
best_intent = intent_type
|
||||
|
||||
# Si aucune intention trouvée mais la requête ressemble à une commande
|
||||
# Fallback durci : ne classifier en EXECUTE que si un verbe d'action est présent
|
||||
if best_intent == IntentType.UNKNOWN and len(query.split()) >= 2:
|
||||
# Supposer que c'est une demande d'exécution
|
||||
best_intent = IntentType.EXECUTE
|
||||
best_confidence = 0.4
|
||||
words = query.lower().split()
|
||||
# Vérifier si au moins un mot est un verbe d'action connu
|
||||
has_action_verb = any(word in self.ACTION_VERBS for word in words)
|
||||
if has_action_verb:
|
||||
best_intent = IntentType.EXECUTE
|
||||
best_confidence = 0.40
|
||||
else:
|
||||
# Pas de verbe d'action reconnu → demander clarification
|
||||
best_intent = IntentType.CLARIFY
|
||||
best_confidence = 0.30
|
||||
|
||||
return best_intent, best_confidence
|
||||
|
||||
@@ -389,13 +463,14 @@ REQUÊTE: "{query}"
|
||||
{f"Contexte conversation: {json.dumps(context, ensure_ascii=False)}" if context else ""}
|
||||
|
||||
INTENTIONS POSSIBLES:
|
||||
- execute: l'utilisateur veut lancer/exécuter un workflow
|
||||
- execute: l'utilisateur veut lancer/exécuter un workflow ou une action UI (geste)
|
||||
- list: l'utilisateur veut voir les workflows disponibles (mots-clés: liste, quels, workflows, disponibles, montrer)
|
||||
- query: l'utilisateur pose une question sur un workflow
|
||||
- query: l'utilisateur pose une question (comment, pourquoi, c'est quoi, quel)
|
||||
- status: l'utilisateur demande le statut d'exécution
|
||||
- cancel: l'utilisateur veut annuler
|
||||
- history: l'utilisateur veut voir l'historique
|
||||
- help: l'utilisateur demande de l'aide
|
||||
- help: l'utilisateur demande de l'aide ou ce qu'il peut faire
|
||||
- greeting: l'utilisateur dit bonjour/salut/hello
|
||||
- confirm: l'utilisateur confirme (oui, ok, go)
|
||||
- deny: l'utilisateur refuse (non, annule)
|
||||
- unknown: impossible à déterminer
|
||||
@@ -504,16 +579,37 @@ if __name__ == "__main__":
|
||||
parser = IntentParser(use_llm=False)
|
||||
|
||||
test_queries = [
|
||||
# EXECUTE — actions explicites
|
||||
"facturer le client Acme",
|
||||
"lance le workflow de facturation",
|
||||
"quels workflows sont disponibles ?",
|
||||
"aide",
|
||||
"oui",
|
||||
"annule",
|
||||
"statut",
|
||||
"exporter le rapport en PDF pour Client ABC",
|
||||
"créer une facture de 1500€ pour Société XYZ",
|
||||
"facturer les clients de A à Z",
|
||||
# EXECUTE — gestes UI
|
||||
"ferme la fenêtre",
|
||||
"ouvre un nouvel onglet",
|
||||
"copier le texte",
|
||||
"lance la facturation",
|
||||
# LIST
|
||||
"quels workflows sont disponibles ?",
|
||||
"liste des workflows",
|
||||
# QUERY — questions
|
||||
"comment ça marche ?",
|
||||
"c'est quoi ce workflow",
|
||||
"pourquoi ce processus est lent ?",
|
||||
# HELP
|
||||
"aide",
|
||||
"quoi faire ?",
|
||||
"que peux-tu faire ?",
|
||||
# GREETING
|
||||
"bonjour",
|
||||
"salut",
|
||||
# Confirmations / annulations
|
||||
"oui",
|
||||
"annule",
|
||||
"statut",
|
||||
# Fallback — ne doit PAS être EXECUTE
|
||||
"blah blah test",
|
||||
]
|
||||
|
||||
print("=== Tests IntentParser ===\n")
|
||||
|
||||
@@ -73,9 +73,16 @@ class ResponseGenerator:
|
||||
"Le workflow '{workflow}' a échoué: {error}"
|
||||
],
|
||||
"not_found": [
|
||||
"Je n'ai pas trouvé de workflow correspondant à '{query}'.",
|
||||
"Aucun workflow ne correspond à '{query}'. Voulez-vous voir la liste ?",
|
||||
"'{query}' ne correspond à aucun workflow connu."
|
||||
"Je ne sais pas encore faire '{query}'. Montre-moi comment faire et je l'apprendrai !",
|
||||
"'{query}' m'est inconnu pour l'instant. Tu peux me montrer en enregistrant un workflow.",
|
||||
"Je ne connais pas '{query}'. Montre-moi et je m'en souviendrai !"
|
||||
],
|
||||
"gesture": [
|
||||
"{gesture_name} ({gesture_keys}) envoyé !",
|
||||
"Raccourci {gesture_name} ({gesture_keys}) exécuté.",
|
||||
],
|
||||
"copilot": [
|
||||
"Mode pas-à-pas activé pour '{workflow}'. Validez chaque étape.",
|
||||
]
|
||||
},
|
||||
IntentType.LIST: {
|
||||
@@ -108,6 +115,13 @@ class ResponseGenerator:
|
||||
"Tapez votre commande en langage naturel !",
|
||||
]
|
||||
},
|
||||
IntentType.GREETING: {
|
||||
"default": [
|
||||
"Bonjour ! Je suis votre assistant RPA. Comment puis-je vous aider ?",
|
||||
"Salut ! Que puis-je faire pour vous ?",
|
||||
"Bonjour ! Tapez une commande ou 'aide' pour voir ce que je peux faire.",
|
||||
]
|
||||
},
|
||||
IntentType.STATUS: {
|
||||
"running": [
|
||||
"Exécution en cours : '{workflow}'\nProgression : {progress}%\n{message}",
|
||||
@@ -355,7 +369,21 @@ class ResponseGenerator:
|
||||
"""Handler pour les intentions d'exécution."""
|
||||
templates = self.RESPONSE_TEMPLATES[IntentType.EXECUTE]
|
||||
|
||||
if result.get("success"):
|
||||
if result.get("gesture"):
|
||||
# Geste primitif (raccourci clavier)
|
||||
template = random.choice(templates["gesture"])
|
||||
message = template.format(
|
||||
gesture_name=result.get("gesture_name", "?"),
|
||||
gesture_keys=result.get("gesture_keys", "?"),
|
||||
)
|
||||
suggestions = self.CONTEXTUAL_SUGGESTIONS["after_execute"]
|
||||
|
||||
elif result.get("mode") == "copilot":
|
||||
template = random.choice(templates["copilot"])
|
||||
message = template.format(workflow=result.get("workflow", "?"))
|
||||
suggestions = ["approuver", "passer", "annuler"]
|
||||
|
||||
elif result.get("success"):
|
||||
template = random.choice(templates["success"])
|
||||
workflow = result.get("workflow", intent.workflow_hint or "inconnu")
|
||||
details = ""
|
||||
@@ -369,8 +397,9 @@ class ResponseGenerator:
|
||||
|
||||
elif result.get("not_found"):
|
||||
template = random.choice(templates["not_found"])
|
||||
message = template.format(query=intent.raw_query)
|
||||
suggestions = self.CONTEXTUAL_SUGGESTIONS["after_error"]
|
||||
query = result.get("query", intent.raw_query)
|
||||
message = template.format(query=query)
|
||||
suggestions = ["lister les workflows", "aide", "enregistrer un workflow"]
|
||||
|
||||
else:
|
||||
template = random.choice(templates["error"])
|
||||
@@ -426,6 +455,22 @@ class ResponseGenerator:
|
||||
action_required=False
|
||||
)
|
||||
|
||||
def _handle_greeting(
|
||||
self,
|
||||
intent: ParsedIntent,
|
||||
context: Dict[str, Any],
|
||||
result: Dict[str, Any]
|
||||
) -> GeneratedResponse:
|
||||
"""Handler pour les salutations."""
|
||||
templates = self.RESPONSE_TEMPLATES[IntentType.GREETING]
|
||||
message = random.choice(templates["default"])
|
||||
|
||||
return GeneratedResponse(
|
||||
message=message,
|
||||
suggestions=self.CONTEXTUAL_SUGGESTIONS["idle"],
|
||||
action_required=False
|
||||
)
|
||||
|
||||
def _handle_status(
|
||||
self,
|
||||
intent: ParsedIntent,
|
||||
|
||||
@@ -617,11 +617,8 @@
|
||||
</div>
|
||||
<div class="header-right">
|
||||
<div class="mode-toggle">
|
||||
<button class="mode-btn active" onclick="setMode('workflow')" id="modeWorkflow">
|
||||
📋 Workflows
|
||||
</button>
|
||||
<button class="mode-btn" onclick="setMode('agent')" id="modeAgent">
|
||||
🚀 Agent Libre
|
||||
<button class="mode-btn active" id="modeWorkflow">
|
||||
💬 Assistant
|
||||
</button>
|
||||
</div>
|
||||
<div class="status-pill" id="statusPill">
|
||||
@@ -715,6 +712,23 @@
|
||||
updateAgentProgress(data);
|
||||
});
|
||||
|
||||
// Copilot events
|
||||
socket.on('copilot_step', (data) => {
|
||||
showCopilotStep(data);
|
||||
});
|
||||
|
||||
socket.on('copilot_step_result', (data) => {
|
||||
updateCopilotStepResult(data);
|
||||
});
|
||||
|
||||
socket.on('copilot_complete', (data) => {
|
||||
completeCopilot(data);
|
||||
});
|
||||
|
||||
socket.on('copilot_error', (data) => {
|
||||
addMessage(`Copilot: ${data.message}`);
|
||||
});
|
||||
|
||||
// =====================================================
|
||||
// UI Functions
|
||||
// =====================================================
|
||||
@@ -853,40 +867,6 @@
|
||||
return card;
|
||||
}
|
||||
|
||||
function createAgentPlanCard(plan) {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'action-card';
|
||||
|
||||
const stepsHtml = plan.steps.map((step, i) => `
|
||||
<div class="progress-step pending" id="step-${i}">
|
||||
<div class="progress-step-icon">${i + 1}</div>
|
||||
<span>${step.description}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
card.innerHTML = `
|
||||
<div class="action-card-header">
|
||||
<div class="action-card-title">
|
||||
🚀 Plan d'exécution
|
||||
<span class="confidence-badge">${plan.steps.length} étapes</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="progress-steps" style="margin-bottom: 12px;">
|
||||
${stepsHtml}
|
||||
</div>
|
||||
<div class="action-buttons">
|
||||
<button class="btn btn-primary" onclick="executeAgentPlan()">
|
||||
<i class="bi bi-play-fill"></i> Exécuter
|
||||
</button>
|
||||
<button class="btn btn-danger" onclick="cancelAction()">
|
||||
<i class="bi bi-x"></i> Annuler
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
|
||||
return card;
|
||||
}
|
||||
|
||||
function createExecutionProgress() {
|
||||
const progress = document.createElement('div');
|
||||
progress.className = 'execution-progress';
|
||||
@@ -1033,11 +1013,7 @@
|
||||
addTypingIndicator();
|
||||
|
||||
try {
|
||||
if (currentMode === 'agent') {
|
||||
await sendAgentRequest(message);
|
||||
} else {
|
||||
await sendChatRequest(message);
|
||||
}
|
||||
await sendChatRequest(message);
|
||||
} catch (error) {
|
||||
removeTypingIndicator();
|
||||
addMessage(`❌ Erreur: ${error.message}`);
|
||||
@@ -1073,9 +1049,35 @@
|
||||
data.intent?.confidence || 0.9
|
||||
);
|
||||
addMessage(data.response.message, 'bot', card);
|
||||
} else if (data.result?.gesture) {
|
||||
// Geste primitif exécuté
|
||||
addMessage(data.response.message);
|
||||
} else if (data.result?.mode === 'copilot') {
|
||||
// Mode copilot — les étapes arrivent via WebSocket
|
||||
addMessage(data.response.message);
|
||||
} else if (data.result?.success) {
|
||||
const progress = createExecutionProgress();
|
||||
addMessage(data.response.message, 'bot', progress);
|
||||
} else if (data.result?.teach_me) {
|
||||
// Workflow non trouvé — proposer l'apprentissage
|
||||
const teachCard = document.createElement('div');
|
||||
teachCard.className = 'action-card';
|
||||
teachCard.innerHTML = `
|
||||
<div class="action-card-header">
|
||||
<div class="action-card-title">
|
||||
Apprentissage disponible
|
||||
</div>
|
||||
</div>
|
||||
<p style="margin: 8px 0; opacity: 0.8; font-size: 0.9em;">
|
||||
Lancez l'enregistrement sur votre PC et montrez-moi comment faire.
|
||||
</p>
|
||||
<div class="action-buttons">
|
||||
<button class="btn btn-primary" onclick="window.open('/api/help', '_blank')">
|
||||
<i class="bi bi-mortarboard"></i> Comment m'apprendre ?
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
addMessage(data.response.message, 'bot', teachCard);
|
||||
} else if (data.result?.workflows) {
|
||||
let msg = data.response.message + '\n\n';
|
||||
data.result.workflows.slice(0, 5).forEach(w => {
|
||||
@@ -1087,30 +1089,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
async function sendAgentRequest(message) {
|
||||
const response = await fetch('/api/agent/plan', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ request: message })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
removeTypingIndicator();
|
||||
|
||||
if (data.error) {
|
||||
addMessage(`❌ ${data.error}`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (data.plan) {
|
||||
pendingConfirmation = data.plan;
|
||||
const card = createAgentPlanCard(data.plan);
|
||||
addMessage(`J'ai préparé un plan pour "${message}":`, 'bot', card);
|
||||
} else {
|
||||
addMessage(data.message || "Je n'ai pas pu créer de plan pour cette demande.");
|
||||
}
|
||||
}
|
||||
|
||||
async function confirmAction() {
|
||||
if (!pendingConfirmation) return;
|
||||
|
||||
@@ -1127,40 +1105,11 @@
|
||||
|
||||
// Show execution progress
|
||||
const progress = createExecutionProgress();
|
||||
addMessage("⏳ Exécution en cours...", 'bot', progress);
|
||||
addMessage("Execution en cours...", 'bot', progress);
|
||||
|
||||
pendingConfirmation = null;
|
||||
}
|
||||
|
||||
async function executeAgentPlan() {
|
||||
if (!pendingConfirmation) return;
|
||||
|
||||
isProcessing = true;
|
||||
updateInputState();
|
||||
|
||||
addMessage("⏳ Exécution du plan en cours...", 'bot');
|
||||
|
||||
const response = await fetch('/api/agent/execute', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ plan: pendingConfirmation })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
const results = data.results || [];
|
||||
const successCount = results.filter(r => r.success).length;
|
||||
addMessage(`✅ Plan exécuté: ${successCount}/${results.length} étapes réussies`);
|
||||
} else {
|
||||
addMessage(`❌ Erreur: ${data.error}`);
|
||||
}
|
||||
|
||||
pendingConfirmation = null;
|
||||
isProcessing = false;
|
||||
updateInputState();
|
||||
}
|
||||
|
||||
function modifyAction() {
|
||||
if (!pendingConfirmation) return;
|
||||
addMessage("✏️ Modification non implémentée. Décrivez les changements souhaités.");
|
||||
@@ -1173,7 +1122,79 @@
|
||||
|
||||
function cancelExecution() {
|
||||
socket.emit('cancel_execution');
|
||||
addMessage("⏹️ Demande d'annulation envoyée...");
|
||||
addMessage("Demande d'annulation envoyée...");
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
// Copilot Mode
|
||||
// =====================================================
|
||||
|
||||
function showCopilotStep(data) {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'action-card';
|
||||
card.id = `copilot-step-${data.step_index}`;
|
||||
card.innerHTML = `
|
||||
<div class="action-card-header">
|
||||
<div class="action-card-title">
|
||||
Copilot - Étape ${data.step_index + 1}/${data.total}
|
||||
</div>
|
||||
<span style="font-size: 0.8em; opacity: 0.6;">${data.workflow}</span>
|
||||
</div>
|
||||
<p style="margin: 8px 0; font-size: 0.95em;">
|
||||
<strong>${data.action.type}</strong>: ${data.action.description}
|
||||
</p>
|
||||
<div class="action-buttons" id="copilot-btns-${data.step_index}">
|
||||
<button class="btn btn-primary" onclick="copilotApprove(${data.step_index})">
|
||||
<i class="bi bi-check-lg"></i> Exécuter
|
||||
</button>
|
||||
<button class="btn btn-secondary" onclick="copilotSkip(${data.step_index})">
|
||||
<i class="bi bi-skip-forward"></i> Passer
|
||||
</button>
|
||||
<button class="btn btn-danger" onclick="copilotAbort()">
|
||||
<i class="bi bi-x-circle"></i> Annuler tout
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
addMessage(`Copilot étape ${data.step_index + 1}/${data.total}`, 'bot', card);
|
||||
}
|
||||
|
||||
function copilotApprove(stepIndex) {
|
||||
socket.emit('copilot_approve');
|
||||
const btns = document.getElementById(`copilot-btns-${stepIndex}`);
|
||||
if (btns) btns.innerHTML = '<span style="color: var(--success);">Approuvé - en cours...</span>';
|
||||
}
|
||||
|
||||
function copilotSkip(stepIndex) {
|
||||
socket.emit('copilot_skip');
|
||||
const btns = document.getElementById(`copilot-btns-${stepIndex}`);
|
||||
if (btns) btns.innerHTML = '<span style="color: var(--warning);">Passé</span>';
|
||||
}
|
||||
|
||||
function copilotAbort() {
|
||||
socket.emit('copilot_abort');
|
||||
}
|
||||
|
||||
function updateCopilotStepResult(data) {
|
||||
const card = document.getElementById(`copilot-step-${data.step_index}`);
|
||||
if (!card) return;
|
||||
|
||||
const btns = card.querySelector('.action-buttons') ||
|
||||
document.getElementById(`copilot-btns-${data.step_index}`);
|
||||
if (!btns) return;
|
||||
|
||||
if (data.status === 'completed') {
|
||||
btns.innerHTML = '<span style="color: var(--success);">Réussi</span>';
|
||||
} else if (data.status === 'failed') {
|
||||
btns.innerHTML = `<span style="color: var(--error);">Échoué: ${data.message}</span>`;
|
||||
} else if (data.status === 'skipped') {
|
||||
btns.innerHTML = '<span style="color: var(--warning);">Passé</span>';
|
||||
}
|
||||
}
|
||||
|
||||
function completeCopilot(data) {
|
||||
const statusColor = data.status === 'completed' ? 'var(--success)' :
|
||||
data.status === 'aborted' ? 'var(--error)' : 'var(--warning)';
|
||||
addMessage(`<span style="color: ${statusColor};">Copilot terminé: ${data.message}</span>`);
|
||||
}
|
||||
|
||||
// =====================================================
|
||||
|
||||
Reference in New Issue
Block a user