Greffe minimale du mécanisme d'apprentissage persistant (Fiche #18, target_memory_store.py) sur le pipeline streaming V4 sans toucher à V3. Architecture (docs/PLAN_APPRENTISSAGE_LEA.md) : - Lookup mémoire AVANT la cascade résolution coûteuse OCR/template/VLM dans _resolve_target_sync → hit = <10ms, miss = overhead zéro - Record APRÈS validation post-condition (title_match strict) dans /replay/result → 2 succès → cristallisation par répétition - Single source of truth : l'agent remplit report.actual_position avec les coords effectivement cliquées, le serveur les lit directement. Pas de cache intermédiaire (option C du plan). Signature écran V4 : sha256(normalize(window_title))[:16]. Robuste aux données variables, faux positifs rattrapés par le post-cond qui décrémente la fiabilité via record_failure(). Fichiers : - agent_v0/server_v1/replay_memory.py : nouveau wrapper 316 lignes exposant compute_screen_sig/memory_lookup/record_success/failure, lazy-init du store, normalisation texte stable, garde sanity coords - agent_v0/server_v1/resolve_engine.py : lookup mémoire en tête de _resolve_target_sync (30 lignes) - agent_v0/server_v1/replay_engine.py : _create_replay_state stocke une copie slim des actions (sans anchor base64) pour retrouver le target_spec par current_action_index - agent_v0/server_v1/api_stream.py : 4 callers passent actions=..., record success/failure dans /replay/result lit actual_position du rapport (click-only), correction du commentaire Pydantic - agent_v0/agent_v1/core/executor.py : remplit result["actual_position"] après self._click(), transmis dans le report de poll_and_execute Tests : 56 E2E + Phase0 passent, zéro régression. Cycle Phase 1 validé en simulation : miss → record → miss → record → HIT au 3ème passage. Le deploy copy executor.py a une divergence pré-existante de 1302 lignes non committées — traité séparément lors du cleanup prochain. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
4249 lines
164 KiB
Python
4249 lines
164 KiB
Python
# agent_v0/server_v1/api_stream.py
|
||
"""
|
||
API de Streaming Temps Réel pour RPA Vision V3.
|
||
|
||
Connecte l'Agent V1 au core pipeline via StreamProcessor.
|
||
Tous les calculs GPU (ScreenAnalyzer, CLIP, FAISS) tournent ici sur le serveur.
|
||
|
||
Inclut les endpoints de replay pour renvoyer des ordres d'exécution à l'Agent V1.
|
||
"""
|
||
|
||
import atexit
|
||
import json
|
||
import logging
|
||
import os
|
||
import secrets
|
||
import signal
|
||
import threading
|
||
import time
|
||
import uuid
|
||
from collections import defaultdict
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
from fastapi import BackgroundTasks, Depends, FastAPI, File, HTTPException, Request, UploadFile
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from pydantic import BaseModel
|
||
|
||
from .replay_failure_logger import log_replay_failure
|
||
from .replay_verifier import ReplayVerifier, VerificationResult
|
||
from .replay_learner import ReplayLearner
|
||
from .audit_trail import AuditTrail, AuditEntry
|
||
from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
|
||
from .worker_stream import StreamWorker
|
||
from .execution_plan_runner import (
|
||
execution_plan_to_actions,
|
||
inject_plan_into_queue,
|
||
)
|
||
|
||
# Instance globale du vérificateur de replay (comparaison screenshots avant/après)
|
||
_replay_verifier = ReplayVerifier()
|
||
_replay_learner = ReplayLearner()
|
||
_audit_trail = AuditTrail()
|
||
|
||
# Nombre maximum de retries par action avant de déclarer un échec
|
||
MAX_RETRIES_PER_ACTION = 3
|
||
|
||
# Limites de sécurité pour les queues de replay
|
||
MAX_ACTIONS_PER_REPLAY = 500 # Max actions par requête de replay
|
||
MAX_REPLAY_STATES = 1000 # Max entrées dans _replay_states
|
||
REPLAY_STATE_TTL_SECONDS = 3600 # Nettoyage auto des replays terminés après 1h
|
||
|
||
# Actions en cours de retry : action_id -> {"action": ..., "retry_count": N, "replay_id": ...}
|
||
_retry_pending: Dict[str, Dict[str, Any]] = {}
|
||
|
||
# Callbacks d'erreur par replay_id : replay_id -> callback_url
|
||
_error_callbacks: Dict[str, str] = {}
|
||
|
||
# Optimisation des actions replay par gestes primitifs
|
||
try:
|
||
from agent_chat.gesture_catalog import get_gesture_catalog
|
||
_gesture_catalog = get_gesture_catalog()
|
||
except ImportError:
|
||
_gesture_catalog = None
|
||
|
||
# Authentification automatique (optionnel) — détection des écrans d'auth pendant le replay
|
||
# Nécessite un vault configuré via la variable d'env RPA_AUTH_VAULT_PATH + RPA_AUTH_VAULT_PASSWORD
|
||
_auth_handler = None
|
||
try:
|
||
_vault_path = os.environ.get("RPA_AUTH_VAULT_PATH")
|
||
_vault_password = os.environ.get("RPA_AUTH_VAULT_PASSWORD")
|
||
if _vault_path and _vault_password:
|
||
from core.auth.credential_vault import CredentialVault
|
||
from core.auth.auth_handler import AuthHandler
|
||
_auth_vault = CredentialVault(_vault_path, _vault_password)
|
||
_auth_handler = AuthHandler(_auth_vault)
|
||
except Exception:
|
||
_auth_handler = None
|
||
|
||
logger = logging.getLogger("api_stream")
|
||
|
||
# =========================================================================
|
||
# Authentification par token Bearer (sécurité HIGH)
|
||
# =========================================================================
|
||
# Le token est lu depuis l'environnement ou généré au démarrage.
|
||
# Tous les endpoints requièrent le header Authorization: Bearer <token>,
|
||
# sauf /health, /docs et /openapi.json (publics).
|
||
API_TOKEN = os.environ.get("RPA_API_TOKEN", secrets.token_hex(32))
|
||
|
||
# Endpoints publics (pas besoin de token)
|
||
# En production, /docs et /redoc sont désactivés (voir ci-dessous)
|
||
# Paths publics : pas de token requis
|
||
# /replay/next est public car l'agent Rust legacy n'envoie pas de token
|
||
# et c'est un endpoint read-only (polling, pas d'écriture)
|
||
_PUBLIC_PATHS = {
|
||
"/health", "/docs", "/openapi.json", "/redoc",
|
||
"/api/v1/traces/stream/replay/next",
|
||
"/api/v1/traces/stream/image",
|
||
}
|
||
|
||
|
||
async def _verify_token(request: Request):
|
||
"""Middleware de vérification du token API Bearer."""
|
||
if request.url.path in _PUBLIC_PATHS:
|
||
return
|
||
auth = request.headers.get("Authorization", "")
|
||
if not auth.startswith("Bearer ") or auth[7:] != API_TOKEN:
|
||
raise HTTPException(status_code=401, detail="Token API invalide")
|
||
|
||
|
||
# =========================================================================
|
||
# Rate limiting en mémoire (sécurité HIGH)
|
||
# =========================================================================
|
||
_rate_limits: Dict[str, list] = defaultdict(list)
|
||
_RATE_LIMIT_WINDOW = 60 # secondes
|
||
_RATE_LIMITS = {
|
||
"/api/v1/traces/stream/replay": 10, # 10 replays par minute
|
||
"/api/v1/traces/stream/replay/raw": 10,
|
||
"/api/v1/traces/stream/replay-session": 10, # 10 replays session par minute
|
||
"/api/v1/traces/stream/replay/single": 30, # 30 actions Copilot par minute
|
||
"/api/v1/traces/stream/finalize": 5,
|
||
"/api/v1/traces/stream/image": 200, # 200 images par minute (heartbeats)
|
||
}
|
||
|
||
|
||
def _check_rate_limit(endpoint: str, client_ip: str) -> bool:
|
||
"""Vérifie si le client a dépassé la limite de requêtes."""
|
||
key = f"{endpoint}:{client_ip}"
|
||
now = time.time()
|
||
# Nettoyer les entrées expirées
|
||
_rate_limits[key] = [t for t in _rate_limits[key] if now - t < _RATE_LIMIT_WINDOW]
|
||
limit = _RATE_LIMITS.get(endpoint, 100)
|
||
if len(_rate_limits[key]) >= limit:
|
||
return False
|
||
_rate_limits[key].append(now)
|
||
return True
|
||
|
||
|
||
# =========================================================================
|
||
# Replay Engine — fonctions de replay extraites dans replay_engine.py
|
||
# =========================================================================
|
||
from .replay_engine import (
|
||
_ALLOWED_ACTION_TYPES,
|
||
_MAX_ACTION_TEXT_LENGTH,
|
||
_MAX_KEYS_PER_COMBO,
|
||
_KNOWN_KEY_NAMES,
|
||
_validate_replay_action,
|
||
_APP_LAUNCH_COMMANDS,
|
||
_APP_VISUAL_SEARCH,
|
||
_SETUP_IGNORE_APPS,
|
||
_extract_required_apps_from_events,
|
||
_extract_required_apps_from_workflow,
|
||
_resolve_launch_command,
|
||
_infer_app_from_window_titles,
|
||
_get_visual_search_info,
|
||
_generate_setup_actions,
|
||
_find_active_agent_session as _find_active_agent_session_impl,
|
||
_workflow_to_actions as _workflow_to_actions_impl,
|
||
_is_learned_workflow,
|
||
_edge_to_normalized_actions,
|
||
_substitute_variables,
|
||
_expand_compound_steps,
|
||
_pre_check_screen_state as _pre_check_screen_state_impl,
|
||
_detect_popup_hint as _detect_popup_hint_impl,
|
||
_create_replay_state,
|
||
_schedule_retry as _schedule_retry_impl,
|
||
_notify_error_callback as _notify_error_callback_impl,
|
||
)
|
||
|
||
|
||
|
||
# Wrappers pour les fonctions replay_engine qui accèdent aux variables globales du module.
|
||
# Ces wrappers passent processor, _replay_lock, _replay_states, etc.
|
||
def _find_active_agent_session(machine_id=None):
|
||
return _find_active_agent_session_impl(processor.session_manager, machine_id)
|
||
|
||
def _workflow_to_actions(workflow, params=None):
|
||
return _workflow_to_actions_impl(workflow, params, processor, _gesture_catalog)
|
||
|
||
def _pre_check_screen_state(session_id, expected_node_id, current_screenshot_path, active_processor):
|
||
return _pre_check_screen_state_impl(
|
||
session_id, expected_node_id, current_screenshot_path, active_processor,
|
||
_replay_states, _replay_lock, _PRECHECK_SIMILARITY_THRESHOLD,
|
||
)
|
||
|
||
def _detect_popup_hint(session_id, workflow, expected_node_id):
|
||
return _detect_popup_hint_impl(session_id, workflow, expected_node_id, processor)
|
||
|
||
def _schedule_retry(session_id, replay_state, action, current_retry, reason):
|
||
_schedule_retry_impl(
|
||
session_id, replay_state, action, current_retry, reason,
|
||
_replay_queues, _retry_pending, MAX_RETRIES_PER_ACTION,
|
||
)
|
||
|
||
def _notify_error_callback(replay_state, action_id, error):
|
||
_notify_error_callback_impl(replay_state, action_id, error, _error_callbacks)
|
||
|
||
|
||
# En production (ENVIRONMENT != development), désactiver la doc Swagger
|
||
_is_production = os.environ.get("ENVIRONMENT", "development") != "development"
|
||
|
||
app = FastAPI(
|
||
title="RPA Vision V3 - Streaming API v1",
|
||
dependencies=[Depends(_verify_token)],
|
||
docs_url=None if _is_production else "/docs",
|
||
redoc_url=None if _is_production else "/redoc",
|
||
openapi_url=None if _is_production else "/openapi.json",
|
||
)
|
||
|
||
# CORS — origines autorisées (VWB frontend, Agent Chat, Dashboard)
|
||
# Configurable via variable d'environnement CORS_ORIGINS (séparées par des virgules)
|
||
# Inclut le domaine public pour l'accès internet via NPM reverse proxy
|
||
_DEFAULT_CORS_ORIGINS = (
|
||
"http://localhost:3002," # VWB Frontend (Vite/React)
|
||
"http://localhost:5002," # VWB Backend (Flask)
|
||
"http://localhost:5004," # Agent Chat
|
||
"http://localhost:5001," # Web Dashboard
|
||
"http://192.168.1.40:3002," # VWB Frontend depuis le réseau local
|
||
"http://192.168.1.40:5004," # Agent Chat depuis le réseau local
|
||
"https://lea.labs.laurinebazin.design," # Domaine public HTTPS
|
||
"https://vwb.labs.laurinebazin.design" # VWB public HTTPS
|
||
)
|
||
CORS_ORIGINS = os.environ.get("CORS_ORIGINS", _DEFAULT_CORS_ORIGINS).split(",")
|
||
CORS_ORIGINS = [o.strip() for o in CORS_ORIGINS if o.strip()]
|
||
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=CORS_ORIGINS,
|
||
allow_credentials=True,
|
||
allow_methods=["GET", "POST"],
|
||
allow_headers=["Content-Type", "Authorization"],
|
||
)
|
||
|
||
|
||
@app.middleware("http")
|
||
async def security_headers_middleware(request: Request, call_next):
|
||
"""Ajouter les headers de sécurité sur toutes les réponses."""
|
||
response = await call_next(request)
|
||
response.headers["X-Content-Type-Options"] = "nosniff"
|
||
response.headers["X-Frame-Options"] = "DENY"
|
||
response.headers["X-XSS-Protection"] = "1; mode=block"
|
||
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
|
||
if request.url.scheme == "https" or request.headers.get("X-Forwarded-Proto") == "https":
|
||
response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
|
||
return response
|
||
|
||
|
||
@app.middleware("http")
|
||
async def rate_limit_middleware(request: Request, call_next):
|
||
"""Middleware de rate limiting sur les endpoints sensibles."""
|
||
path = request.url.path
|
||
if path in _RATE_LIMITS:
|
||
client_ip = request.client.host if request.client else "unknown"
|
||
if not _check_rate_limit(path, client_ip):
|
||
from fastapi.responses import JSONResponse
|
||
logger.warning(f"Rate limit dépassé : {path} par {client_ip}")
|
||
return JSONResponse(
|
||
status_code=429,
|
||
content={"detail": f"Trop de requêtes. Limite : {_RATE_LIMITS[path]}/{_RATE_LIMIT_WINDOW}s"},
|
||
)
|
||
return await call_next(request)
|
||
|
||
|
||
# Dossier des sessions live
|
||
ROOT_DIR = Path(__file__).parent.parent.parent
|
||
LIVE_SESSIONS_DIR = ROOT_DIR / "data" / "training" / "live_sessions"
|
||
LIVE_SESSIONS_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
# =========================================================================
|
||
# Communication avec le worker VLM (process séparé)
|
||
# Le serveur HTTP ne fait JAMAIS de VLM — il écrit dans des fichiers
|
||
# que le worker VLM (run_worker.py) lit dans son propre process.
|
||
# =========================================================================
|
||
_DATA_DIR = ROOT_DIR / "data" / "training"
|
||
WORKER_QUEUE_FILE = _DATA_DIR / "_worker_queue.txt"
|
||
REPLAY_LOCK_FILE = _DATA_DIR / "_replay_active.lock"
|
||
|
||
# Instance globale partagée (le StreamProcessor reste dans le serveur HTTP
|
||
# pour le CLIP, l'indexation FAISS, la gestion des sessions, le replay —
|
||
# mais ne fait PAS de VLM/reprocess_session, c'est le worker séparé qui s'en charge)
|
||
processor = StreamProcessor(data_dir=str(LIVE_SESSIONS_DIR))
|
||
worker = StreamWorker(live_dir=str(LIVE_SESSIONS_DIR), processor=processor)
|
||
|
||
|
||
# =========================================================================
|
||
# Flush garanti à l'arrêt — signal handler + atexit (ceinture et bretelles)
|
||
# =========================================================================
|
||
# Le shutdown handler FastAPI (@app.on_event("shutdown")) fait déjà un flush,
|
||
# mais si le serveur est tué par SIGTERM (systemd) ou SIGINT (Ctrl+C) avant
|
||
# que uvicorn ait le temps de déclencher le shutdown propre, le flush n'a pas
|
||
# lieu. On ajoute donc un signal handler ET un atexit comme filets de sécurité.
|
||
|
||
def _emergency_flush(signum=None, frame=None):
|
||
"""Flush les sessions dirty sur disque avant exit.
|
||
|
||
Appelé par SIGTERM/SIGINT ou atexit. Idempotent (flush() est thread-safe).
|
||
"""
|
||
sig_name = signal.Signals(signum).name if signum else "atexit"
|
||
logger.info(f"Flush d'urgence des sessions en cours ({sig_name})...")
|
||
try:
|
||
processor.session_manager.flush()
|
||
logger.info("Flush d'urgence terminé — données persistées.")
|
||
except Exception as e:
|
||
logger.error(f"Erreur pendant le flush d'urgence : {e}")
|
||
# Si c'est un signal, on laisse le handler par défaut terminer le process
|
||
if signum is not None:
|
||
# Remettre le handler par défaut et re-raise le signal
|
||
signal.signal(signum, signal.SIG_DFL)
|
||
os.kill(os.getpid(), signum)
|
||
|
||
# Enregistrer les handlers uniquement quand le module est exécuté comme serveur
|
||
# (pas lors d'un simple import depuis un autre process comme le retraitement batch)
|
||
def _register_shutdown_handlers():
|
||
signal.signal(signal.SIGTERM, _emergency_flush)
|
||
signal.signal(signal.SIGINT, _emergency_flush)
|
||
atexit.register(processor.session_manager.flush)
|
||
logger.info("Handlers de shutdown enregistrés (SIGTERM, SIGINT, atexit)")
|
||
|
||
|
||
def _enqueue_to_worker(session_id: str):
|
||
"""Ajoute un session_id à la queue du worker VLM (fichier sur disque).
|
||
|
||
Le worker VLM (process séparé) lit ce fichier et traite les sessions.
|
||
Évite les doublons : vérifie si le session_id est déjà dans la queue.
|
||
"""
|
||
try:
|
||
WORKER_QUEUE_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Lire la queue existante pour éviter les doublons
|
||
existing = set()
|
||
if WORKER_QUEUE_FILE.exists():
|
||
existing = {
|
||
line.strip()
|
||
for line in WORKER_QUEUE_FILE.read_text(encoding="utf-8").splitlines()
|
||
if line.strip()
|
||
}
|
||
|
||
if session_id in existing:
|
||
logger.info(f"Session {session_id} déjà dans la queue worker, skip")
|
||
return
|
||
|
||
# Ajouter à la fin du fichier
|
||
with open(WORKER_QUEUE_FILE, "a", encoding="utf-8") as f:
|
||
f.write(session_id + "\n")
|
||
|
||
logger.info(f"Session {session_id} ajoutée à la queue worker ({WORKER_QUEUE_FILE})")
|
||
except Exception as e:
|
||
logger.error(f"Erreur écriture queue worker : {e}")
|
||
|
||
|
||
def _set_replay_lock(replay_id: str = ""):
|
||
"""Crée le fichier lock de replay (signale au worker VLM de se suspendre)."""
|
||
try:
|
||
REPLAY_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||
REPLAY_LOCK_FILE.write_text(
|
||
f"replay_id={replay_id}\ntimestamp={time.time()}\n",
|
||
encoding="utf-8",
|
||
)
|
||
logger.info(f"Replay lock créé : {REPLAY_LOCK_FILE} (replay={replay_id})")
|
||
except Exception as e:
|
||
logger.error(f"Erreur création replay lock : {e}")
|
||
|
||
|
||
def _clear_replay_lock():
|
||
"""Supprime le fichier lock de replay (le worker VLM peut reprendre)."""
|
||
try:
|
||
REPLAY_LOCK_FILE.unlink(missing_ok=True)
|
||
logger.info("Replay lock supprimé, worker VLM autorisé à reprendre")
|
||
except Exception as e:
|
||
logger.error(f"Erreur suppression replay lock : {e}")
|
||
|
||
|
||
def _get_worker_queue_status() -> Dict[str, Any]:
|
||
"""Retourne l'état de la queue du worker VLM (pour le monitoring)."""
|
||
queue = []
|
||
if WORKER_QUEUE_FILE.exists():
|
||
try:
|
||
queue = [
|
||
line.strip()
|
||
for line in WORKER_QUEUE_FILE.read_text(encoding="utf-8").splitlines()
|
||
if line.strip()
|
||
]
|
||
except Exception:
|
||
pass
|
||
|
||
return {
|
||
"running": True, # On ne sait pas si le worker process tourne, mais la queue existe
|
||
"queue_length": len(queue),
|
||
"queue": queue,
|
||
"replay_lock_active": REPLAY_LOCK_FILE.exists(),
|
||
"queue_file": str(WORKER_QUEUE_FILE),
|
||
"note": "Le worker VLM tourne dans un process séparé (run_worker.py)",
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Compteur d'analyses en cours par session (pour attendre avant finalize)
|
||
# =========================================================================
|
||
_pending_analyses: Dict[str, int] = defaultdict(int)
|
||
_pending_lock = threading.Lock()
|
||
|
||
# =========================================================================
|
||
# File d'attente de replay par session
|
||
# Chaque session a une queue d'actions à exécuter et un état de replay
|
||
# =========================================================================
|
||
_replay_lock = threading.Lock()
|
||
# session_id -> liste d'actions en attente (FIFO)
|
||
_replay_queues: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
||
# machine_id -> session_id (mapping pour le replay ciblé par machine)
|
||
_machine_replay_target: Dict[str, str] = {}
|
||
# replay_id -> état du replay (workflow_id, session_id, status, progress)
|
||
_replay_states: Dict[str, Dict[str, Any]] = {}
|
||
|
||
|
||
class StreamEvent(BaseModel):
|
||
session_id: str
|
||
timestamp: float
|
||
event: Dict[str, Any]
|
||
machine_id: str = "default" # Identifiant machine (multi-machine, rétrocompatible)
|
||
|
||
|
||
class ReplayRequest(BaseModel):
|
||
"""Requête de lancement de replay d'un workflow."""
|
||
workflow_id: str
|
||
session_id: str
|
||
machine_id: Optional[str] = None # Machine cible pour le replay (multi-machine)
|
||
params: Optional[Dict[str, Any]] = None
|
||
|
||
|
||
class RawReplayRequest(BaseModel):
|
||
"""Requête de replay avec actions brutes (mode Agent Libre)."""
|
||
actions: List[Dict[str, Any]]
|
||
session_id: str = ""
|
||
machine_id: Optional[str] = None # Machine cible (multi-machine)
|
||
task_description: str = ""
|
||
|
||
|
||
class SingleActionRequest(BaseModel):
|
||
"""Requête d'exécution d'une seule action (mode Copilot)."""
|
||
action: Dict[str, Any]
|
||
session_id: str = ""
|
||
machine_id: Optional[str] = None # Machine cible (multi-machine)
|
||
|
||
|
||
class PlanReplayRequest(BaseModel):
|
||
"""Requête de lancement de replay depuis un ExecutionPlan (pipeline V4).
|
||
|
||
Deux modes supportés :
|
||
1. Référence par ID : fournir `plan_id` → le serveur charge le plan
|
||
depuis `data/plans/{plan_id}.json`.
|
||
2. Plan inline : fournir `plan` (dict JSON) → utilisé directement.
|
||
|
||
Les `variables` écrasent celles du plan.
|
||
"""
|
||
plan_id: Optional[str] = None
|
||
plan: Optional[Dict[str, Any]] = None
|
||
variables: Optional[Dict[str, Any]] = None
|
||
session_id: str = ""
|
||
machine_id: Optional[str] = None
|
||
|
||
|
||
class CompileWorkflowRequest(BaseModel):
|
||
"""Requête de compilation d'une session en WorkflowIR + ExecutionPlan."""
|
||
session_id: str
|
||
machine_id: str = "default"
|
||
domain: str = "generic"
|
||
name: str = ""
|
||
target_machine: str = ""
|
||
target_resolution: str = "1280x800"
|
||
params: Optional[Dict[str, str]] = None
|
||
|
||
|
||
class ReplayResultReport(BaseModel):
|
||
"""Rapport de résultat d'exécution d'une action par l'Agent V1."""
|
||
session_id: str
|
||
action_id: str
|
||
success: bool
|
||
error: Optional[str] = None
|
||
warning: Optional[str] = None # "no_screen_change", "popup_handled", "visual_resolve_failed"
|
||
screenshot: Optional[str] = None # Chemin ou base64 du screenshot post-action
|
||
screenshot_after: Optional[str] = None # Chemin ou base64 du screenshot APRES l'action
|
||
screenshot_before: Optional[str] = None # Screenshot AVANT l'action (pour le Critic)
|
||
actual_position: Optional[Dict[str, float]] = None # {"x_pct": float, "y_pct": float} coords résolues effectivement cliquées
|
||
# Métriques de résolution visuelle
|
||
resolution_method: Optional[str] = None # som_text_match, som_vlm, vlm_quick_find, etc.
|
||
resolution_score: Optional[float] = None
|
||
resolution_elapsed_ms: Optional[float] = None
|
||
# Champs enrichis pour target_not_found (pause supervisée)
|
||
target_description: Optional[str] = None # Description humaine de la cible
|
||
target_spec: Optional[Dict[str, Any]] = None # Spec complete de la cible
|
||
|
||
|
||
class ErrorCallbackConfig(BaseModel):
|
||
"""Configuration du callback d'erreur pour un replay."""
|
||
replay_id: str
|
||
callback_url: str # URL à appeler en cas d'erreur non-récupérable
|
||
|
||
|
||
# Thread de nettoyage périodique des replays terminés et sessions expirées
|
||
_cleanup_thread: Optional[threading.Thread] = None
|
||
_cleanup_running = False
|
||
|
||
|
||
def _cleanup_loop():
|
||
"""Nettoyage périodique des replay states terminés et des sessions expirées.
|
||
|
||
Tourne en arrière-plan toutes les 10 minutes :
|
||
- Supprime les replay states completed/error/failed plus vieux que REPLAY_STATE_TTL_SECONDS
|
||
- Nettoie les sessions en mémoire via LiveSessionManager.cleanup_old_sessions()
|
||
- Borne _replay_states à MAX_REPLAY_STATES entrées
|
||
"""
|
||
while _cleanup_running:
|
||
time.sleep(600) # 10 minutes
|
||
if not _cleanup_running:
|
||
break
|
||
try:
|
||
_cleanup_replay_states()
|
||
# Nettoyage des sessions expirées en mémoire (toutes les heures = 6 cycles)
|
||
processor.session_manager.cleanup_old_sessions(max_age_hours=24)
|
||
except Exception as e:
|
||
logger.error(f"Erreur dans la boucle de nettoyage : {e}")
|
||
|
||
|
||
def _cleanup_replay_states():
|
||
"""Supprimer les replay states terminés (completed/error/failed) plus vieux que le TTL."""
|
||
now = time.time()
|
||
to_delete = []
|
||
|
||
with _replay_lock:
|
||
for replay_id, state in _replay_states.items():
|
||
if state["status"] in ("completed", "error", "failed"):
|
||
# Vérifier l'âge via le dernier résultat ou le timestamp du dernier event
|
||
last_result = state.get("results", [])
|
||
last_time = last_result[-1].get("timestamp", 0) if last_result else 0
|
||
if not last_time:
|
||
# Pas de timestamp dans les résultats, utiliser les error_log
|
||
error_log = state.get("error_log", [])
|
||
last_time = error_log[-1].get("timestamp", 0) if error_log else 0
|
||
if not last_time:
|
||
# Aucun timestamp trouvé, marquer pour suppression (orphelin)
|
||
to_delete.append(replay_id)
|
||
continue
|
||
if now - last_time > REPLAY_STATE_TTL_SECONDS:
|
||
to_delete.append(replay_id)
|
||
|
||
# Supprimer les entrées expirées
|
||
for replay_id in to_delete:
|
||
del _replay_states[replay_id]
|
||
_error_callbacks.pop(replay_id, None)
|
||
|
||
# Borne de sécurité : si trop d'entrées, supprimer les plus anciens terminés
|
||
if len(_replay_states) > MAX_REPLAY_STATES:
|
||
finished = [
|
||
(rid, s) for rid, s in _replay_states.items()
|
||
if s["status"] in ("completed", "error", "failed")
|
||
]
|
||
# Trier par nombre de résultats (les plus anciens ont typiquement tous leurs résultats)
|
||
excess = len(_replay_states) - MAX_REPLAY_STATES
|
||
for rid, _ in finished[:excess]:
|
||
del _replay_states[rid]
|
||
_error_callbacks.pop(rid, None)
|
||
|
||
if to_delete:
|
||
logger.info(f"Nettoyage replay states : {len(to_delete)} entrées supprimées")
|
||
|
||
|
||
@app.get("/health")
|
||
async def health_check():
|
||
"""Endpoint de santé (public, pas besoin de token)."""
|
||
return {"status": "healthy", "version": "1.0.0"}
|
||
|
||
|
||
def _check_gpu_ready():
|
||
"""Vérifier que le GPU a assez de VRAM pour le pipeline.
|
||
|
||
Minimum 6 GB requis pour le VLM (gemma4:e4b ~10 GB) et les modèles CLIP/FAISS.
|
||
Loggue un avertissement si insuffisante, info sinon.
|
||
"""
|
||
try:
|
||
import subprocess
|
||
result = subprocess.run(
|
||
["nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits"],
|
||
capture_output=True, text=True, timeout=5
|
||
)
|
||
if result.returncode != 0:
|
||
logger.debug(f"nvidia-smi retour non-zéro : {result.stderr.strip()}")
|
||
return
|
||
# nvidia-smi peut retourner plusieurs lignes (multi-GPU) — prendre la première
|
||
free_mb_str = result.stdout.strip().split("\n")[0].strip()
|
||
free_mb = int(free_mb_str)
|
||
if free_mb < 6000: # 6 GB minimum pour le VLM + CLIP
|
||
logger.warning(
|
||
f"VRAM insuffisante : {free_mb} MB libres (minimum 6000 MB). "
|
||
f"Vérifier les process GPU avec nvidia-smi."
|
||
)
|
||
print(
|
||
f"\n [GPU WARNING] VRAM insuffisante : {free_mb} MB libres "
|
||
f"(minimum 6000 MB)\n"
|
||
)
|
||
else:
|
||
logger.info(f"GPU OK : {free_mb} MB VRAM libres")
|
||
except FileNotFoundError:
|
||
logger.debug("nvidia-smi non trouvé — pas de GPU NVIDIA détecté")
|
||
except Exception as e:
|
||
logger.debug(f"GPU check échoué : {e}")
|
||
|
||
|
||
@app.on_event("startup")
|
||
async def startup():
|
||
"""Démarrer le worker de streaming et charger les workflows existants.
|
||
|
||
NOTE: Le VLM (SessionWorker) tourne maintenant dans un process séparé
|
||
(run_worker.py). Ce serveur HTTP ne fait PLUS de VLM — il reste toujours
|
||
réactif pour les replays, events, images.
|
||
"""
|
||
global _cleanup_running, _cleanup_thread
|
||
|
||
# Vérifier la VRAM GPU disponible au démarrage
|
||
_check_gpu_ready()
|
||
|
||
# Résoudre et afficher le modèle VLM utilisé
|
||
# Enregistrer les handlers de shutdown (SIGTERM, SIGINT, atexit)
|
||
_register_shutdown_handlers()
|
||
|
||
from core.detection.vlm_config import get_vlm_model
|
||
_vlm_model_name = get_vlm_model()
|
||
logger.info("VLM model: %s", _vlm_model_name)
|
||
print(f"\n VLM model: {_vlm_model_name}")
|
||
|
||
# Afficher le token API au démarrage pour que l'utilisateur puisse configurer l'agent
|
||
_token_source = "env RPA_API_TOKEN" if os.environ.get("RPA_API_TOKEN") else "auto-généré"
|
||
logger.info(f"API Token ({_token_source}): {API_TOKEN}")
|
||
print(f"\n{'='*60}")
|
||
print(f" API Token ({_token_source}):")
|
||
print(f" {API_TOKEN}")
|
||
print(f" Configurer l'agent : export RPA_API_TOKEN={API_TOKEN}")
|
||
print(f"{'='*60}\n")
|
||
|
||
worker.start(blocking=False)
|
||
|
||
# Charger les workflows existants depuis le disque
|
||
_load_existing_workflows()
|
||
|
||
# S'assurer que le replay lock est nettoyé au démarrage (crash précédent)
|
||
_clear_replay_lock()
|
||
|
||
# Démarrer le thread de nettoyage périodique
|
||
_cleanup_running = True
|
||
_cleanup_thread = threading.Thread(target=_cleanup_loop, daemon=True, name="replay_cleanup")
|
||
_cleanup_thread.start()
|
||
|
||
logger.info(
|
||
"API Streaming démarrée — StreamProcessor, Worker et Cleanup prêts. "
|
||
"VLM Worker dans un process séparé (run_worker.py)."
|
||
)
|
||
|
||
|
||
def _load_existing_workflows():
|
||
"""Charger les workflows JSON existants dans processor._workflows.
|
||
|
||
Supporte deux formats :
|
||
- Workflow.load_from_file (format complet avec workflow_id)
|
||
- JSON brut avec clé 'name' (format simplifié VWB/manuels)
|
||
"""
|
||
from core.models.workflow_graph import Workflow
|
||
|
||
workflow_dirs = [
|
||
ROOT_DIR / "data" / "workflows",
|
||
ROOT_DIR / "data" / "training" / "workflows",
|
||
LIVE_SESSIONS_DIR / "workflows",
|
||
]
|
||
|
||
loaded = 0
|
||
for wf_dir in workflow_dirs:
|
||
if not wf_dir.exists():
|
||
continue
|
||
for wf_file in wf_dir.glob("*.json"):
|
||
try:
|
||
wf = Workflow.load_from_file(str(wf_file))
|
||
if wf and hasattr(wf, 'workflow_id'):
|
||
with processor._data_lock:
|
||
processor._workflows[wf.workflow_id] = wf
|
||
loaded += 1
|
||
continue
|
||
except Exception:
|
||
pass
|
||
|
||
# Fallback : charger comme JSON brut et injecter un workflow_id
|
||
try:
|
||
wf_data = json.loads(wf_file.read_text(encoding="utf-8"))
|
||
wf_id = wf_data.get("workflow_id") or wf_file.stem
|
||
# Stocker le dict brut (suffisant pour _workflow_to_actions)
|
||
with processor._data_lock:
|
||
processor._workflows[wf_id] = wf_data
|
||
loaded += 1
|
||
except Exception as e:
|
||
logger.debug(f"Skip workflow {wf_file.name}: {e}")
|
||
|
||
logger.info(f"Workflows chargés depuis disque: {loaded}")
|
||
|
||
|
||
@app.on_event("shutdown")
|
||
async def shutdown():
|
||
global _cleanup_running
|
||
_cleanup_running = False
|
||
worker.stop()
|
||
# Nettoyer le replay lock au shutdown (sinon le worker VLM resterait bloqué)
|
||
_clear_replay_lock()
|
||
processor.session_manager.flush()
|
||
logger.info("API Streaming arrêtée.")
|
||
|
||
|
||
# =========================================================================
|
||
# Session management
|
||
# =========================================================================
|
||
|
||
@app.post("/api/v1/traces/stream/register")
|
||
async def register_session(session_id: str, machine_id: str = "default"):
|
||
"""Enregistrer une nouvelle session de streaming.
|
||
|
||
Args:
|
||
session_id: Identifiant unique de la session
|
||
machine_id: Identifiant de la machine source (multi-machine, défaut: "default")
|
||
"""
|
||
processor.session_manager.register_session(session_id, machine_id=machine_id)
|
||
# Reset des compteurs pour cette session (évite les reliquats d'une session précédente)
|
||
with _pending_lock:
|
||
_pending_analyses[session_id] = 0
|
||
_analyzed_shots[session_id] = set()
|
||
logger.info(f"Session {session_id} enregistrée (machine={machine_id}, compteurs réinitialisés)")
|
||
return {"status": "session_registered", "session_id": session_id, "machine_id": machine_id}
|
||
|
||
|
||
def _ensure_session_registered(session_id: str, machine_id: str = "default"):
|
||
"""Auto-enregistrer une session si elle n'existe pas encore.
|
||
|
||
Robustesse au redémarrage du serveur : l'Agent V1 ne re-register pas
|
||
sa session, mais continue d'envoyer des events/images. On l'enregistre
|
||
automatiquement à la première réception.
|
||
|
||
Args:
|
||
session_id: Identifiant de la session
|
||
machine_id: Identifiant machine (propagé depuis l'agent)
|
||
"""
|
||
session = processor.session_manager.get_session(session_id)
|
||
if session is None:
|
||
logger.info(f"Auto-enregistrement de la session {session_id} (machine={machine_id})")
|
||
processor.session_manager.register_session(session_id, machine_id=machine_id)
|
||
with _pending_lock:
|
||
_pending_analyses[session_id] = 0
|
||
_analyzed_shots[session_id] = set()
|
||
elif machine_id != "default" and session.machine_id == "default":
|
||
# Mettre à jour le machine_id si l'agent l'envoie et qu'on ne l'avait pas
|
||
session.machine_id = machine_id
|
||
|
||
|
||
# =========================================================================
|
||
# Événements
|
||
# =========================================================================
|
||
|
||
@app.post("/api/v1/traces/stream/event")
|
||
async def stream_event(data: StreamEvent):
|
||
"""Reçoit un événement et l'enregistre dans la session."""
|
||
session_id = data.session_id
|
||
machine_id = data.machine_id or "default"
|
||
|
||
# Auto-enregistrer la session si inconnue (robustesse au redémarrage serveur)
|
||
_ensure_session_registered(session_id, machine_id=machine_id)
|
||
|
||
# Persister sur disque (journal JSONL, dans un sous-dossier par machine si multi-machine)
|
||
if machine_id and machine_id != "default":
|
||
session_path = LIVE_SESSIONS_DIR / machine_id / session_id
|
||
else:
|
||
session_path = LIVE_SESSIONS_DIR / session_id
|
||
session_path.mkdir(parents=True, exist_ok=True)
|
||
event_file = session_path / "live_events.jsonl"
|
||
with open(event_file, "a", encoding="utf-8") as f:
|
||
f.write(json.dumps(data.dict()) + "\n")
|
||
|
||
# Traitement direct via StreamProcessor
|
||
result = worker.process_event_direct(session_id, data.event)
|
||
|
||
# ── Observation Shadow (si mode Shadow activé pour cette session) ──
|
||
# L'appel est protégé et non bloquant : si l'observer n'est pas
|
||
# actif, ou s'il lève, la capture continue normalement.
|
||
shadow_observe_event(session_id, data.event)
|
||
|
||
# ── Enrichissement SomEngine temps réel pour les mouse_click ──
|
||
# Après l'enregistrement de l'event, tenter l'enrichissement si le
|
||
# screenshot est déjà arrivé. Sinon, l'event est mis en attente et
|
||
# sera enrichi quand le screenshot arrivera (voir stream_image).
|
||
event = data.event
|
||
if event.get("type") == "mouse_click" and event.get("screenshot_id"):
|
||
session = processor.session_manager.get_session(session_id)
|
||
if session:
|
||
event_index = len(session.events) - 1
|
||
submitted = _try_enrich_click_event(
|
||
session_id, event, event_index, machine_id,
|
||
)
|
||
result["som_enrichment"] = "submitted" if submitted else "pending_screenshot"
|
||
|
||
return {"status": "event_synced", "session_id": session_id, **result}
|
||
|
||
|
||
# =========================================================================
|
||
# Images
|
||
# =========================================================================
|
||
|
||
# Ensemble des screenshots déjà analysés (évite les doublons de retry)
|
||
_analyzed_shots: Dict[str, set] = defaultdict(set)
|
||
|
||
# Hash du dernier screenshot analysé par session (déduplication par similarité)
|
||
_last_screenshot_hash: Dict[str, str] = {}
|
||
|
||
# Dernier heartbeat reçu par session : {session_id: {"path": str, "timestamp": float}}
|
||
# Utilisé par le pre-check de replay pour vérifier l'état de l'écran avant action
|
||
_last_heartbeat: Dict[str, Dict[str, Any]] = {}
|
||
# Seuil max d'ancienneté du heartbeat (secondes) — au-delà, skip le pre-check
|
||
_HEARTBEAT_MAX_AGE_SECONDS = 10.0
|
||
# Seuil de similarité cosine pour valider le pre-check
|
||
_PRECHECK_SIMILARITY_THRESHOLD = 0.85
|
||
|
||
# ThreadPool pour l'analyse GPU (évite de bloquer le event loop async)
|
||
_gpu_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="gpu_analysis")
|
||
|
||
# =========================================================================
|
||
# Enrichissement SomEngine en temps réel
|
||
# Quand un mouse_click arrive avec un screenshot_id, on lance SomEngine
|
||
# pour identifier l'élément UI cliqué. Le résultat est stocké dans l'event
|
||
# de la session, prêt pour le replay sans retraitement VLM.
|
||
# =========================================================================
|
||
|
||
# ThreadPool dédié SomEngine (1 seul worker pour ne pas saturer le GPU)
|
||
_som_enrichment_executor = ThreadPoolExecutor(
|
||
max_workers=1, thread_name_prefix="som_enrich",
|
||
)
|
||
|
||
# Clics en attente d'enrichissement (le screenshot n'est pas encore arrivé)
|
||
# Clé : (session_id, screenshot_id) → dict avec les infos nécessaires
|
||
_pending_click_enrichments: Dict[tuple, Dict[str, Any]] = {}
|
||
_enrichment_lock = threading.Lock()
|
||
|
||
# Screenshots d'action arrivés (pour matcher avec les events en attente)
|
||
# Clé : (session_id, screenshot_id) → chemin du fichier
|
||
_arrived_action_screenshots: Dict[tuple, str] = {}
|
||
|
||
|
||
def _get_session_dir(session_id: str, machine_id: str = "default") -> Path:
|
||
"""Retrouver le répertoire d'une session live."""
|
||
if machine_id and machine_id != "default":
|
||
return LIVE_SESSIONS_DIR / machine_id / session_id
|
||
return LIVE_SESSIONS_DIR / session_id
|
||
|
||
|
||
def _get_screen_resolution_for_session(session_id: str) -> tuple:
|
||
"""Récupérer la résolution d'écran depuis la session en mémoire."""
|
||
session = processor.session_manager.get_session(session_id)
|
||
if session and session.last_window_info:
|
||
res = session.last_window_info.get("screen_resolution", [1920, 1080])
|
||
if isinstance(res, list) and len(res) == 2:
|
||
return (int(res[0]), int(res[1]))
|
||
return (1920, 1080)
|
||
|
||
|
||
def _submit_click_enrichment(
|
||
session_id: str,
|
||
event_data: dict,
|
||
screenshot_path: str,
|
||
event_index: int,
|
||
machine_id: str = "default",
|
||
) -> None:
|
||
"""Soumettre l'enrichissement SomEngine d'un clic au thread pool dédié.
|
||
|
||
Ne bloque pas le handler HTTP — le résultat sera stocké dans l'event
|
||
de la session quand SomEngine aura terminé (~1-2 secondes).
|
||
|
||
Args:
|
||
session_id: Identifiant de la session.
|
||
event_data: Données de l'événement mouse_click (pos, window, etc.).
|
||
screenshot_path: Chemin vers le screenshot full (PNG).
|
||
event_index: Index de l'event dans la liste session.events.
|
||
machine_id: Identifiant machine.
|
||
"""
|
||
_som_enrichment_executor.submit(
|
||
_enrich_click_background,
|
||
session_id, event_data, screenshot_path, event_index, machine_id,
|
||
)
|
||
|
||
|
||
def _enrich_click_background(
|
||
session_id: str,
|
||
event_data: dict,
|
||
screenshot_path: str,
|
||
event_index: int,
|
||
machine_id: str = "default",
|
||
) -> None:
|
||
"""Enrichir un clic avec SomEngine en arrière-plan (thread séparé).
|
||
|
||
Appelle enrich_click_from_screenshot() et stocke le résultat
|
||
directement dans l'event de la session (enrichment dict).
|
||
"""
|
||
try:
|
||
pos = event_data.get("pos", [0, 0])
|
||
if not pos or len(pos) < 2:
|
||
return
|
||
|
||
click_x, click_y = int(pos[0]), int(pos[1])
|
||
screen_w, screen_h = _get_screen_resolution_for_session(session_id)
|
||
|
||
# Extraire le titre de fenêtre
|
||
window = event_data.get("window", {})
|
||
if isinstance(window, dict):
|
||
window_title = window.get("title", "")
|
||
else:
|
||
window_title = event_data.get("window_title", "")
|
||
|
||
# Extraire vision_info si disponible (OCR côté agent)
|
||
vision_info = event_data.get("vision_info")
|
||
|
||
# Déduire session_dir et screenshot_id pour le cache SomEngine
|
||
session_dir = _get_session_dir(session_id, machine_id)
|
||
screenshot_id = event_data.get("screenshot_id", "")
|
||
|
||
logger.info(
|
||
"[SoM-RT] Enrichissement clic (%d,%d) pour %s/%s",
|
||
click_x, click_y, session_id, screenshot_id,
|
||
)
|
||
|
||
enrichment = enrich_click_from_screenshot(
|
||
screenshot_path=Path(screenshot_path),
|
||
click_x=click_x,
|
||
click_y=click_y,
|
||
screen_w=screen_w,
|
||
screen_h=screen_h,
|
||
window_title=window_title,
|
||
vision_info=vision_info,
|
||
session_dir=session_dir,
|
||
screenshot_id=screenshot_id,
|
||
)
|
||
|
||
if not enrichment:
|
||
logger.debug(
|
||
"[SoM-RT] Enrichissement vide pour %s/%s (screenshot illisible ?)",
|
||
session_id, screenshot_id,
|
||
)
|
||
return
|
||
|
||
# Stocker le résultat dans l'event de la session
|
||
session = processor.session_manager.get_session(session_id)
|
||
if session and 0 <= event_index < len(session.events):
|
||
session.events[event_index]["enrichment"] = enrichment
|
||
# Forcer la persistance pour sauvegarder l'enrichissement
|
||
processor.session_manager._maybe_persist(session_id)
|
||
logger.info(
|
||
"[SoM-RT] Clic enrichi : %s/%s → by_text='%s', by_role='%s', som=%s",
|
||
session_id, screenshot_id,
|
||
enrichment.get("by_text", ""),
|
||
enrichment.get("by_role", ""),
|
||
bool(enrichment.get("som_element")),
|
||
)
|
||
else:
|
||
logger.warning(
|
||
"[SoM-RT] Session %s introuvable ou event_index %d invalide",
|
||
session_id, event_index,
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(
|
||
"[SoM-RT] Erreur enrichissement clic %s : %s",
|
||
session_id, e, exc_info=True,
|
||
)
|
||
|
||
|
||
def _try_enrich_click_event(
|
||
session_id: str,
|
||
event_data: dict,
|
||
event_index: int,
|
||
machine_id: str = "default",
|
||
) -> bool:
|
||
"""Tenter l'enrichissement SomEngine d'un event mouse_click.
|
||
|
||
Vérifie si le screenshot est déjà arrivé. Si oui, soumet l'enrichissement.
|
||
Si non, enregistre l'event dans la file d'attente.
|
||
|
||
Returns:
|
||
True si l'enrichissement a été soumis, False si en attente du screenshot.
|
||
"""
|
||
screenshot_id = event_data.get("screenshot_id", "")
|
||
if not screenshot_id:
|
||
return False
|
||
|
||
key = (session_id, screenshot_id)
|
||
|
||
with _enrichment_lock:
|
||
# Le screenshot est-il déjà arrivé ?
|
||
screenshot_path = _arrived_action_screenshots.get(key)
|
||
if screenshot_path:
|
||
# Screenshot disponible → soumettre immédiatement
|
||
_submit_click_enrichment(
|
||
session_id, event_data, screenshot_path, event_index, machine_id,
|
||
)
|
||
# Nettoyer : plus besoin de garder le screenshot en mémoire
|
||
_arrived_action_screenshots.pop(key, None)
|
||
return True
|
||
else:
|
||
# Screenshot pas encore arrivé → mettre en attente
|
||
_pending_click_enrichments[key] = {
|
||
"event_data": event_data,
|
||
"event_index": event_index,
|
||
"machine_id": machine_id,
|
||
}
|
||
logger.debug(
|
||
"[SoM-RT] Clic en attente du screenshot %s/%s",
|
||
session_id, screenshot_id,
|
||
)
|
||
return False
|
||
|
||
|
||
def _on_action_screenshot_arrived(
|
||
session_id: str,
|
||
shot_id: str,
|
||
file_path: str,
|
||
machine_id: str = "default",
|
||
) -> bool:
|
||
"""Appelé quand un screenshot d'action (shot_XXXX_full) arrive.
|
||
|
||
Vérifie s'il y a un clic en attente d'enrichissement pour ce screenshot.
|
||
Si oui, soumet l'enrichissement au thread pool.
|
||
|
||
Args:
|
||
session_id: Identifiant de la session.
|
||
shot_id: Identifiant du screenshot (ex: "shot_0003_full").
|
||
file_path: Chemin complet vers le fichier PNG.
|
||
machine_id: Identifiant machine.
|
||
|
||
Returns:
|
||
True si un enrichissement a été soumis, False sinon.
|
||
"""
|
||
# Extraire le screenshot_id depuis le shot_id : "shot_0003_full" → "shot_0003"
|
||
screenshot_id = shot_id.replace("_full", "")
|
||
key = (session_id, screenshot_id)
|
||
|
||
with _enrichment_lock:
|
||
# Y a-t-il un clic en attente pour ce screenshot ?
|
||
pending = _pending_click_enrichments.pop(key, None)
|
||
if pending:
|
||
# Clic trouvé → soumettre l'enrichissement
|
||
_submit_click_enrichment(
|
||
session_id,
|
||
pending["event_data"],
|
||
file_path,
|
||
pending["event_index"],
|
||
pending.get("machine_id", machine_id),
|
||
)
|
||
return True
|
||
else:
|
||
# Pas de clic en attente → enregistrer le screenshot pour plus tard
|
||
_arrived_action_screenshots[key] = file_path
|
||
# Nettoyage : limiter la taille du cache (les vieux screenshots
|
||
# dont l'event n'arrivera jamais)
|
||
if len(_arrived_action_screenshots) > 200:
|
||
# Supprimer les plus anciennes entrées (FIFO via insertion order)
|
||
oldest = next(iter(_arrived_action_screenshots))
|
||
_arrived_action_screenshots.pop(oldest, None)
|
||
return False
|
||
|
||
|
||
def _merge_enrichments_into_raw_events(
|
||
raw_events: List[Dict[str, Any]],
|
||
session_events: List[Dict[str, Any]],
|
||
) -> int:
|
||
"""Fusionner les enrichissements SomEngine temps réel dans les events JSONL.
|
||
|
||
Les events JSONL (raw_events) sont écrits AVANT l'enrichissement SomEngine.
|
||
Les events en mémoire (session_events) contiennent l'enrichissement dans
|
||
le champ "enrichment". On les fusionne par correspondance screenshot_id.
|
||
|
||
Args:
|
||
raw_events: Events chargés depuis live_events.jsonl (structure
|
||
{"session_id": ..., "event": {...}} ou directement {...}).
|
||
session_events: Events en mémoire depuis LiveSessionState.events
|
||
(contiennent potentiellement le champ "enrichment").
|
||
|
||
Returns:
|
||
Nombre d'enrichissements fusionnés.
|
||
"""
|
||
# Construire un index screenshot_id → enrichment depuis les events mémoire
|
||
enrichment_by_shot: Dict[str, dict] = {}
|
||
for evt in session_events:
|
||
enr = evt.get("enrichment")
|
||
shot_id = evt.get("screenshot_id", "")
|
||
if enr and shot_id:
|
||
enrichment_by_shot[shot_id] = enr
|
||
|
||
if not enrichment_by_shot:
|
||
return 0
|
||
|
||
merged = 0
|
||
for raw_evt in raw_events:
|
||
inner = raw_evt.get("event", raw_evt)
|
||
if inner.get("type") != "mouse_click":
|
||
continue
|
||
shot_id = inner.get("screenshot_id", "")
|
||
if not shot_id:
|
||
continue
|
||
enr = enrichment_by_shot.get(shot_id)
|
||
if enr and "enrichment" not in inner:
|
||
inner["enrichment"] = enr
|
||
merged += 1
|
||
|
||
if merged:
|
||
logger.info(
|
||
"[SoM-RT] %d enrichissement(s) temps réel fusionné(s) dans les events JSONL",
|
||
merged,
|
||
)
|
||
return merged
|
||
|
||
|
||
def _image_hash(file_path: str) -> str:
|
||
"""Hash rapide d'une image pour détecter les doublons (~identiques).
|
||
|
||
Utilise 32x32 au lieu de 16x16 pour une meilleure discrimination
|
||
entre screenshots similaires mais pas identiques (ex: texte modifié
|
||
dans un champ, curseur déplacé, etc.).
|
||
"""
|
||
try:
|
||
from PIL import Image
|
||
import hashlib
|
||
img = Image.open(file_path)
|
||
# Réduire à 32x32 et convertir en niveaux de gris pour un hash perceptuel
|
||
thumb = img.resize((32, 32)).convert('L')
|
||
return hashlib.md5(thumb.tobytes()).hexdigest()
|
||
except Exception:
|
||
return ""
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/image")
|
||
async def stream_image(
|
||
session_id: str,
|
||
shot_id: str,
|
||
machine_id: str = "default",
|
||
file: UploadFile = File(...),
|
||
background_tasks: BackgroundTasks = None,
|
||
):
|
||
"""Reçoit une image et déclenche l'analyse via le core pipeline."""
|
||
# Auto-enregistrer la session si inconnue (robustesse au redémarrage serveur)
|
||
_ensure_session_registered(session_id, machine_id=machine_id)
|
||
|
||
# Sauvegarder sur disque (dans un sous-dossier par machine si multi-machine)
|
||
if machine_id and machine_id != "default":
|
||
session_path = LIVE_SESSIONS_DIR / machine_id / session_id
|
||
else:
|
||
session_path = LIVE_SESSIONS_DIR / session_id
|
||
shots_dir = session_path / "shots"
|
||
shots_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
file_path = shots_dir / f"{shot_id}.png"
|
||
content = await file.read()
|
||
with open(file_path, "wb") as f:
|
||
f.write(content)
|
||
|
||
file_path_str = str(file_path)
|
||
|
||
# Crops : traitement léger (pas d'analyse ScreenAnalyzer)
|
||
if "_crop" in shot_id:
|
||
result = worker.process_crop_direct(session_id, shot_id, file_path_str)
|
||
return {"status": "crop_stored", "shot_id": shot_id, **result}
|
||
|
||
# Filtrer les screenshots qui ne nécessitent PAS d'analyse GPU.
|
||
# Seuls les shot_XXXX_full (screenshots d'action) sont analysés.
|
||
# Les autres (heartbeat, focus, res_shot) sont stockés sur disque
|
||
# mais pas envoyés au GPU — sinon le ThreadPool (1 worker, ~10-30s/analyse)
|
||
# est submergé et la finalisation timeout avec 0 states.
|
||
if shot_id.startswith("heartbeat_"):
|
||
# Mémoriser le dernier heartbeat pour le pre-check de replay
|
||
_last_heartbeat[session_id] = {
|
||
"path": file_path_str,
|
||
"timestamp": time.time(),
|
||
}
|
||
return {"status": "heartbeat_stored", "shot_id": shot_id}
|
||
if shot_id.startswith("focus_"):
|
||
return {"status": "focus_stored", "shot_id": shot_id}
|
||
if shot_id.startswith("res_shot_"):
|
||
return {"status": "res_stored", "shot_id": shot_id}
|
||
if not shot_id.startswith("shot_") or "_full" not in shot_id:
|
||
# Tout ce qui n'est pas shot_XXXX_full → stocker sans analyser
|
||
logger.debug(f"Screenshot {shot_id} stocké sans analyse GPU")
|
||
return {"status": "stored_no_analysis", "shot_id": shot_id}
|
||
|
||
# Enrichissement SomEngine temps réel (léger, ~1-2s en background)
|
||
# Lancé AVANT la déduplication VLM car c'est un traitement indépendant.
|
||
# Si un event mouse_click attend ce screenshot, on lance SomEngine en background.
|
||
# Sinon, on enregistre le screenshot pour le matcher quand l'event arrivera.
|
||
_on_action_screenshot_arrived(session_id, shot_id, file_path_str, machine_id)
|
||
|
||
# Déduplication par ID : ne pas réanalyser un screenshot déjà traité
|
||
with _pending_lock:
|
||
if shot_id in _analyzed_shots[session_id]:
|
||
logger.debug(f"Screenshot {shot_id} déjà analysé, skip")
|
||
return {"status": "already_analyzed", "shot_id": shot_id}
|
||
|
||
# Déduplication par similarité : si l'image est quasi identique à la précédente, skip
|
||
img_hash = _image_hash(file_path_str)
|
||
if img_hash and img_hash == _last_screenshot_hash.get(session_id):
|
||
logger.info(f"Screenshot {shot_id} identique au précédent, skip analyse GPU")
|
||
with _pending_lock:
|
||
_analyzed_shots[session_id].add(shot_id)
|
||
return {"status": "duplicate_skipped", "shot_id": shot_id}
|
||
if img_hash:
|
||
_last_screenshot_hash[session_id] = img_hash
|
||
|
||
with _pending_lock:
|
||
_analyzed_shots[session_id].add(shot_id)
|
||
|
||
# Screenshots full : STOCKAGE UNIQUEMENT (pas d'analyse VLM lourde en temps réel)
|
||
# L'analyse VLM complète (ScreenAnalyzer + CLIP + FAISS) est faite par le
|
||
# worker séparé (run_worker.py) après finalisation de la session.
|
||
logger.debug(f"Screenshot {shot_id} stocké (analyse VLM différée au worker)")
|
||
|
||
return {"status": "image_stored", "shot_id": shot_id}
|
||
|
||
|
||
|
||
def _process_screenshot_thread(session_id: str, shot_id: str, path: str):
|
||
"""Analyse GPU d'un screenshot dans un thread séparé (ne bloque pas FastAPI)."""
|
||
try:
|
||
import traceback
|
||
logger.info(f"[GPU] Début analyse {shot_id} pour {session_id}")
|
||
result = worker.process_screenshot_direct(session_id, shot_id, path)
|
||
logger.info(
|
||
f"[GPU] Screenshot {shot_id} analysé: "
|
||
f"{result.get('ui_elements_count', 0)} UI, "
|
||
f"{result.get('text_detected', 0)} textes, "
|
||
f"indexed={result.get('embedding_indexed', False)}"
|
||
)
|
||
except Exception as e:
|
||
import traceback
|
||
logger.error(f"[GPU] Erreur analyse {shot_id}: {e}\n{traceback.format_exc()}")
|
||
finally:
|
||
with _pending_lock:
|
||
_pending_analyses[session_id] = max(0, _pending_analyses[session_id] - 1)
|
||
|
||
|
||
# =========================================================================
|
||
# Finalisation
|
||
# =========================================================================
|
||
|
||
@app.post("/api/v1/traces/stream/finalize")
|
||
async def finalize(session_id: str, machine_id: str = "default"):
|
||
"""Clôture la session et place le traitement en file d'attente.
|
||
|
||
Ne bloque plus : marque la session comme finalisée et l'ajoute à la queue
|
||
du worker VLM (process séparé) pour analyse + construction workflow.
|
||
|
||
Le client peut suivre la progression via GET /api/v1/traces/stream/processing/status.
|
||
|
||
Args:
|
||
session_id: Identifiant de la session à finaliser
|
||
machine_id: Identifiant machine (informatif, le machine_id est déjà dans la session)
|
||
"""
|
||
# Vérifier que la session existe
|
||
session = processor.session_manager.get_session(session_id)
|
||
if not session:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Session {session_id} non trouvée",
|
||
)
|
||
|
||
# Marquer la session comme finalisée (persistée sur disque)
|
||
processor.session_manager.finalize(session_id)
|
||
logger.info(f"Session {session_id} finalisée, ajout à la queue du worker VLM")
|
||
|
||
# Nettoyer les structures d'enrichissement temps réel pour cette session
|
||
with _enrichment_lock:
|
||
keys_to_remove = [k for k in _pending_click_enrichments if k[0] == session_id]
|
||
for k in keys_to_remove:
|
||
del _pending_click_enrichments[k]
|
||
keys_to_remove = [k for k in _arrived_action_screenshots if k[0] == session_id]
|
||
for k in keys_to_remove:
|
||
del _arrived_action_screenshots[k]
|
||
|
||
# Écrire dans le fichier queue pour le worker VLM (process séparé)
|
||
_enqueue_to_worker(session_id)
|
||
|
||
# Compter les screenshots full disponibles pour donner une estimation
|
||
session_dir = processor._find_session_dir(session_id)
|
||
full_shots_count = 0
|
||
if session_dir:
|
||
shots_dir = session_dir / "shots"
|
||
if shots_dir.exists():
|
||
full_shots_count = len(list(shots_dir.glob("shot_*_full.png")))
|
||
|
||
return {
|
||
"status": "queued_for_processing",
|
||
"session_id": session_id,
|
||
"machine_id": session.machine_id,
|
||
"screenshots_to_analyze": full_shots_count,
|
||
"message": (
|
||
f"Session finalisée. {full_shots_count} screenshots seront analysés "
|
||
"en arrière-plan. Suivez la progression via "
|
||
"GET /api/v1/traces/stream/processing/status"
|
||
),
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Traitement asynchrone — Suivi de la queue de processing
|
||
# =========================================================================
|
||
|
||
@app.get("/api/v1/traces/stream/processing/status")
|
||
async def get_processing_status():
|
||
"""État de la queue de traitement VLM (worker process séparé).
|
||
|
||
Retourne :
|
||
- queue_length : nombre de sessions en attente dans le fichier queue
|
||
- queue : liste des session_ids en attente
|
||
- replay_lock_active : si un replay est en cours (worker suspendu)
|
||
"""
|
||
return _get_worker_queue_status()
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/processing/requeue")
|
||
async def requeue_session(session_id: str):
|
||
"""Relancer le traitement d'une session (manuellement).
|
||
|
||
Utile pour :
|
||
- Relancer une session échouée après correction
|
||
- Forcer le retraitement d'une session déjà traitée
|
||
"""
|
||
session = processor.session_manager.get_session(session_id)
|
||
if not session:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Session {session_id} non trouvée",
|
||
)
|
||
|
||
_enqueue_to_worker(session_id)
|
||
|
||
return {
|
||
"status": "requeued",
|
||
"session_id": session_id,
|
||
"queue_status": _get_worker_queue_status(),
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Shadow mode — observation temps réel + feedback utilisateur
|
||
# =========================================================================
|
||
#
|
||
# Endpoints utilisés par la GUI Léa pour :
|
||
# - Démarrer/arrêter le mode Shadow sur une session en cours
|
||
# - Récupérer en temps réel ce que Léa a compris
|
||
# - Envoyer des feedbacks (valider/corriger/annuler/fusionner)
|
||
# - Construire le WorkflowIR final après validation
|
||
#
|
||
# Source de vérité : events.jsonl (inchangé). Le ShadowObserver est une
|
||
# couche d'observation facultative qui ne modifie PAS la capture.
|
||
#
|
||
# Import paresseux pour ne pas alourdir le démarrage serveur si la
|
||
# feature n'est pas utilisée.
|
||
# =========================================================================
|
||
|
||
_shadow_observer = None
|
||
_shadow_validators: Dict[str, Any] = {} # session_id -> ShadowValidator
|
||
_shadow_lock = threading.Lock()
|
||
|
||
|
||
def _get_shadow_observer():
|
||
"""Retourner le ShadowObserver partagé (lazy init)."""
|
||
global _shadow_observer
|
||
with _shadow_lock:
|
||
if _shadow_observer is None:
|
||
from core.workflow.shadow_observer import get_shared_observer
|
||
_shadow_observer = get_shared_observer()
|
||
return _shadow_observer
|
||
|
||
|
||
def _get_shadow_validator(session_id: str):
|
||
"""Retourner (ou créer) le ShadowValidator pour une session."""
|
||
from core.workflow.shadow_validator import ShadowValidator
|
||
with _shadow_lock:
|
||
if session_id not in _shadow_validators:
|
||
_shadow_validators[session_id] = ShadowValidator()
|
||
return _shadow_validators[session_id]
|
||
|
||
|
||
def shadow_observe_event(session_id: str, event: Dict[str, Any]) -> None:
|
||
"""Injection d'un événement dans le ShadowObserver (si session active).
|
||
|
||
Helper appelé depuis stream_event() pour alimenter l'observer sans
|
||
casser le flux de capture. Protégé contre les exceptions pour
|
||
garantir qu'une erreur d'observation ne fait jamais planter la
|
||
capture.
|
||
"""
|
||
try:
|
||
observer = _get_shadow_observer()
|
||
if observer.has_session(session_id):
|
||
observer.observe_event(session_id, event)
|
||
except Exception as e:
|
||
logger.debug(f"shadow_observe_event: {e}")
|
||
|
||
|
||
class ShadowStartRequest(BaseModel):
|
||
session_id: str
|
||
|
||
|
||
class ShadowFeedbackRequest(BaseModel):
|
||
"""Feedback utilisateur pendant l'enregistrement.
|
||
|
||
action :
|
||
- "validate" : valider l'étape
|
||
- "correct" : corriger l'intention (new_intent requis)
|
||
- "undo" : annuler l'étape
|
||
- "cancel" : annuler tout le workflow
|
||
- "merge_next" : fusionner avec la suivante
|
||
- "split" : couper (at_event_index requis)
|
||
"""
|
||
session_id: str
|
||
action: str
|
||
step_index: Optional[int] = None
|
||
new_intent: Optional[str] = None
|
||
at_event_index: Optional[int] = None
|
||
|
||
|
||
class ShadowBuildRequest(BaseModel):
|
||
"""Construire le WorkflowIR final à partir des feedbacks."""
|
||
session_id: str
|
||
name: str = ""
|
||
domain: str = "generic"
|
||
require_all_validated: bool = False
|
||
|
||
|
||
@app.post("/api/v1/shadow/start")
|
||
async def shadow_start(request: ShadowStartRequest):
|
||
"""Démarrer le mode Shadow pour une session en cours.
|
||
|
||
Une fois démarré, chaque événement reçu via /api/v1/traces/stream/event
|
||
alimentera le ShadowObserver pour construire la compréhension en
|
||
temps réel.
|
||
"""
|
||
observer = _get_shadow_observer()
|
||
observer.start(request.session_id)
|
||
logger.info(f"Shadow mode démarré pour la session {request.session_id}")
|
||
return {
|
||
"status": "shadow_started",
|
||
"session_id": request.session_id,
|
||
"message": "Léa observe — fais ta tâche normalement.",
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/shadow/stop")
|
||
async def shadow_stop(request: ShadowStartRequest):
|
||
"""Arrêter le mode Shadow (sans détruire l'état).
|
||
|
||
La compréhension reste accessible via /api/v1/shadow/{id}/understanding
|
||
jusqu'à ce que /api/v1/shadow/build soit appelé ou la session finalisée.
|
||
"""
|
||
observer = _get_shadow_observer()
|
||
observer.stop(request.session_id)
|
||
understanding = observer.get_understanding(request.session_id)
|
||
return {
|
||
"status": "shadow_stopped",
|
||
"session_id": request.session_id,
|
||
"steps_count": len(understanding),
|
||
"understanding": understanding,
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/shadow/feedback")
|
||
async def shadow_feedback(request: ShadowFeedbackRequest):
|
||
"""Recevoir un feedback utilisateur pendant ou après l'enregistrement.
|
||
|
||
body : {session_id, action, step_index?, new_intent?, at_event_index?}
|
||
"""
|
||
observer = _get_shadow_observer()
|
||
if not observer.has_session(request.session_id):
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Shadow active pour {request.session_id}",
|
||
)
|
||
|
||
validator = _get_shadow_validator(request.session_id)
|
||
# Recharger les étapes courantes depuis l'observer
|
||
validator.set_steps(observer.get_steps_internal(request.session_id))
|
||
|
||
feedback_dict: Dict[str, Any] = {"action": request.action}
|
||
if request.step_index is not None:
|
||
feedback_dict["step_index"] = request.step_index
|
||
if request.new_intent is not None:
|
||
feedback_dict["new_intent"] = request.new_intent
|
||
if request.at_event_index is not None:
|
||
feedback_dict["at_event_index"] = request.at_event_index
|
||
|
||
result = validator.apply_feedback(feedback_dict)
|
||
return {
|
||
"status": "feedback_applied" if result.ok else "feedback_rejected",
|
||
"session_id": request.session_id,
|
||
"result": result.to_dict(),
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/shadow/{session_id}/understanding")
|
||
async def shadow_get_understanding(session_id: str, since_ts: float = 0.0):
|
||
"""Récupérer ce que Léa a compris jusqu'ici.
|
||
|
||
Returns:
|
||
{
|
||
"session_id": ...,
|
||
"steps": [
|
||
{"step": 1, "intent": "...", "confidence": 0.9, ...},
|
||
...
|
||
],
|
||
"current_step": {...} | None,
|
||
"notifications": [...] # Seulement celles depuis since_ts
|
||
}
|
||
"""
|
||
observer = _get_shadow_observer()
|
||
if not observer.has_session(session_id):
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Shadow active pour {session_id}",
|
||
)
|
||
return {
|
||
"session_id": session_id,
|
||
"steps": observer.get_understanding(session_id, include_current=False),
|
||
"current_step": observer.get_current_step(session_id),
|
||
"notifications": observer.get_notifications(session_id, since_ts=since_ts),
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/shadow/build")
|
||
async def shadow_build(request: ShadowBuildRequest):
|
||
"""Construire le WorkflowIR final à partir des étapes validées/corrigées.
|
||
|
||
Le WorkflowIR est retourné mais pas encore sauvegardé — c'est au
|
||
caller de décider s'il l'écrit sur disque ou le compile en
|
||
ExecutionPlan.
|
||
"""
|
||
observer = _get_shadow_observer()
|
||
if not observer.has_session(request.session_id):
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Shadow active pour {request.session_id}",
|
||
)
|
||
|
||
validator = _get_shadow_validator(request.session_id)
|
||
# S'assurer que le validator voit les étapes finales de l'observer
|
||
validator.set_steps(observer.get_steps_internal(request.session_id))
|
||
|
||
# Réappliquer l'historique n'est PAS nécessaire : on s'attend à ce que
|
||
# les feedbacks aient été appliqués dans l'ordre via /api/v1/shadow/feedback
|
||
# et que le validator ait accumulé son état. Mais puisqu'on vient de
|
||
# recharger les étapes, on perd les corrections. Stratégie : conserver
|
||
# l'historique et le rejouer.
|
||
history = validator.history
|
||
validator.set_steps(observer.get_steps_internal(request.session_id))
|
||
for entry in history:
|
||
# Rejouer en reconstruisant un feedback depuis le résultat
|
||
data = entry.data or {}
|
||
fb: Dict[str, Any] = {"action": entry.action, "step_index": entry.step_index}
|
||
if "new_intent" in data:
|
||
fb["new_intent"] = data["new_intent"]
|
||
validator.apply_feedback(fb)
|
||
|
||
try:
|
||
ir = validator.build_workflow_ir(
|
||
session_id=request.session_id,
|
||
name=request.name,
|
||
domain=request.domain,
|
||
require_all_validated=request.require_all_validated,
|
||
)
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=str(e))
|
||
|
||
if ir is None:
|
||
return {
|
||
"status": "cancelled",
|
||
"session_id": request.session_id,
|
||
"message": "Workflow annulé par l'utilisateur",
|
||
}
|
||
|
||
return {
|
||
"status": "workflow_built",
|
||
"session_id": request.session_id,
|
||
"workflow_ir": ir.to_dict(),
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Monitoring
|
||
# =========================================================================
|
||
|
||
@app.get("/api/v1/traces/stream/stats")
|
||
async def get_stats():
|
||
"""Statistiques du serveur de streaming."""
|
||
stats = worker.stats
|
||
# Ajouter les machines connues
|
||
stats["machines"] = processor.session_manager.get_machine_ids()
|
||
return stats
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/machines")
|
||
async def list_machines():
|
||
"""Lister toutes les machines connues avec leurs sessions actives.
|
||
|
||
Utile pour le dashboard et l'agent chat (Léa) pour savoir quelles
|
||
machines sont connectées et cibler un replay spécifique.
|
||
"""
|
||
machine_ids = processor.session_manager.get_machine_ids()
|
||
machines = []
|
||
for mid in machine_ids:
|
||
machine_sessions = processor.session_manager.get_sessions_by_machine(mid)
|
||
active = [s for s in machine_sessions if not s.finalized]
|
||
machines.append({
|
||
"machine_id": mid,
|
||
"total_sessions": len(machine_sessions),
|
||
"active_sessions": len(active),
|
||
"last_activity": max(
|
||
(s.last_activity for s in machine_sessions),
|
||
default=None,
|
||
).isoformat() if machine_sessions else None,
|
||
})
|
||
return {"machines": machines}
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/sessions")
|
||
async def list_sessions(machine_id: Optional[str] = None):
|
||
"""Lister les sessions (actives et finalisées).
|
||
|
||
Args:
|
||
machine_id: Si fourni, filtre par machine. Si absent, retourne toutes les sessions.
|
||
"""
|
||
sessions = processor.list_sessions(machine_id=machine_id)
|
||
result = {"sessions": sessions}
|
||
# Ajouter la liste des machines connues pour l'UI
|
||
result["machines"] = processor.session_manager.get_machine_ids()
|
||
return result
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/workflows")
|
||
async def list_workflows(machine_id: Optional[str] = None):
|
||
"""Lister les workflows construits.
|
||
|
||
Args:
|
||
machine_id: Si fourni, filtre par machine. Si absent, retourne tous les workflows.
|
||
"""
|
||
workflows = processor.list_workflows(machine_id=machine_id)
|
||
result = {"workflows": workflows}
|
||
# Ajouter la liste des machines connues pour l'UI
|
||
result["machines"] = processor.session_manager.get_machine_ids()
|
||
return result
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/reload-workflows")
|
||
async def reload_workflows():
|
||
"""Recharger les workflows depuis le disque.
|
||
|
||
Appelé par le VWB après un export-for-lea pour que le streaming server
|
||
voie immédiatement les nouveaux workflows sans redémarrage.
|
||
"""
|
||
count = processor.reload_workflows()
|
||
return {"success": True, "workflows_count": count}
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/workflow/{workflow_id}")
|
||
async def get_workflow_detail(workflow_id: str):
|
||
"""Retourne le détail complet d'un workflow (format core JSON).
|
||
|
||
Utilisé par le VWB pour importer un workflow appris qui n'est pas
|
||
encore sur disque (seulement en mémoire dans le streaming server).
|
||
"""
|
||
with processor._data_lock:
|
||
wf = processor._workflows.get(workflow_id)
|
||
|
||
if not wf:
|
||
raise HTTPException(status_code=404, detail=f"Workflow '{workflow_id}' non trouvé")
|
||
|
||
return wf.to_dict()
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/session/{session_id}")
|
||
async def get_session(session_id: str):
|
||
"""État d'une session."""
|
||
session = processor.session_manager.get_session(session_id)
|
||
if not session:
|
||
raise HTTPException(status_code=404, detail=f"Session {session_id} non trouvée")
|
||
return {
|
||
"session_id": session.session_id,
|
||
"machine_id": session.machine_id,
|
||
"events_count": len(session.events),
|
||
"screenshots_count": len(session.shot_paths),
|
||
"last_window": session.last_window_info,
|
||
"created_at": session.created_at.isoformat(),
|
||
"last_activity": session.last_activity.isoformat(),
|
||
"finalized": session.finalized,
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Replay — Exécution de workflows sur l'Agent V1
|
||
# =========================================================================
|
||
|
||
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay")
|
||
async def start_replay(request: ReplayRequest):
|
||
"""
|
||
Lancer le replay d'un workflow sur une session Agent V1 active.
|
||
|
||
Le serveur charge le workflow, le convertit en liste d'actions normalisées,
|
||
et les place dans la queue de la session. L'Agent V1 les récupérera
|
||
via GET /replay/next (modèle pull).
|
||
|
||
Si session_id commence par "chat_" ou est vide, on détecte automatiquement
|
||
la dernière session Agent V1 active (non finalisée, préfixe "sess_").
|
||
Si machine_id est fourni, on cible spécifiquement cette machine.
|
||
"""
|
||
workflow_id = request.workflow_id
|
||
session_id = request.session_id
|
||
target_machine_id = request.machine_id
|
||
params = request.params or {}
|
||
|
||
# Auto-détection de la session Agent V1 active (avec filtre machine optionnel)
|
||
if not session_id or session_id.startswith("chat_"):
|
||
active_session = _find_active_agent_session(machine_id=target_machine_id)
|
||
if active_session:
|
||
logger.info(
|
||
f"Auto-détection session Agent V1 : {active_session} "
|
||
f"(demandé: {session_id}, machine={target_machine_id})"
|
||
)
|
||
session_id = active_session
|
||
else:
|
||
machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Agent V1 active{machine_hint}. "
|
||
"Lancez l'Agent V1 et démarrez une session d'abord."
|
||
)
|
||
|
||
# Vérifier que le workflow existe
|
||
with processor._data_lock:
|
||
workflow = processor._workflows.get(workflow_id)
|
||
|
||
if not workflow:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Workflow '{workflow_id}' non trouvé. "
|
||
f"Workflows disponibles : {list(processor._workflows.keys())}"
|
||
)
|
||
|
||
# Convertir le workflow en actions normalisées
|
||
actions = _workflow_to_actions(workflow, params)
|
||
if not actions:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Le workflow '{workflow_id}' ne contient aucune action exécutable."
|
||
)
|
||
|
||
# Limite de sécurité sur le nombre d'actions
|
||
if len(actions) > MAX_ACTIONS_PER_REPLAY:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}). "
|
||
"Découpez le workflow en parties plus petites."
|
||
)
|
||
|
||
# ── Setup environnement — ouvrir les applications nécessaires ──
|
||
setup_actions = []
|
||
app_info = _extract_required_apps_from_workflow(workflow)
|
||
if app_info:
|
||
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_wf")
|
||
if setup_actions:
|
||
actions = setup_actions + actions
|
||
logger.info(
|
||
"replay workflow %s : %d actions de setup injectées "
|
||
"(app=%s, cmd=%s)",
|
||
workflow_id, len(setup_actions),
|
||
app_info.get("primary_app"), app_info.get("primary_launch_cmd"),
|
||
)
|
||
|
||
# Créer l'identifiant de replay
|
||
replay_id = f"replay_{uuid.uuid4().hex[:8]}"
|
||
|
||
# Résoudre le machine_id de la session cible
|
||
session_obj = processor.session_manager.get_session(session_id)
|
||
resolved_machine_id = target_machine_id or (session_obj.machine_id if session_obj else "default")
|
||
|
||
# Injecter les actions dans la queue de la session
|
||
with _replay_lock:
|
||
_replay_queues[session_id] = list(actions) # Remplacer la queue existante
|
||
_replay_states[replay_id] = _create_replay_state(
|
||
replay_id=replay_id,
|
||
workflow_id=workflow_id,
|
||
session_id=session_id,
|
||
total_actions=len(actions),
|
||
params=params,
|
||
machine_id=resolved_machine_id,
|
||
actions=actions,
|
||
)
|
||
# Enregistrer le mapping machine -> session pour le replay ciblé
|
||
if resolved_machine_id and resolved_machine_id != "default":
|
||
_machine_replay_target[resolved_machine_id] = session_id
|
||
|
||
# Signaler au worker VLM (process séparé) qu'un replay est actif → se suspendre
|
||
_set_replay_lock(replay_id)
|
||
|
||
logger.info(
|
||
f"Replay démarré : {replay_id} | workflow={workflow_id} | "
|
||
f"session={session_id} | machine={resolved_machine_id} | "
|
||
f"{len(actions)} actions ({len(setup_actions)} setup + "
|
||
f"{len(actions) - len(setup_actions)} replay) (worker suspendu)"
|
||
)
|
||
|
||
return {
|
||
"replay_id": replay_id,
|
||
"status": "running",
|
||
"workflow_id": workflow_id,
|
||
"session_id": session_id,
|
||
"machine_id": resolved_machine_id,
|
||
"total_actions": len(actions),
|
||
"setup_actions": len(setup_actions),
|
||
"setup_app": app_info.get("primary_app", "") if app_info else "",
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/raw")
|
||
async def start_raw_replay(request: RawReplayRequest):
|
||
"""
|
||
Lancer un replay avec des actions brutes (mode Agent Libre).
|
||
|
||
Au lieu de charger un workflow, accepte directement une liste d'actions
|
||
normalisées générées par le LLM planner. Les actions sont injectées
|
||
dans la queue de replay de l'Agent V1.
|
||
"""
|
||
session_id = request.session_id
|
||
actions = request.actions
|
||
target_machine_id = request.machine_id
|
||
task = request.task_description or "Tâche libre"
|
||
|
||
if not actions:
|
||
raise HTTPException(status_code=400, detail="Aucune action fournie.")
|
||
|
||
# Limite de sécurité sur le nombre d'actions
|
||
if len(actions) > MAX_ACTIONS_PER_REPLAY:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}). "
|
||
"Réduisez le plan d'exécution."
|
||
)
|
||
|
||
# Validation de chaque action (sécurité HIGH)
|
||
for i, action in enumerate(actions):
|
||
error = _validate_replay_action(action)
|
||
if error:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Action #{i} invalide : {error}"
|
||
)
|
||
|
||
# Auto-détection de la session Agent V1 (avec filtre machine optionnel)
|
||
if not session_id or session_id.startswith("chat_"):
|
||
active_session = _find_active_agent_session(machine_id=target_machine_id)
|
||
if active_session:
|
||
session_id = active_session
|
||
else:
|
||
machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Agent V1 active{machine_hint}. "
|
||
"Lancez l'Agent V1 sur le PC cible."
|
||
)
|
||
|
||
# Assigner des action_id si manquants
|
||
for i, action in enumerate(actions):
|
||
if "action_id" not in action:
|
||
action["action_id"] = f"act_free_{uuid.uuid4().hex[:6]}"
|
||
|
||
replay_id = f"replay_free_{uuid.uuid4().hex[:8]}"
|
||
|
||
# Résoudre le machine_id de la session cible
|
||
session_obj = processor.session_manager.get_session(session_id)
|
||
resolved_machine_id = target_machine_id or (session_obj.machine_id if session_obj else "default")
|
||
|
||
with _replay_lock:
|
||
_replay_queues[session_id] = list(actions)
|
||
_replay_states[replay_id] = _create_replay_state(
|
||
replay_id=replay_id,
|
||
workflow_id=f"free_task:{task[:50]}",
|
||
session_id=session_id,
|
||
total_actions=len(actions),
|
||
params={},
|
||
machine_id=resolved_machine_id,
|
||
actions=actions,
|
||
)
|
||
# Enregistrer le mapping machine -> session pour le replay ciblé
|
||
if resolved_machine_id and resolved_machine_id != "default":
|
||
_machine_replay_target[resolved_machine_id] = session_id
|
||
|
||
# Signaler au worker VLM (process séparé) qu'un replay est actif → se suspendre
|
||
_set_replay_lock(replay_id)
|
||
|
||
logger.info(
|
||
f"Replay libre démarré : {replay_id} | task='{task}' | "
|
||
f"session={session_id} | machine={resolved_machine_id} | {len(actions)} actions (worker suspendu)"
|
||
)
|
||
|
||
return {
|
||
"replay_id": replay_id,
|
||
"status": "running",
|
||
"task": task,
|
||
"session_id": session_id,
|
||
"machine_id": resolved_machine_id,
|
||
"total_actions": len(actions),
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay-session")
|
||
async def replay_from_session(
|
||
session_id: str,
|
||
machine_id: str = "default",
|
||
):
|
||
"""Rejouer une session directement depuis ses événements bruts.
|
||
|
||
Pas besoin d'attendre le traitement VLM/GraphBuilder.
|
||
Construit le replay propre automatiquement depuis live_events.jsonl.
|
||
|
||
Pipeline :
|
||
1. Charge les events bruts de la session
|
||
2. Filtre les parasites (heartbeat, focus_change, action_result)
|
||
3. Fusionne les text_input consécutifs
|
||
4. Normalise les coordonnées en pourcentage
|
||
5. Ajoute des waits contextuels (après Win+R, Ctrl+S, Alt+F4, Enter)
|
||
6. Coupe après Alt+F4
|
||
7. Injecte dans la queue de replay
|
||
|
||
Résultat typique : ~15-20 actions propres, prêtes à exécuter immédiatement.
|
||
"""
|
||
if not session_id:
|
||
raise HTTPException(status_code=400, detail="session_id requis")
|
||
|
||
# ── 1. Trouver le fichier live_events.jsonl de la session ──
|
||
events_file = None
|
||
|
||
# Chercher dans le sous-dossier machine_id (format standard)
|
||
if machine_id and machine_id != "default":
|
||
candidate = LIVE_SESSIONS_DIR / machine_id / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
|
||
# Fallback : chercher dans tous les sous-dossiers machine
|
||
if not events_file:
|
||
for machine_dir in LIVE_SESSIONS_DIR.iterdir():
|
||
if not machine_dir.is_dir():
|
||
continue
|
||
candidate = machine_dir / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
# Résoudre le machine_id depuis le dossier
|
||
if machine_id == "default":
|
||
machine_id = machine_dir.name
|
||
break
|
||
|
||
# Dernier fallback : dossier session directement sous LIVE_SESSIONS_DIR
|
||
if not events_file:
|
||
candidate = LIVE_SESSIONS_DIR / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
|
||
if not events_file:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Session '{session_id}' introuvable. "
|
||
f"Fichier live_events.jsonl non trouvé dans "
|
||
f"{LIVE_SESSIONS_DIR}/{machine_id}/{session_id}/"
|
||
)
|
||
|
||
# ── 2. Charger les événements bruts ──
|
||
raw_events = []
|
||
try:
|
||
for line in events_file.read_text(encoding="utf-8").splitlines():
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
try:
|
||
raw_events.append(json.loads(line))
|
||
except json.JSONDecodeError:
|
||
continue
|
||
except Exception as e:
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur lecture events de la session : {e}"
|
||
)
|
||
|
||
if not raw_events:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Session '{session_id}' : aucun événement trouvé dans live_events.jsonl"
|
||
)
|
||
|
||
# ── 2b. Fusionner les enrichissements temps réel depuis la session en mémoire ──
|
||
# Le JSONL ne contient pas les enrichissements SomEngine calculés pendant
|
||
# l'enregistrement (ils sont ajoutés en mémoire après écriture JSONL).
|
||
# On les injecte ici pour que build_replay_from_raw_events puisse les réutiliser.
|
||
session_mem = processor.session_manager.get_session(session_id)
|
||
if session_mem and session_mem.events:
|
||
_merge_enrichments_into_raw_events(raw_events, session_mem.events)
|
||
|
||
# ── 3. Construire le replay propre depuis les events bruts ──
|
||
# Passer le répertoire de session pour activer le visual replay (crops de référence)
|
||
session_dir = str(events_file.parent)
|
||
actions = build_replay_from_raw_events(
|
||
raw_events, session_id=session_id, session_dir=session_dir,
|
||
)
|
||
|
||
if not actions:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Session '{session_id}' : aucune action exploitable après nettoyage "
|
||
f"({len(raw_events)} événements bruts)"
|
||
)
|
||
|
||
# Limite de sécurité
|
||
if len(actions) > MAX_ACTIONS_PER_REPLAY:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}). "
|
||
"La session est trop longue pour un replay direct."
|
||
)
|
||
|
||
# Validation de chaque action (sécurité HIGH)
|
||
for i, action in enumerate(actions):
|
||
error = _validate_replay_action(action)
|
||
if error:
|
||
logger.warning(
|
||
"replay-session : action #%d invalide (%s), suppression", i, error
|
||
)
|
||
# Supprimer les actions invalides plutôt que rejeter tout le replay
|
||
actions[i] = None
|
||
actions = [a for a in actions if a is not None]
|
||
|
||
if not actions:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Session '{session_id}' : toutes les actions ont été rejetées par la validation"
|
||
)
|
||
|
||
# Optimisation par gestes clavier si disponible
|
||
if _gesture_catalog and actions:
|
||
actions = _gesture_catalog.optimize_replay_actions(actions)
|
||
|
||
# ── 3b. Setup environnement — ouvrir les applications nécessaires ──
|
||
# Analyser les événements bruts pour détecter quelles applications sont requises
|
||
# et injecter des actions de setup en tête de la queue de replay.
|
||
setup_actions = []
|
||
app_info = _extract_required_apps_from_events(raw_events)
|
||
if app_info:
|
||
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess")
|
||
if setup_actions:
|
||
actions = setup_actions + actions
|
||
logger.info(
|
||
"replay-session %s : %d actions de setup injectées avant le replay "
|
||
"(app=%s, cmd=%s)",
|
||
session_id, len(setup_actions),
|
||
app_info.get("primary_app"), app_info.get("primary_launch_cmd"),
|
||
)
|
||
|
||
# ── 4. Trouver la session de replay cible (Agent V1 actif) ──
|
||
# L'agent actif peut avoir une session différente de la session source
|
||
target_session_id = _find_active_agent_session(machine_id=machine_id)
|
||
if not target_session_id:
|
||
# Fallback : utiliser la session source si c'est une session Agent V1
|
||
if session_id.startswith("sess_"):
|
||
target_session_id = session_id
|
||
else:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Agent V1 active sur la machine '{machine_id}'. "
|
||
"Lancez l'Agent V1 sur le PC cible."
|
||
)
|
||
|
||
# ── 5. Injecter dans la queue de replay ──
|
||
replay_id = f"replay_sess_{uuid.uuid4().hex[:8]}"
|
||
|
||
with _replay_lock:
|
||
_replay_queues[target_session_id] = list(actions)
|
||
_replay_states[replay_id] = _create_replay_state(
|
||
replay_id=replay_id,
|
||
workflow_id=f"session_replay:{session_id}",
|
||
session_id=target_session_id,
|
||
total_actions=len(actions),
|
||
params={},
|
||
machine_id=machine_id,
|
||
actions=actions,
|
||
)
|
||
# Enregistrer le mapping machine -> session pour le replay ciblé
|
||
if machine_id and machine_id != "default":
|
||
_machine_replay_target[machine_id] = target_session_id
|
||
|
||
# Signaler au worker VLM (process séparé) qu'un replay est actif → se suspendre
|
||
_set_replay_lock(replay_id)
|
||
|
||
logger.info(
|
||
"Replay session démarré : %s | source=%s | target=%s | machine=%s | "
|
||
"%d actions (%d setup + %d replay) (worker suspendu)",
|
||
replay_id, session_id, target_session_id, machine_id,
|
||
len(actions), len(setup_actions), len(actions) - len(setup_actions),
|
||
)
|
||
|
||
return {
|
||
"replay_id": replay_id,
|
||
"status": "running",
|
||
"source_session_id": session_id,
|
||
"target_session_id": target_session_id,
|
||
"machine_id": machine_id,
|
||
"total_actions": len(actions),
|
||
"setup_actions": len(setup_actions),
|
||
"replay_actions": len(actions) - len(setup_actions),
|
||
"total_raw_events": len(raw_events),
|
||
"setup_app": app_info.get("primary_app", "") if app_info else "",
|
||
"actions_preview": [
|
||
{
|
||
k: (
|
||
# Ne pas sérialiser l'image base64 dans le preview
|
||
{kk: ("..." if kk == "anchor_image_base64" else vv) for kk, vv in v.items()}
|
||
if k == "target_spec" and isinstance(v, dict)
|
||
else v
|
||
)
|
||
for k, v in a.items()
|
||
if k != "action_id"
|
||
}
|
||
for a in actions[:8] # Montrer plus d'actions pour inclure le setup
|
||
],
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/single")
|
||
async def enqueue_single_action(request: SingleActionRequest):
|
||
"""
|
||
Enqueue une seule action pour exécution (mode Copilot).
|
||
|
||
Contrairement à /replay et /replay/raw qui injectent toute une liste,
|
||
cet endpoint n'enqueue qu'UNE action à la fois. L'agent chat Copilot
|
||
appelle cet endpoint étape par étape après validation utilisateur.
|
||
|
||
Retourne un action_id pour le tracking du résultat via /replay/result.
|
||
"""
|
||
session_id = request.session_id
|
||
action = dict(request.action)
|
||
target_machine_id = request.machine_id
|
||
|
||
# Validation de l'action (sécurité HIGH)
|
||
error = _validate_replay_action(action)
|
||
if error:
|
||
raise HTTPException(status_code=400, detail=f"Action invalide : {error}")
|
||
|
||
# Auto-détection de la session Agent V1 (avec filtre machine optionnel)
|
||
if not session_id or session_id.startswith("chat_"):
|
||
active_session = _find_active_agent_session(machine_id=target_machine_id)
|
||
if active_session:
|
||
session_id = active_session
|
||
else:
|
||
machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Agent V1 active{machine_hint}. "
|
||
"Lancez l'Agent V1 sur le PC cible."
|
||
)
|
||
|
||
# Assigner un action_id si manquant
|
||
if "action_id" not in action:
|
||
action["action_id"] = f"act_copilot_{uuid.uuid4().hex[:8]}"
|
||
|
||
action_id = action["action_id"]
|
||
|
||
with _replay_lock:
|
||
_replay_queues[session_id].append(action)
|
||
|
||
logger.info(
|
||
f"Action Copilot enqueued: {action_id} | type={action.get('type')} | "
|
||
f"session={session_id} | machine={target_machine_id}"
|
||
)
|
||
|
||
return {
|
||
"action_id": action_id,
|
||
"session_id": session_id,
|
||
"machine_id": target_machine_id,
|
||
"status": "enqueued",
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Pipeline V4 — ExecutionPlan → Runtime (nouveau chemin)
|
||
# =========================================================================
|
||
# RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||
#
|
||
# Ces deux endpoints sont optionnels et coexistent avec le chemin legacy
|
||
# (build_replay_from_raw_events() dans stream_processor.py). Ils permettent
|
||
# de lancer un replay depuis un plan pré-compilé, déterministe et borné.
|
||
# =========================================================================
|
||
|
||
# Répertoires par défaut pour la persistance du pipeline V4
|
||
WORKFLOWS_IR_DIR = ROOT_DIR / "data" / "workflows_ir"
|
||
EXECUTION_PLANS_DIR = ROOT_DIR / "data" / "plans"
|
||
|
||
|
||
def _load_execution_plan(plan_id: str):
|
||
"""Charger un ExecutionPlan depuis le disque (data/plans/{id}.json)."""
|
||
from core.workflow.execution_plan import ExecutionPlan
|
||
|
||
# Chemin direct
|
||
candidate = EXECUTION_PLANS_DIR / f"{plan_id}.json"
|
||
if candidate.exists():
|
||
return ExecutionPlan.load(str(candidate))
|
||
|
||
# Fallback : recherche par prefix (plan_id sans _vN)
|
||
if EXECUTION_PLANS_DIR.exists():
|
||
for p in EXECUTION_PLANS_DIR.glob(f"{plan_id}*.json"):
|
||
return ExecutionPlan.load(str(p))
|
||
|
||
return None
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/plan")
|
||
async def launch_replay_from_plan(request: PlanReplayRequest):
|
||
"""Lancer un replay depuis un ExecutionPlan (pipeline V4).
|
||
|
||
Pipeline :
|
||
1. Charger le plan (depuis plan_id sur disque ou depuis le body inline)
|
||
2. Convertir chaque ExecutionNode en action replay via
|
||
execution_plan_runner.execution_plan_to_actions()
|
||
3. Appliquer les variables (body > plan.variables)
|
||
4. Valider chaque action (sécurité HIGH)
|
||
5. Injecter dans la queue de replay de la session Agent V1 cible
|
||
|
||
Pas de dépendance au VLM au runtime pour les cas normaux — les stratégies
|
||
de résolution sont déjà pré-compilées dans le plan.
|
||
"""
|
||
from core.workflow.execution_plan import ExecutionPlan
|
||
|
||
# ── 1. Charger / parser le plan ──
|
||
plan = None
|
||
if request.plan_id:
|
||
plan = _load_execution_plan(request.plan_id)
|
||
if plan is None:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"ExecutionPlan '{request.plan_id}' introuvable dans "
|
||
f"{EXECUTION_PLANS_DIR}/",
|
||
)
|
||
elif request.plan:
|
||
try:
|
||
plan = ExecutionPlan.from_dict(request.plan)
|
||
except Exception as e:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Impossible de parser le plan inline : {e}",
|
||
)
|
||
else:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail="Fournir 'plan_id' (référence) ou 'plan' (inline).",
|
||
)
|
||
|
||
if not plan.nodes:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"ExecutionPlan '{plan.plan_id}' : aucun nœud à exécuter.",
|
||
)
|
||
|
||
# ── 2. Convertir les nœuds en actions replay ──
|
||
try:
|
||
actions = execution_plan_to_actions(
|
||
plan=plan,
|
||
variables=request.variables,
|
||
id_prefix="act_plan",
|
||
)
|
||
except Exception as e:
|
||
logger.exception("Erreur conversion ExecutionPlan → actions")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur de conversion du plan : {e}",
|
||
)
|
||
|
||
if not actions:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"ExecutionPlan '{plan.plan_id}' : aucune action exploitable "
|
||
f"après conversion ({plan.total_nodes} nœuds).",
|
||
)
|
||
|
||
# Limite de sécurité
|
||
if len(actions) > MAX_ACTIONS_PER_REPLAY:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}).",
|
||
)
|
||
|
||
# ── 3. Validation de chaque action (sécurité HIGH) ──
|
||
validated: List[Dict[str, Any]] = []
|
||
for i, action in enumerate(actions):
|
||
error = _validate_replay_action(action)
|
||
if error:
|
||
logger.warning(
|
||
"replay/plan : action #%d invalide (%s), suppression", i, error,
|
||
)
|
||
continue
|
||
validated.append(action)
|
||
|
||
if not validated:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"ExecutionPlan '{plan.plan_id}' : toutes les actions "
|
||
f"ont été rejetées par la validation.",
|
||
)
|
||
|
||
# ── 4. Trouver la session Agent V1 cible ──
|
||
target_session_id = request.session_id
|
||
if not target_session_id or target_session_id.startswith("chat_"):
|
||
active_session = _find_active_agent_session(machine_id=request.machine_id)
|
||
if active_session:
|
||
target_session_id = active_session
|
||
else:
|
||
machine_hint = (
|
||
f" sur la machine '{request.machine_id}'" if request.machine_id else ""
|
||
)
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Aucune session Agent V1 active{machine_hint}. "
|
||
"Lancez l'Agent V1 sur le PC cible.",
|
||
)
|
||
|
||
# ── 5. Injecter dans la queue de replay ──
|
||
replay_id = f"replay_plan_{uuid.uuid4().hex[:8]}"
|
||
|
||
session_obj = processor.session_manager.get_session(target_session_id)
|
||
resolved_machine_id = (
|
||
request.machine_id
|
||
or (session_obj.machine_id if session_obj else "default")
|
||
)
|
||
|
||
with _replay_lock:
|
||
_replay_queues[target_session_id] = list(validated)
|
||
_replay_states[replay_id] = _create_replay_state(
|
||
replay_id=replay_id,
|
||
workflow_id=f"execution_plan:{plan.plan_id}",
|
||
session_id=target_session_id,
|
||
total_actions=len(validated),
|
||
params=dict(plan.variables or {}),
|
||
machine_id=resolved_machine_id,
|
||
actions=validated,
|
||
)
|
||
if resolved_machine_id and resolved_machine_id != "default":
|
||
_machine_replay_target[resolved_machine_id] = target_session_id
|
||
|
||
# Signaler au worker VLM qu'un replay est actif → se suspendre
|
||
_set_replay_lock(replay_id)
|
||
|
||
logger.info(
|
||
"Replay plan V4 démarré : %s | plan=%s (v%d) | session=%s | "
|
||
"machine=%s | %d actions (total_nodes=%d, rejected=%d)",
|
||
replay_id, plan.plan_id, plan.version, target_session_id,
|
||
resolved_machine_id, len(validated), plan.total_nodes,
|
||
len(actions) - len(validated),
|
||
)
|
||
|
||
return {
|
||
"replay_id": replay_id,
|
||
"status": "running",
|
||
"plan_id": plan.plan_id,
|
||
"workflow_id": plan.workflow_id,
|
||
"plan_version": plan.version,
|
||
"session_id": target_session_id,
|
||
"machine_id": resolved_machine_id,
|
||
"total_actions": len(validated),
|
||
"total_nodes": plan.total_nodes,
|
||
"rejected_actions": len(actions) - len(validated),
|
||
"stats": {
|
||
"nodes_with_ocr": plan.nodes_with_ocr,
|
||
"nodes_with_template": plan.nodes_with_template,
|
||
"nodes_with_vlm": plan.nodes_with_vlm,
|
||
"estimated_duration_s": plan.estimated_duration_s,
|
||
},
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/workflow/compile")
|
||
async def compile_workflow_endpoint(request: CompileWorkflowRequest):
|
||
"""Compiler une session en WorkflowIR + ExecutionPlan (pipeline V4).
|
||
|
||
Pipeline :
|
||
1. Charger les événements bruts de la session (live_events.jsonl)
|
||
2. IRBuilder.build() → WorkflowIR (connaissance métier)
|
||
3. WorkflowIR.save() → persistance dans data/workflows_ir/
|
||
4. ExecutionCompiler.compile() → ExecutionPlan (plan déterministe)
|
||
5. ExecutionPlan.save() → persistance dans data/plans/
|
||
6. Retourner les IDs pour lancer ensuite /replay/plan
|
||
|
||
Cette endpoint NE LANCE PAS le replay — elle prépare le plan.
|
||
L'appelant doit ensuite appeler /replay/plan avec plan_id.
|
||
"""
|
||
from core.workflow.execution_compiler import ExecutionCompiler
|
||
from core.workflow.ir_builder import IRBuilder
|
||
|
||
session_id = request.session_id
|
||
machine_id = request.machine_id or "default"
|
||
|
||
if not session_id:
|
||
raise HTTPException(status_code=400, detail="session_id requis")
|
||
|
||
# ── 1. Trouver le fichier live_events.jsonl de la session ──
|
||
events_file = None
|
||
if machine_id and machine_id != "default":
|
||
candidate = LIVE_SESSIONS_DIR / machine_id / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
|
||
if not events_file and LIVE_SESSIONS_DIR.exists():
|
||
for machine_dir in LIVE_SESSIONS_DIR.iterdir():
|
||
if not machine_dir.is_dir():
|
||
continue
|
||
candidate = machine_dir / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
if machine_id == "default":
|
||
machine_id = machine_dir.name
|
||
break
|
||
|
||
if not events_file:
|
||
candidate = LIVE_SESSIONS_DIR / session_id / "live_events.jsonl"
|
||
if candidate.exists():
|
||
events_file = candidate
|
||
|
||
if not events_file:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Session '{session_id}' : live_events.jsonl introuvable.",
|
||
)
|
||
|
||
# ── 2. Charger les événements ──
|
||
raw_events: List[Dict[str, Any]] = []
|
||
try:
|
||
for line in events_file.read_text(encoding="utf-8").splitlines():
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
try:
|
||
raw_events.append(json.loads(line))
|
||
except json.JSONDecodeError:
|
||
continue
|
||
except Exception as e:
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur lecture events : {e}",
|
||
)
|
||
|
||
if not raw_events:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Session '{session_id}' : aucun événement.",
|
||
)
|
||
|
||
# ── 3. IRBuilder → WorkflowIR ──
|
||
try:
|
||
builder = IRBuilder()
|
||
ir = builder.build(
|
||
events=raw_events,
|
||
session_id=session_id,
|
||
session_dir=str(events_file.parent),
|
||
domain=request.domain,
|
||
name=request.name,
|
||
)
|
||
except Exception as e:
|
||
logger.exception("Erreur IRBuilder.build()")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur de construction WorkflowIR : {e}",
|
||
)
|
||
|
||
if not ir.steps:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Session '{session_id}' : aucune étape détectée "
|
||
f"(pipeline IRBuilder a produit un workflow vide).",
|
||
)
|
||
|
||
# ── 4. Sauvegarder le WorkflowIR ──
|
||
try:
|
||
WORKFLOWS_IR_DIR.mkdir(parents=True, exist_ok=True)
|
||
ir_path = ir.save(str(WORKFLOWS_IR_DIR))
|
||
except Exception as e:
|
||
logger.exception("Erreur sauvegarde WorkflowIR")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur sauvegarde WorkflowIR : {e}",
|
||
)
|
||
|
||
# ── 5. ExecutionCompiler → ExecutionPlan ──
|
||
try:
|
||
compiler = ExecutionCompiler()
|
||
plan = compiler.compile(
|
||
ir=ir,
|
||
target_machine=request.target_machine,
|
||
target_resolution=request.target_resolution,
|
||
params=request.params,
|
||
)
|
||
except Exception as e:
|
||
logger.exception("Erreur ExecutionCompiler.compile()")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur de compilation du plan : {e}",
|
||
)
|
||
|
||
# ── 6. Sauvegarder l'ExecutionPlan ──
|
||
try:
|
||
EXECUTION_PLANS_DIR.mkdir(parents=True, exist_ok=True)
|
||
plan_path = plan.save(str(EXECUTION_PLANS_DIR))
|
||
except Exception as e:
|
||
logger.exception("Erreur sauvegarde ExecutionPlan")
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Erreur sauvegarde ExecutionPlan : {e}",
|
||
)
|
||
|
||
logger.info(
|
||
"Compilation V4 : session=%s → workflow_ir=%s (v%d) → plan=%s "
|
||
"(%d nœuds, OCR=%d, template=%d, VLM=%d)",
|
||
session_id, ir.workflow_id, ir.version, plan.plan_id,
|
||
plan.total_nodes, plan.nodes_with_ocr, plan.nodes_with_template,
|
||
plan.nodes_with_vlm,
|
||
)
|
||
|
||
return {
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
"workflow_id": ir.workflow_id,
|
||
"workflow_version": ir.version,
|
||
"workflow_ir_path": str(ir_path),
|
||
"workflow_name": ir.name,
|
||
"domain": ir.domain,
|
||
"steps": len(ir.steps),
|
||
"variables": len(ir.variables),
|
||
"applications": ir.applications,
|
||
"plan_id": plan.plan_id,
|
||
"plan_path": str(plan_path),
|
||
"total_nodes": plan.total_nodes,
|
||
"stats": {
|
||
"nodes_with_ocr": plan.nodes_with_ocr,
|
||
"nodes_with_template": plan.nodes_with_template,
|
||
"nodes_with_vlm": plan.nodes_with_vlm,
|
||
"estimated_duration_s": plan.estimated_duration_s,
|
||
},
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Pre-check écran — Vérification pré-action par embedding CLIP
|
||
# =========================================================================
|
||
|
||
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/replay/next")
|
||
async def get_next_action(session_id: str, machine_id: str = "default"):
|
||
"""
|
||
L'Agent V1 poll cet endpoint pour récupérer la prochaine action à exécuter.
|
||
|
||
Retourne la prochaine action de la queue ou {"action": null} si rien.
|
||
Modèle pull : l'agent demande, pas de WebSocket nécessaire.
|
||
|
||
Inclut un pre-check optionnel : si un heartbeat récent est disponible,
|
||
compare l'écran actuel avec le node attendu via similarité CLIP.
|
||
En cas de mismatch, retourne une action "wait" au lieu de l'action réelle,
|
||
laissant le client le temps de retrouver le bon état.
|
||
|
||
Multi-machine : si machine_id est fourni, ne retourne que les actions
|
||
destinées à cette machine (évite les fuites cross-machine).
|
||
|
||
Si la session de l'agent n'a pas d'actions en attente, cherche dans les
|
||
autres queues de la MÊME machine (pas cross-machine).
|
||
"""
|
||
with _replay_lock:
|
||
# Verifier si le replay est en pause supervisee (target_not_found).
|
||
# Dans ce cas, NE PAS envoyer d'action — attendre l'intervention utilisateur.
|
||
for state in _replay_states.values():
|
||
if (state["session_id"] == session_id
|
||
and state["status"] == "paused_need_help"):
|
||
logger.debug(
|
||
f"Replay {state['replay_id']} en pause supervisee "
|
||
f"pour session {session_id} — pas d'action envoyee"
|
||
)
|
||
return {
|
||
"action": None,
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
"replay_paused": True,
|
||
"pause_message": state.get("pause_message", "Replay en pause"),
|
||
"replay_id": state["replay_id"],
|
||
}
|
||
|
||
# CRITIQUE : vérifier que la queue appartient BIEN à cette machine.
|
||
# Quand 2 machines partagent le même session_id (ex: agent_demo_user),
|
||
# il faut s'assurer qu'elles ne volent PAS les actions l'une de l'autre.
|
||
# Un replay est lié à UNE machine_id spécifique via replay_states.
|
||
# On cherche d'abord si cette machine a un replay actif qui lui est propre.
|
||
queue = []
|
||
owning_replay = None
|
||
for state in _replay_states.values():
|
||
if (state.get("machine_id") == machine_id
|
||
and state.get("status") == "running"
|
||
and state.get("session_id") == session_id):
|
||
owning_replay = state
|
||
break
|
||
|
||
if owning_replay:
|
||
# Cette machine a un replay actif → consommer sa queue
|
||
queue = _replay_queues.get(session_id, [])
|
||
else:
|
||
# Pas de replay pour cette machine sur cette session → NE RIEN DISTRIBUER
|
||
# Même si _replay_queues[session_id] contient des actions, elles
|
||
# appartiennent à une autre machine.
|
||
queue = []
|
||
|
||
# Log seulement quand il y a des actions à distribuer
|
||
if queue:
|
||
logger.info(
|
||
f"[REPLAY-QUEUE] session={session_id}, machine={machine_id}, "
|
||
f"actions_en_attente={len(queue)}"
|
||
)
|
||
|
||
if not queue and machine_id != "default":
|
||
# Lookup 1 : machine_replay_target (mapping explicite POST /replay)
|
||
target_sid = _machine_replay_target.get(machine_id)
|
||
if target_sid and target_sid != session_id:
|
||
target_queue = _replay_queues.get(target_sid, [])
|
||
if target_queue:
|
||
# Vérifier que le replay_state ciblé concerne BIEN cette machine
|
||
target_state = None
|
||
for state in _replay_states.values():
|
||
if (state.get("session_id") == target_sid
|
||
and state.get("machine_id") == machine_id
|
||
and state["status"] == "running"):
|
||
target_state = state
|
||
break
|
||
if target_state:
|
||
queue = target_queue
|
||
_replay_queues[session_id] = target_queue
|
||
del _replay_queues[target_sid]
|
||
target_state["session_id"] = session_id
|
||
_machine_replay_target[machine_id] = session_id
|
||
logger.info(f"Replay machine-target: {machine_id} -> {target_sid} -> {session_id}")
|
||
|
||
# Lookup 2 : chercher dans les replay_states actifs pour cette machine
|
||
if not queue:
|
||
for state in _replay_states.values():
|
||
if (state.get("machine_id") == machine_id
|
||
and state["status"] == "running"
|
||
and state["session_id"] != session_id):
|
||
other_sid = state["session_id"]
|
||
other_queue = _replay_queues.get(other_sid, [])
|
||
if other_queue:
|
||
queue = other_queue
|
||
_replay_queues[session_id] = other_queue
|
||
del _replay_queues[other_sid]
|
||
state["session_id"] = session_id
|
||
_machine_replay_target[machine_id] = session_id
|
||
logger.info(f"Replay machine-state: {machine_id} -> {other_sid} -> {session_id}")
|
||
break
|
||
|
||
if not queue:
|
||
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
||
|
||
# Peek à la prochaine action SANS la retirer (pour le pre-check)
|
||
action = queue[0]
|
||
|
||
# ---- Pre-check écran (optionnel, non bloquant) ----
|
||
# Ne s'applique qu'aux actions qui ont un from_node (actions de workflow,
|
||
# pas les wait/retry auto-injectés ni les actions Copilot/Agent Libre)
|
||
from_node = action.get("from_node")
|
||
precheck_result = None
|
||
if from_node and action.get("type") not in ("wait",):
|
||
heartbeat = _last_heartbeat.get(session_id)
|
||
if heartbeat:
|
||
age = time.time() - heartbeat["timestamp"]
|
||
if age <= _HEARTBEAT_MAX_AGE_SECONDS:
|
||
try:
|
||
import asyncio
|
||
loop = asyncio.get_event_loop()
|
||
# Exécuter le pre-check dans un thread séparé pour ne pas
|
||
# bloquer l'event loop async (CLIP embed ~200ms)
|
||
precheck_result = await asyncio.wait_for(
|
||
loop.run_in_executor(
|
||
None, # ThreadPool par défaut
|
||
_pre_check_screen_state,
|
||
session_id,
|
||
from_node,
|
||
heartbeat["path"],
|
||
processor,
|
||
),
|
||
timeout=0.5, # Max 500ms pour le pre-check
|
||
)
|
||
except asyncio.TimeoutError:
|
||
logger.warning(
|
||
f"Pre-check timeout (>500ms) pour session={session_id} "
|
||
f"node={from_node}, skip"
|
||
)
|
||
precheck_result = None
|
||
except Exception as e:
|
||
logger.error(f"Pre-check exception (non bloquant): {e}")
|
||
precheck_result = None
|
||
else:
|
||
logger.debug(
|
||
f"Pre-check skip: heartbeat trop ancien ({age:.1f}s "
|
||
f"> {_HEARTBEAT_MAX_AGE_SECONDS}s)"
|
||
)
|
||
|
||
# Si le pre-check détecte un mismatch, ne pas retirer l'action de la queue
|
||
# et retourner une action "wait" pour que le client attende et ré-essaie
|
||
if precheck_result and not precheck_result["match"]:
|
||
# ---- Auth auto : détecter un écran d'authentification (optionnel) ----
|
||
# Si le mismatch est dû à un écran d'auth, injecter les actions d'auth
|
||
# en tête de queue pour que l'agent s'authentifie automatiquement.
|
||
if _auth_handler and not precheck_result.get("popup_detected"):
|
||
try:
|
||
# Construire un ScreenState minimal depuis le heartbeat
|
||
heartbeat = _last_heartbeat.get(session_id, {})
|
||
_auth_screen_state = {
|
||
"perception": {"detected_text": heartbeat.get("detected_text", [])},
|
||
"ui_elements": heartbeat.get("ui_elements", []),
|
||
"window": heartbeat.get("window_info", {}),
|
||
"ocr_text": heartbeat.get("ocr_text", ""),
|
||
}
|
||
auth_request = _auth_handler.detect_auth_screen(_auth_screen_state)
|
||
if auth_request and auth_request.confidence >= 0.5:
|
||
auth_actions = _auth_handler.get_auth_actions(auth_request)
|
||
if auth_actions:
|
||
# Injecter les actions d'auth en tête de queue (avant l'action bloquée)
|
||
with _replay_lock:
|
||
current_q = _replay_queues.get(session_id, [])
|
||
_replay_queues[session_id] = auth_actions + current_q
|
||
logger.info(
|
||
f"Auth auto : {len(auth_actions)} actions injectées pour "
|
||
f"session={session_id} app={auth_request.app_name} "
|
||
f"type={auth_request.auth_type} (confiance={auth_request.confidence:.2f})"
|
||
)
|
||
# Retourner la première action d'auth immédiatement
|
||
with _replay_lock:
|
||
first_auth = _replay_queues[session_id].pop(0)
|
||
return {
|
||
"action": first_auth,
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
"precheck": precheck_result,
|
||
"auth_detected": True,
|
||
}
|
||
except Exception as e:
|
||
logger.warning(f"Auth auto : détection échouée (non bloquant) : {e}")
|
||
|
||
if precheck_result.get("popup_detected"):
|
||
wait_action = {
|
||
"action_id": f"precheck_wait_{uuid.uuid4().hex[:6]}",
|
||
"type": "wait",
|
||
"reason": "popup_detected",
|
||
"suggestion": "press_escape_or_click_close",
|
||
"expected_node": from_node,
|
||
"similarity": precheck_result["similarity"],
|
||
"duration_ms": 2000,
|
||
}
|
||
logger.warning(
|
||
f"Pre-check: popup détectée pour session={session_id} "
|
||
f"node={from_node}, envoi wait+suggestion"
|
||
)
|
||
else:
|
||
wait_action = {
|
||
"action_id": f"precheck_wait_{uuid.uuid4().hex[:6]}",
|
||
"type": "wait",
|
||
"reason": "screen_mismatch",
|
||
"expected_node": from_node,
|
||
"similarity": precheck_result["similarity"],
|
||
"threshold": _PRECHECK_SIMILARITY_THRESHOLD,
|
||
"duration_ms": 1500,
|
||
}
|
||
logger.warning(
|
||
f"Pre-check: mismatch écran pour session={session_id} "
|
||
f"node={from_node} (sim={precheck_result['similarity']:.4f}), envoi wait"
|
||
)
|
||
return {
|
||
"action": wait_action,
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
"precheck": precheck_result,
|
||
}
|
||
|
||
# Pre-check OK (ou skip) : retirer l'action de la queue et l'envoyer
|
||
with _replay_lock:
|
||
current_queue = _replay_queues.get(session_id, [])
|
||
if current_queue and current_queue[0].get("action_id") == action.get("action_id"):
|
||
current_queue.pop(0)
|
||
# Else: queue a changé entre temps (race condition bénigne), on envoie quand même
|
||
|
||
# Sauvegarder l'action envoyée pour le retry (si la vérification échoue)
|
||
# NE PAS écraser si _schedule_retry a déjà mis le bon retry_count
|
||
action_id_sent = action.get("action_id", "")
|
||
if action_id_sent and action_id_sent not in _retry_pending:
|
||
_retry_pending[action_id_sent] = {
|
||
"action": dict(action),
|
||
"retry_count": 0,
|
||
"replay_id": "",
|
||
}
|
||
|
||
logger.info(
|
||
f"Action envoyée à {session_id} (machine={machine_id}) : "
|
||
f"{action.get('type')} (id={action.get('action_id')})"
|
||
f"{' [precheck OK sim=' + str(precheck_result['similarity']) + ']' if precheck_result else ''}"
|
||
)
|
||
|
||
response: Dict[str, Any] = {
|
||
"action": action,
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
}
|
||
if precheck_result:
|
||
response["precheck"] = precheck_result
|
||
return response
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/result")
|
||
async def report_action_result(report: ReplayResultReport):
|
||
"""
|
||
L'Agent V1 renvoie le résultat d'exécution d'une action.
|
||
|
||
Permet au serveur de suivre la progression et de détecter les échecs.
|
||
Intègre la vérification post-action (comparaison screenshots) et le retry
|
||
automatique (max 3 tentatives) avant de déclarer un échec.
|
||
|
||
Stratégie de retry :
|
||
- Retry 1 : re-résoudre la cible visuellement et réinjecter l'action
|
||
- Retry 2 : attendre 2s (wait) puis réinjecter l'action (possible loading)
|
||
- Retry 3 : dernier essai identique, si échec → erreur non-récupérable
|
||
"""
|
||
session_id = report.session_id
|
||
action_id = report.action_id
|
||
|
||
# Trouver le replay correspondant à cette session
|
||
with _replay_lock:
|
||
replay_state = None
|
||
for state in _replay_states.values():
|
||
if state["session_id"] == session_id and state["status"] == "running":
|
||
replay_state = state
|
||
break
|
||
|
||
if not replay_state:
|
||
logger.warning(
|
||
f"Résultat reçu pour session {session_id} mais aucun replay actif"
|
||
)
|
||
return {"status": "no_active_replay", "session_id": session_id}
|
||
|
||
# Récupérer l'info de retry pour cette action (si c'est un retry)
|
||
retry_info = _retry_pending.pop(action_id, None)
|
||
retry_count = retry_info["retry_count"] if retry_info else 0
|
||
original_action = retry_info["action"] if retry_info else None
|
||
|
||
# Guard de sécurité : détecter le retry_count depuis l'action_id si non trouvé
|
||
# Évite la boucle infinie si _retry_pending est désynchronisé
|
||
if retry_count == 0 and "_retry" in action_id:
|
||
import re
|
||
retry_suffixes = re.findall(r"_retry\d+", action_id)
|
||
retry_count = max(retry_count, len(retry_suffixes))
|
||
if retry_count > 0:
|
||
logger.warning(
|
||
f"retry_count corrigé par action_id : {retry_count} "
|
||
f"(action_id contient {len(retry_suffixes)} suffixes _retry)"
|
||
)
|
||
|
||
# Mettre à jour le dernier screenshot reçu
|
||
screenshot_after = report.screenshot_after or report.screenshot
|
||
if screenshot_after:
|
||
with _replay_lock:
|
||
replay_state["last_screenshot"] = screenshot_after
|
||
|
||
# === Vérification post-action ===
|
||
# Ne vérifier que les actions "click" — les "type" et "key_combo" sont
|
||
# toujours considérées réussies si l'agent dit success (pas de position à vérifier,
|
||
# et le screenshot change peu pour une frappe clavier)
|
||
#
|
||
# Si l'agent a envoyé un warning "no_screen_change" ou "popup_handled",
|
||
# il a déjà tenté de gérer la situation (popup handler). Ne PAS relancer
|
||
# de retry côté serveur — continuer vers l'action suivante.
|
||
agent_warning = report.warning or ""
|
||
agent_handled_popup = agent_warning in ("no_screen_change", "popup_handled")
|
||
if agent_handled_popup:
|
||
logger.info(
|
||
f"Action {action_id} : agent warning='{agent_warning}' — "
|
||
f"popup déjà gérée côté agent, pas de retry serveur"
|
||
)
|
||
|
||
action_type_for_verify = (original_action or {}).get("type", "unknown")
|
||
skip_verify = action_type_for_verify in ("type", "key_combo", "wait")
|
||
# Skip aussi la vérification serveur si l'agent a déjà géré la popup
|
||
skip_verify = skip_verify or agent_handled_popup
|
||
verification = None
|
||
if report.success and screenshot_after and not skip_verify:
|
||
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
|
||
# Fallback sur le dernier screenshot stocké côté serveur
|
||
screenshot_before = report.screenshot_before or replay_state.get("_last_screenshot_before")
|
||
if screenshot_before:
|
||
try:
|
||
action_dict = original_action or {"type": "unknown", "action_id": action_id}
|
||
result_dict = {
|
||
"success": report.success,
|
||
"error": report.error,
|
||
}
|
||
# Utiliser le Critic sémantique si l'action a un expected_result
|
||
expected_result = (original_action or {}).get("expected_result", "")
|
||
action_intention = (original_action or {}).get("intention", "")
|
||
if expected_result:
|
||
# Critic complet : pixel + VLM sémantique
|
||
workflow_ctx = (
|
||
f"Action {replay_state.get('completed_actions', 0)+1}"
|
||
f"/{len(replay_state.get('actions', []))}"
|
||
)
|
||
verification = _replay_verifier.verify_with_critic(
|
||
action=action_dict,
|
||
result=result_dict,
|
||
screenshot_before=screenshot_before,
|
||
screenshot_after=screenshot_after,
|
||
expected_result=expected_result,
|
||
action_intention=action_intention,
|
||
workflow_context=workflow_ctx,
|
||
)
|
||
if verification.semantic_verified is not None:
|
||
logger.info(
|
||
f"Critic sémantique : {'OK' if verification.semantic_verified else 'ÉCHEC'} "
|
||
f"en {verification.semantic_elapsed_ms:.0f}ms — {verification.semantic_detail[:80]}"
|
||
)
|
||
else:
|
||
# Vérification pixel seule (pas d'expected_result)
|
||
verification = _replay_verifier.verify_action(
|
||
action=action_dict,
|
||
result=result_dict,
|
||
screenshot_before=screenshot_before,
|
||
screenshot_after=screenshot_after,
|
||
)
|
||
except Exception as e:
|
||
logger.warning(f"Vérification post-action échouée: {e}")
|
||
|
||
# Stocker le screenshot actuel comme "before" pour la prochaine action
|
||
if screenshot_after:
|
||
with _replay_lock:
|
||
replay_state["_last_screenshot_before"] = screenshot_after
|
||
|
||
# === Enregistrer le résultat ===
|
||
with _replay_lock:
|
||
result_entry = {
|
||
"action_id": action_id,
|
||
"success": report.success,
|
||
"error": report.error,
|
||
"warning": report.warning,
|
||
"has_screenshot": bool(screenshot_after),
|
||
"actual_position": report.actual_position,
|
||
"retry_count": retry_count,
|
||
"verification": verification.to_dict() if verification else None,
|
||
"resolution_method": report.resolution_method,
|
||
"resolution_score": report.resolution_score,
|
||
"resolution_elapsed_ms": report.resolution_elapsed_ms,
|
||
}
|
||
replay_state["results"].append(result_entry)
|
||
|
||
# === Apprentissage : enregistrer le résultat pour amélioration continue ===
|
||
try:
|
||
_replay_learner.record_from_replay_result(
|
||
session_id=session_id,
|
||
action=original_action or {"action_id": action_id, "type": "unknown"},
|
||
result=result_entry,
|
||
verification=verification.to_dict() if verification else None,
|
||
)
|
||
except Exception as e:
|
||
logger.debug(f"Learning: échec enregistrement: {e}")
|
||
|
||
# === Audit Trail : traçabilité complète pour conformité hospitalière ===
|
||
try:
|
||
_action = original_action or {"action_id": action_id, "type": "unknown"}
|
||
_target_spec = _action.get("target_spec", {})
|
||
_verification = verification.to_dict() if verification else {}
|
||
|
||
# Déterminer le résultat pour l'audit
|
||
if report.success and (verification is None or verification.verified):
|
||
_audit_result = "success"
|
||
elif report.success and verification and not verification.verified:
|
||
_audit_result = "recovered" if retry_count > 0 else "failed"
|
||
elif not report.success:
|
||
_audit_result = "failed"
|
||
else:
|
||
_audit_result = "success"
|
||
|
||
# Déterminer le résultat du Critic
|
||
_critic = ""
|
||
if verification:
|
||
if verification.semantic_verified is True:
|
||
_critic = "semantic_ok"
|
||
elif verification.semantic_verified is False:
|
||
_critic = f"semantic_fail: {verification.semantic_detail[:100]}"
|
||
elif verification.verified:
|
||
_critic = "pixel_ok"
|
||
else:
|
||
_critic = f"pixel_fail: {verification.detail[:100]}"
|
||
|
||
_audit_trail.record(AuditEntry(
|
||
session_id=session_id,
|
||
action_id=action_id,
|
||
user_id=replay_state.get("params", {}).get("user_id", ""),
|
||
user_name=replay_state.get("params", {}).get("user_name", ""),
|
||
machine_id=replay_state.get("machine_id", ""),
|
||
action_type=_action.get("type", ""),
|
||
action_detail=_target_spec.get("by_text", "") or _action.get("intention", ""),
|
||
target_app=_target_spec.get("window_title", ""),
|
||
execution_mode=replay_state.get("params", {}).get("execution_mode", "autonomous"),
|
||
result=_audit_result,
|
||
resolution_method=result_entry.get("resolution_method", ""),
|
||
critic_result=_critic,
|
||
recovery_action=report.warning or "",
|
||
domain=replay_state.get("params", {}).get("domain", ""),
|
||
workflow_id=replay_state.get("workflow_id", ""),
|
||
workflow_name=replay_state.get("params", {}).get("workflow_name", ""),
|
||
duration_ms=result_entry.get("resolution_elapsed_ms", 0.0) or 0.0,
|
||
))
|
||
except Exception as e:
|
||
logger.debug(f"Audit Trail: échec enregistrement: {e}")
|
||
|
||
# === Apprentissage persistant (Phase 1 plan Léa — Fiche #18) ===
|
||
# Single source of truth : l'agent remplit `report.actual_position`
|
||
# avec les coordonnées percentages qu'il a effectivement cliquées
|
||
# (après résolution visuelle). Le serveur les lit directement — pas
|
||
# de cache intermédiaire entre /resolve_target et /replay/result.
|
||
#
|
||
# On lit aussi le `target_spec` de l'action courante depuis
|
||
# `replay_state["actions"]`, qui contient la copie slim stockée au
|
||
# démarrage du replay (cf. _create_replay_state).
|
||
#
|
||
# Garde stricte : on ne mémorise que les clics (type == "click").
|
||
# On traite cette branche AVANT d'incrémenter current_action_index.
|
||
try:
|
||
from .replay_memory import memory_record_success, memory_record_failure
|
||
|
||
_idx = replay_state.get("current_action_index", 0)
|
||
_actions_meta = replay_state.get("actions", [])
|
||
if 0 <= _idx < len(_actions_meta):
|
||
_current = _actions_meta[_idx] or {}
|
||
if _current.get("type") == "click":
|
||
_mem_target_spec = _current.get("target_spec") or {}
|
||
_mem_window_title = (
|
||
_mem_target_spec.get("window_title", "")
|
||
or _mem_target_spec.get("expected_window_before", "")
|
||
)
|
||
|
||
if _mem_window_title:
|
||
_mem_success = (
|
||
report.success and (verification is None or verification.verified)
|
||
)
|
||
if _mem_success:
|
||
# Lire les coordonnées RÉSOLUES directement depuis
|
||
# le rapport de l'agent. Format attendu :
|
||
# actual_position = {"x_pct": float, "y_pct": float}
|
||
_pos = report.actual_position or {}
|
||
_x_pct = _pos.get("x_pct") if isinstance(_pos, dict) else None
|
||
_y_pct = _pos.get("y_pct") if isinstance(_pos, dict) else None
|
||
|
||
if _x_pct is not None and _y_pct is not None:
|
||
memory_record_success(
|
||
window_title=_mem_window_title,
|
||
target_spec=_mem_target_spec,
|
||
x_pct=float(_x_pct),
|
||
y_pct=float(_y_pct),
|
||
method=(report.resolution_method or "v4_unknown"),
|
||
confidence=float(report.resolution_score or 0.9),
|
||
)
|
||
else:
|
||
logger.debug(
|
||
"memory_record skipped: actual_position absent "
|
||
"ou sans x_pct/y_pct (agent pas à jour ?)"
|
||
)
|
||
else:
|
||
memory_record_failure(
|
||
window_title=_mem_window_title,
|
||
target_spec=_mem_target_spec,
|
||
error_message=(
|
||
report.error or report.warning or "post_cond_failed"
|
||
),
|
||
)
|
||
except Exception as _mem_exc:
|
||
logger.debug("Memory record skipped : %s", _mem_exc)
|
||
|
||
with _replay_lock:
|
||
# === Logique de retry / success / failure ===
|
||
if report.success and (verification is None or verification.verified):
|
||
# Action réussie (vérification OK ou pas de vérification)
|
||
replay_state["completed_actions"] += 1
|
||
replay_state["current_action_index"] += 1
|
||
|
||
elif report.success and verification and not verification.verified:
|
||
# Agent dit "success" mais la vérification échoue (rien n'a changé)
|
||
replay_state["unverified_actions"] += 1
|
||
logger.warning(
|
||
f"Action {action_id} marquée success mais non vérifiée: "
|
||
f"{verification.detail}"
|
||
)
|
||
if verification.suggestion == "retry" and retry_count < MAX_RETRIES_PER_ACTION:
|
||
# Réinjecter pour retry
|
||
_schedule_retry(
|
||
session_id, replay_state, original_action or {"action_id": action_id},
|
||
retry_count, "verification_failed"
|
||
)
|
||
else:
|
||
# Continuer malgré tout (action non vérifiée)
|
||
replay_state["completed_actions"] += 1
|
||
replay_state["current_action_index"] += 1
|
||
|
||
elif not report.success and agent_warning == "no_screen_change":
|
||
# L'action a été exécutée mais l'écran n'a pas changé.
|
||
# PAS de retry — loguer l'échec et continuer vers l'action suivante.
|
||
# C'est plus honnête que "success" et évite les retries en boucle.
|
||
replay_state["unverified_actions"] += 1
|
||
replay_state["completed_actions"] += 1
|
||
replay_state["current_action_index"] += 1
|
||
logger.warning(
|
||
f"Action {action_id} : écran inchangé (no_screen_change) — "
|
||
f"action sans effet visible, on continue"
|
||
)
|
||
|
||
elif not report.success and (report.error or "") == "target_not_found":
|
||
# Cible non trouvée visuellement — PAUSE supervisée, PAS d'erreur fatale.
|
||
# L'utilisateur doit intervenir (naviguer vers le bon ecran, fermer une popup, etc.)
|
||
# On NE vide PAS la queue : les actions restantes seront reprises apres intervention.
|
||
target_desc = report.target_description or "élément inconnu"
|
||
replay_state["status"] = "paused_need_help"
|
||
replay_state["failed_action"] = {
|
||
"action_id": action_id,
|
||
"type": (original_action or {}).get("type", "unknown"),
|
||
"target_description": target_desc,
|
||
"screenshot_b64": screenshot_after or report.screenshot,
|
||
"target_spec": report.target_spec,
|
||
}
|
||
replay_state["pause_message"] = f"Je ne vois pas '{target_desc}' à l'écran"
|
||
error_entry = {
|
||
"action_id": action_id,
|
||
"error": f"target_not_found: {target_desc}",
|
||
"retry_count": 0,
|
||
"timestamp": time.time(),
|
||
}
|
||
replay_state["error_log"].append(error_entry)
|
||
logger.warning(
|
||
f"Replay PAUSE supervisée : cible '{target_desc}' non trouvée "
|
||
f"pour {action_id} — en attente d'intervention utilisateur"
|
||
)
|
||
# Logger l'echec pour l'apprentissage futur
|
||
log_replay_failure(
|
||
replay_id=replay_state["replay_id"],
|
||
action_id=action_id,
|
||
target_spec=report.target_spec,
|
||
screenshot_b64=screenshot_after or report.screenshot,
|
||
resolution_attempts=[
|
||
r for r in replay_state["results"]
|
||
if r.get("action_id") == action_id and r.get("resolution_method")
|
||
],
|
||
error="target_not_found",
|
||
extra={
|
||
"target_description": target_desc,
|
||
"actions_completed": replay_state["completed_actions"],
|
||
"actions_remaining": len(_replay_queues.get(session_id, [])),
|
||
},
|
||
)
|
||
|
||
elif not report.success and "visual resolve" in (report.error or "").lower():
|
||
# Visual resolve échoué (ancien format d'erreur) — PAUSE supervisée aussi.
|
||
# Compatibilité avec les agents qui n'envoient pas encore "target_not_found".
|
||
target_desc = report.target_description or (report.error or "Visual resolve échoué")
|
||
replay_state["status"] = "paused_need_help"
|
||
replay_state["failed_action"] = {
|
||
"action_id": action_id,
|
||
"type": (original_action or {}).get("type", "unknown"),
|
||
"target_description": target_desc,
|
||
"screenshot_b64": screenshot_after or report.screenshot,
|
||
"target_spec": report.target_spec,
|
||
}
|
||
replay_state["pause_message"] = f"Je ne vois pas '{target_desc}' à l'écran"
|
||
error_entry = {
|
||
"action_id": action_id,
|
||
"error": report.error or "Visual resolve échoué",
|
||
"retry_count": 0,
|
||
"timestamp": time.time(),
|
||
}
|
||
replay_state["error_log"].append(error_entry)
|
||
logger.warning(
|
||
f"Replay PAUSE supervisée (compat) : visual resolve échoué pour {action_id} — "
|
||
f"{report.error}"
|
||
)
|
||
# Logger l'echec pour l'apprentissage futur
|
||
log_replay_failure(
|
||
replay_id=replay_state["replay_id"],
|
||
action_id=action_id,
|
||
target_spec=report.target_spec,
|
||
screenshot_b64=screenshot_after or report.screenshot,
|
||
error="visual_resolve_failed",
|
||
)
|
||
|
||
elif not report.success and retry_count < MAX_RETRIES_PER_ACTION:
|
||
# Échec réel (pas juste screen inchangé ou visual) — retry
|
||
action_to_retry = original_action or {"action_id": action_id, "type": "unknown"}
|
||
_schedule_retry(
|
||
session_id, replay_state, action_to_retry,
|
||
retry_count, report.error or "unknown_error"
|
||
)
|
||
|
||
else:
|
||
# Échec définitif (retries épuisés)
|
||
replay_state["failed_actions"] += 1
|
||
error_entry = {
|
||
"action_id": action_id,
|
||
"error": report.error or "Retries épuisés",
|
||
"retry_count": retry_count,
|
||
"timestamp": time.time(),
|
||
}
|
||
replay_state["error_log"].append(error_entry)
|
||
|
||
# Marquer le replay en erreur et vider la queue
|
||
replay_state["status"] = "error"
|
||
_replay_queues[session_id] = []
|
||
logger.error(
|
||
f"Replay {replay_state['replay_id']} échoué à l'action {action_id} "
|
||
f"après {retry_count} retries: {report.error}"
|
||
)
|
||
|
||
# Notifier via callback si configuré
|
||
_notify_error_callback(replay_state, action_id, report.error)
|
||
|
||
# Vérifier si le replay est terminé (queue vide + dernière action réussie)
|
||
remaining = len(_replay_queues.get(session_id, []))
|
||
if remaining == 0 and replay_state["status"] == "running":
|
||
replay_state["status"] = "completed"
|
||
logger.info(
|
||
f"Replay {replay_state['replay_id']} terminé avec succès : "
|
||
f"{replay_state['completed_actions']}/{replay_state['total_actions']} actions"
|
||
f" ({replay_state['retried_actions']} retries, "
|
||
f"{replay_state['unverified_actions']} non vérifiées)"
|
||
)
|
||
# Résumé des métriques de résolution visuelle
|
||
results_with_method = [
|
||
r for r in replay_state["results"]
|
||
if r.get("resolution_method")
|
||
]
|
||
if results_with_method:
|
||
methods_count = {}
|
||
total_elapsed = 0.0
|
||
total_score = 0.0
|
||
for r in results_with_method:
|
||
m = r["resolution_method"]
|
||
methods_count[m] = methods_count.get(m, 0) + 1
|
||
total_elapsed += r.get("resolution_elapsed_ms") or 0
|
||
total_score += r.get("resolution_score") or 0
|
||
avg_elapsed = total_elapsed / len(results_with_method)
|
||
avg_score = total_score / len(results_with_method)
|
||
methods_str = ", ".join(
|
||
f"{m}={c}" for m, c in sorted(methods_count.items())
|
||
)
|
||
logger.info(
|
||
f"Replay {replay_state['replay_id']} métriques résolution : "
|
||
f"{len(results_with_method)} resolves [{methods_str}] "
|
||
f"score_moy={avg_score:.2f} temps_moy={avg_elapsed:.0f}ms"
|
||
)
|
||
|
||
# Libérer le GPU pour le worker VLM si le replay est terminé ou en erreur
|
||
if replay_state["status"] in ("completed", "error"):
|
||
_clear_replay_lock()
|
||
logger.info(
|
||
f"Replay {replay_state['replay_id']} terminé (status={replay_state['status']}) "
|
||
f"— worker VLM autorisé à reprendre"
|
||
)
|
||
|
||
return {
|
||
"status": "recorded",
|
||
"action_id": action_id,
|
||
"success": report.success,
|
||
"replay_status": replay_state["status"],
|
||
"remaining_actions": remaining,
|
||
"retry_count": retry_count,
|
||
"verification": verification.to_dict() if verification else None,
|
||
}
|
||
|
||
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/error_callback")
|
||
async def register_error_callback(config: ErrorCallbackConfig):
|
||
"""
|
||
Enregistrer une URL de callback pour les erreurs non-récupérables d'un replay.
|
||
|
||
Le chat server configure cette URL lors du lancement du replay.
|
||
Quand une erreur non-récupérable se produit (retries épuisés),
|
||
le serveur POST vers cette URL avec les détails de l'erreur.
|
||
"""
|
||
replay_id = config.replay_id
|
||
callback_url = config.callback_url
|
||
|
||
with _replay_lock:
|
||
if replay_id not in _replay_states:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail=f"Replay '{replay_id}' non trouvé"
|
||
)
|
||
|
||
_error_callbacks[replay_id] = callback_url
|
||
logger.info(f"Error callback enregistré pour {replay_id}: {callback_url}")
|
||
|
||
return {
|
||
"status": "callback_registered",
|
||
"replay_id": replay_id,
|
||
"callback_url": callback_url,
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/replay/{replay_id}")
|
||
async def get_replay_status(replay_id: str):
|
||
"""Consulter l'etat d'un replay en cours ou termine.
|
||
|
||
Quand le replay est en pause supervisee (paused_need_help), la reponse
|
||
inclut le contexte complet de l'echec : action echouee, screenshot,
|
||
target_spec, et message utilisateur.
|
||
"""
|
||
with _replay_lock:
|
||
state = _replay_states.get(replay_id)
|
||
|
||
if not state:
|
||
raise HTTPException(
|
||
status_code=404, detail=f"Replay '{replay_id}' non trouvé"
|
||
)
|
||
|
||
# Filtrer les champs internes (prefixes par _)
|
||
result = {k: v for k, v in state.items() if not k.startswith("_")}
|
||
|
||
# Enrichir avec le contexte de pause si applicable
|
||
if state["status"] == "paused_need_help":
|
||
session_id = state["session_id"]
|
||
remaining = len(_replay_queues.get(session_id, []))
|
||
result["actions_completed"] = state["completed_actions"]
|
||
result["actions_remaining"] = remaining
|
||
result["message"] = state.get("pause_message", "Replay en pause")
|
||
# Le failed_action contient deja screenshot_b64 et target_spec
|
||
|
||
return result
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/replays")
|
||
async def list_replays():
|
||
"""Lister tous les replays (actifs, terminés, en erreur)."""
|
||
with _replay_lock:
|
||
# Filtrer les champs internes (préfixés par _)
|
||
return {
|
||
"replays": [
|
||
{k: v for k, v in state.items() if not k.startswith("_")}
|
||
for state in _replay_states.values()
|
||
]
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/{replay_id}/resume")
|
||
async def resume_replay(replay_id: str):
|
||
"""Reprendre un replay en pause supervisee (paused_need_help).
|
||
|
||
L'utilisateur a intervenu manuellement (naviguer vers le bon ecran,
|
||
fermer une popup, etc.) et veut relancer le replay. L'action echouee
|
||
est reinjectee en tete de queue pour etre re-tentee.
|
||
|
||
Si le replay n'est pas en pause, retourne une erreur 409 (conflit).
|
||
"""
|
||
with _replay_lock:
|
||
state = _replay_states.get(replay_id)
|
||
|
||
if not state:
|
||
raise HTTPException(
|
||
status_code=404, detail=f"Replay '{replay_id}' non trouvé"
|
||
)
|
||
|
||
if state["status"] != "paused_need_help":
|
||
raise HTTPException(
|
||
status_code=409,
|
||
detail=(
|
||
f"Replay '{replay_id}' n'est pas en pause "
|
||
f"(status actuel: {state['status']})"
|
||
),
|
||
)
|
||
|
||
# Recuperer l'action echouee pour la reinjecter
|
||
failed_action = state.get("failed_action")
|
||
session_id = state["session_id"]
|
||
|
||
# Remettre le replay en mode running
|
||
state["status"] = "running"
|
||
state["failed_action"] = None
|
||
state["pause_message"] = None
|
||
|
||
# Reinjecter l'action echouee en tete de queue (sera re-tentee)
|
||
if failed_action and failed_action.get("action_id"):
|
||
# Reconstruire l'action a partir du retry_pending ou de l'original
|
||
original_action_id = failed_action["action_id"]
|
||
# Chercher l'action originale dans les retry_pending
|
||
original = _retry_pending.pop(original_action_id, {}).get("action")
|
||
if not original:
|
||
# Reconstruire un minimum depuis le failed_action context
|
||
original = {
|
||
"action_id": original_action_id,
|
||
"type": failed_action.get("type", "click"),
|
||
"target_spec": failed_action.get("target_spec"),
|
||
"visual_mode": True,
|
||
}
|
||
# Creer un nouvel action_id pour le tracking
|
||
resume_id = f"{original_action_id}_resume"
|
||
resume_action = dict(original)
|
||
resume_action["action_id"] = resume_id
|
||
# Stocker dans retry_pending pour le suivi
|
||
_retry_pending[resume_id] = {
|
||
"action": original,
|
||
"retry_count": 0,
|
||
"replay_id": replay_id,
|
||
"reason": "resume_after_pause",
|
||
}
|
||
queue = _replay_queues.get(session_id, [])
|
||
_replay_queues[session_id] = [resume_action] + queue
|
||
|
||
remaining = len(_replay_queues.get(session_id, []))
|
||
logger.info(
|
||
f"Replay {replay_id} repris apres pause supervisee — "
|
||
f"{remaining} actions en attente"
|
||
)
|
||
|
||
return {
|
||
"status": "resumed",
|
||
"replay_id": replay_id,
|
||
"session_id": session_id,
|
||
"remaining_actions": remaining,
|
||
}
|
||
|
||
|
||
# =========================================================================
|
||
# Visual Replay — Résolution visuelle des cibles (module resolve_engine)
|
||
# =========================================================================
|
||
from .resolve_engine import (
|
||
ResolveTargetRequest,
|
||
PreAnalyzeRequest,
|
||
_resolve_by_template_matching,
|
||
_validate_match_context,
|
||
_get_omniparser,
|
||
_resolve_by_yolo,
|
||
_get_vlm_client,
|
||
_build_target_description,
|
||
_vlm_quick_find,
|
||
_resolve_by_grounding,
|
||
_get_som_engine_api,
|
||
_resolve_by_som,
|
||
_resolve_target_sync,
|
||
_fuzzy_match,
|
||
_fallback_response,
|
||
_pre_analyze_screen_sync,
|
||
_locate_popup_button,
|
||
)
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/resolve_target")
|
||
async def resolve_target(request: ResolveTargetRequest):
|
||
"""
|
||
Résoudre visuellement une cible UI à partir d'un screenshot.
|
||
|
||
L'Agent V1 envoie un screenshot + target_spec AVANT d'exécuter l'action.
|
||
Le serveur analyse l'image avec UIDetector/OCR et retourne les coordonnées
|
||
de l'élément trouvé.
|
||
|
||
Stratégie de matching (par priorité) :
|
||
1. Template matching OpenCV (~100ms) — si anchor_image_base64 fourni
|
||
2. VLM Quick Find (~5-10s) — 1 appel VLM pour localiser l'élément
|
||
3. Matching sémantique complet (~15-20s) — ScreenAnalyzer + OCR + UI detection
|
||
4. Fallback — coordonnées statiques
|
||
"""
|
||
import base64
|
||
import io
|
||
import tempfile
|
||
|
||
from PIL import Image
|
||
|
||
# Décoder le screenshot
|
||
try:
|
||
img_bytes = base64.b64decode(request.screenshot_b64)
|
||
img = Image.open(io.BytesIO(img_bytes))
|
||
except Exception as e:
|
||
logger.error(f"Décodage screenshot échoué: {e}")
|
||
return _fallback_response(request, "decode_error", str(e))
|
||
|
||
# Sauver temporairement pour les analyseurs (ils attendent un chemin fichier)
|
||
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
||
img.save(tmp, format="JPEG", quality=90)
|
||
tmp_path = tmp.name
|
||
|
||
try:
|
||
# Lancer la résolution visuelle dans un thread SÉPARÉ (pas le GPU executor).
|
||
# Le template matching est CPU-only.
|
||
import asyncio
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(
|
||
None, # ThreadPool par défaut (pas _gpu_executor)
|
||
_resolve_target_sync,
|
||
tmp_path,
|
||
request.target_spec,
|
||
request.screen_width,
|
||
request.screen_height,
|
||
request.fallback_x_pct,
|
||
request.fallback_y_pct,
|
||
request.strict_mode,
|
||
processor,
|
||
)
|
||
return result
|
||
except Exception as e:
|
||
logger.error(f"Résolution visuelle échouée: {e}")
|
||
return _fallback_response(request, "analysis_error", str(e))
|
||
finally:
|
||
import os
|
||
try:
|
||
os.unlink(tmp_path)
|
||
except OSError:
|
||
pass
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/replay/pre_analyze")
|
||
async def pre_analyze_screen(request: PreAnalyzeRequest):
|
||
"""Observer : analyser l'écran AVANT la résolution de cible.
|
||
|
||
Détecte les popups, dialogues modaux, et états inattendus
|
||
qui empêcheraient la résolution visuelle de fonctionner.
|
||
|
||
Retourne :
|
||
- screen_state: "ok" | "popup" | "unexpected"
|
||
- popup_label: texte du bouton popup à cliquer (si popup)
|
||
- popup_coords: {x_pct, y_pct} du bouton (si popup)
|
||
- detail: description du problème
|
||
"""
|
||
import asyncio
|
||
import base64
|
||
import io
|
||
|
||
from PIL import Image
|
||
|
||
try:
|
||
img_bytes = base64.b64decode(request.screenshot_b64)
|
||
img = Image.open(io.BytesIO(img_bytes))
|
||
except Exception as e:
|
||
return {"screen_state": "ok", "detail": f"decode error: {e}"}
|
||
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(
|
||
None,
|
||
_pre_analyze_screen_sync,
|
||
request.screenshot_b64,
|
||
request.expected_state,
|
||
request.window_title,
|
||
request.screen_width,
|
||
request.screen_height,
|
||
)
|
||
return result
|
||
|
||
|
||
# =========================================================================
|
||
# Learning Pack — Export / Import pour la fédération des apprentissages
|
||
# =========================================================================
|
||
|
||
class LearningPackImportRequest(BaseModel):
|
||
"""Corps de la requête d'import d'un Learning Pack."""
|
||
# Le pack complet au format JSON (structure LearningPack.to_dict())
|
||
pack: Dict[str, Any]
|
||
|
||
|
||
@app.get("/api/v1/traces/stream/learning-pack/export")
|
||
async def export_learning_pack(client_id: str, request: Request):
|
||
"""
|
||
Exporter les apprentissages d'un client en Learning Pack anonymisé.
|
||
|
||
Le client_id est haché (SHA-256) dans le pack exporté —
|
||
aucune donnée d'identification ne sort du serveur.
|
||
|
||
Query params:
|
||
client_id: identifiant du client (obligatoire).
|
||
|
||
Returns:
|
||
JSON du LearningPack anonymisé.
|
||
"""
|
||
try:
|
||
from core.federation.learning_pack import LearningPackExporter
|
||
from core.models.workflow_graph import Workflow
|
||
except ImportError as exc:
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Module federation non disponible : {exc}",
|
||
)
|
||
|
||
if not client_id or not client_id.strip():
|
||
raise HTTPException(status_code=400, detail="client_id requis")
|
||
|
||
# Récupérer tous les workflows chargés par le StreamProcessor
|
||
workflows = list(processor._workflows.values())
|
||
if not workflows:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail="Aucun workflow trouvé pour l'export",
|
||
)
|
||
|
||
exporter = LearningPackExporter()
|
||
pack = exporter.export(workflows, client_id=client_id.strip())
|
||
|
||
logger.info(
|
||
"Learning pack exporté pour client_id=%s (hash=%s) : %d workflows, %d prototypes",
|
||
client_id[:8] + "...", pack.source_hash[:16] + "...",
|
||
len(workflows), len(pack.screen_prototypes),
|
||
)
|
||
return pack.to_dict()
|
||
|
||
|
||
@app.post("/api/v1/traces/stream/learning-pack/import")
|
||
async def import_learning_pack(body: LearningPackImportRequest, request: Request):
|
||
"""
|
||
Importer un Learning Pack dans l'index FAISS global.
|
||
|
||
Body JSON:
|
||
{ "pack": { ... } } — structure LearningPack complète
|
||
|
||
Returns:
|
||
Statistiques de l'import (vecteurs ajoutés, total index, etc.).
|
||
"""
|
||
try:
|
||
from core.federation.learning_pack import LearningPack
|
||
from core.federation.faiss_global import GlobalFAISSIndex
|
||
except ImportError as exc:
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"Module federation non disponible : {exc}",
|
||
)
|
||
|
||
try:
|
||
pack = LearningPack.from_dict(body.pack)
|
||
except Exception as exc:
|
||
raise HTTPException(
|
||
status_code=400,
|
||
detail=f"Format de Learning Pack invalide : {exc}",
|
||
)
|
||
|
||
# Utiliser ou créer l'index global (singleton au niveau du module)
|
||
global _global_faiss_index
|
||
if _global_faiss_index is None:
|
||
_global_faiss_index = GlobalFAISSIndex()
|
||
|
||
added = _global_faiss_index.add_pack(pack)
|
||
stats = _global_faiss_index.get_stats()
|
||
|
||
logger.info(
|
||
"Learning pack importé : pack_id=%s, +%d vecteurs (total=%d)",
|
||
pack.pack_id, added, stats["total_vectors"],
|
||
)
|
||
return {
|
||
"status": "ok",
|
||
"pack_id": pack.pack_id,
|
||
"source_hash": pack.source_hash,
|
||
"vectors_added": added,
|
||
"index_stats": stats,
|
||
}
|
||
|
||
|
||
# Index FAISS global (singleton, initialisé au premier import)
|
||
_global_faiss_index = None
|
||
|
||
|
||
# =========================================================================
|
||
# Endpoints Audit Trail — traçabilité complète des actions RPA
|
||
# =========================================================================
|
||
|
||
@app.get("/api/v1/audit/history")
|
||
async def audit_history(
|
||
date_from: str = "",
|
||
date_to: str = "",
|
||
user_id: str = "",
|
||
session_id: str = "",
|
||
result: str = "",
|
||
action_type: str = "",
|
||
workflow_id: str = "",
|
||
domain: str = "",
|
||
limit: int = 100,
|
||
offset: int = 0,
|
||
):
|
||
"""
|
||
Historique d'audit paginé avec filtres.
|
||
|
||
Paramètres query :
|
||
date_from : date début (YYYY-MM-DD), défaut = aujourd'hui
|
||
date_to : date fin (YYYY-MM-DD), défaut = date_from
|
||
user_id : filtrer par identifiant TIM
|
||
session_id: filtrer par session
|
||
result : filtrer par résultat (success, failed, recovered, skipped)
|
||
action_type: filtrer par type d'action (click, type, key_combo, etc.)
|
||
workflow_id: filtrer par workflow
|
||
domain : filtrer par domaine métier
|
||
limit : nombre max de résultats (défaut 100, max 1000)
|
||
offset : décalage pour la pagination
|
||
|
||
Retourne la liste des entrées triées par timestamp décroissant.
|
||
"""
|
||
# Borner le limit pour éviter les abus
|
||
limit = min(max(1, limit), 1000)
|
||
offset = max(0, offset)
|
||
|
||
entries = _audit_trail.query(
|
||
date_from=date_from,
|
||
date_to=date_to,
|
||
user_id=user_id,
|
||
session_id=session_id,
|
||
result=result,
|
||
action_type=action_type,
|
||
workflow_id=workflow_id,
|
||
domain=domain,
|
||
limit=limit,
|
||
offset=offset,
|
||
)
|
||
|
||
return {
|
||
"status": "ok",
|
||
"count": len(entries),
|
||
"offset": offset,
|
||
"limit": limit,
|
||
"entries": entries,
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/audit/summary")
|
||
async def audit_summary(
|
||
date: str = "",
|
||
):
|
||
"""
|
||
Résumé journalier de l'audit.
|
||
|
||
Paramètre query :
|
||
date : date cible (YYYY-MM-DD), défaut = aujourd'hui
|
||
|
||
Retourne les statistiques agrégées : nombre d'actions, taux de succès,
|
||
répartition par utilisateur, par résultat, par type, par workflow, par mode.
|
||
"""
|
||
summary = _audit_trail.get_summary(target_date=date)
|
||
return {
|
||
"status": "ok",
|
||
**summary,
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/audit/export")
|
||
async def audit_export(
|
||
date_from: str = "",
|
||
date_to: str = "",
|
||
user_id: str = "",
|
||
session_id: str = "",
|
||
):
|
||
"""
|
||
Export CSV de l'historique d'audit.
|
||
|
||
Paramètres query :
|
||
date_from : date début (YYYY-MM-DD), défaut = aujourd'hui
|
||
date_to : date fin (YYYY-MM-DD), défaut = date_from
|
||
user_id : filtrer par identifiant TIM
|
||
session_id : filtrer par session
|
||
|
||
Retourne le fichier CSV en texte brut (Content-Type: text/csv).
|
||
"""
|
||
from fastapi.responses import Response
|
||
|
||
csv_data = _audit_trail.export_csv(
|
||
date_from=date_from,
|
||
date_to=date_to,
|
||
user_id=user_id,
|
||
session_id=session_id,
|
||
)
|
||
|
||
if not csv_data:
|
||
raise HTTPException(
|
||
status_code=404,
|
||
detail="Aucune entrée d'audit trouvée pour les filtres spécifiés.",
|
||
)
|
||
|
||
# Nom du fichier pour le téléchargement
|
||
filename = f"audit_{date_from or 'today'}"
|
||
if date_to and date_to != date_from:
|
||
filename += f"_to_{date_to}"
|
||
filename += ".csv"
|
||
|
||
return Response(
|
||
content=csv_data,
|
||
media_type="text/csv; charset=utf-8",
|
||
headers={
|
||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||
},
|
||
)
|
||
|
||
|
||
# =========================================================================
|
||
# Task Planner — Comprendre et exécuter des ordres en langage naturel
|
||
# =========================================================================
|
||
|
||
from .task_planner import TaskPlanner
|
||
|
||
_task_planner = TaskPlanner()
|
||
|
||
|
||
class TaskRequest(BaseModel):
|
||
"""Requête de tâche en langage naturel."""
|
||
instruction: str # "Traite les dossiers de janvier"
|
||
machine_id: str = "default" # Machine cible
|
||
dry_run: bool = False # True = planifier sans exécuter
|
||
|
||
|
||
@app.post("/api/v1/task")
|
||
async def execute_task(request: TaskRequest):
|
||
"""Exécuter une tâche décrite en langage naturel.
|
||
|
||
Léa comprend l'instruction, trouve le workflow correspondant,
|
||
et l'exécute. C'est le point d'entrée principal pour l'utilisateur.
|
||
|
||
Exemples :
|
||
- "Ouvre le bloc-notes et écris bonjour"
|
||
- "Traite les dossiers de janvier"
|
||
- "Recherche voiture électrique sur Google"
|
||
"""
|
||
import asyncio
|
||
|
||
# 1. Lister les workflows disponibles
|
||
workflows = _list_available_workflows()
|
||
|
||
# 2. Comprendre l'instruction
|
||
loop = asyncio.get_event_loop()
|
||
plan = await loop.run_in_executor(
|
||
None,
|
||
lambda: _task_planner.understand(
|
||
instruction=request.instruction,
|
||
available_workflows=workflows,
|
||
),
|
||
)
|
||
|
||
if not plan.understood:
|
||
return {
|
||
"status": "not_understood",
|
||
"instruction": request.instruction,
|
||
"error": plan.error or "Instruction non comprise",
|
||
"plan": plan.to_dict(),
|
||
}
|
||
|
||
# 3. Dry run = retourner le plan sans exécuter
|
||
if request.dry_run:
|
||
return {
|
||
"status": "planned",
|
||
"instruction": request.instruction,
|
||
"plan": plan.to_dict(),
|
||
}
|
||
|
||
# 4. Exécuter
|
||
def replay_callback(session_id="", machine_id="", params=None, actions=None, task_description=""):
|
||
"""Callback pour lancer un replay depuis le planner."""
|
||
if session_id:
|
||
# Mode replay : relancer un workflow connu
|
||
import requests as _req
|
||
resp = _req.post(
|
||
f"http://localhost:5005/api/v1/traces/stream/replay-session"
|
||
f"?session_id={session_id}&machine_id={machine_id}",
|
||
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
||
timeout=600,
|
||
)
|
||
if resp.ok:
|
||
return resp.json().get("replay_id", "")
|
||
raise Exception(f"Replay échoué: {resp.text[:200]}")
|
||
elif actions:
|
||
# Mode libre : actions planifiées
|
||
import requests as _req
|
||
resp = _req.post(
|
||
f"http://localhost:5005/api/v1/traces/stream/replay/raw",
|
||
json={
|
||
"session_id": "",
|
||
"actions": actions,
|
||
"machine_id": machine_id,
|
||
"task_description": task_description,
|
||
},
|
||
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
||
timeout=30,
|
||
)
|
||
if resp.ok:
|
||
return resp.json().get("replay_id", "")
|
||
raise Exception(f"Replay raw échoué: {resp.text[:200]}")
|
||
|
||
result = await loop.run_in_executor(
|
||
None,
|
||
lambda: _task_planner.execute(
|
||
plan=plan,
|
||
replay_callback=replay_callback,
|
||
machine_id=request.machine_id,
|
||
),
|
||
)
|
||
|
||
return {
|
||
"status": "executed" if result.success else "failed",
|
||
"instruction": request.instruction,
|
||
"plan": plan.to_dict(),
|
||
"result": result.to_dict(),
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/task/capabilities")
|
||
async def list_capabilities():
|
||
"""Lister ce que Léa sait faire (workflows appris)."""
|
||
workflows = _list_available_workflows()
|
||
return {
|
||
"capabilities": _task_planner.list_capabilities(workflows),
|
||
"workflows": workflows,
|
||
"total": len(workflows),
|
||
}
|
||
|
||
|
||
def _list_available_workflows() -> List[Dict[str, Any]]:
|
||
"""Lister les workflows/sessions disponibles pour le planner."""
|
||
workflows = []
|
||
|
||
# Sessions enregistrées avec des événements
|
||
try:
|
||
sessions_dir = LIVE_SESSIONS_DIR
|
||
for machine_dir in sessions_dir.iterdir():
|
||
if not machine_dir.is_dir() or machine_dir.name.startswith((".", "embeddings", "streaming")):
|
||
continue
|
||
for session_dir in machine_dir.iterdir():
|
||
if not session_dir.is_dir() or not session_dir.name.startswith("sess_"):
|
||
continue
|
||
events_file = session_dir / "live_events.jsonl"
|
||
if events_file.is_file():
|
||
# Extraire une description depuis les événements
|
||
desc = _extract_session_description(events_file)
|
||
workflows.append({
|
||
"session_id": session_dir.name,
|
||
"name": desc.get("name", session_dir.name),
|
||
"description": desc.get("description", ""),
|
||
"machine": machine_dir.name,
|
||
"event_count": desc.get("event_count", 0),
|
||
})
|
||
except Exception as e:
|
||
logger.debug(f"Erreur listage workflows: {e}")
|
||
|
||
return workflows
|
||
|
||
|
||
def _extract_session_description(events_file) -> Dict[str, Any]:
|
||
"""Extraire une description métier d'une session depuis ses événements.
|
||
|
||
Analyse les événements pour produire une description sémantique
|
||
(pas juste une liste d'apps) qui aide au matching par le TaskPlanner.
|
||
|
||
Exemples de descriptions produites :
|
||
- "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte"
|
||
- "Naviguer dans l'Explorateur de fichiers et ouvrir des images"
|
||
- "Utiliser cmd.exe pour exécuter des commandes"
|
||
"""
|
||
try:
|
||
apps = set()
|
||
app_names = set() # Noms d'applications (partie droite du titre)
|
||
typed_texts = [] # Texte saisi par l'utilisateur
|
||
key_combos = [] # Raccourcis clavier utilisés
|
||
event_types = {} # Compteur par type d'événement
|
||
window_sequence = [] # Séquence des fenêtres visitées (pour le flux)
|
||
event_count = 0
|
||
|
||
with open(events_file) as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
event_count += 1
|
||
if event_count > 100: # Lire plus pour mieux comprendre
|
||
break
|
||
try:
|
||
obj = json.loads(line)
|
||
evt = obj.get("event", obj)
|
||
evt_type = evt.get("type", "")
|
||
|
||
# Compter les types d'événements
|
||
event_types[evt_type] = event_types.get(evt_type, 0) + 1
|
||
|
||
# Collecter les fenêtres
|
||
title = evt.get("window", {}).get("title", "")
|
||
if title and title not in ("unknown_window", "Program Manager"):
|
||
if title not in window_sequence[-1:]:
|
||
window_sequence.append(title)
|
||
# Extraire le nom de l'app (partie droite du titre)
|
||
for sep in [" – ", " - ", " — "]:
|
||
if sep in title:
|
||
app_name = title.split(sep)[-1].strip()
|
||
app_names.add(app_name)
|
||
apps.add(title)
|
||
break
|
||
else:
|
||
app_names.add(title[:30])
|
||
apps.add(title[:30])
|
||
|
||
# Collecter le texte saisi
|
||
if evt_type == "text_input":
|
||
text = evt.get("text", "")
|
||
if text and len(text) > 1:
|
||
typed_texts.append(text)
|
||
|
||
# Collecter les raccourcis clavier
|
||
if evt_type == "key_combo":
|
||
keys = evt.get("keys", [])
|
||
if keys:
|
||
key_combos.append("+".join(keys))
|
||
|
||
# Changement de fenêtre → flux
|
||
if evt_type == "window_focus_change":
|
||
to_title = evt.get("to", {}).get("title", "")
|
||
if to_title and to_title not in ("unknown_window", "Program Manager"):
|
||
if to_title not in window_sequence[-1:]:
|
||
window_sequence.append(to_title)
|
||
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
# --- Construire la description sémantique ---
|
||
apps_list = sorted(app_names)[:5]
|
||
apps_str = ", ".join(apps_list)
|
||
|
||
# Construire une description orientée action
|
||
desc_parts = []
|
||
|
||
# Détecter les patterns courants
|
||
has_run_dialog = any("Exécuter" in w for w in window_sequence)
|
||
has_search = any("Rechercher" in w or "Recherche" in w for w in window_sequence)
|
||
has_win_r = "win+r" in [k.lower() for k in key_combos]
|
||
has_win_s = "win+s" in [k.lower() for k in key_combos]
|
||
|
||
# Applications principales utilisées (en dehors des launchers)
|
||
main_apps = [a for a in apps_list if a not in ("Exécuter", "Rechercher")]
|
||
launcher = ""
|
||
if has_run_dialog or has_win_r:
|
||
launcher = "via Exécuter (Win+R)"
|
||
elif has_search or has_win_s:
|
||
launcher = "via la recherche Windows"
|
||
|
||
if main_apps:
|
||
verb = "Ouvrir" if launcher else "Utiliser"
|
||
desc_parts.append(f"{verb} {', '.join(main_apps)} {launcher}".strip())
|
||
elif launcher:
|
||
desc_parts.append(f"Lancer une application {launcher}")
|
||
|
||
# Texte saisi
|
||
total_typed = "".join(typed_texts)
|
||
if len(total_typed) > 5:
|
||
desc_parts.append("écrire du texte")
|
||
elif typed_texts:
|
||
desc_parts.append(f"saisir '{total_typed[:30]}'")
|
||
|
||
# Raccourcis clavier notables
|
||
notable_combos = [k for k in key_combos if k.lower() not in ("win+r", "win+s")]
|
||
if notable_combos:
|
||
combos_str = ", ".join(sorted(set(notable_combos))[:3])
|
||
desc_parts.append(f"raccourcis : {combos_str}")
|
||
|
||
# Nombre de clics
|
||
click_count = event_types.get("mouse_click", 0)
|
||
if click_count > 5:
|
||
desc_parts.append(f"{click_count} clics")
|
||
|
||
description = " et ".join(desc_parts) if desc_parts else f"Workflow avec {apps_str}"
|
||
name = apps_str or "Session sans nom"
|
||
|
||
return {
|
||
"name": name,
|
||
"description": description,
|
||
"event_count": event_count,
|
||
"apps": apps_list,
|
||
"typed_text_preview": total_typed[:50] if typed_texts else "",
|
||
}
|
||
except Exception:
|
||
return {"name": "?", "description": "", "event_count": 0}
|
||
|
||
|
||
# =========================================================================
|
||
# Chat conversationnel (Léa conversationnelle)
|
||
# =========================================================================
|
||
|
||
from .chat_interface import ChatManager # noqa: E402
|
||
|
||
|
||
def _chat_replay_callback(session_id="", machine_id="default", params=None, **kwargs):
|
||
"""Callback utilisé par ChatSession pour lancer un replay.
|
||
|
||
Appelle l'endpoint /replay-session en interne. On passe par HTTP pour
|
||
réutiliser la logique d'auth/rate-limit/enqueue existante.
|
||
"""
|
||
import requests as _req
|
||
if not session_id:
|
||
raise ValueError("session_id requis pour replay chat")
|
||
resp = _req.post(
|
||
f"http://localhost:5005/api/v1/traces/stream/replay-session"
|
||
f"?session_id={session_id}&machine_id={machine_id}",
|
||
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
||
timeout=600,
|
||
)
|
||
if not resp.ok:
|
||
raise RuntimeError(f"Replay échoué: {resp.text[:200]}")
|
||
return resp.json().get("replay_id", "")
|
||
|
||
|
||
def _chat_status_provider(replay_id: str) -> Dict[str, Any]:
|
||
"""Callback pour lire l'état d'un replay depuis ChatSession.
|
||
|
||
Lit directement _replay_states en mémoire (pas de HTTP round-trip).
|
||
"""
|
||
if not replay_id:
|
||
return {}
|
||
with _replay_lock:
|
||
state = _replay_states.get(replay_id)
|
||
if not state:
|
||
return {}
|
||
# Filtrer les clés internes
|
||
return {k: v for k, v in state.items() if not k.startswith("_")}
|
||
|
||
|
||
_chat_manager = ChatManager(
|
||
task_planner=_task_planner,
|
||
workflows_provider=_list_available_workflows,
|
||
replay_callback=_chat_replay_callback,
|
||
status_provider=_chat_status_provider,
|
||
)
|
||
|
||
|
||
class ChatMessageRequest(BaseModel):
|
||
"""Message envoyé par l'utilisateur."""
|
||
message: str
|
||
|
||
|
||
class ChatConfirmRequest(BaseModel):
|
||
"""Confirmation (ou refus) d'un plan en attente."""
|
||
confirmed: bool = True
|
||
|
||
|
||
class ChatSessionCreateRequest(BaseModel):
|
||
"""Paramètres de création d'une session de chat."""
|
||
machine_id: str = "default"
|
||
|
||
|
||
@app.post("/api/v1/chat/session")
|
||
async def create_chat_session(request: ChatSessionCreateRequest = None):
|
||
"""Créer une nouvelle session de chat avec Léa."""
|
||
machine_id = request.machine_id if request else "default"
|
||
session = _chat_manager.create_session(machine_id=machine_id)
|
||
return {
|
||
"ok": True,
|
||
"session_id": session.session_id,
|
||
"state": session.state,
|
||
"history": session.get_history(),
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/chat/{session_id}/message")
|
||
async def post_chat_message(session_id: str, request: ChatMessageRequest):
|
||
"""Envoyer un message dans une session de chat."""
|
||
import asyncio
|
||
|
||
session = _chat_manager.get_session(session_id)
|
||
if session is None:
|
||
raise HTTPException(status_code=404, detail=f"Session chat '{session_id}' non trouvée")
|
||
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(
|
||
None,
|
||
lambda: session.send_message(request.message),
|
||
)
|
||
# Toujours retourner l'historique + l'état courant pour que le client se mette à jour
|
||
return {
|
||
**result,
|
||
"session_id": session_id,
|
||
"state": session.state,
|
||
"history": session.get_history(),
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/chat/{session_id}/history")
|
||
async def get_chat_history(session_id: str):
|
||
"""Récupérer l'historique d'une session de chat."""
|
||
session = _chat_manager.get_session(session_id)
|
||
if session is None:
|
||
raise HTTPException(status_code=404, detail=f"Session chat '{session_id}' non trouvée")
|
||
|
||
# Rafraîchir la progression si en cours d'exécution
|
||
if session.state == "executing":
|
||
try:
|
||
session.refresh_progress()
|
||
except Exception as e:
|
||
logger.debug(f"chat refresh_progress erreur: {e}")
|
||
|
||
return {
|
||
"ok": True,
|
||
"session_id": session_id,
|
||
"snapshot": session.get_snapshot(),
|
||
}
|
||
|
||
|
||
@app.post("/api/v1/chat/{session_id}/confirm")
|
||
async def confirm_chat_plan(session_id: str, request: ChatConfirmRequest = None):
|
||
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||
import asyncio
|
||
|
||
session = _chat_manager.get_session(session_id)
|
||
if session is None:
|
||
raise HTTPException(status_code=404, detail=f"Session chat '{session_id}' non trouvée")
|
||
|
||
confirmed = request.confirmed if request else True
|
||
loop = asyncio.get_event_loop()
|
||
result = await loop.run_in_executor(
|
||
None,
|
||
lambda: session.confirm(confirmed=confirmed),
|
||
)
|
||
return {
|
||
**result,
|
||
"session_id": session_id,
|
||
"state": session.state,
|
||
"history": session.get_history(),
|
||
}
|
||
|
||
|
||
@app.get("/api/v1/chat/sessions")
|
||
async def list_chat_sessions():
|
||
"""Lister toutes les sessions de chat actives."""
|
||
return {
|
||
"ok": True,
|
||
"sessions": _chat_manager.list_sessions(),
|
||
}
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format="%(asctime)s [API-STREAM] %(message)s",
|
||
)
|
||
uvicorn.run(app, host="0.0.0.0", port=5005)
|