rpa_vision_v3/agent_v0/server_v1/api_stream.py

# agent_v0/server_v1/api_stream.py
"""
API de Streaming Temps Réel pour RPA Vision V3.

Connecte l'Agent V1 au core pipeline via StreamProcessor.
Tous les calculs GPU (ScreenAnalyzer, CLIP, FAISS) tournent ici sur le serveur.

Inclut les endpoints de replay pour renvoyer des ordres d'exécution à l'Agent V1.
"""

import json
import logging
import os
import threading
import time
import uuid
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Any, Dict, List, Optional

from fastapi import BackgroundTasks, FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

from .replay_verifier import ReplayVerifier, VerificationResult
from .session_worker import SessionWorker
from .stream_processor import StreamProcessor
from .worker_stream import StreamWorker

# Instance globale du vérificateur de replay (comparaison screenshots avant/après)
_replay_verifier = ReplayVerifier()

# Nombre maximum de retries par action avant de déclarer un échec
MAX_RETRIES_PER_ACTION = 3

# Limites de sécurité pour les queues de replay
MAX_ACTIONS_PER_REPLAY = 500       # Max actions par requête de replay
MAX_REPLAY_STATES = 1000           # Max entrées dans _replay_states
REPLAY_STATE_TTL_SECONDS = 3600    # Nettoyage auto des replays terminés après 1h

# Actions en cours de retry : action_id -> {"action": ..., "retry_count": N, "replay_id": ...}
_retry_pending: Dict[str, Dict[str, Any]] = {}

# Callbacks d'erreur par replay_id : replay_id -> callback_url
_error_callbacks: Dict[str, str] = {}

# Optimisation des actions replay par gestes primitifs
try:
    from agent_chat.gesture_catalog import get_gesture_catalog
    _gesture_catalog = get_gesture_catalog()
except ImportError:
    _gesture_catalog = None

logger = logging.getLogger("api_stream")
app = FastAPI(title="RPA Vision V3 - Streaming API v1")

# CORS — origines autorisées (VWB frontend, Agent Chat, Dashboard)
# Configurable via variable d'environnement CORS_ORIGINS (séparées par des virgules)
_DEFAULT_CORS_ORIGINS = (
    "http://localhost:3002,"       # VWB Frontend (Vite/React)
    "http://localhost:5002,"       # VWB Backend (Flask)
    "http://localhost:5004,"       # Agent Chat
    "http://localhost:5001,"       # Web Dashboard
    "http://192.168.1.40:3002,"   # VWB Frontend depuis le réseau local
    "http://192.168.1.40:5004"    # Agent Chat depuis le réseau local
)
CORS_ORIGINS = os.environ.get("CORS_ORIGINS", _DEFAULT_CORS_ORIGINS).split(",")
CORS_ORIGINS = [o.strip() for o in CORS_ORIGINS if o.strip()]

app.add_middleware(
    CORSMiddleware,
    allow_origins=CORS_ORIGINS,
    allow_methods=["GET", "POST"],
    allow_headers=["Content-Type", "Authorization"],
)

# Dossier des sessions live
ROOT_DIR = Path(__file__).parent.parent.parent
LIVE_SESSIONS_DIR = ROOT_DIR / "data" / "training" / "live_sessions"
LIVE_SESSIONS_DIR.mkdir(parents=True, exist_ok=True)

# Instance globale partagée
processor = StreamProcessor(data_dir=str(LIVE_SESSIONS_DIR))
worker = StreamWorker(live_dir=str(LIVE_SESSIONS_DIR), processor=processor)

# Worker asynchrone pour le traitement des sessions finalisées
# (analyse VLM + construction workflow en arrière-plan)
session_worker = SessionWorker(processor=processor, poll_interval=10)

# =========================================================================
# Compteur d'analyses en cours par session (pour attendre avant finalize)
# =========================================================================
_pending_analyses: Dict[str, int] = defaultdict(int)
_pending_lock = threading.Lock()

# =========================================================================
# File d'attente de replay par session
# Chaque session a une queue d'actions à exécuter et un état de replay
# =========================================================================
_replay_lock = threading.Lock()
# session_id -> liste d'actions en attente (FIFO)
_replay_queues: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
# replay_id -> état du replay (workflow_id, session_id, status, progress)
_replay_states: Dict[str, Dict[str, Any]] = {}
# machine_id -> session_id (mapping pour le replay ciblé par machine)
_machine_replay_target: Dict[str, str] = {}


class StreamEvent(BaseModel):
    session_id: str
    timestamp: float
    event: Dict[str, Any]
    machine_id: str = "default"  # Identifiant machine (multi-machine, rétrocompatible)


class ReplayRequest(BaseModel):
    """Requête de lancement de replay d'un workflow."""
    workflow_id: str
    session_id: str
    machine_id: Optional[str] = None  # Machine cible pour le replay (multi-machine)
    params: Optional[Dict[str, Any]] = None


class RawReplayRequest(BaseModel):
    """Requête de replay avec actions brutes (mode Agent Libre)."""
    actions: List[Dict[str, Any]]
    session_id: str = ""
    machine_id: Optional[str] = None  # Machine cible (multi-machine)
    task_description: str = ""


class SingleActionRequest(BaseModel):
    """Requête d'exécution d'une seule action (mode Copilot)."""
    action: Dict[str, Any]
    session_id: str = ""
    machine_id: Optional[str] = None  # Machine cible (multi-machine)


class ReplayResultReport(BaseModel):
    """Rapport de résultat d'exécution d'une action par l'Agent V1."""
    session_id: str
    action_id: str
    success: bool
    error: Optional[str] = None
    screenshot: Optional[str] = None  # Chemin ou base64 du screenshot post-action
    screenshot_after: Optional[str] = None  # Chemin ou base64 du screenshot APRES l'action
    actual_position: Optional[Dict[str, float]] = None  # {"x": px, "y": py} position réelle du clic


class ErrorCallbackConfig(BaseModel):
    """Configuration du callback d'erreur pour un replay."""
    replay_id: str
    callback_url: str  # URL à appeler en cas d'erreur non-récupérable


# Thread de nettoyage périodique des replays terminés et sessions expirées
_cleanup_thread: Optional[threading.Thread] = None
_cleanup_running = False


def _cleanup_loop():
    """Nettoyage périodique des replay states terminés et des sessions expirées.

    Tourne en arrière-plan toutes les 10 minutes :
    - Supprime les replay states completed/error/failed plus vieux que REPLAY_STATE_TTL_SECONDS
    - Nettoie les sessions en mémoire via LiveSessionManager.cleanup_old_sessions()
    - Borne _replay_states à MAX_REPLAY_STATES entrées
    """
    while _cleanup_running:
        time.sleep(600)  # 10 minutes
        if not _cleanup_running:
            break
        try:
            _cleanup_replay_states()
            # Nettoyage des sessions expirées en mémoire (toutes les heures = 6 cycles)
            processor.session_manager.cleanup_old_sessions(max_age_hours=24)
        except Exception as e:
            logger.error(f"Erreur dans la boucle de nettoyage : {e}")


def _cleanup_replay_states():
    """Supprimer les replay states terminés (completed/error/failed) plus vieux que le TTL."""
    now = time.time()
    to_delete = []

    with _replay_lock:
        for replay_id, state in _replay_states.items():
            if state["status"] in ("completed", "error", "failed"):
                # Vérifier l'âge via le dernier résultat ou le timestamp du dernier event
                last_result = state.get("results", [])
                last_time = last_result[-1].get("timestamp", 0) if last_result else 0
                if not last_time:
                    # Pas de timestamp dans les résultats, utiliser les error_log
                    error_log = state.get("error_log", [])
                    last_time = error_log[-1].get("timestamp", 0) if error_log else 0
                if not last_time:
                    # Aucun timestamp trouvé, marquer pour suppression (orphelin)
                    to_delete.append(replay_id)
                    continue
                if now - last_time > REPLAY_STATE_TTL_SECONDS:
                    to_delete.append(replay_id)

        # Supprimer les entrées expirées
        for replay_id in to_delete:
            del _replay_states[replay_id]
            _error_callbacks.pop(replay_id, None)

        # Borne de sécurité : si trop d'entrées, supprimer les plus anciens terminés
        if len(_replay_states) > MAX_REPLAY_STATES:
            finished = [
                (rid, s) for rid, s in _replay_states.items()
                if s["status"] in ("completed", "error", "failed")
            ]
            # Trier par nombre de résultats (les plus anciens ont typiquement tous leurs résultats)
            excess = len(_replay_states) - MAX_REPLAY_STATES
            for rid, _ in finished[:excess]:
                del _replay_states[rid]
                _error_callbacks.pop(rid, None)

    if to_delete:
        logger.info(f"Nettoyage replay states : {len(to_delete)} entrées supprimées")


@app.on_event("startup")
async def startup():
    """Démarrer le worker, le session_worker et charger les workflows existants."""
    global _cleanup_running, _cleanup_thread

    worker.start(blocking=False)

    # Charger les workflows existants depuis le disque
    _load_existing_workflows()

    # Démarrer le worker de traitement asynchrone des sessions finalisées
    session_worker.start()

    # Scanner les sessions finalisées mais jamais traitées (0 states, workflow non construit)
    # et les ajouter à la queue de traitement automatiquement
    _enqueue_pending_sessions()

    # Démarrer le thread de nettoyage périodique
    _cleanup_running = True
    _cleanup_thread = threading.Thread(target=_cleanup_loop, daemon=True, name="replay_cleanup")
    _cleanup_thread.start()

    logger.info(
        "API Streaming démarrée — StreamProcessor, Worker, SessionWorker et Cleanup prêts."
    )


def _enqueue_pending_sessions():
    """Scanner et ajouter les sessions finalisées en attente à la queue du SessionWorker.

    Appelé au démarrage du serveur pour rattraper les sessions finalisées
    dont l'analyse VLM n'a jamais été complétée (crash, redémarrage, etc.).
    """
    try:
        pending = processor.find_pending_sessions()
        if pending:
            logger.info(
                f"Sessions en attente trouvées au démarrage : {len(pending)} "
                f"({', '.join(pending[:5])}{'...' if len(pending) > 5 else ''})"
            )
            for sid in pending:
                session_worker.enqueue(sid)
        else:
            logger.info("Aucune session en attente au démarrage")
    except Exception as e:
        logger.error(f"Erreur scan sessions en attente : {e}")


def _load_existing_workflows():
    """Charger les workflows JSON existants dans processor._workflows.

    Supporte deux formats :
    - Workflow.load_from_file (format complet avec workflow_id)
    - JSON brut avec clé 'name' (format simplifié VWB/manuels)
    """
    from core.models.workflow_graph import Workflow

    workflow_dirs = [
        ROOT_DIR / "data" / "workflows",
        ROOT_DIR / "data" / "training" / "workflows",
        LIVE_SESSIONS_DIR / "workflows",
    ]

    loaded = 0
    for wf_dir in workflow_dirs:
        if not wf_dir.exists():
            continue
        for wf_file in wf_dir.glob("*.json"):
            try:
                wf = Workflow.load_from_file(str(wf_file))
                if wf and hasattr(wf, 'workflow_id'):
                    with processor._data_lock:
                        processor._workflows[wf.workflow_id] = wf
                    loaded += 1
                    continue
            except Exception:
                pass

            # Fallback : charger comme JSON brut et injecter un workflow_id
            try:
                wf_data = json.loads(wf_file.read_text(encoding="utf-8"))
                wf_id = wf_data.get("workflow_id") or wf_file.stem
                # Stocker le dict brut (suffisant pour _workflow_to_actions)
                with processor._data_lock:
                    processor._workflows[wf_id] = wf_data
                loaded += 1
            except Exception as e:
                logger.debug(f"Skip workflow {wf_file.name}: {e}")

    logger.info(f"Workflows chargés depuis disque: {loaded}")


@app.on_event("shutdown")
async def shutdown():
    global _cleanup_running
    _cleanup_running = False
    session_worker.stop()
    worker.stop()
    processor.session_manager.flush()
    logger.info("API Streaming arrêtée.")


# =========================================================================
# Session management
# =========================================================================

@app.post("/api/v1/traces/stream/register")
async def register_session(session_id: str, machine_id: str = "default"):
    """Enregistrer une nouvelle session de streaming.

    Args:
        session_id: Identifiant unique de la session
        machine_id: Identifiant de la machine source (multi-machine, défaut: "default")
    """
    processor.session_manager.register_session(session_id, machine_id=machine_id)
    # Reset des compteurs pour cette session (évite les reliquats d'une session précédente)
    with _pending_lock:
        _pending_analyses[session_id] = 0
        _analyzed_shots[session_id] = set()
    logger.info(f"Session {session_id} enregistrée (machine={machine_id}, compteurs réinitialisés)")
    return {"status": "session_registered", "session_id": session_id, "machine_id": machine_id}


def _ensure_session_registered(session_id: str, machine_id: str = "default"):
    """Auto-enregistrer une session si elle n'existe pas encore.

    Robustesse au redémarrage du serveur : l'Agent V1 ne re-register pas
    sa session, mais continue d'envoyer des events/images. On l'enregistre
    automatiquement à la première réception.

    Args:
        session_id: Identifiant de la session
        machine_id: Identifiant machine (propagé depuis l'agent)
    """
    session = processor.session_manager.get_session(session_id)
    if session is None:
        logger.info(f"Auto-enregistrement de la session {session_id} (machine={machine_id})")
        processor.session_manager.register_session(session_id, machine_id=machine_id)
        with _pending_lock:
            _pending_analyses[session_id] = 0
            _analyzed_shots[session_id] = set()
    elif machine_id != "default" and session.machine_id == "default":
        # Mettre à jour le machine_id si l'agent l'envoie et qu'on ne l'avait pas
        session.machine_id = machine_id


# =========================================================================
# Événements
# =========================================================================

@app.post("/api/v1/traces/stream/event")
async def stream_event(data: StreamEvent):
    """Reçoit un événement et l'enregistre dans la session."""
    session_id = data.session_id
    machine_id = data.machine_id or "default"

    # Auto-enregistrer la session si inconnue (robustesse au redémarrage serveur)
    _ensure_session_registered(session_id, machine_id=machine_id)

    # Persister sur disque (journal JSONL, dans un sous-dossier par machine si multi-machine)
    if machine_id and machine_id != "default":
        session_path = LIVE_SESSIONS_DIR / machine_id / session_id
    else:
        session_path = LIVE_SESSIONS_DIR / session_id
    session_path.mkdir(parents=True, exist_ok=True)
    event_file = session_path / "live_events.jsonl"
    with open(event_file, "a", encoding="utf-8") as f:
        f.write(json.dumps(data.dict()) + "\n")

    # Traitement direct via StreamProcessor
    result = worker.process_event_direct(session_id, data.event)
    return {"status": "event_synced", "session_id": session_id, **result}


# =========================================================================
# Images
# =========================================================================

# Ensemble des screenshots déjà analysés (évite les doublons de retry)
_analyzed_shots: Dict[str, set] = defaultdict(set)

# Hash du dernier screenshot analysé par session (déduplication par similarité)
_last_screenshot_hash: Dict[str, str] = {}

# ThreadPool pour l'analyse GPU (évite de bloquer le event loop async)
_gpu_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="gpu_analysis")


def _image_hash(file_path: str) -> str:
    """Hash rapide d'une image pour détecter les doublons (~identiques)."""
    try:
        from PIL import Image
        import hashlib
        img = Image.open(file_path)
        # Réduire à 16x16 et convertir en niveaux de gris pour un hash perceptuel
        thumb = img.resize((16, 16)).convert('L')
        return hashlib.md5(thumb.tobytes()).hexdigest()
    except Exception:
        return ""


@app.post("/api/v1/traces/stream/image")
async def stream_image(
    session_id: str,
    shot_id: str,
    machine_id: str = "default",
    file: UploadFile = File(...),
    background_tasks: BackgroundTasks = None,
):
    """Reçoit une image et déclenche l'analyse via le core pipeline."""
    # Auto-enregistrer la session si inconnue (robustesse au redémarrage serveur)
    _ensure_session_registered(session_id, machine_id=machine_id)

    # Sauvegarder sur disque (dans un sous-dossier par machine si multi-machine)
    if machine_id and machine_id != "default":
        session_path = LIVE_SESSIONS_DIR / machine_id / session_id
    else:
        session_path = LIVE_SESSIONS_DIR / session_id
    shots_dir = session_path / "shots"
    shots_dir.mkdir(parents=True, exist_ok=True)

    file_path = shots_dir / f"{shot_id}.png"
    content = await file.read()
    with open(file_path, "wb") as f:
        f.write(content)

    file_path_str = str(file_path)

    # Crops : traitement léger (pas d'analyse ScreenAnalyzer)
    if "_crop" in shot_id:
        result = worker.process_crop_direct(session_id, shot_id, file_path_str)
        return {"status": "crop_stored", "shot_id": shot_id, **result}

    # Filtrer les screenshots qui ne nécessitent PAS d'analyse GPU.
    # Seuls les shot_XXXX_full (screenshots d'action) sont analysés.
    # Les autres (heartbeat, focus, res_shot) sont stockés sur disque
    # mais pas envoyés au GPU — sinon le ThreadPool (1 worker, ~10-30s/analyse)
    # est submergé et la finalisation timeout avec 0 states.
    if shot_id.startswith("heartbeat_"):
        return {"status": "heartbeat_stored", "shot_id": shot_id}
    if shot_id.startswith("focus_"):
        return {"status": "focus_stored", "shot_id": shot_id}
    if shot_id.startswith("res_shot_"):
        return {"status": "res_stored", "shot_id": shot_id}
    if not shot_id.startswith("shot_") or "_full" not in shot_id:
        # Tout ce qui n'est pas shot_XXXX_full → stocker sans analyser
        logger.debug(f"Screenshot {shot_id} stocké sans analyse GPU")
        return {"status": "stored_no_analysis", "shot_id": shot_id}

    # Déduplication par ID : ne pas réanalyser un screenshot déjà traité
    with _pending_lock:
        if shot_id in _analyzed_shots[session_id]:
            logger.debug(f"Screenshot {shot_id} déjà analysé, skip")
            return {"status": "already_analyzed", "shot_id": shot_id}

    # Déduplication par similarité : si l'image est quasi identique à la précédente, skip
    img_hash = _image_hash(file_path_str)
    if img_hash and img_hash == _last_screenshot_hash.get(session_id):
        logger.info(f"Screenshot {shot_id} identique au précédent, skip analyse GPU")
        with _pending_lock:
            _analyzed_shots[session_id].add(shot_id)
        return {"status": "duplicate_skipped", "shot_id": shot_id}
    if img_hash:
        _last_screenshot_hash[session_id] = img_hash

    with _pending_lock:
        _analyzed_shots[session_id].add(shot_id)

    # Screenshots full : analyse GPU dans un thread séparé (ne bloque pas l'event loop)
    with _pending_lock:
        _pending_analyses[session_id] += 1
    _gpu_executor.submit(_process_screenshot_thread, session_id, shot_id, file_path_str)
    return {"status": "image_queued", "shot_id": shot_id}


def _process_screenshot_thread(session_id: str, shot_id: str, path: str):
    """Analyse GPU d'un screenshot dans un thread séparé (ne bloque pas FastAPI)."""
    try:
        import traceback
        logger.info(f"[GPU] Début analyse {shot_id} pour {session_id}")
        result = worker.process_screenshot_direct(session_id, shot_id, path)
        logger.info(
            f"[GPU] Screenshot {shot_id} analysé: "
            f"{result.get('ui_elements_count', 0)} UI, "
            f"{result.get('text_detected', 0)} textes, "
            f"indexed={result.get('embedding_indexed', False)}"
        )
    except Exception as e:
        import traceback
        logger.error(f"[GPU] Erreur analyse {shot_id}: {e}\n{traceback.format_exc()}")
    finally:
        with _pending_lock:
            _pending_analyses[session_id] = max(0, _pending_analyses[session_id] - 1)


# =========================================================================
# Finalisation
# =========================================================================

@app.post("/api/v1/traces/stream/finalize")
async def finalize(session_id: str, machine_id: str = "default"):
    """Clôture la session et place le traitement en file d'attente.

    Ne bloque plus : marque la session comme finalisée et l'ajoute à la queue
    du SessionWorker pour analyse VLM + construction workflow en arrière-plan.

    Le client peut suivre la progression via GET /api/v1/traces/stream/processing/status.

    Args:
        session_id: Identifiant de la session à finaliser
        machine_id: Identifiant machine (informatif, le machine_id est déjà dans la session)
    """
    # Vérifier que la session existe
    session = processor.session_manager.get_session(session_id)
    if not session:
        raise HTTPException(
            status_code=404,
            detail=f"Session {session_id} non trouvée",
        )

    # Marquer la session comme finalisée (persistée sur disque)
    processor.session_manager.finalize(session_id)
    logger.info(f"Session {session_id} finalisée, ajout à la queue de traitement")

    # Ajouter à la queue du SessionWorker pour traitement asynchrone
    session_worker.enqueue(session_id)

    # Compter les screenshots full disponibles pour donner une estimation
    session_dir = processor._find_session_dir(session_id)
    full_shots_count = 0
    if session_dir:
        shots_dir = session_dir / "shots"
        if shots_dir.exists():
            full_shots_count = len(list(shots_dir.glob("shot_*_full.png")))

    return {
        "status": "queued_for_processing",
        "session_id": session_id,
        "machine_id": session.machine_id,
        "screenshots_to_analyze": full_shots_count,
        "message": (
            f"Session finalisée. {full_shots_count} screenshots seront analysés "
            "en arrière-plan. Suivez la progression via "
            "GET /api/v1/traces/stream/processing/status"
        ),
    }


# =========================================================================
# Traitement asynchrone — Suivi de la queue de processing
# =========================================================================

@app.get("/api/v1/traces/stream/processing/status")
async def get_processing_status():
    """État du worker de traitement asynchrone des sessions finalisées.

    Retourne :
    - queue_length : nombre de sessions en attente
    - queue : liste des session_ids en attente
    - current_session : session en cours de traitement (ou null)
    - current_progress : progression de la session en cours
    - completed : historique des sessions traitées avec succès
    - failed : historique des sessions échouées
    """
    return session_worker.get_status()


@app.post("/api/v1/traces/stream/processing/requeue")
async def requeue_session(session_id: str):
    """Relancer le traitement d'une session (manuellement).

    Utile pour :
    - Relancer une session échouée après correction
    - Forcer le retraitement d'une session déjà traitée
    """
    session = processor.session_manager.get_session(session_id)
    if not session:
        raise HTTPException(
            status_code=404,
            detail=f"Session {session_id} non trouvée",
        )

    session_worker.enqueue(session_id)

    return {
        "status": "requeued",
        "session_id": session_id,
        "queue_status": session_worker.get_status(),
    }


# =========================================================================
# Monitoring
# =========================================================================

@app.get("/api/v1/traces/stream/stats")
async def get_stats():
    """Statistiques du serveur de streaming."""
    stats = worker.stats
    # Ajouter les machines connues
    stats["machines"] = processor.session_manager.get_machine_ids()
    return stats


@app.get("/api/v1/traces/stream/machines")
async def list_machines():
    """Lister toutes les machines connues avec leurs sessions actives.

    Utile pour le dashboard et l'agent chat (Léa) pour savoir quelles
    machines sont connectées et cibler un replay spécifique.
    """
    machine_ids = processor.session_manager.get_machine_ids()
    machines = []
    for mid in machine_ids:
        machine_sessions = processor.session_manager.get_sessions_by_machine(mid)
        active = [s for s in machine_sessions if not s.finalized]
        machines.append({
            "machine_id": mid,
            "total_sessions": len(machine_sessions),
            "active_sessions": len(active),
            "last_activity": max(
                (s.last_activity for s in machine_sessions),
                default=None,
            ).isoformat() if machine_sessions else None,
        })
    return {"machines": machines}


@app.get("/api/v1/traces/stream/sessions")
async def list_sessions(machine_id: Optional[str] = None):
    """Lister les sessions (actives et finalisées).

    Args:
        machine_id: Si fourni, filtre par machine. Si absent, retourne toutes les sessions.
    """
    sessions = processor.list_sessions(machine_id=machine_id)
    result = {"sessions": sessions}
    # Ajouter la liste des machines connues pour l'UI
    result["machines"] = processor.session_manager.get_machine_ids()
    return result


@app.get("/api/v1/traces/stream/workflows")
async def list_workflows(machine_id: Optional[str] = None):
    """Lister les workflows construits.

    Args:
        machine_id: Si fourni, filtre par machine. Si absent, retourne tous les workflows.
    """
    workflows = processor.list_workflows(machine_id=machine_id)
    result = {"workflows": workflows}
    # Ajouter la liste des machines connues pour l'UI
    result["machines"] = processor.session_manager.get_machine_ids()
    return result


@app.get("/api/v1/traces/stream/session/{session_id}")
async def get_session(session_id: str):
    """État d'une session."""
    session = processor.session_manager.get_session(session_id)
    if not session:
        raise HTTPException(status_code=404, detail=f"Session {session_id} non trouvée")
    return {
        "session_id": session.session_id,
        "machine_id": session.machine_id,
        "events_count": len(session.events),
        "screenshots_count": len(session.shot_paths),
        "last_window": session.last_window_info,
        "created_at": session.created_at.isoformat(),
        "last_activity": session.last_activity.isoformat(),
        "finalized": session.finalized,
    }


# =========================================================================
# Replay — Exécution de workflows sur l'Agent V1
# =========================================================================


def _find_active_agent_session(machine_id: Optional[str] = None) -> Optional[str]:
    """Trouver la dernière session Agent V1 pour le replay.

    Stratégie en 2 passes :
    1. D'abord chercher une session non-finalisée (Agent V1 actif)
    2. Sinon, prendre la plus récente même finalisée (Agent V1 peut avoir
       redémarré et créé une nouvelle session, ou la session a été finalisée
       par timeout mais l'agent est toujours là)

    Dans les deux cas, on ne considère que les sessions 'sess_*' (Agent V1).

    Args:
        machine_id: Si fourni, ne chercher que les sessions de cette machine.
                    Si None, chercher toutes les sessions (rétrocompatible).
    """
    with processor.session_manager._lock:
        all_agent_sessions = [
            s for s in processor.session_manager._sessions.values()
            if s.session_id.startswith("sess_")
            and (machine_id is None or s.machine_id == machine_id)
        ]

    if not all_agent_sessions:
        return None

    # Trier par session_id (contient un timestamp) — plus récent d'abord
    all_agent_sessions.sort(key=lambda s: s.session_id, reverse=True)

    # Passe 1 : préférer une session non-finalisée
    for s in all_agent_sessions:
        if not s.finalized:
            return s.session_id

    # Passe 2 : fallback sur la plus récente (même finalisée)
    # L'Agent V1 poll /replay/next indépendamment de l'état finalized
    return all_agent_sessions[0].session_id


def _workflow_to_actions(workflow, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
    """
    Convertir un workflow (nodes + edges ordonnés) en liste d'actions normalisées.

    Parcourt le graphe depuis les entry_nodes en suivant les edges.
    Chaque edge produit une action normalisée avec coordonnées en pourcentage.
    """
    actions = []
    params = params or {}

    # Construire un index des edges sortants par node
    outgoing: Dict[str, list] = defaultdict(list)
    for edge in workflow.edges:
        outgoing[edge.from_node].append(edge)

    # Parcours linéaire depuis le premier entry_node
    visited = set()
    current_nodes = list(workflow.entry_nodes) if workflow.entry_nodes else []

    # Fallback : si pas d'entry_nodes, prendre le premier node
    if not current_nodes and workflow.nodes:
        current_nodes = [workflow.nodes[0].node_id]

    while current_nodes:
        node_id = current_nodes.pop(0)
        if node_id in visited:
            continue
        visited.add(node_id)

        edges = outgoing.get(node_id, [])
        for edge in edges:
            edge_actions = _edge_to_normalized_actions(edge, params)
            actions.extend(edge_actions)
            # Suivre le graphe vers le prochain node
            if edge.to_node not in visited:
                current_nodes.append(edge.to_node)

    # Optimisation : substituer les actions visuelles par des gestes clavier si possible
    if _gesture_catalog and actions:
        actions = _gesture_catalog.optimize_replay_actions(actions)

    return actions


def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str, Any]]:
    """
    Convertir un WorkflowEdge en liste d'actions normalisées pour l'Agent V1.

    Un edge simple produit 1 action, un edge compound produit N actions (une par step).
    """
    action = edge.action
    if action is None:
        logger.warning(f"Edge {edge.edge_id} sans action, skip")
        return []
    action_type = action.type
    target = action.target
    action_params = action.parameters or {}

    # Extraire les coordonnées normalisées depuis TargetSpec.by_position
    x_pct = 0.0
    y_pct = 0.0
    if target and target.by_position:
        px, py = target.by_position
        if px <= 1.0 and py <= 1.0:
            x_pct = px
            y_pct = py
        else:
            ref_w = action_params.get("ref_width", 1920) or 1920
            ref_h = action_params.get("ref_height", 1080) or 1080
            x_pct = round(px / ref_w, 6)
            y_pct = round(py / ref_h, 6)

    base = {"edge_id": edge.edge_id, "from_node": edge.from_node, "to_node": edge.to_node}

    # Compound : décomposer en actions individuelles
    if action_type == "compound":
        return _expand_compound_steps(action_params.get("steps", []), base, params)

    # Actions simples
    normalized = {**base, "action_id": f"act_{uuid.uuid4().hex[:8]}"}

    if action_type == "mouse_click":
        normalized["type"] = "click"
        normalized["x_pct"] = x_pct
        normalized["y_pct"] = y_pct
        normalized["button"] = action_params.get("button", "left")

    elif action_type == "text_input":
        normalized["type"] = "type"
        text = action_params.get("text", "")
        text = _substitute_variables(text, params, action_params.get("defaults", {}))
        normalized["text"] = text
        normalized["x_pct"] = x_pct
        normalized["y_pct"] = y_pct

    elif action_type == "key_press":
        normalized["type"] = "key_combo"
        keys = action_params.get("keys", [])
        if not keys and action_params.get("key"):
            keys = [action_params["key"]]
        normalized["keys"] = keys

    else:
        logger.warning(f"Type d'action inconnu : {action_type}")
        return []

    # Ajouter le target_spec complet pour la résolution visuelle
    target_spec = {}
    if target and target.by_role:
        target_spec["by_role"] = target.by_role
        normalized["target_role"] = target.by_role  # Compat debug
    if target and target.by_text:
        target_spec["by_text"] = target.by_text
        normalized["target_text"] = target.by_text  # Compat debug
    if target and hasattr(target, 'context_hints') and target.context_hints:
        target_spec["context_hints"] = target.context_hints
    if target_spec:
        normalized["target_spec"] = target_spec
        normalized["visual_mode"] = True  # Signal à l'agent d'utiliser la résolution visuelle

    return [normalized]


def _substitute_variables(text: str, params: Dict[str, Any], defaults: Dict[str, Any]) -> str:
    """Substituer les variables ${var} dans un texte.

    Priorité : params utilisateur > defaults du workflow > texte brut inchangé.
    Supporte ${var} dans un texte plus long (ex: "${expression}=").
    """
    import re

    def replacer(match):
        var_name = match.group(1)
        return str(params.get(var_name, defaults.get(var_name, match.group(0))))

    return re.sub(r'\$\{(\w+)\}', replacer, text)


def _expand_compound_steps(
    steps: List[Dict[str, Any]], base: Dict[str, Any], params: Dict[str, Any]
) -> List[Dict[str, Any]]:
    """Décomposer les steps d'un compound en actions individuelles."""
    actions = []
    for step in steps:
        step_type = step.get("type", "unknown")
        action = {
            **base,
            "action_id": f"act_{uuid.uuid4().hex[:8]}",
        }

        if step_type == "key_press":
            action["type"] = "key_combo"
            keys = step.get("keys", [])
            if not keys and step.get("key"):
                keys = [step["key"]]
            action["keys"] = keys

        elif step_type == "text_input":
            action["type"] = "type"
            text = step.get("text", "")
            text = _substitute_variables(text, params, {})
            action["text"] = text

        elif step_type == "wait":
            action["type"] = "wait"
            action["duration_ms"] = step.get("duration_ms", 500)

        elif step_type == "mouse_click":
            action["type"] = "click"
            action["x_pct"] = step.get("x_pct", 0.0)
            action["y_pct"] = step.get("y_pct", 0.0)
            action["button"] = step.get("button", "left")

        else:
            logger.debug(f"Step compound inconnu : {step_type}")
            continue

        actions.append(action)

    return actions


@app.post("/api/v1/traces/stream/replay")
async def start_replay(request: ReplayRequest):
    """
    Lancer le replay d'un workflow sur une session Agent V1 active.

    Le serveur charge le workflow, le convertit en liste d'actions normalisées,
    et les place dans la queue de la session. L'Agent V1 les récupérera
    via GET /replay/next (modèle pull).

    Si session_id commence par "chat_" ou est vide, on détecte automatiquement
    la dernière session Agent V1 active (non finalisée, préfixe "sess_").
    Si machine_id est fourni, on cible spécifiquement cette machine.
    """
    workflow_id = request.workflow_id
    session_id = request.session_id
    target_machine_id = request.machine_id
    params = request.params or {}

    # Auto-détection de la session Agent V1 active (avec filtre machine optionnel)
    if not session_id or session_id.startswith("chat_"):
        active_session = _find_active_agent_session(machine_id=target_machine_id)
        if active_session:
            logger.info(
                f"Auto-détection session Agent V1 : {active_session} "
                f"(demandé: {session_id}, machine={target_machine_id})"
            )
            session_id = active_session
        else:
            machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
            raise HTTPException(
                status_code=404,
                detail=f"Aucune session Agent V1 active{machine_hint}. "
                       "Lancez l'Agent V1 et démarrez une session d'abord."
            )

    # Vérifier que le workflow existe
    with processor._data_lock:
        workflow = processor._workflows.get(workflow_id)

    if not workflow:
        raise HTTPException(
            status_code=404,
            detail=f"Workflow '{workflow_id}' non trouvé. "
                   f"Workflows disponibles : {list(processor._workflows.keys())}"
        )

    # Convertir le workflow en actions normalisées
    actions = _workflow_to_actions(workflow, params)
    if not actions:
        raise HTTPException(
            status_code=400,
            detail=f"Le workflow '{workflow_id}' ne contient aucune action exécutable."
        )

    # Limite de sécurité sur le nombre d'actions
    if len(actions) > MAX_ACTIONS_PER_REPLAY:
        raise HTTPException(
            status_code=400,
            detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}). "
                   "Découpez le workflow en parties plus petites."
        )

    # Créer l'identifiant de replay
    replay_id = f"replay_{uuid.uuid4().hex[:8]}"

    # Résoudre le machine_id de la session cible
    session_obj = processor.session_manager.get_session(session_id)
    resolved_machine_id = target_machine_id or (session_obj.machine_id if session_obj else "default")

    # Injecter les actions dans la queue de la session
    with _replay_lock:
        _replay_queues[session_id] = list(actions)  # Remplacer la queue existante
        _replay_states[replay_id] = _create_replay_state(
            replay_id=replay_id,
            workflow_id=workflow_id,
            session_id=session_id,
            total_actions=len(actions),
            params=params,
            machine_id=resolved_machine_id,
        )
        # Enregistrer le mapping machine -> session pour le replay ciblé
        if resolved_machine_id and resolved_machine_id != "default":
            _machine_replay_target[resolved_machine_id] = session_id

    logger.info(
        f"Replay démarré : {replay_id} | workflow={workflow_id} | "
        f"session={session_id} | machine={resolved_machine_id} | "
        f"{len(actions)} actions à exécuter"
    )

    return {
        "replay_id": replay_id,
        "status": "running",
        "workflow_id": workflow_id,
        "session_id": session_id,
        "machine_id": resolved_machine_id,
        "total_actions": len(actions),
    }


@app.post("/api/v1/traces/stream/replay/raw")
async def start_raw_replay(request: RawReplayRequest):
    """
    Lancer un replay avec des actions brutes (mode Agent Libre).

    Au lieu de charger un workflow, accepte directement une liste d'actions
    normalisées générées par le LLM planner. Les actions sont injectées
    dans la queue de replay de l'Agent V1.
    """
    session_id = request.session_id
    actions = request.actions
    target_machine_id = request.machine_id
    task = request.task_description or "Tâche libre"

    if not actions:
        raise HTTPException(status_code=400, detail="Aucune action fournie.")

    # Limite de sécurité sur le nombre d'actions
    if len(actions) > MAX_ACTIONS_PER_REPLAY:
        raise HTTPException(
            status_code=400,
            detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}). "
                   "Réduisez le plan d'exécution."
        )

    # Auto-détection de la session Agent V1 (avec filtre machine optionnel)
    if not session_id or session_id.startswith("chat_"):
        active_session = _find_active_agent_session(machine_id=target_machine_id)
        if active_session:
            session_id = active_session
        else:
            machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
            raise HTTPException(
                status_code=404,
                detail=f"Aucune session Agent V1 active{machine_hint}. "
                       "Lancez l'Agent V1 sur le PC cible."
            )

    # Assigner des action_id si manquants
    for i, action in enumerate(actions):
        if "action_id" not in action:
            action["action_id"] = f"act_free_{uuid.uuid4().hex[:6]}"

    replay_id = f"replay_free_{uuid.uuid4().hex[:8]}"

    # Résoudre le machine_id de la session cible
    session_obj = processor.session_manager.get_session(session_id)
    resolved_machine_id = target_machine_id or (session_obj.machine_id if session_obj else "default")

    with _replay_lock:
        _replay_queues[session_id] = list(actions)
        _replay_states[replay_id] = _create_replay_state(
            replay_id=replay_id,
            workflow_id=f"free_task:{task[:50]}",
            session_id=session_id,
            total_actions=len(actions),
            params={},
            machine_id=resolved_machine_id,
        )
        # Enregistrer le mapping machine -> session pour le replay ciblé
        if resolved_machine_id and resolved_machine_id != "default":
            _machine_replay_target[resolved_machine_id] = session_id

    logger.info(
        f"Replay libre démarré : {replay_id} | task='{task}' | "
        f"session={session_id} | machine={resolved_machine_id} | {len(actions)} actions"
    )

    return {
        "replay_id": replay_id,
        "status": "running",
        "task": task,
        "session_id": session_id,
        "machine_id": resolved_machine_id,
        "total_actions": len(actions),
    }


@app.post("/api/v1/traces/stream/replay/single")
async def enqueue_single_action(request: SingleActionRequest):
    """
    Enqueue une seule action pour exécution (mode Copilot).

    Contrairement à /replay et /replay/raw qui injectent toute une liste,
    cet endpoint n'enqueue qu'UNE action à la fois. L'agent chat Copilot
    appelle cet endpoint étape par étape après validation utilisateur.

    Retourne un action_id pour le tracking du résultat via /replay/result.
    """
    session_id = request.session_id
    action = dict(request.action)
    target_machine_id = request.machine_id

    # Auto-détection de la session Agent V1 (avec filtre machine optionnel)
    if not session_id or session_id.startswith("chat_"):
        active_session = _find_active_agent_session(machine_id=target_machine_id)
        if active_session:
            session_id = active_session
        else:
            machine_hint = f" sur la machine '{target_machine_id}'" if target_machine_id else ""
            raise HTTPException(
                status_code=404,
                detail=f"Aucune session Agent V1 active{machine_hint}. "
                       "Lancez l'Agent V1 sur le PC cible."
            )

    # Assigner un action_id si manquant
    if "action_id" not in action:
        action["action_id"] = f"act_copilot_{uuid.uuid4().hex[:8]}"

    action_id = action["action_id"]

    with _replay_lock:
        _replay_queues[session_id].append(action)

    logger.info(
        f"Action Copilot enqueued: {action_id} | type={action.get('type')} | "
        f"session={session_id} | machine={target_machine_id}"
    )

    return {
        "action_id": action_id,
        "session_id": session_id,
        "machine_id": target_machine_id,
        "status": "enqueued",
    }


@app.get("/api/v1/traces/stream/replay/next")
async def get_next_action(session_id: str, machine_id: str = "default"):
    """
    L'Agent V1 poll cet endpoint pour récupérer la prochaine action à exécuter.

    Retourne la prochaine action de la queue ou {"action": null} si rien.
    Modèle pull : l'agent demande, pas de WebSocket nécessaire.

    Multi-machine : si machine_id est fourni, ne retourne que les actions
    destinées à cette machine (évite les fuites cross-machine).

    Si la session de l'agent n'a pas d'actions en attente, cherche dans les
    autres queues de la MÊME machine (pas cross-machine).
    """
    with _replay_lock:
        queue = _replay_queues.get(session_id, [])

        if not queue:
            # Seul le lookup machine_replay_target est conservé (sûr : mapping explicite
            # créé lors du POST /replay). Le cross-session stealing a été supprimé
            # car il causait des race conditions entre agents.
            if machine_id != "default":
                target_sid = _machine_replay_target.get(machine_id)
                if target_sid and target_sid != session_id:
                    target_queue = _replay_queues.get(target_sid, [])
                    if target_queue:
                        logger.info(
                            f"Replay machine-target: {machine_id} -> "
                            f"transfert queue {target_sid} -> {session_id}"
                        )
                        queue = target_queue
                        _replay_queues[session_id] = target_queue
                        del _replay_queues[target_sid]
                        for state in _replay_states.values():
                            if state["session_id"] == target_sid and state["status"] == "running":
                                state["session_id"] = session_id
                        _machine_replay_target[machine_id] = session_id

        if not queue:
            return {"action": None, "session_id": session_id, "machine_id": machine_id}

        # Retirer la première action de la queue (FIFO)
        action = queue.pop(0)

    logger.info(
        f"Action envoyée à {session_id} (machine={machine_id}) : "
        f"{action.get('type')} (id={action.get('action_id')})"
    )

    return {"action": action, "session_id": session_id, "machine_id": machine_id}


@app.post("/api/v1/traces/stream/replay/result")
async def report_action_result(report: ReplayResultReport):
    """
    L'Agent V1 renvoie le résultat d'exécution d'une action.

    Permet au serveur de suivre la progression et de détecter les échecs.
    Intègre la vérification post-action (comparaison screenshots) et le retry
    automatique (max 3 tentatives) avant de déclarer un échec.

    Stratégie de retry :
    - Retry 1 : re-résoudre la cible visuellement et réinjecter l'action
    - Retry 2 : attendre 2s (wait) puis réinjecter l'action (possible loading)
    - Retry 3 : dernier essai identique, si échec → erreur non-récupérable
    """
    session_id = report.session_id
    action_id = report.action_id

    # Trouver le replay correspondant à cette session
    with _replay_lock:
        replay_state = None
        for state in _replay_states.values():
            if state["session_id"] == session_id and state["status"] == "running":
                replay_state = state
                break

        if not replay_state:
            logger.warning(
                f"Résultat reçu pour session {session_id} mais aucun replay actif"
            )
            return {"status": "no_active_replay", "session_id": session_id}

    # Récupérer l'info de retry pour cette action (si c'est un retry)
    retry_info = _retry_pending.pop(action_id, None)
    retry_count = retry_info["retry_count"] if retry_info else 0
    original_action = retry_info["action"] if retry_info else None

    # Mettre à jour le dernier screenshot reçu
    screenshot_after = report.screenshot_after or report.screenshot
    if screenshot_after:
        with _replay_lock:
            replay_state["last_screenshot"] = screenshot_after

    # === Vérification post-action ===
    verification = None
    if report.success and screenshot_after:
        # Chercher le screenshot avant (dernier connu de la session)
        screenshot_before = replay_state.get("_last_screenshot_before")
        if screenshot_before:
            try:
                action_dict = original_action or {"type": "unknown", "action_id": action_id}
                result_dict = {
                    "success": report.success,
                    "error": report.error,
                }
                verification = _replay_verifier.verify_action(
                    action=action_dict,
                    result=result_dict,
                    screenshot_before=screenshot_before,
                    screenshot_after=screenshot_after,
                )
            except Exception as e:
                logger.warning(f"Vérification post-action échouée: {e}")

    # Stocker le screenshot actuel comme "before" pour la prochaine action
    if screenshot_after:
        with _replay_lock:
            replay_state["_last_screenshot_before"] = screenshot_after

    # === Enregistrer le résultat ===
    with _replay_lock:
        result_entry = {
            "action_id": action_id,
            "success": report.success,
            "error": report.error,
            "has_screenshot": bool(screenshot_after),
            "actual_position": report.actual_position,
            "retry_count": retry_count,
            "verification": verification.to_dict() if verification else None,
        }
        replay_state["results"].append(result_entry)

        # === Logique de retry / success / failure ===
        if report.success and (verification is None or verification.verified):
            # Action réussie (vérification OK ou pas de vérification)
            replay_state["completed_actions"] += 1
            replay_state["current_action_index"] += 1

        elif report.success and verification and not verification.verified:
            # Agent dit "success" mais la vérification échoue (rien n'a changé)
            replay_state["unverified_actions"] += 1
            logger.warning(
                f"Action {action_id} marquée success mais non vérifiée: "
                f"{verification.detail}"
            )
            if verification.suggestion == "retry" and retry_count < MAX_RETRIES_PER_ACTION:
                # Réinjecter pour retry
                _schedule_retry(
                    session_id, replay_state, original_action or {"action_id": action_id},
                    retry_count, "verification_failed"
                )
            else:
                # Continuer malgré tout (action non vérifiée)
                replay_state["completed_actions"] += 1
                replay_state["current_action_index"] += 1

        elif not report.success and retry_count < MAX_RETRIES_PER_ACTION:
            # Échec mais on a encore des retries
            action_to_retry = original_action or {"action_id": action_id, "type": "unknown"}
            _schedule_retry(
                session_id, replay_state, action_to_retry,
                retry_count, report.error or "unknown_error"
            )

        else:
            # Échec définitif (retries épuisés)
            replay_state["failed_actions"] += 1
            error_entry = {
                "action_id": action_id,
                "error": report.error or "Retries épuisés",
                "retry_count": retry_count,
                "timestamp": time.time(),
            }
            replay_state["error_log"].append(error_entry)

            # Marquer le replay en erreur et vider la queue
            replay_state["status"] = "error"
            _replay_queues[session_id] = []
            logger.error(
                f"Replay {replay_state['replay_id']} échoué à l'action {action_id} "
                f"après {retry_count} retries: {report.error}"
            )

            # Notifier via callback si configuré
            _notify_error_callback(replay_state, action_id, report.error)

        # Vérifier si le replay est terminé (queue vide + dernière action réussie)
        remaining = len(_replay_queues.get(session_id, []))
        if remaining == 0 and replay_state["status"] == "running":
            replay_state["status"] = "completed"
            logger.info(
                f"Replay {replay_state['replay_id']} terminé avec succès : "
                f"{replay_state['completed_actions']}/{replay_state['total_actions']} actions"
                f" ({replay_state['retried_actions']} retries, "
                f"{replay_state['unverified_actions']} non vérifiées)"
            )

    return {
        "status": "recorded",
        "action_id": action_id,
        "success": report.success,
        "replay_status": replay_state["status"],
        "remaining_actions": remaining,
        "retry_count": retry_count,
        "verification": verification.to_dict() if verification else None,
    }


def _create_replay_state(
    replay_id: str,
    workflow_id: str,
    session_id: str,
    total_actions: int,
    params: Optional[Dict[str, Any]] = None,
    machine_id: Optional[str] = None,
) -> Dict[str, Any]:
    """Créer un état de replay enrichi avec les champs de suivi d'erreur."""
    return {
        "replay_id": replay_id,
        "workflow_id": workflow_id,
        "session_id": session_id,
        "machine_id": machine_id or "default",  # Machine cible du replay
        "status": "running",
        "total_actions": total_actions,
        "completed_actions": 0,
        "failed_actions": 0,
        "current_action_index": 0,
        "params": params or {},
        "results": [],  # Historique des résultats action par action
        # Champs enrichis pour le suivi d'erreur (#7)
        "retried_actions": 0,
        "unverified_actions": 0,
        "error_log": [],         # Liste des erreurs rencontrées
        "last_screenshot": None, # Path du dernier screenshot reçu
        "_last_screenshot_before": None,  # Interne: screenshot avant la dernière action
    }


def _schedule_retry(
    session_id: str,
    replay_state: Dict[str, Any],
    action: Dict[str, Any],
    current_retry: int,
    reason: str,
):
    """
    Programmer un retry pour une action échouée.

    Stratégie :
    - Retry 1 : réinjecter l'action directement (re-résolution visuelle par l'agent)
    - Retry 2 : injecter un wait de 2s avant l'action (possible loading en cours)
    - Retry 3 : dernier essai direct

    L'action est réinsérée en tête de la queue pour être la prochaine exécutée.
    _replay_lock doit être acquis par l'appelant.
    """
    next_retry = current_retry + 1
    replay_state["retried_actions"] += 1

    # Créer une copie de l'action avec un nouveau action_id pour le tracking
    retry_action = dict(action)
    retry_action_id = f"{action.get('action_id', 'unknown')}_retry{next_retry}"
    retry_action["action_id"] = retry_action_id

    # Stocker l'info de retry pour le prochain report_action_result
    _retry_pending[retry_action_id] = {
        "action": action,
        "retry_count": next_retry,
        "replay_id": replay_state["replay_id"],
        "reason": reason,
    }

    # Stratégie de retry selon le numéro
    actions_to_insert = []

    if next_retry == 2:
        # Retry 2 : injecter un wait de 2s avant l'action
        wait_action = {
            "action_id": f"wait_retry_{uuid.uuid4().hex[:6]}",
            "type": "wait",
            "duration_ms": 2000,
        }
        actions_to_insert.append(wait_action)

    actions_to_insert.append(retry_action)

    # Insérer en tête de la queue (prochaine action à exécuter)
    queue = _replay_queues.get(session_id, [])
    _replay_queues[session_id] = actions_to_insert + queue

    logger.info(
        f"Retry {next_retry}/{MAX_RETRIES_PER_ACTION} programmé pour {action.get('action_id')} "
        f"(raison: {reason}) | nouveau id: {retry_action_id}"
    )


def _notify_error_callback(
    replay_state: Dict[str, Any],
    action_id: str,
    error: Optional[str],
):
    """
    Notifier le callback d'erreur si configuré pour ce replay.

    Appel HTTP POST non-bloquant vers l'URL de callback.
    En cas d'échec de notification, on log mais on ne bloque pas.
    """
    replay_id = replay_state["replay_id"]
    callback_url = _error_callbacks.get(replay_id)
    if not callback_url:
        return

    def _send_callback():
        try:
            import urllib.request
            payload = json.dumps({
                "replay_id": replay_id,
                "workflow_id": replay_state.get("workflow_id"),
                "session_id": replay_state.get("session_id"),
                "action_id": action_id,
                "error": error or "Erreur inconnue",
                "retried_actions": replay_state.get("retried_actions", 0),
                "error_log": replay_state.get("error_log", []),
                "status": replay_state.get("status"),
            }).encode("utf-8")

            req = urllib.request.Request(
                callback_url,
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST",
            )
            with urllib.request.urlopen(req, timeout=5) as resp:
                logger.info(
                    f"Error callback envoyé à {callback_url}: {resp.status}"
                )
        except Exception as e:
            logger.warning(
                f"Échec envoi error callback à {callback_url}: {e}"
            )

    # Envoyer en arrière-plan pour ne pas bloquer
    threading.Thread(target=_send_callback, daemon=True).start()


@app.post("/api/v1/traces/stream/replay/error_callback")
async def register_error_callback(config: ErrorCallbackConfig):
    """
    Enregistrer une URL de callback pour les erreurs non-récupérables d'un replay.

    Le chat server configure cette URL lors du lancement du replay.
    Quand une erreur non-récupérable se produit (retries épuisés),
    le serveur POST vers cette URL avec les détails de l'erreur.
    """
    replay_id = config.replay_id
    callback_url = config.callback_url

    with _replay_lock:
        if replay_id not in _replay_states:
            raise HTTPException(
                status_code=404,
                detail=f"Replay '{replay_id}' non trouvé"
            )

    _error_callbacks[replay_id] = callback_url
    logger.info(f"Error callback enregistré pour {replay_id}: {callback_url}")

    return {
        "status": "callback_registered",
        "replay_id": replay_id,
        "callback_url": callback_url,
    }


@app.get("/api/v1/traces/stream/replay/{replay_id}")
async def get_replay_status(replay_id: str):
    """Consulter l'état d'un replay en cours ou terminé."""
    with _replay_lock:
        state = _replay_states.get(replay_id)

    if not state:
        raise HTTPException(
            status_code=404, detail=f"Replay '{replay_id}' non trouvé"
        )

    # Filtrer les champs internes (préfixés par _)
    return {k: v for k, v in state.items() if not k.startswith("_")}


@app.get("/api/v1/traces/stream/replays")
async def list_replays():
    """Lister tous les replays (actifs, terminés, en erreur)."""
    with _replay_lock:
        # Filtrer les champs internes (préfixés par _)
        return {
            "replays": [
                {k: v for k, v in state.items() if not k.startswith("_")}
                for state in _replay_states.values()
            ]
        }


# =========================================================================
# Visual Replay — Résolution visuelle des cibles
# =========================================================================


class ResolveTargetRequest(BaseModel):
    """Requête de résolution visuelle d'une cible."""
    session_id: str
    screenshot_b64: str  # Screenshot JPEG en base64
    target_spec: Dict[str, Any]  # {by_role, by_text, by_position, ...}
    fallback_x_pct: float = 0.0  # Coordonnées de fallback
    fallback_y_pct: float = 0.0
    screen_width: int = 1920
    screen_height: int = 1080


@app.post("/api/v1/traces/stream/replay/resolve_target")
async def resolve_target(request: ResolveTargetRequest):
    """
    Résoudre visuellement une cible UI à partir d'un screenshot.

    L'Agent V1 envoie un screenshot + target_spec AVANT d'exécuter l'action.
    Le serveur analyse l'image avec UIDetector/OCR et retourne les coordonnées
    de l'élément trouvé.

    Stratégie de matching (par priorité) :
    1. by_text — chercher un élément dont le label contient le texte
    2. by_role — chercher un élément avec le bon rôle sémantique
    3. by_text + by_role — intersection des deux
    4. fallback — utiliser les coordonnées statiques
    """
    import base64
    import io
    import tempfile

    from PIL import Image

    # Décoder le screenshot
    try:
        img_bytes = base64.b64decode(request.screenshot_b64)
        img = Image.open(io.BytesIO(img_bytes))
    except Exception as e:
        logger.error(f"Décodage screenshot échoué: {e}")
        return _fallback_response(request, "decode_error", str(e))

    # Sauver temporairement pour les analyseurs (ils attendent un chemin fichier)
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
        img.save(tmp, format="JPEG", quality=90)
        tmp_path = tmp.name

    try:
        # Lancer la résolution visuelle dans le thread GPU
        import asyncio
        loop = asyncio.get_event_loop()
        result = await loop.run_in_executor(
            _gpu_executor,
            _resolve_target_sync,
            tmp_path,
            request.target_spec,
            request.screen_width,
            request.screen_height,
            request.fallback_x_pct,
            request.fallback_y_pct,
        )
        return result
    except Exception as e:
        logger.error(f"Résolution visuelle échouée: {e}")
        return _fallback_response(request, "analysis_error", str(e))
    finally:
        import os
        try:
            os.unlink(tmp_path)
        except OSError:
            pass


def _resolve_by_template_matching(
    screenshot_path: str,
    anchor_image_b64: str,
    screen_width: int,
    screen_height: int,
    confidence_threshold: float = 0.7,
) -> Optional[Dict[str, Any]]:
    """Résoudre la position d'une ancre par template matching OpenCV.

    Compare l'image de l'ancre (crop) avec le screenshot actuel pour trouver
    la meilleure correspondance. Utilise cv2.matchTemplate avec TM_CCOEFF_NORMED.

    Args:
        screenshot_path: Chemin du screenshot de l'écran actuel
        anchor_image_b64: Image de l'ancre encodée en base64 (PNG)
        screen_width: Largeur de l'écran en pixels
        screen_height: Hauteur de l'écran en pixels
        confidence_threshold: Seuil minimum de confiance (0.0 à 1.0)

    Returns:
        Dict avec resolved=True et coordonnées, ou None si pas de match
    """
    import base64
    import io

    try:
        import cv2
        import numpy as np
    except ImportError:
        logger.warning("OpenCV non disponible pour template matching")
        return None

    try:
        # Charger le screenshot
        screenshot = cv2.imread(screenshot_path)
        if screenshot is None:
            logger.warning("Impossible de lire le screenshot : %s", screenshot_path)
            return None

        # Décoder l'image de l'ancre depuis base64
        anchor_bytes = base64.b64decode(anchor_image_b64)
        anchor_array = np.frombuffer(anchor_bytes, dtype=np.uint8)
        anchor_img = cv2.imdecode(anchor_array, cv2.IMREAD_COLOR)
        if anchor_img is None:
            logger.warning("Impossible de décoder l'image de l'ancre")
            return None

        # Convertir en niveaux de gris pour le matching
        screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
        anchor_gray = cv2.cvtColor(anchor_img, cv2.COLOR_BGR2GRAY)

        # Vérifier que l'ancre n'est pas plus grande que le screenshot
        sh, sw = screenshot_gray.shape[:2]
        ah, aw = anchor_gray.shape[:2]
        if ah > sh or aw > sw:
            logger.warning(
                "Ancre (%dx%d) plus grande que le screenshot (%dx%d)",
                aw, ah, sw, sh,
            )
            return None

        # Template matching multi-échelle : essayer l'échelle 1.0 d'abord,
        # puis quelques variations si la résolution a changé
        best_val = -1.0
        best_loc = None
        best_scale = 1.0
        best_anchor_size = (aw, ah)

        for scale in [1.0, 0.9, 1.1, 0.8, 1.2, 0.75, 1.25]:
            if scale != 1.0:
                new_w = int(aw * scale)
                new_h = int(ah * scale)
                if new_w < 10 or new_h < 10 or new_w > sw or new_h > sh:
                    continue
                scaled_anchor = cv2.resize(anchor_gray, (new_w, new_h))
            else:
                scaled_anchor = anchor_gray
                new_w, new_h = aw, ah

            result = cv2.matchTemplate(screenshot_gray, scaled_anchor, cv2.TM_CCOEFF_NORMED)
            _, max_val, _, max_loc = cv2.minMaxLoc(result)

            if max_val > best_val:
                best_val = max_val
                best_loc = max_loc
                best_scale = scale
                best_anchor_size = (new_w, new_h)

            # Si on a un très bon match, pas besoin de continuer
            if best_val >= 0.95:
                break

        if best_val < confidence_threshold:
            logger.info(
                "Template matching : meilleur score=%.3f < seuil=%.3f (ancre %dx%d, écran %dx%d)",
                best_val, confidence_threshold, aw, ah, sw, sh,
            )
            return None

        # Calculer le centre du match
        match_w, match_h = best_anchor_size
        cx = best_loc[0] + match_w / 2.0
        cy = best_loc[1] + match_h / 2.0

        # Convertir en proportions normalisées
        x_pct = round(cx / sw, 6) if sw > 0 else 0.0
        y_pct = round(cy / sh, 6) if sh > 0 else 0.0

        logger.info(
            "Template matching OK : score=%.3f, échelle=%.2f, "
            "centre=(%d, %d) → (%.4f, %.4f) sur %dx%d",
            best_val, best_scale, int(cx), int(cy), x_pct, y_pct, sw, sh,
        )

        return {
            "resolved": True,
            "method": "template_matching",
            "x_pct": x_pct,
            "y_pct": y_pct,
            "matched_element": {
                "label": f"anchor_template",
                "type": "visual_anchor",
                "role": "anchor",
                "center": [int(cx), int(cy)],
                "confidence": best_val,
            },
            "score": best_val,
            "scale": best_scale,
            "match_box": {
                "x": best_loc[0],
                "y": best_loc[1],
                "width": match_w,
                "height": match_h,
            },
        }

    except Exception as e:
        logger.error("Erreur template matching : %s", e)
        return None


def _resolve_target_sync(
    screenshot_path: str,
    target_spec: Dict[str, Any],
    screen_width: int,
    screen_height: int,
    fallback_x_pct: float,
    fallback_y_pct: float,
) -> Dict[str, Any]:
    """Résoudre la cible visuellement (exécuté dans le thread GPU).

    Stratégies de matching (par priorité) :
    1. anchor_image_base64 — template matching OpenCV (pour ancres VWB)
    2. by_text / by_role — matching sémantique via ScreenAnalyzer
    3. fallback — coordonnées statiques
    """
    # ---------------------------------------------------------------
    # Stratégie 1 : Template matching par image d'ancre
    # ---------------------------------------------------------------
    anchor_image_b64 = target_spec.get("anchor_image_base64", "")
    if anchor_image_b64:
        result = _resolve_by_template_matching(
            screenshot_path=screenshot_path,
            anchor_image_b64=anchor_image_b64,
            screen_width=screen_width,
            screen_height=screen_height,
            confidence_threshold=0.7,
        )
        if result:
            return result
        logger.info(
            "Template matching échoué pour ancre '%s', fallback",
            target_spec.get("anchor_id", "?"),
        )
        # Pas de ScreenAnalyzer fallback — trop lent pour le replay interactif
        return {
            "resolved": False,
            "method": "fallback",
            "reason": "template_matching_failed",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
        }

    # ---------------------------------------------------------------
    # Stratégie 2 : Matching sémantique via ScreenAnalyzer
    # ---------------------------------------------------------------
    by_text = target_spec.get("by_text", "")
    by_role = target_spec.get("by_role", "")

    # Si aucun critère sémantique et pas d'ancre, fallback direct
    if not by_text and not by_role and not anchor_image_b64:
        return {
            "resolved": False,
            "method": "fallback",
            "reason": "no_target_criteria",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
        }

    processor._ensure_initialized()

    if processor._screen_analyzer is None:
        return {
            "resolved": False,
            "method": "fallback",
            "reason": "screen_analyzer_unavailable",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
        }

    # Analyser le screenshot (Niveaux 1-3 : raw, OCR, UI elements)
    try:
        screen_state = processor._screen_analyzer.analyze(screenshot_path)
    except Exception as e:
        logger.warning(f"Analyse screenshot échouée: {e}")
        return {
            "resolved": False,
            "method": "fallback",
            "reason": f"analysis_failed: {e}",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
        }

    ui_elements = screen_state.ui_elements or []
    if not ui_elements:
        logger.info("Aucun élément UI détecté, fallback coordonnées")
        return {
            "resolved": False,
            "method": "fallback",
            "reason": "no_ui_elements",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
        }

    # Matching de la cible parmi les éléments détectés
    candidates = []

    for elem in ui_elements:
        score = 0.0

        # Score par texte (label)
        if by_text and elem.label:
            text_lower = by_text.lower()
            label_lower = elem.label.lower()
            if text_lower in label_lower or label_lower in text_lower:
                score += 0.6
            elif _fuzzy_match(text_lower, label_lower):
                score += 0.3

        # Score par rôle
        if by_role:
            role_lower = by_role.lower()
            if elem.role and role_lower in elem.role.lower():
                score += 0.3
            if elem.type and role_lower in elem.type.lower():
                score += 0.2

        if score > 0:
            candidates.append((elem, score))

    if not candidates:
        logger.info(
            f"Aucun match visuel pour target(text='{by_text}', role='{by_role}') "
            f"parmi {len(ui_elements)} éléments"
        )
        return {
            "resolved": False,
            "method": "fallback",
            "reason": "no_match",
            "x_pct": fallback_x_pct,
            "y_pct": fallback_y_pct,
            "ui_elements_count": len(ui_elements),
        }

    # Trier par score décroissant et prendre le meilleur
    candidates.sort(key=lambda c: c[1], reverse=True)
    best_elem, best_score = candidates[0]

    # Convertir les coordonnées pixel en proportions
    cx, cy = best_elem.center
    x_pct = round(cx / screen_width, 6) if screen_width > 0 else 0.0
    y_pct = round(cy / screen_height, 6) if screen_height > 0 else 0.0

    logger.info(
        f"Cible résolue visuellement: '{best_elem.label}' ({best_elem.type}/{best_elem.role}) "
        f"score={best_score:.2f} → ({x_pct:.4f}, {y_pct:.4f})"
    )

    return {
        "resolved": True,
        "method": "visual",
        "x_pct": x_pct,
        "y_pct": y_pct,
        "matched_element": {
            "label": best_elem.label,
            "type": best_elem.type,
            "role": best_elem.role,
            "center": list(best_elem.center),
            "confidence": best_elem.label_confidence,
        },
        "score": best_score,
        "candidates_count": len(candidates),
        "ui_elements_count": len(ui_elements),
    }


def _fuzzy_match(a: str, b: str, threshold: float = 0.6) -> bool:
    """Match approximatif par ratio de caractères communs."""
    if not a or not b:
        return False
    common = sum(1 for c in a if c in b)
    return (common / max(len(a), len(b))) >= threshold


def _fallback_response(request: ResolveTargetRequest, reason: str, detail: str) -> Dict:
    """Réponse de fallback quand la résolution visuelle échoue."""
    return {
        "resolved": False,
        "method": "fallback",
        "reason": reason,
        "detail": detail,
        "x_pct": request.fallback_x_pct,
        "y_pct": request.fallback_y_pct,
    }


if __name__ == "__main__":
    import uvicorn

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [API-STREAM] %(message)s",
    )
    uvicorn.run(app, host="0.0.0.0", port=5005)