Files
rpa_vision_v3/tools/session_cleaner.py
Dom 057c37131f fix: session_cleaner fallback — x_pct/y_pct + visual_mode=False
Deux bugs dans _simple_build_replay :

1. Mauvais noms de champs : x_percent/y_percent au lieu de x_pct/y_pct
   attendus par l'agent executor. Et valeurs en 0-100 au lieu de 0-1.
   Résultat : l'agent recevait x_pct=None → crash "cannot unpack
   non-iterable NoneType object".

2. Pas de visual_mode=False explicite. Sans enrichissement
   (target_spec vide, pas d'anchor), l'agent tentait une résolution
   visuelle sur du vide → crash.

Aussi : la condition de fallback empêchait le déclenchement quand
build_replay_from_raw_events crashait (error_message non vide bloquait
la branche). Corrigé : le fallback se déclenche sur `not replay_actions`
(couvre None, liste vide, et crash du build principal).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 15:51:40 +02:00

999 lines
35 KiB
Python

#!/usr/bin/env python3
"""
Session Cleaner -- Outil leger de nettoyage de sessions avant replay.
Petit serveur Flask standalone qui permet de :
- Lister les sessions enregistrees recentes
- Visualiser chaque session avec ses screenshots (crop + full)
- Marquer les clics parasites a supprimer (auto-detection des toasts,
clics droit, fenetres Lea/systray, derniers 3 evenements)
- Re-construire un replay nettoye et l'injecter dans la queue
Option A du rapport audit VWB.
Port : 5006
"""
import json
import logging
import os
import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from flask import (
Flask,
redirect,
render_template_string,
request,
send_from_directory,
url_for,
)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
STREAMING_SERVER = os.environ.get("RPA_STREAMING_SERVER", "http://localhost:5005")
LIVE_SESSIONS_DIR = os.environ.get(
"RPA_LIVE_SESSIONS_DIR",
os.path.join(os.path.dirname(__file__), "..", "data", "training", "live_sessions"),
)
PORT = int(os.environ.get("SESSION_CLEANER_PORT", "5006"))
# Charger le token API depuis l'environnement ou .env.local
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
if not API_TOKEN:
env_local = os.path.join(os.path.dirname(__file__), "..", ".env.local")
if os.path.isfile(env_local):
try:
with open(env_local, encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("RPA_API_TOKEN="):
API_TOKEN = line.split("=", 1)[1].strip().strip('"').strip("'")
break
except OSError:
pass
# ---------------------------------------------------------------------------
# Import optionnel de build_replay_from_raw_events
# ---------------------------------------------------------------------------
_build_replay_fn = None
try:
from agent_v0.server_v1.stream_processor import build_replay_from_raw_events
_build_replay_fn = build_replay_from_raw_events
except ImportError:
pass
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger("session_cleaner")
# ---------------------------------------------------------------------------
# Application Flask
# ---------------------------------------------------------------------------
app = Flask(__name__)
# ---------------------------------------------------------------------------
# Utilitaires
# ---------------------------------------------------------------------------
# Fenetres considerees comme parasites
_PARASITIC_WINDOW_PATTERNS = [
"program manager",
"fenetre de depassement",
"fenêtre de dépassement",
"léa",
"lea",
"assistant",
"activer windows",
]
# Types d'evenements exploitables (affiches a l'utilisateur)
_ACTIONABLE_TYPES = frozenset({"mouse_click", "text_input", "key_combo", "key_press", "type"})
def _resolve_sessions_dir() -> Path:
"""Resoudre le repertoire racine des live_sessions."""
return Path(LIVE_SESSIONS_DIR).resolve()
def _discover_sessions(limit: int = 50) -> List[Dict[str, Any]]:
"""Decouvrir les sessions recentes.
Parcourt deux niveaux :
- <base>/<machine_id>/sess_* (format actuel)
- <base>/sess_* (ancien format, sessions au niveau racine)
"""
base = _resolve_sessions_dir()
if not base.is_dir():
logger.warning("Repertoire live_sessions introuvable : %s", base)
return []
sessions: List[Dict[str, Any]] = []
for item in base.iterdir():
if not item.is_dir():
continue
# Sessions directement a la racine (ancien format)
if item.name.startswith("sess_"):
jsonl = item / "live_events.jsonl"
if jsonl.is_file():
sessions.append(_build_session_info("(racine)", item.name, item, jsonl))
continue
# Ignorer les dossiers systeme
if item.name.startswith(".") or item.name in ("embeddings", "streaming_sessions", "workflows", "test_gpu"):
continue
# Sous-dossiers machine_id
for sub in item.iterdir():
if sub.is_dir() and sub.name.startswith("sess_"):
jsonl = sub / "live_events.jsonl"
if jsonl.is_file():
sessions.append(_build_session_info(item.name, sub.name, sub, jsonl))
# Tri par date decroissante (mtime du JSONL)
sessions.sort(key=lambda s: s["mtime"], reverse=True)
return sessions[:limit]
def _build_session_info(machine_id: str, session_id: str, session_dir: Path, jsonl_path: Path) -> Dict[str, Any]:
"""Construire les metadonnees d'une session."""
mtime = jsonl_path.stat().st_mtime
event_count = 0
try:
with open(jsonl_path, encoding="utf-8") as f:
for line in f:
if line.strip():
event_count += 1
except OSError:
pass
# Extraire la date depuis le nom de session (sess_YYYYMMDDTHHMMSS_...)
date_str = ""
try:
parts = session_id.split("_")
if len(parts) >= 2:
raw = parts[1] # 20260410T222352
dt = datetime.strptime(raw, "%Y%m%dT%H%M%S")
date_str = dt.strftime("%d/%m/%Y %H:%M:%S")
except (ValueError, IndexError):
date_str = datetime.fromtimestamp(mtime).strftime("%d/%m/%Y %H:%M:%S")
return {
"machine_id": machine_id,
"session_id": session_id,
"session_dir": str(session_dir),
"date_str": date_str,
"event_count": event_count,
"mtime": mtime,
}
def _load_events(session_dir: Path) -> List[Dict[str, Any]]:
"""Charger les evenements depuis live_events.jsonl."""
jsonl = session_dir / "live_events.jsonl"
events: List[Dict[str, Any]] = []
if not jsonl.is_file():
return events
try:
with open(jsonl, encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
try:
events.append(json.loads(line))
except json.JSONDecodeError:
continue
except OSError as e:
logger.error("Erreur lecture %s : %s", jsonl, e)
return events
def _get_window_title(event: Dict[str, Any]) -> str:
"""Extraire le titre de fenetre d'un evenement.
Les evenements plus recents stockent la fenetre dans event.window.title,
les anciens dans event.active_window_title.
"""
inner = event.get("event", {})
# Format actuel : inner.window.title
window = inner.get("window") or {}
if isinstance(window, dict) and window.get("title"):
return window["title"]
# Ancien format
return inner.get("active_window_title", "")
def _get_shot_filename(click_index: int, session_dir: Path) -> Optional[str]:
"""Trouver le fichier screenshot pour un clic donne.
Essaie dans l'ordre :
1. shot_XXXX_crop.png (ancien format)
2. shot_XXXX_full.png (ancien format)
3. res_shot_XXXX.png (format recent — resultat post-action)
``click_index`` est 1-based (premier clic = 1).
"""
shots_dir = session_dir / "shots"
if not shots_dir.is_dir():
return None
shot_id = f"shot_{click_index:04d}"
# Priorite au crop (plus informatif en thumbnail)
for pattern in [f"{shot_id}_crop.png", f"{shot_id}_full.png", f"res_{shot_id}.png"]:
if (shots_dir / pattern).is_file():
return pattern
return None
def _is_parasitic(event: Dict[str, Any], index: int, total: int) -> bool:
"""Determiner si un evenement est probablement parasite.
Criteres :
- Fenetre contenant un pattern parasite (systray, Program Manager, Lea, etc.)
- Clic droit
- Types non-exploitables (heartbeat, focus_change, action_result)
- Parmi les 3 derniers evenements (souvent = arret enregistrement)
"""
inner = event.get("event", {})
etype = inner.get("type", "")
# Types toujours parasites
if etype in ("heartbeat", "focus_change", "window_focus_change", "action_result",
"screenshot", "status", "ping", "pong"):
return True
# Clics droit
if etype == "mouse_click" and inner.get("button") == "right":
return True
# Fenetre parasite
win_title = _get_window_title(event).lower()
if win_title:
for pattern in _PARASITIC_WINDOW_PATTERNS:
if pattern in win_title:
return True
# Derniers 3 evenements exploitables de la session
# (on les marque UNIQUEMENT si c'est un evenement exploitable, pas un heartbeat)
if etype in _ACTIONABLE_TYPES and index >= total - 3:
return True
return False
def _parse_actions(events: List[Dict[str, Any]], session_dir: Path) -> List[Dict[str, Any]]:
"""Convertir les evenements bruts en liste d'actions affichables.
Retourne une liste de dicts avec : index_global, type, position, fenetre,
texte, touches, shot_file, is_parasitic, etc.
"""
actions: List[Dict[str, Any]] = []
click_count = 0
total_events = len(events)
# Pre-calculer les 3 derniers indices d'evenements exploitables
actionable_indices = [
i for i, ev in enumerate(events)
if ev.get("event", {}).get("type", "") in _ACTIONABLE_TYPES
]
last_3_actionable = set(actionable_indices[-3:]) if len(actionable_indices) >= 3 else set(actionable_indices)
for i, event in enumerate(events):
inner = event.get("event", {})
etype = inner.get("type", "")
# Ne montrer que les evenements exploitables
if etype not in _ACTIONABLE_TYPES:
continue
action: Dict[str, Any] = {
"global_index": i,
"type": etype,
"position": "",
"window_title": _get_window_title(event),
"text": "",
"keys": "",
"shot_file": None,
"is_parasitic": False,
}
# Position (pour les clics)
pos = inner.get("pos")
if pos and isinstance(pos, (list, tuple)) and len(pos) >= 2:
action["position"] = f"({pos[0]}, {pos[1]})"
# Bouton de clic
if etype == "mouse_click":
action["button"] = inner.get("button", "left")
click_count += 1
action["shot_file"] = _get_shot_filename(click_count, session_dir)
action["click_number"] = click_count
# Texte tape
if etype in ("text_input", "type"):
action["text"] = inner.get("text", "")
# Touches pour key_combo / key_press
if etype in ("key_combo", "key_press"):
keys = inner.get("keys", [])
if isinstance(keys, list):
action["keys"] = " + ".join(str(k) for k in keys)
else:
action["keys"] = str(inner.get("key", keys))
# Detection parasite
# Utiliser les 3 derniers indices exploitables (pas les indices globaux)
parasitic = False
inner_type = etype
# Clic droit
if inner_type == "mouse_click" and inner.get("button") == "right":
parasitic = True
# Fenetre parasite
win_lower = action["window_title"].lower()
if win_lower:
for pattern in _PARASITIC_WINDOW_PATTERNS:
if pattern in win_lower:
parasitic = True
break
# Derniers 3 evenements exploitables
if i in last_3_actionable:
parasitic = True
action["is_parasitic"] = parasitic
actions.append(action)
return actions
# ---------------------------------------------------------------------------
# Templates HTML
# ---------------------------------------------------------------------------
_BASE_CSS = """
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
margin: 0; padding: 20px; background: #f5f5f5; color: #333; }
h1 { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; }
a { color: #2980b9; text-decoration: none; }
a:hover { text-decoration: underline; }
table { border-collapse: collapse; width: 100%; background: white; border-radius: 6px;
overflow: hidden; box-shadow: 0 1px 3px rgba(0,0,0,0.12); }
th { background: #2c3e50; color: white; padding: 12px 15px; text-align: left; }
td { padding: 10px 15px; border-bottom: 1px solid #eee; }
tr:hover { background: #f0f7ff; }
.btn { display: inline-block; padding: 10px 20px; background: #e74c3c; color: white;
border: none; border-radius: 4px; cursor: pointer; font-size: 14px; }
.btn:hover { background: #c0392b; }
.btn-secondary { background: #3498db; }
.btn-secondary:hover { background: #2980b9; }
.info-box { background: #eaf4ff; border: 1px solid #b8d4f0; border-radius: 6px;
padding: 15px; margin: 15px 0; }
.warning-box { background: #fff3cd; border: 1px solid #ffc107; border-radius: 6px;
padding: 15px; margin: 15px 0; }
.success-box { background: #d4edda; border: 1px solid #28a745; border-radius: 6px;
padding: 15px; margin: 15px 0; }
.error-box { background: #f8d7da; border: 1px solid #dc3545; border-radius: 6px;
padding: 15px; margin: 15px 0; }
.parasitic { background: #ffe0e0; }
.normal { background: #e0ffe0; }
.counter { font-size: 18px; font-weight: bold; margin: 15px 0; }
.counter .remove { color: #e74c3c; }
.counter .total { color: #2c3e50; }
img.thumb { max-height: 80px; border: 1px solid #ccc; border-radius: 4px; cursor: pointer; }
img.thumb:hover { box-shadow: 0 2px 8px rgba(0,0,0,0.3); }
.nav { margin-bottom: 20px; }
.mono { font-family: 'Fira Code', 'Consolas', monospace; font-size: 13px; }
label { cursor: pointer; }
"""
_INDEX_TEMPLATE = """<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Session Cleaner -- Lea</title>
<style>{{ css }}</style>
</head>
<body>
<h1>Session Cleaner</h1>
<p>Outil de nettoyage des sessions avant replay. Selectionnez une session pour voir ses actions.</p>
{% if sessions %}
<table>
<thead>
<tr>
<th>Date</th>
<th>Machine</th>
<th>Session ID</th>
<th>Evenements</th>
<th>Action</th>
</tr>
</thead>
<tbody>
{% for s in sessions %}
<tr>
<td>{{ s.date_str }}</td>
<td class="mono">{{ s.machine_id }}</td>
<td class="mono">{{ s.session_id }}</td>
<td>{{ s.event_count }}</td>
<td><a href="{{ url_for('view_session', machine_id=s.machine_id, session_id=s.session_id) }}"
class="btn btn-secondary">Voir</a></td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
<div class="warning-box">
<p>Aucune session trouvee dans <code>{{ sessions_dir }}</code>.</p>
<p>Lancez un enregistrement depuis l'Agent V1 pour creer des sessions.</p>
</div>
{% endif %}
</body>
</html>"""
_SESSION_TEMPLATE = """<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Session {{ session_id }} -- Session Cleaner</title>
<style>{{ css }}</style>
<style>
.overlay { display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%;
background: rgba(0,0,0,0.8); z-index: 1000; justify-content: center;
align-items: center; }
.overlay.active { display: flex; }
.overlay img { max-width: 90%; max-height: 90%; border-radius: 8px; }
</style>
</head>
<body>
<div class="nav">
<a href="{{ url_for('index') }}">&larr; Retour a la liste</a>
</div>
<h1>Session : <span class="mono">{{ session_id }}</span></h1>
<div class="info-box">
<strong>Machine :</strong> {{ machine_id }} |
<strong>Date :</strong> {{ date_str }} |
<strong>Evenements bruts :</strong> {{ total_events }}
</div>
{% if actions %}
<div class="counter" id="counter">
<span class="remove" id="remove-count">{{ parasitic_count }}</span> actions a supprimer /
<span class="total">{{ actions|length }}</span> total
</div>
<form method="POST" action="{{ url_for('clean_and_replay') }}" id="clean-form">
<input type="hidden" name="session_id" value="{{ session_id }}">
<input type="hidden" name="machine_id" value="{{ machine_id }}">
<table>
<thead>
<tr>
<th>Supprimer</th>
<th>#</th>
<th>Type</th>
<th>Position</th>
<th>Fenetre</th>
<th>Texte / Touches</th>
<th>Screenshot</th>
</tr>
</thead>
<tbody>
{% for a in actions %}
<tr class="{{ 'parasitic' if a.is_parasitic else 'normal' }}">
<td>
<label>
<input type="checkbox" name="remove_indices"
value="{{ a.global_index }}"
{{ 'checked' if a.is_parasitic else '' }}
onchange="updateCounter()">
</label>
</td>
<td>{{ loop.index }}</td>
<td class="mono">
{{ a.type }}
{% if a.button is defined and a.button == 'right' %}
<span style="color:#e74c3c">(droit)</span>
{% endif %}
</td>
<td class="mono">{{ a.position }}</td>
<td>{{ a.window_title|truncate(40) }}</td>
<td class="mono">
{% if a.text %}{{ a.text|truncate(60) }}{% endif %}
{% if a.keys %}{{ a.keys }}{% endif %}
</td>
<td>
{% if a.shot_file %}
<img src="{{ url_for('serve_shot', machine_id=machine_id, session_id=session_id, filename=a.shot_file) }}"
class="thumb"
alt="Screenshot action {{ loop.index }}"
onclick="showOverlay(this.src)"
loading="lazy">
{% else %}
<span style="color:#aaa">--</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
<div style="margin-top: 20px; display: flex; gap: 10px; align-items: center;">
<button type="submit" class="btn" id="btn-replay">
Nettoyer et relancer le replay
</button>
<button type="button" class="btn btn-secondary" onclick="selectAll(true)">
Tout cocher
</button>
<button type="button" class="btn btn-secondary" onclick="selectAll(false)">
Tout decocher
</button>
<button type="button" class="btn btn-secondary" onclick="resetParasitic()">
Reinitialiser (auto-detection)
</button>
</div>
</form>
{% else %}
<div class="warning-box">
Aucune action exploitable dans cette session.
</div>
{% endif %}
<!-- Overlay pour zoom screenshot -->
<div class="overlay" id="overlay" onclick="this.classList.remove('active')">
<img id="overlay-img" src="" alt="Screenshot agrandi">
</div>
<script>
// Indices des evenements auto-detectes comme parasites (pour reset)
var parasiticIndices = {{ parasitic_indices|tojson }};
function updateCounter() {
var checked = document.querySelectorAll('input[name="remove_indices"]:checked').length;
document.getElementById('remove-count').textContent = checked;
// Mettre a jour les couleurs des lignes
document.querySelectorAll('input[name="remove_indices"]').forEach(function(cb) {
var tr = cb.closest('tr');
tr.className = cb.checked ? 'parasitic' : 'normal';
});
}
function selectAll(state) {
document.querySelectorAll('input[name="remove_indices"]').forEach(function(cb) {
cb.checked = state;
});
updateCounter();
}
function resetParasitic() {
document.querySelectorAll('input[name="remove_indices"]').forEach(function(cb) {
cb.checked = parasiticIndices.indexOf(parseInt(cb.value)) !== -1;
});
updateCounter();
}
function showOverlay(src) {
document.getElementById('overlay-img').src = src;
document.getElementById('overlay').classList.add('active');
}
</script>
</body>
</html>"""
_RESULT_TEMPLATE = """<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Replay lance -- Session Cleaner</title>
<style>{{ css }}</style>
</head>
<body>
<div class="nav">
<a href="{{ url_for('index') }}">&larr; Retour a la liste</a>
</div>
<h1>Replay lance</h1>
{% if success %}
<div class="success-box">
<p><strong>Replay demarre avec succes.</strong></p>
<p><strong>Replay ID :</strong> <code>{{ replay_id }}</code></p>
<p><strong>Session :</strong> <code>{{ session_id }}</code></p>
<p><strong>Machine cible :</strong> <code>{{ machine_id }}</code></p>
<p><strong>Actions injectees :</strong> {{ action_count }}</p>
<p><strong>Actions supprimees :</strong> {{ removed_count }}</p>
</div>
{% else %}
<div class="error-box">
<p><strong>Erreur lors du lancement du replay.</strong></p>
<p>{{ error_message }}</p>
</div>
{% endif %}
</body>
</html>"""
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.route("/")
def index():
"""Page d'accueil : liste des sessions recentes."""
sessions = _discover_sessions(limit=50)
return render_template_string(
_INDEX_TEMPLATE,
sessions=sessions,
sessions_dir=str(_resolve_sessions_dir()),
css=_BASE_CSS,
)
@app.route("/session/<machine_id>/<session_id>")
def view_session(machine_id: str, session_id: str):
"""Vue detaillee d'une session avec ses actions."""
session_dir = _find_session_dir(machine_id, session_id)
if session_dir is None:
return render_template_string(
"""<!DOCTYPE html><html lang="fr"><head><meta charset="utf-8">
<title>Session introuvable</title><style>{{ css }}</style></head>
<body><div class="error-box"><p>Session <code>{{ sid }}</code>
introuvable pour la machine <code>{{ mid }}</code>.</p></div>
<a href="{{ url_for('index') }}">Retour</a></body></html>""",
sid=session_id, mid=machine_id, css=_BASE_CSS,
), 404
events = _load_events(session_dir)
actions = _parse_actions(events, session_dir)
# Compter les parasites et collecter leurs indices globaux
parasitic_count = sum(1 for a in actions if a["is_parasitic"])
parasitic_indices = [a["global_index"] for a in actions if a["is_parasitic"]]
# Date depuis le nom de session
date_str = ""
try:
parts = session_id.split("_")
if len(parts) >= 2:
dt = datetime.strptime(parts[1], "%Y%m%dT%H%M%S")
date_str = dt.strftime("%d/%m/%Y %H:%M:%S")
except (ValueError, IndexError):
date_str = "?"
return render_template_string(
_SESSION_TEMPLATE,
session_id=session_id,
machine_id=machine_id,
date_str=date_str,
total_events=len(events),
actions=actions,
parasitic_count=parasitic_count,
parasitic_indices=parasitic_indices,
css=_BASE_CSS,
)
@app.route("/shots/<machine_id>/<session_id>/<filename>")
def serve_shot(machine_id: str, session_id: str, filename: str):
"""Servir un fichier screenshot."""
session_dir = _find_session_dir(machine_id, session_id)
if session_dir is None:
return "Session introuvable", 404
shots_dir = session_dir / "shots"
if not shots_dir.is_dir():
return "Repertoire shots introuvable", 404
# Securite : empecher la traversee de repertoire
safe_name = Path(filename).name
if safe_name != filename:
return "Nom de fichier invalide", 400
target = shots_dir / safe_name
if not target.is_file():
return "Fichier introuvable", 404
return send_from_directory(str(shots_dir), safe_name, mimetype="image/png")
@app.route("/clean-and-replay", methods=["POST"])
def clean_and_replay():
"""Nettoyer les evenements et lancer un replay."""
session_id = request.form.get("session_id", "")
machine_id = request.form.get("machine_id", "")
remove_indices_raw = request.form.getlist("remove_indices")
# Convertir les indices en entiers
remove_indices = set()
for idx_str in remove_indices_raw:
try:
remove_indices.add(int(idx_str))
except ValueError:
continue
# Trouver le repertoire de session
session_dir = _find_session_dir(machine_id, session_id)
if session_dir is None:
return render_template_string(
_RESULT_TEMPLATE,
success=False,
error_message=f"Session {session_id} introuvable pour la machine {machine_id}.",
replay_id="", session_id=session_id, machine_id=machine_id,
action_count=0, removed_count=0, css=_BASE_CSS,
)
# Charger les evenements et filtrer
all_events = _load_events(session_dir)
cleaned_events = [
ev for i, ev in enumerate(all_events)
if i not in remove_indices
]
removed_count = len(all_events) - len(cleaned_events)
logger.info(
"Nettoyage session %s : %d evenements -> %d (suppression de %d)",
session_id, len(all_events), len(cleaned_events), removed_count,
)
# Construire les actions de replay
replay_actions = None
error_message = ""
if _build_replay_fn is not None:
# Chemin principal : utiliser build_replay_from_raw_events
try:
replay_actions = _build_replay_fn(
cleaned_events,
session_id=session_id,
session_dir=str(session_dir),
)
logger.info("build_replay_from_raw_events a produit %d actions", len(replay_actions))
except Exception as e:
logger.error("Erreur build_replay_from_raw_events : %s", e)
error_message = f"Erreur lors de la construction du replay : {e}"
if not replay_actions:
# Fallback : filtrage simple et conversion directe.
# Se declenche si build_replay_from_raw_events a crashe OU
# retourne une liste vide OU n'est pas disponible.
try:
replay_actions = _simple_build_replay(cleaned_events, session_dir)
logger.info("Fallback simple_build_replay a produit %d actions", len(replay_actions))
error_message = "" # le fallback a reussi, on efface l'erreur precedente
except Exception as e:
logger.error("Erreur fallback simple_build_replay : %s", e)
error_message = f"Erreur lors de la construction du replay (fallback) : {e}"
if not replay_actions:
if not error_message:
error_message = "Aucune action exploitable apres nettoyage."
return render_template_string(
_RESULT_TEMPLATE,
success=False, error_message=error_message,
replay_id="", session_id=session_id, machine_id=machine_id,
action_count=0, removed_count=removed_count, css=_BASE_CSS,
)
# Envoyer au streaming server
replay_id = f"replay_clean_{uuid.uuid4().hex[:8]}"
try:
import requests as _requests
headers = {"Content-Type": "application/json"}
if API_TOKEN:
headers["Authorization"] = f"Bearer {API_TOKEN}"
payload = {
"session_id": session_id,
"actions": replay_actions,
"machine_id": machine_id if machine_id != "(racine)" else "",
"task_description": f"Replay nettoye de {session_id} ({removed_count} actions supprimees)",
}
resp = _requests.post(
f"{STREAMING_SERVER}/api/v1/traces/stream/replay/raw",
json=payload,
headers=headers,
timeout=30,
)
if resp.status_code == 200:
data = resp.json()
replay_id = data.get("replay_id", replay_id)
logger.info("Replay lance : %s (%d actions)", replay_id, len(replay_actions))
return render_template_string(
_RESULT_TEMPLATE,
success=True, replay_id=replay_id,
session_id=session_id, machine_id=machine_id,
action_count=len(replay_actions), removed_count=removed_count,
error_message="", css=_BASE_CSS,
)
else:
error_message = f"Serveur streaming a repondu {resp.status_code} : {resp.text[:300]}"
logger.error("Erreur POST replay : %s", error_message)
except ImportError:
error_message = (
"Module 'requests' non disponible. "
"Installez-le avec : pip install requests"
)
except Exception as e:
error_message = f"Erreur de connexion au serveur streaming ({STREAMING_SERVER}) : {e}"
logger.error("Erreur connexion streaming : %s", e)
return render_template_string(
_RESULT_TEMPLATE,
success=False, error_message=error_message,
replay_id="", session_id=session_id, machine_id=machine_id,
action_count=0, removed_count=removed_count, css=_BASE_CSS,
)
# ---------------------------------------------------------------------------
# Helpers internes
# ---------------------------------------------------------------------------
def _find_session_dir(machine_id: str, session_id: str) -> Optional[Path]:
"""Trouver le repertoire d'une session.
Cherche dans :
1. <base>/<machine_id>/<session_id>/
2. <base>/<session_id>/ (ancien format, racine)
"""
base = _resolve_sessions_dir()
# Sous machine_id
if machine_id and machine_id != "(racine)":
candidate = base / machine_id / session_id
if candidate.is_dir():
return candidate
# Directement a la racine
candidate = base / session_id
if candidate.is_dir():
return candidate
# Recherche exhaustive (au cas ou le machine_id a change)
for item in base.iterdir():
if item.is_dir() and not item.name.startswith("."):
candidate = item / session_id
if candidate.is_dir():
return candidate
return None
def _simple_build_replay(events: List[Dict[str, Any]], session_dir: Path) -> List[Dict[str, Any]]:
"""Construire un replay simplifie sans dependre de stream_processor.
Convertit les evenements bruts en actions normalisees simples :
- mouse_click -> action click (coordonnees en pixels)
- text_input / type -> action type
- key_combo / key_press -> action key_combo
C'est un fallback pour quand build_replay_from_raw_events n'est pas disponible.
Les coordonnees ne sont PAS converties en pourcentages (le serveur les accepte
aussi en pixels).
"""
actions: List[Dict[str, Any]] = []
click_count = 0
# Essayer d'extraire la resolution d'ecran
screen_w, screen_h = 1920, 1080
for ev in events:
inner = ev.get("event", {})
meta = inner.get("screen_metadata", {})
res = meta.get("screen_resolution")
if res and isinstance(res, (list, tuple)) and len(res) >= 2:
screen_w, screen_h = int(res[0]), int(res[1])
break
for ev in events:
inner = ev.get("event", {})
etype = inner.get("type", "")
if etype not in _ACTIONABLE_TYPES:
continue
action_id = f"act_clean_{uuid.uuid4().hex[:6]}"
if etype == "mouse_click":
pos = inner.get("pos", [0, 0])
click_count += 1
action = {
"action_id": action_id,
"type": "click",
"x_pct": round(pos[0] / screen_w, 6) if screen_w else 0.0,
"y_pct": round(pos[1] / screen_h, 6) if screen_h else 0.0,
"button": inner.get("button", "left"),
"visual_mode": False, # pas d'enrichissement → coords brutes
"wait_before": 0.5,
}
actions.append(action)
elif etype in ("text_input", "type"):
text = inner.get("text", "")
if text:
action = {
"action_id": action_id,
"type": "type",
"text": text,
"wait_before": 0.3,
}
actions.append(action)
elif etype in ("key_combo", "key_press"):
keys = inner.get("keys", [])
if isinstance(keys, str):
keys = [keys]
key_single = inner.get("key", "")
if not keys and key_single:
keys = [key_single]
if keys:
action = {
"action_id": action_id,
"type": "key_combo",
"keys": keys,
"wait_before": 0.3,
}
actions.append(action)
return actions
# ---------------------------------------------------------------------------
# Point d'entree
# ---------------------------------------------------------------------------
def main():
"""Demarrer le serveur Session Cleaner."""
import argparse
parser = argparse.ArgumentParser(
description="Session Cleaner -- Nettoyage de sessions avant replay",
)
parser.add_argument(
"--port", type=int, default=PORT,
help=f"Port du serveur (defaut: {PORT})",
)
parser.add_argument(
"--host", default="0.0.0.0",
help="Adresse d'ecoute (defaut: 0.0.0.0)",
)
parser.add_argument(
"--debug", action="store_true",
help="Mode debug Flask",
)
args = parser.parse_args()
logger.info("Session Cleaner demarre sur http://%s:%d", args.host, args.port)
logger.info("Repertoire sessions : %s", _resolve_sessions_dir())
logger.info("Serveur streaming : %s", STREAMING_SERVER)
logger.info("Token API : %s", "configure" if API_TOKEN else "non configure")
app.run(host=args.host, port=args.port, debug=args.debug)
if __name__ == "__main__":
main()