feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay

Refonte majeure du système Agent Chat et ajout de nombreux modules :

- Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat
  avec résolution en 3 niveaux (workflow → geste → "montre-moi")
- GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique,
  substitution automatique dans les replays, et endpoint /api/gestures
- Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket
  (approve/skip/abort) avant chaque action
- Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent
  pour feedback visuel pendant le replay
- Data Extraction (core/extraction/) : moteur d'extraction visuelle de données
  (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel
- ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison
  de screenshots, avec logique de retry (max 3)
- IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés
- Dashboard : nouvelles pages gestures, streaming, extractions
- Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants
- Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410,
  suppression du code hardcodé _plan_to_replay_actions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-15 10:02:09 +01:00
parent 74a1cb4e03
commit cf495dd82f
93 changed files with 12463 additions and 1080 deletions

View File

@@ -1,4 +1,11 @@
"""Screen capture module"""
from .screen_capturer import ScreenCapturer
__all__ = ['ScreenCapturer']
try:
from .event_listener import EventListener
except ImportError:
EventListener = None
from .session_recorder import SessionRecorder
__all__ = ['ScreenCapturer', 'EventListener', 'SessionRecorder']

View File

@@ -0,0 +1,258 @@
"""
EventListener - Capture d'événements clavier/souris pour RPA Vision V3
Couche 0 (RawSession) : capture en temps réel des interactions utilisateur
(clics souris, frappes clavier) avec horodatage précis et contexte de fenêtre.
Génère des objets Event compatibles avec RawSession.
"""
import logging
import threading
import time
from typing import Optional, Callable, List, Dict, Any
from datetime import datetime
logger = logging.getLogger(__name__)
try:
from pynput import mouse, keyboard
PYNPUT_AVAILABLE = True
except ImportError:
mouse = None # type: ignore
keyboard = None # type: ignore
PYNPUT_AVAILABLE = False
logger.warning("pynput non disponible — EventListener désactivé")
class EventListener:
"""
Listener d'événements clavier/souris basé sur pynput.
Capture les interactions utilisateur en temps réel et les transmet
via un callback. Compatible avec le format Event de RawSession.
Example:
>>> listener = EventListener()
>>> listener.start(callback=on_event)
>>> # ... l'utilisateur interagit ...
>>> events = listener.stop()
"""
def __init__(self, capture_mouse_move: bool = False):
"""
Args:
capture_mouse_move: Capturer les déplacements souris (volumineux, désactivé par défaut)
"""
if not PYNPUT_AVAILABLE:
raise ImportError(
"pynput est requis pour EventListener. "
"Installer avec: pip install pynput"
)
self.capture_mouse_move = capture_mouse_move
self._running = False
self._start_time: Optional[float] = None
self._events: List[Dict[str, Any]] = []
self._callback: Optional[Callable[[Dict[str, Any]], None]] = None
self._lock = threading.Lock()
self._mouse_listener = None
self._keyboard_listener = None
def start(self, callback: Optional[Callable[[Dict[str, Any]], None]] = None) -> None:
"""
Démarrer la capture d'événements.
Args:
callback: Fonction appelée pour chaque événement capturé.
Reçoit un dict au format Event.to_dict().
"""
if self._running:
logger.warning("EventListener déjà en cours")
return
self._callback = callback
self._events = []
self._start_time = time.time()
self._running = True
# Démarrer les listeners
self._mouse_listener = mouse.Listener(
on_click=self._on_click,
on_scroll=self._on_scroll,
on_move=self._on_move if self.capture_mouse_move else None,
)
self._keyboard_listener = keyboard.Listener(
on_press=self._on_key_press,
on_release=self._on_key_release,
)
self._mouse_listener.start()
self._keyboard_listener.start()
logger.info("EventListener démarré")
def stop(self) -> List[Dict[str, Any]]:
"""
Arrêter la capture et retourner les événements capturés.
Returns:
Liste de dicts au format Event
"""
self._running = False
if self._mouse_listener:
self._mouse_listener.stop()
self._mouse_listener = None
if self._keyboard_listener:
self._keyboard_listener.stop()
self._keyboard_listener = None
logger.info(f"EventListener arrêté — {len(self._events)} événements capturés")
with self._lock:
return list(self._events)
@property
def is_running(self) -> bool:
return self._running
@property
def event_count(self) -> int:
with self._lock:
return len(self._events)
def _relative_time(self) -> float:
"""Temps relatif depuis le début de la capture."""
if self._start_time is None:
return 0.0
return round(time.time() - self._start_time, 3)
def _get_window_context(self) -> Dict[str, str]:
"""Obtenir le contexte de la fenêtre active."""
try:
import subprocess
# Utiliser xdotool sur Linux pour obtenir la fenêtre active
result = subprocess.run(
["xdotool", "getactivewindow", "getwindowname"],
capture_output=True, text=True, timeout=1
)
title = result.stdout.strip() if result.returncode == 0 else "Unknown"
result2 = subprocess.run(
["xdotool", "getactivewindow", "getwindowpid"],
capture_output=True, text=True, timeout=1
)
pid = result2.stdout.strip() if result2.returncode == 0 else ""
# Essayer d'obtenir le nom du process
app_name = "unknown"
if pid:
try:
result3 = subprocess.run(
["ps", "-p", pid, "-o", "comm="],
capture_output=True, text=True, timeout=1
)
app_name = result3.stdout.strip() if result3.returncode == 0 else "unknown"
except Exception:
pass
return {"title": title, "app_name": app_name}
except Exception:
return {"title": "Unknown", "app_name": "unknown"}
def _emit_event(self, event: Dict[str, Any]) -> None:
"""Enregistrer et émettre un événement."""
with self._lock:
self._events.append(event)
if self._callback:
try:
self._callback(event)
except Exception as e:
logger.error(f"Erreur callback événement: {e}")
# === Handlers souris ===
def _on_click(self, x: int, y: int, button, pressed: bool) -> None:
if not self._running or not pressed:
return
event = {
"t": self._relative_time(),
"type": "mouse_click",
"button": button.name,
"pos": [x, y],
"window": self._get_window_context(),
"screenshot_id": None,
}
self._emit_event(event)
def _on_scroll(self, x: int, y: int, dx: int, dy: int) -> None:
if not self._running:
return
event = {
"t": self._relative_time(),
"type": "mouse_scroll",
"delta": dy * 120,
"pos": [x, y],
"window": self._get_window_context(),
"screenshot_id": None,
}
self._emit_event(event)
def _on_move(self, x: int, y: int) -> None:
if not self._running:
return
event = {
"t": self._relative_time(),
"type": "mouse_move",
"pos": [x, y],
"window": self._get_window_context(),
"screenshot_id": None,
}
self._emit_event(event)
# === Handlers clavier ===
def _on_key_press(self, key) -> None:
if not self._running:
return
key_name = self._key_to_string(key)
event = {
"t": self._relative_time(),
"type": "key_press",
"keys": [key_name],
"window": self._get_window_context(),
"screenshot_id": None,
}
self._emit_event(event)
def _on_key_release(self, key) -> None:
if not self._running:
return
key_name = self._key_to_string(key)
event = {
"t": self._relative_time(),
"type": "key_release",
"keys": [key_name],
"window": self._get_window_context(),
"screenshot_id": None,
}
self._emit_event(event)
@staticmethod
def _key_to_string(key) -> str:
"""Convertir une touche pynput en string lisible."""
if hasattr(key, 'char') and key.char:
return key.char
if hasattr(key, 'name'):
return key.name.upper()
return str(key)

View File

@@ -0,0 +1,344 @@
"""
SessionRecorder - Enregistrement de sessions RPA complètes
Orchestre EventListener + ScreenCapturer pour produire un RawSession :
- Capture les événements clavier/souris en continu
- Prend un screenshot à chaque clic (ou périodiquement)
- Sauvegarde les screenshots sur disque
- Produit un RawSession complet avec events + screenshots liés
Usage:
>>> recorder = SessionRecorder(output_dir="data/sessions")
>>> recorder.start(workflow_name="login_workflow")
>>> # ... l'utilisateur effectue ses actions ...
>>> session = recorder.stop()
>>> print(f"{len(session.events)} events, {len(session.screenshots)} screenshots")
"""
import logging
import os
import platform
import threading
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, Callable, Dict, Any, List
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
logger = logging.getLogger(__name__)
class SessionRecorder:
"""
Enregistreur de sessions RPA complet.
Combine EventListener (clavier/souris) et ScreenCapturer (screenshots)
pour produire une RawSession exploitable par le GraphBuilder.
"""
def __init__(
self,
output_dir: str = "data/training/sessions",
screenshot_on_click: bool = True,
screenshot_interval_ms: int = 0,
capture_keyboard: bool = True,
):
"""
Args:
output_dir: Répertoire de sortie pour les sessions
screenshot_on_click: Prendre un screenshot à chaque clic
screenshot_interval_ms: Intervalle de capture périodique (0 = désactivé)
capture_keyboard: Capturer les frappes clavier
"""
self.output_dir = Path(output_dir)
self.screenshot_on_click = screenshot_on_click
self.screenshot_interval_ms = screenshot_interval_ms
self.capture_keyboard = capture_keyboard
self._session: Optional[RawSession] = None
self._session_dir: Optional[Path] = None
self._screenshots_dir: Optional[Path] = None
self._running = False
self._screenshot_counter = 0
self._lock = threading.Lock()
# Composants (lazy init)
self._event_listener = None
self._screen_capturer = None
self._periodic_thread: Optional[threading.Thread] = None
# Callbacks optionnels
self._on_event: Optional[Callable[[Dict[str, Any]], None]] = None
self._on_screenshot: Optional[Callable[[str], None]] = None
def start(
self,
workflow_name: str = "",
session_id: Optional[str] = None,
on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
on_screenshot: Optional[Callable[[str], None]] = None,
) -> str:
"""
Démarrer l'enregistrement d'une session.
Args:
workflow_name: Nom du workflow pour le contexte
session_id: ID de session (généré si None)
on_event: Callback appelé pour chaque événement
on_screenshot: Callback appelé pour chaque screenshot
Returns:
session_id de la session démarrée
"""
if self._running:
logger.warning("SessionRecorder déjà en cours")
return self._session.session_id if self._session else ""
# Générer ID de session
if session_id is None:
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Créer répertoires
self._session_dir = self.output_dir / session_id
self._screenshots_dir = self._session_dir / session_id / "screenshots"
self._screenshots_dir.mkdir(parents=True, exist_ok=True)
# Initialiser la session
self._session = RawSession(
session_id=session_id,
agent_version="rpa_vision_v3",
environment=self._get_environment(),
user={"id": os.getenv("USER", "unknown")},
context={"workflow": workflow_name, "tags": []},
started_at=datetime.now(),
)
self._screenshot_counter = 0
self._on_event = on_event
self._on_screenshot = on_screenshot
self._running = True
# Démarrer le listener d'événements
self._start_event_listener()
# Démarrer la capture périodique si configurée
if self.screenshot_interval_ms > 0:
self._start_periodic_capture()
logger.info(
f"SessionRecorder démarré: {session_id} "
f"(screenshots_dir={self._screenshots_dir})"
)
return session_id
def stop(self) -> RawSession:
"""
Arrêter l'enregistrement et retourner la session complète.
Returns:
RawSession avec tous les événements et screenshots
"""
if not self._running:
logger.warning("SessionRecorder non démarré")
return self._session
self._running = False
# Arrêter la capture périodique
if self._periodic_thread and self._periodic_thread.is_alive():
self._periodic_thread.join(timeout=2)
# Arrêter le listener d'événements
if self._event_listener:
self._event_listener.stop()
# Finaliser la session
self._session.ended_at = datetime.now()
# Sauvegarder la session JSON
session_path = self._session_dir / f"{self._session.session_id}.json"
self._session.save_to_file(session_path)
logger.info(
f"SessionRecorder arrêté: {self._session.session_id} "
f"({len(self._session.events)} events, "
f"{len(self._session.screenshots)} screenshots) "
f"{session_path}"
)
return self._session
@property
def is_running(self) -> bool:
return self._running
@property
def event_count(self) -> int:
return len(self._session.events) if self._session else 0
@property
def screenshot_count(self) -> int:
return len(self._session.screenshots) if self._session else 0
# =========================================================================
# Capture d'événements
# =========================================================================
def _start_event_listener(self) -> None:
"""Démarrer le listener d'événements."""
try:
from core.capture.event_listener import EventListener
self._event_listener = EventListener(capture_mouse_move=False)
self._event_listener.start(callback=self._on_raw_event)
logger.info("EventListener démarré")
except ImportError:
logger.warning(
"EventListener non disponible (pynput manquant). "
"Seuls les screenshots périodiques seront capturés."
)
def _on_raw_event(self, raw_event: Dict[str, Any]) -> None:
"""Callback appelé par EventListener pour chaque événement."""
if not self._running or not self._session:
return
# Convertir en Event
event = Event(
t=raw_event.get("t", 0.0),
type=raw_event.get("type", "unknown"),
window=RawWindowContext(
title=raw_event.get("window", {}).get("title", "Unknown"),
app_name=raw_event.get("window", {}).get("app_name", "unknown"),
),
screenshot_id=None,
data={
k: v
for k, v in raw_event.items()
if k not in ("t", "type", "window", "screenshot_id")
},
)
# Screenshot sur clic
if self.screenshot_on_click and event.type == "mouse_click":
screenshot_id = self._take_screenshot()
if screenshot_id:
event.screenshot_id = screenshot_id
with self._lock:
self._session.add_event(event)
# Callback utilisateur
if self._on_event:
try:
self._on_event(raw_event)
except Exception as e:
logger.warning(f"Erreur callback on_event: {e}")
# =========================================================================
# Capture de screenshots
# =========================================================================
def _take_screenshot(self) -> Optional[str]:
"""Prendre un screenshot et le sauvegarder."""
if not self._running or not self._session:
return None
try:
self._ensure_screen_capturer()
if self._screen_capturer is None:
return None
frame = self._screen_capturer.capture_frame()
if frame is None:
return None
# Sauvegarder
self._screenshot_counter += 1
screenshot_id = f"ss_{self._screenshot_counter:04d}"
filename = f"screen_{self._screenshot_counter:04d}.png"
filepath = self._screenshots_dir / filename
self._screen_capturer.save_frame(frame, str(filepath))
# Enregistrer dans la session
screenshot = Screenshot(
screenshot_id=screenshot_id,
relative_path=f"screenshots/{filename}",
captured_at=datetime.now().isoformat(),
)
with self._lock:
self._session.add_screenshot(screenshot)
# Callback utilisateur
if self._on_screenshot:
try:
self._on_screenshot(str(filepath))
except Exception as e:
logger.warning(f"Erreur callback on_screenshot: {e}")
return screenshot_id
except Exception as e:
logger.warning(f"Erreur capture screenshot: {e}")
return None
def _ensure_screen_capturer(self) -> None:
"""Initialiser le ScreenCapturer (lazy)."""
if self._screen_capturer is not None:
return
try:
from core.capture.screen_capturer import ScreenCapturer
self._screen_capturer = ScreenCapturer(
buffer_size=5,
detect_changes=False,
)
except Exception as e:
logger.warning(f"ScreenCapturer non disponible: {e}")
def _start_periodic_capture(self) -> None:
"""Démarrer la capture périodique en thread."""
interval_s = self.screenshot_interval_ms / 1000.0
def _periodic_loop():
while self._running:
self._take_screenshot()
time.sleep(interval_s)
self._periodic_thread = threading.Thread(
target=_periodic_loop, daemon=True, name="periodic_capture"
)
self._periodic_thread.start()
logger.info(
f"Capture périodique démarrée (intervalle={self.screenshot_interval_ms}ms)"
)
# =========================================================================
# Helpers
# =========================================================================
def _get_environment(self) -> Dict[str, Any]:
"""Collecter les informations d'environnement."""
env = {
"os": platform.system().lower(),
"os_version": platform.version(),
"hostname": platform.node(),
"screen": {},
}
# Résolution d'écran
try:
self._ensure_screen_capturer()
if self._screen_capturer:
w, h = self._screen_capturer.get_screen_resolution()
env["screen"] = {
"primary_resolution": [w, h],
}
except Exception:
env["screen"] = {"primary_resolution": [1920, 1080]}
return env