feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,11 @@
|
||||
"""Screen capture module"""
|
||||
from .screen_capturer import ScreenCapturer
|
||||
|
||||
__all__ = ['ScreenCapturer']
|
||||
try:
|
||||
from .event_listener import EventListener
|
||||
except ImportError:
|
||||
EventListener = None
|
||||
|
||||
from .session_recorder import SessionRecorder
|
||||
|
||||
__all__ = ['ScreenCapturer', 'EventListener', 'SessionRecorder']
|
||||
|
||||
258
core/capture/event_listener.py
Normal file
258
core/capture/event_listener.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""
|
||||
EventListener - Capture d'événements clavier/souris pour RPA Vision V3
|
||||
|
||||
Couche 0 (RawSession) : capture en temps réel des interactions utilisateur
|
||||
(clics souris, frappes clavier) avec horodatage précis et contexte de fenêtre.
|
||||
|
||||
Génère des objets Event compatibles avec RawSession.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Optional, Callable, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from pynput import mouse, keyboard
|
||||
PYNPUT_AVAILABLE = True
|
||||
except ImportError:
|
||||
mouse = None # type: ignore
|
||||
keyboard = None # type: ignore
|
||||
PYNPUT_AVAILABLE = False
|
||||
logger.warning("pynput non disponible — EventListener désactivé")
|
||||
|
||||
|
||||
class EventListener:
|
||||
"""
|
||||
Listener d'événements clavier/souris basé sur pynput.
|
||||
|
||||
Capture les interactions utilisateur en temps réel et les transmet
|
||||
via un callback. Compatible avec le format Event de RawSession.
|
||||
|
||||
Example:
|
||||
>>> listener = EventListener()
|
||||
>>> listener.start(callback=on_event)
|
||||
>>> # ... l'utilisateur interagit ...
|
||||
>>> events = listener.stop()
|
||||
"""
|
||||
|
||||
def __init__(self, capture_mouse_move: bool = False):
|
||||
"""
|
||||
Args:
|
||||
capture_mouse_move: Capturer les déplacements souris (volumineux, désactivé par défaut)
|
||||
"""
|
||||
if not PYNPUT_AVAILABLE:
|
||||
raise ImportError(
|
||||
"pynput est requis pour EventListener. "
|
||||
"Installer avec: pip install pynput"
|
||||
)
|
||||
|
||||
self.capture_mouse_move = capture_mouse_move
|
||||
self._running = False
|
||||
self._start_time: Optional[float] = None
|
||||
self._events: List[Dict[str, Any]] = []
|
||||
self._callback: Optional[Callable[[Dict[str, Any]], None]] = None
|
||||
self._lock = threading.Lock()
|
||||
|
||||
self._mouse_listener = None
|
||||
self._keyboard_listener = None
|
||||
|
||||
def start(self, callback: Optional[Callable[[Dict[str, Any]], None]] = None) -> None:
|
||||
"""
|
||||
Démarrer la capture d'événements.
|
||||
|
||||
Args:
|
||||
callback: Fonction appelée pour chaque événement capturé.
|
||||
Reçoit un dict au format Event.to_dict().
|
||||
"""
|
||||
if self._running:
|
||||
logger.warning("EventListener déjà en cours")
|
||||
return
|
||||
|
||||
self._callback = callback
|
||||
self._events = []
|
||||
self._start_time = time.time()
|
||||
self._running = True
|
||||
|
||||
# Démarrer les listeners
|
||||
self._mouse_listener = mouse.Listener(
|
||||
on_click=self._on_click,
|
||||
on_scroll=self._on_scroll,
|
||||
on_move=self._on_move if self.capture_mouse_move else None,
|
||||
)
|
||||
self._keyboard_listener = keyboard.Listener(
|
||||
on_press=self._on_key_press,
|
||||
on_release=self._on_key_release,
|
||||
)
|
||||
|
||||
self._mouse_listener.start()
|
||||
self._keyboard_listener.start()
|
||||
|
||||
logger.info("EventListener démarré")
|
||||
|
||||
def stop(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Arrêter la capture et retourner les événements capturés.
|
||||
|
||||
Returns:
|
||||
Liste de dicts au format Event
|
||||
"""
|
||||
self._running = False
|
||||
|
||||
if self._mouse_listener:
|
||||
self._mouse_listener.stop()
|
||||
self._mouse_listener = None
|
||||
if self._keyboard_listener:
|
||||
self._keyboard_listener.stop()
|
||||
self._keyboard_listener = None
|
||||
|
||||
logger.info(f"EventListener arrêté — {len(self._events)} événements capturés")
|
||||
|
||||
with self._lock:
|
||||
return list(self._events)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@property
|
||||
def event_count(self) -> int:
|
||||
with self._lock:
|
||||
return len(self._events)
|
||||
|
||||
def _relative_time(self) -> float:
|
||||
"""Temps relatif depuis le début de la capture."""
|
||||
if self._start_time is None:
|
||||
return 0.0
|
||||
return round(time.time() - self._start_time, 3)
|
||||
|
||||
def _get_window_context(self) -> Dict[str, str]:
|
||||
"""Obtenir le contexte de la fenêtre active."""
|
||||
try:
|
||||
import subprocess
|
||||
# Utiliser xdotool sur Linux pour obtenir la fenêtre active
|
||||
result = subprocess.run(
|
||||
["xdotool", "getactivewindow", "getwindowname"],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
title = result.stdout.strip() if result.returncode == 0 else "Unknown"
|
||||
|
||||
result2 = subprocess.run(
|
||||
["xdotool", "getactivewindow", "getwindowpid"],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
pid = result2.stdout.strip() if result2.returncode == 0 else ""
|
||||
|
||||
# Essayer d'obtenir le nom du process
|
||||
app_name = "unknown"
|
||||
if pid:
|
||||
try:
|
||||
result3 = subprocess.run(
|
||||
["ps", "-p", pid, "-o", "comm="],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
app_name = result3.stdout.strip() if result3.returncode == 0 else "unknown"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {"title": title, "app_name": app_name}
|
||||
except Exception:
|
||||
return {"title": "Unknown", "app_name": "unknown"}
|
||||
|
||||
def _emit_event(self, event: Dict[str, Any]) -> None:
|
||||
"""Enregistrer et émettre un événement."""
|
||||
with self._lock:
|
||||
self._events.append(event)
|
||||
|
||||
if self._callback:
|
||||
try:
|
||||
self._callback(event)
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur callback événement: {e}")
|
||||
|
||||
# === Handlers souris ===
|
||||
|
||||
def _on_click(self, x: int, y: int, button, pressed: bool) -> None:
|
||||
if not self._running or not pressed:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_click",
|
||||
"button": button.name,
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_scroll(self, x: int, y: int, dx: int, dy: int) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_scroll",
|
||||
"delta": dy * 120,
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_move(self, x: int, y: int) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_move",
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
# === Handlers clavier ===
|
||||
|
||||
def _on_key_press(self, key) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
key_name = self._key_to_string(key)
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "key_press",
|
||||
"keys": [key_name],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_key_release(self, key) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
key_name = self._key_to_string(key)
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "key_release",
|
||||
"keys": [key_name],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
@staticmethod
|
||||
def _key_to_string(key) -> str:
|
||||
"""Convertir une touche pynput en string lisible."""
|
||||
if hasattr(key, 'char') and key.char:
|
||||
return key.char
|
||||
if hasattr(key, 'name'):
|
||||
return key.name.upper()
|
||||
return str(key)
|
||||
344
core/capture/session_recorder.py
Normal file
344
core/capture/session_recorder.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""
|
||||
SessionRecorder - Enregistrement de sessions RPA complètes
|
||||
|
||||
Orchestre EventListener + ScreenCapturer pour produire un RawSession :
|
||||
- Capture les événements clavier/souris en continu
|
||||
- Prend un screenshot à chaque clic (ou périodiquement)
|
||||
- Sauvegarde les screenshots sur disque
|
||||
- Produit un RawSession complet avec events + screenshots liés
|
||||
|
||||
Usage:
|
||||
>>> recorder = SessionRecorder(output_dir="data/sessions")
|
||||
>>> recorder.start(workflow_name="login_workflow")
|
||||
>>> # ... l'utilisateur effectue ses actions ...
|
||||
>>> session = recorder.stop()
|
||||
>>> print(f"{len(session.events)} events, {len(session.screenshots)} screenshots")
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Callable, Dict, Any, List
|
||||
|
||||
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SessionRecorder:
|
||||
"""
|
||||
Enregistreur de sessions RPA complet.
|
||||
|
||||
Combine EventListener (clavier/souris) et ScreenCapturer (screenshots)
|
||||
pour produire une RawSession exploitable par le GraphBuilder.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str = "data/training/sessions",
|
||||
screenshot_on_click: bool = True,
|
||||
screenshot_interval_ms: int = 0,
|
||||
capture_keyboard: bool = True,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
output_dir: Répertoire de sortie pour les sessions
|
||||
screenshot_on_click: Prendre un screenshot à chaque clic
|
||||
screenshot_interval_ms: Intervalle de capture périodique (0 = désactivé)
|
||||
capture_keyboard: Capturer les frappes clavier
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.screenshot_on_click = screenshot_on_click
|
||||
self.screenshot_interval_ms = screenshot_interval_ms
|
||||
self.capture_keyboard = capture_keyboard
|
||||
|
||||
self._session: Optional[RawSession] = None
|
||||
self._session_dir: Optional[Path] = None
|
||||
self._screenshots_dir: Optional[Path] = None
|
||||
self._running = False
|
||||
self._screenshot_counter = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# Composants (lazy init)
|
||||
self._event_listener = None
|
||||
self._screen_capturer = None
|
||||
self._periodic_thread: Optional[threading.Thread] = None
|
||||
|
||||
# Callbacks optionnels
|
||||
self._on_event: Optional[Callable[[Dict[str, Any]], None]] = None
|
||||
self._on_screenshot: Optional[Callable[[str], None]] = None
|
||||
|
||||
def start(
|
||||
self,
|
||||
workflow_name: str = "",
|
||||
session_id: Optional[str] = None,
|
||||
on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
|
||||
on_screenshot: Optional[Callable[[str], None]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Démarrer l'enregistrement d'une session.
|
||||
|
||||
Args:
|
||||
workflow_name: Nom du workflow pour le contexte
|
||||
session_id: ID de session (généré si None)
|
||||
on_event: Callback appelé pour chaque événement
|
||||
on_screenshot: Callback appelé pour chaque screenshot
|
||||
|
||||
Returns:
|
||||
session_id de la session démarrée
|
||||
"""
|
||||
if self._running:
|
||||
logger.warning("SessionRecorder déjà en cours")
|
||||
return self._session.session_id if self._session else ""
|
||||
|
||||
# Générer ID de session
|
||||
if session_id is None:
|
||||
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# Créer répertoires
|
||||
self._session_dir = self.output_dir / session_id
|
||||
self._screenshots_dir = self._session_dir / session_id / "screenshots"
|
||||
self._screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialiser la session
|
||||
self._session = RawSession(
|
||||
session_id=session_id,
|
||||
agent_version="rpa_vision_v3",
|
||||
environment=self._get_environment(),
|
||||
user={"id": os.getenv("USER", "unknown")},
|
||||
context={"workflow": workflow_name, "tags": []},
|
||||
started_at=datetime.now(),
|
||||
)
|
||||
|
||||
self._screenshot_counter = 0
|
||||
self._on_event = on_event
|
||||
self._on_screenshot = on_screenshot
|
||||
self._running = True
|
||||
|
||||
# Démarrer le listener d'événements
|
||||
self._start_event_listener()
|
||||
|
||||
# Démarrer la capture périodique si configurée
|
||||
if self.screenshot_interval_ms > 0:
|
||||
self._start_periodic_capture()
|
||||
|
||||
logger.info(
|
||||
f"SessionRecorder démarré: {session_id} "
|
||||
f"(screenshots_dir={self._screenshots_dir})"
|
||||
)
|
||||
return session_id
|
||||
|
||||
def stop(self) -> RawSession:
|
||||
"""
|
||||
Arrêter l'enregistrement et retourner la session complète.
|
||||
|
||||
Returns:
|
||||
RawSession avec tous les événements et screenshots
|
||||
"""
|
||||
if not self._running:
|
||||
logger.warning("SessionRecorder non démarré")
|
||||
return self._session
|
||||
|
||||
self._running = False
|
||||
|
||||
# Arrêter la capture périodique
|
||||
if self._periodic_thread and self._periodic_thread.is_alive():
|
||||
self._periodic_thread.join(timeout=2)
|
||||
|
||||
# Arrêter le listener d'événements
|
||||
if self._event_listener:
|
||||
self._event_listener.stop()
|
||||
|
||||
# Finaliser la session
|
||||
self._session.ended_at = datetime.now()
|
||||
|
||||
# Sauvegarder la session JSON
|
||||
session_path = self._session_dir / f"{self._session.session_id}.json"
|
||||
self._session.save_to_file(session_path)
|
||||
|
||||
logger.info(
|
||||
f"SessionRecorder arrêté: {self._session.session_id} "
|
||||
f"({len(self._session.events)} events, "
|
||||
f"{len(self._session.screenshots)} screenshots) "
|
||||
f"→ {session_path}"
|
||||
)
|
||||
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@property
|
||||
def event_count(self) -> int:
|
||||
return len(self._session.events) if self._session else 0
|
||||
|
||||
@property
|
||||
def screenshot_count(self) -> int:
|
||||
return len(self._session.screenshots) if self._session else 0
|
||||
|
||||
# =========================================================================
|
||||
# Capture d'événements
|
||||
# =========================================================================
|
||||
|
||||
def _start_event_listener(self) -> None:
|
||||
"""Démarrer le listener d'événements."""
|
||||
try:
|
||||
from core.capture.event_listener import EventListener
|
||||
|
||||
self._event_listener = EventListener(capture_mouse_move=False)
|
||||
self._event_listener.start(callback=self._on_raw_event)
|
||||
logger.info("EventListener démarré")
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"EventListener non disponible (pynput manquant). "
|
||||
"Seuls les screenshots périodiques seront capturés."
|
||||
)
|
||||
|
||||
def _on_raw_event(self, raw_event: Dict[str, Any]) -> None:
|
||||
"""Callback appelé par EventListener pour chaque événement."""
|
||||
if not self._running or not self._session:
|
||||
return
|
||||
|
||||
# Convertir en Event
|
||||
event = Event(
|
||||
t=raw_event.get("t", 0.0),
|
||||
type=raw_event.get("type", "unknown"),
|
||||
window=RawWindowContext(
|
||||
title=raw_event.get("window", {}).get("title", "Unknown"),
|
||||
app_name=raw_event.get("window", {}).get("app_name", "unknown"),
|
||||
),
|
||||
screenshot_id=None,
|
||||
data={
|
||||
k: v
|
||||
for k, v in raw_event.items()
|
||||
if k not in ("t", "type", "window", "screenshot_id")
|
||||
},
|
||||
)
|
||||
|
||||
# Screenshot sur clic
|
||||
if self.screenshot_on_click and event.type == "mouse_click":
|
||||
screenshot_id = self._take_screenshot()
|
||||
if screenshot_id:
|
||||
event.screenshot_id = screenshot_id
|
||||
|
||||
with self._lock:
|
||||
self._session.add_event(event)
|
||||
|
||||
# Callback utilisateur
|
||||
if self._on_event:
|
||||
try:
|
||||
self._on_event(raw_event)
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur callback on_event: {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Capture de screenshots
|
||||
# =========================================================================
|
||||
|
||||
def _take_screenshot(self) -> Optional[str]:
|
||||
"""Prendre un screenshot et le sauvegarder."""
|
||||
if not self._running or not self._session:
|
||||
return None
|
||||
|
||||
try:
|
||||
self._ensure_screen_capturer()
|
||||
if self._screen_capturer is None:
|
||||
return None
|
||||
|
||||
frame = self._screen_capturer.capture_frame()
|
||||
if frame is None:
|
||||
return None
|
||||
|
||||
# Sauvegarder
|
||||
self._screenshot_counter += 1
|
||||
screenshot_id = f"ss_{self._screenshot_counter:04d}"
|
||||
filename = f"screen_{self._screenshot_counter:04d}.png"
|
||||
filepath = self._screenshots_dir / filename
|
||||
|
||||
self._screen_capturer.save_frame(frame, str(filepath))
|
||||
|
||||
# Enregistrer dans la session
|
||||
screenshot = Screenshot(
|
||||
screenshot_id=screenshot_id,
|
||||
relative_path=f"screenshots/{filename}",
|
||||
captured_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._session.add_screenshot(screenshot)
|
||||
|
||||
# Callback utilisateur
|
||||
if self._on_screenshot:
|
||||
try:
|
||||
self._on_screenshot(str(filepath))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur callback on_screenshot: {e}")
|
||||
|
||||
return screenshot_id
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur capture screenshot: {e}")
|
||||
return None
|
||||
|
||||
def _ensure_screen_capturer(self) -> None:
|
||||
"""Initialiser le ScreenCapturer (lazy)."""
|
||||
if self._screen_capturer is not None:
|
||||
return
|
||||
|
||||
try:
|
||||
from core.capture.screen_capturer import ScreenCapturer
|
||||
|
||||
self._screen_capturer = ScreenCapturer(
|
||||
buffer_size=5,
|
||||
detect_changes=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenCapturer non disponible: {e}")
|
||||
|
||||
def _start_periodic_capture(self) -> None:
|
||||
"""Démarrer la capture périodique en thread."""
|
||||
interval_s = self.screenshot_interval_ms / 1000.0
|
||||
|
||||
def _periodic_loop():
|
||||
while self._running:
|
||||
self._take_screenshot()
|
||||
time.sleep(interval_s)
|
||||
|
||||
self._periodic_thread = threading.Thread(
|
||||
target=_periodic_loop, daemon=True, name="periodic_capture"
|
||||
)
|
||||
self._periodic_thread.start()
|
||||
logger.info(
|
||||
f"Capture périodique démarrée (intervalle={self.screenshot_interval_ms}ms)"
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Helpers
|
||||
# =========================================================================
|
||||
|
||||
def _get_environment(self) -> Dict[str, Any]:
|
||||
"""Collecter les informations d'environnement."""
|
||||
env = {
|
||||
"os": platform.system().lower(),
|
||||
"os_version": platform.version(),
|
||||
"hostname": platform.node(),
|
||||
"screen": {},
|
||||
}
|
||||
|
||||
# Résolution d'écran
|
||||
try:
|
||||
self._ensure_screen_capturer()
|
||||
if self._screen_capturer:
|
||||
w, h = self._screen_capturer.get_screen_resolution()
|
||||
env["screen"] = {
|
||||
"primary_resolution": [w, h],
|
||||
}
|
||||
except Exception:
|
||||
env["screen"] = {"primary_resolution": [1920, 1080]}
|
||||
|
||||
return env
|
||||
Reference in New Issue
Block a user