Analyse VLM : - 1 seul appel VLM par screenshot au lieu de 30 (~15s vs 6.5min) - Sélection screenshots par hash perceptuel (3-4 utiles sur 12) - Fallback classification individuelle si appel unique échoue - Estimation : ~1min par workflow au lieu de 78min Rust agent : - Léa (Edge mode app) s'ouvre automatiquement au démarrage - Plus besoin de systray pour lancer le chat - Fix URL chat /chat → / Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1038 lines
43 KiB
Python
1038 lines
43 KiB
Python
"""
|
||
StreamProcessor — Pont entre le streaming Agent V1 et le core pipeline RPA Vision V3.
|
||
|
||
Orchestre les composants core (ScreenAnalyzer, CLIP, FAISS, GraphBuilder)
|
||
pour traiter en temps réel les screenshots et événements reçus via fibre.
|
||
|
||
Tous les calculs GPU tournent ici (serveur RTX 5070).
|
||
"""
|
||
|
||
import hashlib
|
||
import logging
|
||
import os
|
||
import threading
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
import numpy as np
|
||
|
||
from .live_session_manager import LiveSessionManager
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class StreamProcessor:
|
||
"""
|
||
Processeur de streaming qui connecte les données Agent V1 au core pipeline.
|
||
|
||
Cycle de vie :
|
||
1. register_session() — crée l'état mémoire
|
||
2. process_event() — accumule événements, extrait contexte fenêtre
|
||
3. process_screenshot() — analyse via ScreenAnalyzer + CLIP embedding
|
||
4. finalize_session() — construit le Workflow via GraphBuilder (DBSCAN)
|
||
"""
|
||
|
||
def __init__(self, data_dir: str = "data/training"):
|
||
self.data_dir = Path(data_dir)
|
||
persist_dir = str(self.data_dir / "streaming_sessions")
|
||
self.session_manager = LiveSessionManager(persist_dir=persist_dir)
|
||
self._lock = threading.Lock()
|
||
|
||
# Core components (chargés paresseusement pour éviter les imports lourds au démarrage)
|
||
self._screen_analyzer = None
|
||
self._clip_embedder = None
|
||
self._state_embedding_builder = None # P0-3 : pipeline d'embedding unifié (fusion multi-modale)
|
||
self._faiss_manager = None
|
||
self._initialized = False
|
||
|
||
# Lock pour l'accès concurrent aux données de session (screen_states, embeddings, workflows)
|
||
self._data_lock = threading.Lock()
|
||
|
||
# Résultats d'analyse par session
|
||
self._screen_states: Dict[str, list] = {} # session_id -> List[ScreenState]
|
||
self._embeddings: Dict[str, list] = {} # session_id -> List[np.ndarray]
|
||
|
||
# Workflows construits (pour le matching)
|
||
self._workflows: Dict[str, Any] = {}
|
||
|
||
# Charger les workflows existants depuis le disque
|
||
self._load_persisted_workflows()
|
||
|
||
def _load_persisted_workflows(self):
|
||
"""Charger les workflows sauvegardés depuis le disque au démarrage.
|
||
|
||
Scanne le dossier workflows/ principal et les sous-dossiers par machine
|
||
(workflows/{machine_id}/) pour la rétrocompatibilité.
|
||
"""
|
||
workflows_dir = self.data_dir / "workflows"
|
||
if not workflows_dir.exists():
|
||
return
|
||
|
||
try:
|
||
from core.models.workflow_graph import Workflow
|
||
|
||
count = 0
|
||
# Charger les workflows du dossier racine (rétrocompatibilité)
|
||
for wf_file in sorted(workflows_dir.glob("*.json")):
|
||
try:
|
||
wf = Workflow.load_from_file(wf_file)
|
||
self._workflows[wf.workflow_id] = wf
|
||
count += 1
|
||
except Exception as e:
|
||
logger.warning(f"Impossible de charger {wf_file.name}: {e}")
|
||
|
||
# Charger les workflows des sous-dossiers par machine
|
||
for machine_dir in sorted(workflows_dir.iterdir()):
|
||
if not machine_dir.is_dir():
|
||
continue
|
||
for wf_file in sorted(machine_dir.glob("*.json")):
|
||
try:
|
||
wf = Workflow.load_from_file(wf_file)
|
||
# Stocker le machine_id dans les métadonnées du workflow
|
||
if not hasattr(wf, '_machine_id'):
|
||
wf._machine_id = machine_dir.name
|
||
self._workflows[wf.workflow_id] = wf
|
||
count += 1
|
||
except Exception as e:
|
||
logger.warning(f"Impossible de charger {wf_file.name}: {e}")
|
||
|
||
if count:
|
||
logger.info(f"{count} workflow(s) chargé(s) depuis {workflows_dir}")
|
||
except ImportError:
|
||
logger.debug("core.models.workflow_graph non disponible, skip chargement")
|
||
|
||
def _ensure_initialized(self):
|
||
"""Charger les composants core GPU si pas encore fait."""
|
||
if self._initialized:
|
||
return
|
||
|
||
with self._lock:
|
||
if self._initialized:
|
||
return
|
||
|
||
logger.info("Initialisation des composants core (GPU)...")
|
||
|
||
try:
|
||
from core.pipeline.screen_analyzer import ScreenAnalyzer
|
||
self._screen_analyzer = ScreenAnalyzer(session_id="stream_server")
|
||
logger.info(" ScreenAnalyzer prêt")
|
||
except Exception as e:
|
||
logger.error(f" Erreur init ScreenAnalyzer: {e}")
|
||
self._screen_analyzer = None
|
||
|
||
try:
|
||
from core.embedding.clip_embedder import CLIPEmbedder
|
||
self._clip_embedder = CLIPEmbedder()
|
||
logger.info(" CLIPEmbedder prêt (singleton, ne sera plus rechargé)")
|
||
except Exception as e:
|
||
logger.error(f" Erreur init CLIPEmbedder: {e}")
|
||
self._clip_embedder = None
|
||
|
||
# P0-3 : Initialiser le StateEmbeddingBuilder pour unifier l'espace d'embedding
|
||
# Utilise le même CLIPEmbedder (pas de rechargement du modèle) + FusionEngine
|
||
# pour produire des vecteurs fusionnés (image+text+title+ui) identiques à GraphBuilder
|
||
try:
|
||
from core.embedding.state_embedding_builder import StateEmbeddingBuilder
|
||
if self._clip_embedder is not None:
|
||
# Injecter le CLIPEmbedder déjà chargé pour éviter un double chargement
|
||
self._state_embedding_builder = StateEmbeddingBuilder(
|
||
embedders={
|
||
"image": self._clip_embedder,
|
||
"text": self._clip_embedder,
|
||
"title": self._clip_embedder,
|
||
"ui": self._clip_embedder,
|
||
},
|
||
output_dir=self.data_dir / "embeddings",
|
||
use_clip=False, # Pas besoin, on fournit les embedders directement
|
||
)
|
||
else:
|
||
# Fallback : laisser le builder créer son propre CLIPEmbedder
|
||
self._state_embedding_builder = StateEmbeddingBuilder(
|
||
output_dir=self.data_dir / "embeddings",
|
||
use_clip=True,
|
||
)
|
||
logger.info(" StateEmbeddingBuilder prêt (fusion multi-modale unifiée)")
|
||
except Exception as e:
|
||
logger.warning(f" StateEmbeddingBuilder non disponible, fallback CLIP pur: {e}")
|
||
self._state_embedding_builder = None
|
||
|
||
try:
|
||
from core.embedding.faiss_manager import FAISSManager
|
||
self._faiss_manager = FAISSManager(
|
||
dimensions=512,
|
||
index_type="Flat",
|
||
metric="cosine",
|
||
)
|
||
logger.info(" FAISSManager prêt (512 dims, cosine)")
|
||
except Exception as e:
|
||
logger.error(f" Erreur init FAISSManager: {e}")
|
||
self._faiss_manager = None
|
||
|
||
self._initialized = True
|
||
logger.info("Composants core initialisés.")
|
||
|
||
# =========================================================================
|
||
# Événements
|
||
# =========================================================================
|
||
|
||
def process_event(self, session_id: str, event_data: Dict[str, Any]) -> Dict[str, Any]:
|
||
"""Enregistrer un événement dans la session live."""
|
||
self.session_manager.add_event(session_id, event_data)
|
||
return {"status": "event_recorded", "session_id": session_id}
|
||
|
||
# =========================================================================
|
||
# Screenshots
|
||
# =========================================================================
|
||
|
||
def process_screenshot(self, session_id: str, shot_id: str, file_path: str) -> Dict[str, Any]:
|
||
"""
|
||
Analyser un screenshot full via le core pipeline.
|
||
|
||
1. ScreenAnalyzer → ScreenState (OCR, UI detection)
|
||
2. StateEmbeddingBuilder → vecteur fusionné 512d (image+text+title+ui)
|
||
Même espace d'embedding que GraphBuilder (P0-3)
|
||
Fallback : CLIP embed_image() si StateEmbeddingBuilder échoue
|
||
3. FAISS indexation → matching temps réel
|
||
"""
|
||
self._ensure_initialized()
|
||
self.session_manager.add_screenshot(session_id, shot_id, file_path)
|
||
|
||
result = {
|
||
"shot_id": shot_id,
|
||
"session_id": session_id,
|
||
"state_id": None,
|
||
"ui_elements_count": 0,
|
||
"text_detected": 0,
|
||
"embedding_indexed": False,
|
||
"match": None,
|
||
}
|
||
|
||
# 1. Construire le ScreenState
|
||
if self._screen_analyzer is None:
|
||
logger.warning("ScreenAnalyzer non disponible, skip analyse")
|
||
return result
|
||
|
||
session = self.session_manager.get_session(session_id)
|
||
window_info = session.last_window_info if session else {}
|
||
|
||
try:
|
||
screen_state = self._screen_analyzer.analyze(
|
||
screenshot_path=file_path,
|
||
window_info=window_info,
|
||
)
|
||
result["state_id"] = screen_state.screen_state_id
|
||
result["ui_elements_count"] = len(screen_state.ui_elements)
|
||
result["text_detected"] = len(
|
||
getattr(screen_state.perception, "detected_text", [])
|
||
)
|
||
|
||
# Stocker le ScreenState pour le build final
|
||
with self._data_lock:
|
||
if session_id not in self._screen_states:
|
||
self._screen_states[session_id] = []
|
||
self._screen_states[session_id].append(screen_state)
|
||
|
||
logger.info(
|
||
f"Screenshot analysé: {shot_id} | "
|
||
f"{result['ui_elements_count']} UI elements, "
|
||
f"{result['text_detected']} textes"
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"Erreur analyse screenshot {shot_id}: {e}")
|
||
return result
|
||
|
||
# 2. Construire l'embedding fusionné via StateEmbeddingBuilder (P0-3)
|
||
# Utilise le même pipeline que GraphBuilder : fusion image+text+title+ui
|
||
# pour garantir que les vecteurs FAISS sont dans le même espace d'embedding
|
||
embedding_vector = None
|
||
|
||
if self._state_embedding_builder is not None:
|
||
try:
|
||
state_embedding = self._state_embedding_builder.build(screen_state)
|
||
# Récupérer le vecteur fusionné depuis le StateEmbedding
|
||
fused_vec = state_embedding.get_vector()
|
||
if fused_vec is not None:
|
||
embedding_vector = fused_vec.astype(np.float32)
|
||
logger.debug(
|
||
f"Embedding fusionné multi-modal calculé pour {shot_id} "
|
||
f"(dim={embedding_vector.shape[0]})"
|
||
)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"StateEmbeddingBuilder échoué pour {shot_id}: {e}, "
|
||
f"fallback sur CLIP pur"
|
||
)
|
||
|
||
# Fallback 1 : embedding pré-calculé dans le ScreenState (si disponible)
|
||
if embedding_vector is None:
|
||
if hasattr(screen_state, "perception") and screen_state.perception:
|
||
emb_ref = getattr(screen_state.perception, "embedding", None)
|
||
if emb_ref and hasattr(emb_ref, "vector") and emb_ref.vector is not None:
|
||
embedding_vector = np.array(emb_ref.vector, dtype=np.float32)
|
||
|
||
# Fallback 2 : utiliser le CLIPEmbedder singleton (embedding image seul)
|
||
if embedding_vector is None and self._clip_embedder is not None:
|
||
try:
|
||
from PIL import Image
|
||
pil_image = Image.open(file_path)
|
||
embedding_vector = self._clip_embedder.embed_image(pil_image)
|
||
except Exception as e:
|
||
logger.debug(f"CLIP embedding échoué: {e}")
|
||
|
||
if embedding_vector is not None:
|
||
# Stocker pour le build final
|
||
with self._data_lock:
|
||
if session_id not in self._embeddings:
|
||
self._embeddings[session_id] = []
|
||
self._embeddings[session_id].append(embedding_vector)
|
||
|
||
# 3. Indexer dans FAISS
|
||
if self._faiss_manager is not None:
|
||
try:
|
||
self._faiss_manager.add_embedding(
|
||
embedding_id=screen_state.screen_state_id,
|
||
vector=embedding_vector,
|
||
metadata={
|
||
"session_id": session_id,
|
||
"shot_id": shot_id,
|
||
"window_title": window_info.get("title", ""),
|
||
},
|
||
)
|
||
result["embedding_indexed"] = True
|
||
except Exception as e:
|
||
logger.error(f"Erreur FAISS indexation: {e}")
|
||
|
||
# 4. Matching temps réel contre les workflows connus
|
||
with self._data_lock:
|
||
has_workflows = bool(self._workflows)
|
||
if embedding_vector is not None and has_workflows:
|
||
result["match"] = self._try_match(embedding_vector)
|
||
|
||
return result
|
||
|
||
def process_crop(self, session_id: str, shot_id: str, file_path: str) -> Dict[str, Any]:
|
||
"""
|
||
Enregistrer un crop (400x400). Pas d'analyse ScreenAnalyzer
|
||
(un crop est un fragment, pas un écran complet).
|
||
"""
|
||
self.session_manager.add_screenshot(session_id, shot_id, file_path)
|
||
return {"status": "crop_stored", "shot_id": shot_id}
|
||
|
||
# =========================================================================
|
||
# Finalisation
|
||
# =========================================================================
|
||
|
||
def finalize_session(self, session_id: str) -> Dict[str, Any]:
|
||
"""
|
||
Construire un Workflow depuis les données accumulées.
|
||
|
||
Utilise le GraphBuilder du core avec les ScreenStates et embeddings
|
||
collectés pendant le streaming.
|
||
"""
|
||
self._ensure_initialized()
|
||
|
||
session = self.session_manager.finalize(session_id)
|
||
if not session:
|
||
return {"error": f"Session {session_id} non trouvée"}
|
||
|
||
with self._data_lock:
|
||
states = list(self._screen_states.get(session_id, []))
|
||
embeddings = list(self._embeddings.get(session_id, []))
|
||
|
||
if len(states) < 2:
|
||
logger.warning(
|
||
f"Session {session_id}: seulement {len(states)} states, "
|
||
f"pas assez pour construire un workflow"
|
||
)
|
||
return {
|
||
"session_id": session_id,
|
||
"status": "insufficient_data",
|
||
"states_count": len(states),
|
||
"min_required": 2,
|
||
}
|
||
|
||
# Convertir en RawSession pour le GraphBuilder
|
||
raw_dict = self.session_manager.to_raw_session(session_id)
|
||
if not raw_dict:
|
||
return {"error": "Conversion RawSession échouée"}
|
||
|
||
try:
|
||
from core.models.raw_session import RawSession
|
||
raw_session = RawSession.from_dict(raw_dict)
|
||
except Exception as e:
|
||
logger.error(f"Erreur construction RawSession: {e}")
|
||
# Fallback : construire manuellement
|
||
try:
|
||
raw_session = self._build_raw_session_fallback(session, raw_dict)
|
||
except Exception as e2:
|
||
return {"error": f"Erreur RawSession: {e2}"}
|
||
|
||
# Construire le workflow via GraphBuilder
|
||
try:
|
||
from core.graph.graph_builder import GraphBuilder
|
||
|
||
n = len(states)
|
||
min_reps = 2 if n < 10 else 3 if n <= 30 else min(5, n // 10)
|
||
|
||
builder = GraphBuilder(
|
||
min_pattern_repetitions=min_reps,
|
||
clustering_eps=0.15,
|
||
clustering_min_samples=2,
|
||
)
|
||
|
||
# Nommer le workflow intelligemment à partir des titres de fenêtre
|
||
workflow_name = self._generate_workflow_name(session_id)
|
||
|
||
# Injecter les ScreenStates pré-calculés pour éviter de re-analyser
|
||
workflow = builder.build_from_session(
|
||
raw_session,
|
||
workflow_name=workflow_name,
|
||
precomputed_states=states,
|
||
)
|
||
|
||
with self._data_lock:
|
||
self._workflows[workflow.workflow_id] = workflow
|
||
|
||
# Persister sur disque (dans le dossier de la machine source)
|
||
machine_id = session.machine_id if hasattr(session, 'machine_id') else "default"
|
||
saved_path = self._persist_workflow(workflow, session_id, machine_id=machine_id)
|
||
# Stocker le machine_id dans le workflow pour le filtrage
|
||
workflow._machine_id = machine_id
|
||
|
||
# Récupérer les métadonnées applicatives de la session
|
||
session_state = self.session_manager.get_session(session_id)
|
||
app_context = {}
|
||
if session_state:
|
||
app_context = {
|
||
"window_titles": dict(session_state.window_titles_seen),
|
||
"app_names": dict(session_state.app_names_seen),
|
||
"primary_app": sorted(
|
||
session_state.app_names_seen.items(),
|
||
key=lambda x: -x[1]
|
||
)[0][0] if session_state.app_names_seen else None,
|
||
"multi_app": len(session_state.app_names_seen) >= 3,
|
||
}
|
||
|
||
result = {
|
||
"session_id": session_id,
|
||
"machine_id": machine_id,
|
||
"status": "workflow_built",
|
||
"workflow_id": workflow.workflow_id,
|
||
"workflow_name": workflow_name,
|
||
"nodes": len(workflow.nodes),
|
||
"edges": len(workflow.edges),
|
||
"states_analyzed": len(states),
|
||
"embeddings_indexed": len(embeddings),
|
||
"saved_path": str(saved_path) if saved_path else None,
|
||
"app_context": app_context,
|
||
}
|
||
|
||
logger.info(
|
||
f"Workflow construit: '{workflow_name}' ({workflow.workflow_id}) | "
|
||
f"{result['nodes']} nodes, {result['edges']} edges"
|
||
+ (f" | apps: {list(app_context.get('app_names', {}).keys())}" if app_context.get('app_names') else "")
|
||
)
|
||
|
||
# Libérer la mémoire des données de session (peuvent être lourdes)
|
||
self._cleanup_session_data(session_id)
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
logger.error(f"Erreur construction workflow: {e}")
|
||
return {"error": f"GraphBuilder: {e}", "session_id": session_id}
|
||
|
||
# =========================================================================
|
||
# Matching
|
||
# =========================================================================
|
||
|
||
def _try_match(self, embedding_vector: np.ndarray) -> Optional[Dict[str, Any]]:
|
||
"""Matcher un embedding contre les workflows connus."""
|
||
if self._faiss_manager is None or self._faiss_manager.index.ntotal == 0:
|
||
return None
|
||
|
||
try:
|
||
results = self._faiss_manager.search_similar(
|
||
query_vector=embedding_vector,
|
||
k=1,
|
||
min_similarity=0.85,
|
||
)
|
||
if results:
|
||
best = results[0]
|
||
return {
|
||
"matched_id": best.embedding_id,
|
||
"similarity": round(best.similarity, 4),
|
||
"metadata": best.metadata,
|
||
}
|
||
except Exception as e:
|
||
logger.debug(f"Erreur matching: {e}")
|
||
|
||
return None
|
||
|
||
# =========================================================================
|
||
# Retraitement (appelé par le SessionWorker)
|
||
# =========================================================================
|
||
|
||
def reprocess_session(
|
||
self,
|
||
session_id: str,
|
||
progress_callback=None,
|
||
) -> Dict[str, Any]:
|
||
"""Retraiter une session finalisée : analyser tous les screenshots puis construire le workflow.
|
||
|
||
Utilisé par le SessionWorker pour traiter les sessions en arrière-plan.
|
||
Cherche les fichiers shot_*_full.png sur disque, les analyse un par un
|
||
via process_screenshot(), puis appelle finalize_session() pour construire
|
||
le workflow.
|
||
|
||
Args:
|
||
session_id: Identifiant de la session à retraiter.
|
||
progress_callback: Callable(session_id, current, total, shot_id) pour la progression.
|
||
|
||
Returns:
|
||
Dict avec le résultat de finalize_session() ou un dict d'erreur.
|
||
"""
|
||
logger.info(f"Retraitement de la session {session_id}")
|
||
|
||
# Trouver le dossier de la session sur disque
|
||
# Les screenshots peuvent être dans :
|
||
# - data/training/live_sessions/{session_id}/shots/
|
||
# - data/training/live_sessions/{machine_id}/{session_id}/shots/
|
||
session_dir = self._find_session_dir(session_id)
|
||
if not session_dir:
|
||
return {"error": f"Dossier session {session_id} introuvable sur disque"}
|
||
|
||
shots_dir = session_dir / "shots"
|
||
if not shots_dir.exists():
|
||
return {"error": f"Dossier shots/ introuvable pour {session_id}"}
|
||
|
||
# Lister les screenshots full (shot_XXXX_full.png), triés par nom
|
||
all_shots = sorted(shots_dir.glob("shot_*_full.png"))
|
||
if not all_shots:
|
||
return {
|
||
"error": f"Aucun screenshot shot_*_full.png trouvé dans {shots_dir}",
|
||
"session_id": session_id,
|
||
}
|
||
|
||
# Sélection intelligente : ne garder que les screenshots significatifs
|
||
# pour éviter d'analyser des captures redondantes (~identiques)
|
||
key_shots = self._select_key_screenshots(session_id, all_shots)
|
||
total_all = len(all_shots)
|
||
total = len(key_shots)
|
||
logger.info(
|
||
f"Screenshots sélectionnés : {total}/{total_all} "
|
||
f"(déduplication perceptuelle) dans {shots_dir}"
|
||
)
|
||
|
||
# S'assurer que la session est enregistrée dans le session_manager
|
||
self.session_manager.get_or_create(session_id)
|
||
|
||
# Nettoyer les données en mémoire (au cas où un traitement précédent a échoué)
|
||
with self._data_lock:
|
||
self._screen_states.pop(session_id, None)
|
||
self._embeddings.pop(session_id, None)
|
||
|
||
# Analyser chaque screenshot sélectionné
|
||
errors = 0
|
||
for i, shot_file in enumerate(key_shots):
|
||
shot_id = shot_file.stem # ex: "shot_0001_full"
|
||
file_path = str(shot_file)
|
||
|
||
if progress_callback:
|
||
try:
|
||
progress_callback(session_id, i + 1, total, shot_id)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
result = self.process_screenshot(session_id, shot_id, file_path)
|
||
if result.get("state_id") is None:
|
||
logger.warning(
|
||
f"Screenshot {shot_id} : analyse échouée (pas de state_id)"
|
||
)
|
||
errors += 1
|
||
except Exception as e:
|
||
logger.error(f"Erreur analyse screenshot {shot_id}: {e}")
|
||
errors += 1
|
||
|
||
# Vérifier combien de states ont été produits
|
||
with self._data_lock:
|
||
states_count = len(self._screen_states.get(session_id, []))
|
||
|
||
logger.info(
|
||
f"Session {session_id} : {states_count}/{total} screenshots analysés "
|
||
f"({errors} erreurs, {total_all - total} skippés par dédup)"
|
||
)
|
||
|
||
# Construire le workflow via finalize_session()
|
||
# Note: finalize() du session_manager a déjà été appelé quand la session
|
||
# a été marquée comme finalisée. On n'a pas besoin de le refaire.
|
||
# finalize_session() utilise les screen_states accumulés.
|
||
result = self.finalize_session(session_id)
|
||
return result
|
||
|
||
def _select_key_screenshots(self, session_id: str, shot_paths: List[Path]) -> List[Path]:
|
||
"""Sélectionner uniquement les screenshots significatifs pour éviter les analyses redondantes.
|
||
|
||
Critères :
|
||
1. Garder le premier et le dernier screenshot (toujours)
|
||
2. Comparer chaque screenshot au précédent via hash perceptuel (32x32 grayscale)
|
||
3. Si l'image est identique au précédent → skip (même écran, pas de changement)
|
||
4. Privilégier les screenshots d'action (shot_*_full) vs heartbeat
|
||
|
||
Réduit typiquement 12 screenshots à 3-4 screenshots utiles.
|
||
"""
|
||
if len(shot_paths) <= 2:
|
||
return list(shot_paths)
|
||
|
||
from PIL import Image
|
||
|
||
selected = []
|
||
last_hash = None
|
||
|
||
for path in shot_paths:
|
||
basename = os.path.basename(str(path))
|
||
|
||
# Les screenshots d'action sont prioritaires
|
||
is_action = 'shot_' in basename and '_full' in basename
|
||
|
||
# Hash perceptuel : redimensionner à 32x32 en niveaux de gris
|
||
# Assez discriminant pour détecter les changements d'état de l'UI
|
||
try:
|
||
img = Image.open(str(path)).resize((32, 32)).convert('L')
|
||
current_hash = hashlib.md5(img.tobytes()).hexdigest()
|
||
except Exception as e:
|
||
logger.debug(f"Impossible de hasher {basename}: {e}")
|
||
# En cas d'erreur, inclure le screenshot par sécurité
|
||
selected.append(path)
|
||
continue
|
||
|
||
# Inclure si : premier screenshot, hash différent, ou screenshot d'action
|
||
if last_hash is None or current_hash != last_hash:
|
||
selected.append(path)
|
||
last_hash = current_hash
|
||
elif is_action:
|
||
# Action mais visuellement identique — skip quand même
|
||
# car l'état de l'écran n'a pas changé
|
||
logger.debug(f"Screenshot d'action {basename} identique au précédent, skip")
|
||
|
||
# Garantir que le premier et le dernier sont toujours inclus
|
||
if shot_paths[0] not in selected:
|
||
selected.insert(0, shot_paths[0])
|
||
if shot_paths[-1] not in selected:
|
||
selected.append(shot_paths[-1])
|
||
|
||
return selected
|
||
|
||
def _find_session_dir(self, session_id: str) -> Optional[Path]:
|
||
"""Trouver le dossier d'une session sur disque.
|
||
|
||
Cherche dans :
|
||
1. data/training/live_sessions/{session_id}/
|
||
2. data/training/live_sessions/{machine_id}/{session_id}/ (multi-machine)
|
||
"""
|
||
# Chemin direct
|
||
direct = self.data_dir / session_id
|
||
if direct.is_dir() and (direct / "shots").exists():
|
||
return direct
|
||
|
||
# Chercher dans les sous-dossiers (machine_id)
|
||
parent = self.data_dir
|
||
if parent.exists():
|
||
for subdir in parent.iterdir():
|
||
if subdir.is_dir():
|
||
candidate = subdir / session_id
|
||
if candidate.is_dir() and (candidate / "shots").exists():
|
||
return candidate
|
||
|
||
# Chercher aussi dans le parent du data_dir (cas où data_dir = streaming_sessions)
|
||
parent_parent = self.data_dir.parent
|
||
if parent_parent.exists() and parent_parent != self.data_dir:
|
||
direct2 = parent_parent / session_id
|
||
if direct2.is_dir() and (direct2 / "shots").exists():
|
||
return direct2
|
||
for subdir in parent_parent.iterdir():
|
||
if subdir.is_dir() and subdir.name != self.data_dir.name:
|
||
candidate = subdir / session_id
|
||
if candidate.is_dir() and (candidate / "shots").exists():
|
||
return candidate
|
||
|
||
return None
|
||
|
||
def find_pending_sessions(self) -> List[str]:
|
||
"""Trouver les sessions finalisées qui n'ont pas encore été traitées.
|
||
|
||
Une session est "pending" si :
|
||
- Elle est marquée comme finalisée dans le session_manager
|
||
- Elle a 0 ScreenStates en mémoire (jamais analysée ou analyse perdue)
|
||
- Elle a des screenshots full sur disque
|
||
|
||
Returns:
|
||
Liste de session_ids à traiter.
|
||
"""
|
||
pending = []
|
||
for sid in self.session_manager.session_ids:
|
||
session = self.session_manager.get_session(sid)
|
||
if session is None:
|
||
continue
|
||
if not session.finalized:
|
||
continue
|
||
|
||
# Vérifier si des states existent déjà
|
||
with self._data_lock:
|
||
states_count = len(self._screen_states.get(sid, []))
|
||
if states_count > 0:
|
||
continue
|
||
|
||
# Vérifier si un workflow existe déjà pour cette session
|
||
# (parcourir les workflows et checker la session_id dans les métadonnées)
|
||
with self._data_lock:
|
||
has_workflow = any(
|
||
getattr(wf, '_source_session', None) == sid
|
||
for wf in self._workflows.values()
|
||
)
|
||
if has_workflow:
|
||
continue
|
||
|
||
# Vérifier qu'il y a des screenshots full sur disque
|
||
session_dir = self._find_session_dir(sid)
|
||
if session_dir:
|
||
shots_dir = session_dir / "shots"
|
||
if shots_dir.exists():
|
||
full_shots = list(shots_dir.glob("shot_*_full.png"))
|
||
if full_shots:
|
||
logger.info(
|
||
f"Session pending trouvée : {sid} "
|
||
f"({len(full_shots)} screenshots full)"
|
||
)
|
||
pending.append(sid)
|
||
|
||
return pending
|
||
|
||
def _cleanup_session_data(self, session_id: str):
|
||
"""Libérer la mémoire des ScreenStates et embeddings après finalization."""
|
||
with self._data_lock:
|
||
states = self._screen_states.pop(session_id, [])
|
||
embeddings = self._embeddings.pop(session_id, [])
|
||
logger.info(
|
||
f"Mémoire libérée pour {session_id}: "
|
||
f"{len(states)} states, {len(embeddings)} embeddings"
|
||
)
|
||
|
||
# =========================================================================
|
||
# Helpers
|
||
# =========================================================================
|
||
|
||
def _generate_workflow_name(self, session_id: str) -> str:
|
||
"""
|
||
Générer un nom de tâche lisible et humain à partir des titres de fenêtre.
|
||
|
||
Analyse les titres vus pendant la session pour extraire :
|
||
- L'application principale (la plus fréquente)
|
||
- Le contexte documentaire (après le tiret dans le titre)
|
||
- Une description d'action déduite du contexte
|
||
|
||
Exemples de résultats :
|
||
"Chrome - Facturation DPI" → "Chrome — Facturation DPI"
|
||
"Excel - Budget_2026.xlsx" → "Excel — Budget 2026"
|
||
3 apps → "Chrome, Excel et Word"
|
||
Aucun contexte → "Tâche du 17 mars à 14h"
|
||
"""
|
||
import re
|
||
|
||
session = self.session_manager.get_session(session_id)
|
||
if not session:
|
||
return self._fallback_task_name()
|
||
|
||
titles = session.window_titles_seen
|
||
apps = session.app_names_seen
|
||
|
||
if not titles and not apps:
|
||
return self._fallback_task_name()
|
||
|
||
# Trier par fréquence décroissante
|
||
sorted_titles = sorted(titles.items(), key=lambda x: -x[1])
|
||
sorted_apps = sorted(apps.items(), key=lambda x: -x[1])
|
||
|
||
# Extraire le nom d'app depuis le titre le plus fréquent
|
||
primary_title = sorted_titles[0][0] if sorted_titles else ""
|
||
primary_app = sorted_apps[0][0] if sorted_apps else ""
|
||
|
||
# Nettoyer le nom d'application pour l'affichage humain
|
||
app_display = self._humanize_app_name(primary_app) if primary_app else ""
|
||
|
||
# Extraire la partie contextuelle du titre (après/avant le séparateur)
|
||
context_part = ""
|
||
for sep in [" - ", " — ", " – ", " | ", ": "]:
|
||
if sep in primary_title:
|
||
parts = primary_title.split(sep)
|
||
if len(parts) >= 2:
|
||
candidates = [p.strip() for p in parts]
|
||
app_lower = primary_app.lower()
|
||
context_candidates = [
|
||
c for c in candidates
|
||
if app_lower not in c.lower()
|
||
and c.lower() not in app_lower
|
||
]
|
||
if context_candidates:
|
||
context_part = context_candidates[0]
|
||
else:
|
||
context_part = candidates[0]
|
||
break
|
||
|
||
# Construire le nom lisible
|
||
distinct_apps = [a for a, _ in sorted_apps if a.lower() not in ("unknown", "explorer")]
|
||
|
||
if len(distinct_apps) >= 3:
|
||
# Multi-app : "Chrome, Excel et Word"
|
||
app_names = [self._humanize_app_name(a) for a in distinct_apps[:3]]
|
||
if len(app_names) == 3:
|
||
name = f"{app_names[0]}, {app_names[1]} et {app_names[2]}"
|
||
else:
|
||
name = " et ".join(app_names)
|
||
elif context_part:
|
||
# Nettoyer le contexte pour le rendre lisible
|
||
clean_context = re.sub(r'[<>:"/\\|?*\[\]]', '', context_part)
|
||
# Retirer les extensions de fichier courantes
|
||
clean_context = re.sub(r'\.(xlsx?|csv|docx?|pdf|txt)$', '', clean_context, flags=re.IGNORECASE)
|
||
# Remplacer les underscores par des espaces
|
||
clean_context = clean_context.replace('_', ' ').strip()[:40]
|
||
if app_display:
|
||
name = f"{app_display} \u2014 {clean_context}"
|
||
else:
|
||
name = clean_context
|
||
elif app_display:
|
||
name = f"{app_display} \u2014 session"
|
||
else:
|
||
name = self._fallback_task_name()
|
||
|
||
# Dédoublonner si une tâche avec ce nom existe déjà
|
||
base_name = name
|
||
counter = 1
|
||
with self._data_lock:
|
||
existing_names = {
|
||
getattr(w, 'name', '') for w in self._workflows.values()
|
||
}
|
||
while name in existing_names:
|
||
counter += 1
|
||
name = f"{base_name} ({counter})"
|
||
|
||
return name
|
||
|
||
@staticmethod
|
||
def _fallback_task_name() -> str:
|
||
"""Générer un nom de tâche par défaut basé sur la date et l'heure."""
|
||
now = datetime.now()
|
||
# Noms de mois en français
|
||
mois = [
|
||
"", "janvier", "février", "mars", "avril", "mai", "juin",
|
||
"juillet", "août", "septembre", "octobre", "novembre", "décembre"
|
||
]
|
||
return f"Tâche du {now.day} {mois[now.month]} à {now.hour}h{now.minute:02d}"
|
||
|
||
@staticmethod
|
||
def _humanize_app_name(app_name: str) -> str:
|
||
"""Convertir un nom d'application technique en nom lisible.
|
||
|
||
Exemples :
|
||
"notepad.exe" → "Bloc-notes"
|
||
"chrome.exe" → "Chrome"
|
||
"WindowsTerminal" → "Terminal"
|
||
"""
|
||
import re
|
||
# Supprimer l'extension .exe et les chemins
|
||
name = app_name.split("\\")[-1].split("/")[-1]
|
||
name = re.sub(r'\.exe$', '', name, flags=re.IGNORECASE).strip()
|
||
|
||
# Dictionnaire de noms humains pour les applications courantes
|
||
app_human_names = {
|
||
"notepad": "Bloc-notes",
|
||
"notepad++": "Notepad++",
|
||
"chrome": "Chrome",
|
||
"msedge": "Edge",
|
||
"firefox": "Firefox",
|
||
"explorer": "Explorateur",
|
||
"windowsterminal": "Terminal",
|
||
"cmd": "Invite de commandes",
|
||
"powershell": "PowerShell",
|
||
"excel": "Excel",
|
||
"winword": "Word",
|
||
"powerpnt": "PowerPoint",
|
||
"outlook": "Outlook",
|
||
"teams": "Teams",
|
||
"code": "VS Code",
|
||
"searchhost": "Recherche",
|
||
"applicationframehost": "Application",
|
||
"calc": "Calculatrice",
|
||
"mspaint": "Paint",
|
||
"snippingtool": "Capture d'écran",
|
||
}
|
||
|
||
name_lower = name.lower()
|
||
if name_lower in app_human_names:
|
||
return app_human_names[name_lower]
|
||
|
||
# Capitaliser le nom si pas dans le dictionnaire
|
||
return name.capitalize() if name else "Application"
|
||
|
||
@staticmethod
|
||
def _clean_app_name(app_name: str) -> str:
|
||
"""Nettoyer un nom d'application pour l'utiliser dans un nom de workflow."""
|
||
import re
|
||
# Supprimer l'extension .exe et les chemins
|
||
name = app_name.split("\\")[-1].split("/")[-1]
|
||
name = re.sub(r'\.exe$', '', name, flags=re.IGNORECASE)
|
||
# Capitaliser
|
||
name = name.strip().capitalize()
|
||
# Supprimer les caractères spéciaux
|
||
name = re.sub(r'[^a-zA-Z0-9àâäéèêëïîôùûüÿçÀÂÄÉÈÊËÏÎÔÙÛÜŸÇ_]', '', name)
|
||
return name or "App"
|
||
|
||
def _persist_workflow(self, workflow, session_id: str, machine_id: str = "default") -> Optional[Path]:
|
||
"""Sauvegarder le workflow JSON sur disque.
|
||
|
||
Les workflows sont sauvegardés dans un sous-dossier par machine :
|
||
data/training/workflows/{machine_id}/wf_xxx.json
|
||
|
||
Cela permet de distinguer les workflows appris sur des machines différentes.
|
||
"""
|
||
try:
|
||
# Dossier par machine (ou racine pour "default")
|
||
if machine_id and machine_id != "default":
|
||
workflows_dir = self.data_dir / "workflows" / machine_id
|
||
else:
|
||
workflows_dir = self.data_dir / "workflows"
|
||
workflows_dir.mkdir(parents=True, exist_ok=True)
|
||
filepath = workflows_dir / f"{workflow.workflow_id}.json"
|
||
workflow.save_to_file(filepath)
|
||
# Stocker le machine_id dans le workflow pour référence
|
||
if not hasattr(workflow, '_machine_id'):
|
||
workflow._machine_id = machine_id
|
||
logger.info(f"Workflow sauvegardé: {filepath} (machine={machine_id})")
|
||
return filepath
|
||
except Exception as e:
|
||
logger.error(f"Erreur sauvegarde workflow {session_id}: {e}")
|
||
return None
|
||
|
||
def _build_raw_session_fallback(self, session, raw_dict):
|
||
"""Construire un RawSession manuellement si from_dict échoue."""
|
||
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
|
||
|
||
events = []
|
||
for evt_dict in raw_dict.get("events", []):
|
||
window_data = evt_dict.get("window", {"title": "", "app_name": "unknown"})
|
||
window = RawWindowContext(
|
||
title=window_data.get("title", ""),
|
||
app_name=window_data.get("app_name", "unknown"),
|
||
)
|
||
events.append(Event(
|
||
t=evt_dict.get("t", 0.0),
|
||
type=evt_dict.get("type", "unknown"),
|
||
window=window,
|
||
data={k: v for k, v in evt_dict.items()
|
||
if k not in ("t", "type", "window", "screenshot_id")},
|
||
screenshot_id=evt_dict.get("screenshot_id"),
|
||
))
|
||
|
||
screenshots = []
|
||
for ss_dict in raw_dict.get("screenshots", []):
|
||
screenshots.append(Screenshot(
|
||
screenshot_id=ss_dict["screenshot_id"],
|
||
relative_path=ss_dict.get("relative_path", ss_dict.get("path", "")),
|
||
captured_at=ss_dict.get("captured_at", datetime.now().isoformat()),
|
||
))
|
||
|
||
return RawSession(
|
||
session_id=session.session_id,
|
||
agent_version="agent_v1_stream",
|
||
environment=raw_dict.get("environment", {}),
|
||
user=raw_dict.get("user", {"id": "remote_agent"}),
|
||
context=raw_dict.get("context", {}),
|
||
started_at=session.created_at,
|
||
ended_at=datetime.now(),
|
||
events=events,
|
||
screenshots=screenshots,
|
||
)
|
||
|
||
def list_sessions(self, machine_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
||
"""Lister les sessions avec leur état.
|
||
|
||
Args:
|
||
machine_id: Si fourni, filtre par machine. Si None, retourne toutes les sessions.
|
||
"""
|
||
sessions = []
|
||
for sid in self.session_manager.session_ids:
|
||
session = self.session_manager.get_session(sid)
|
||
if session is None:
|
||
continue
|
||
# Filtre par machine si demandé
|
||
if machine_id and session.machine_id != machine_id:
|
||
continue
|
||
with self._data_lock:
|
||
states_count = len(self._screen_states.get(sid, []))
|
||
embeddings_count = len(self._embeddings.get(sid, []))
|
||
sessions.append({
|
||
"session_id": session.session_id,
|
||
"machine_id": session.machine_id,
|
||
"events_count": len(session.events),
|
||
"screenshots_count": len(session.shot_paths),
|
||
"states_count": states_count,
|
||
"embeddings_count": embeddings_count,
|
||
"last_window": session.last_window_info,
|
||
"created_at": session.created_at.isoformat(),
|
||
"last_activity": session.last_activity.isoformat(),
|
||
"finalized": session.finalized,
|
||
})
|
||
return sessions
|
||
|
||
def list_workflows(self, machine_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
||
"""Lister les workflows construits.
|
||
|
||
Args:
|
||
machine_id: Si fourni, filtre par machine. Si None, retourne tous les workflows.
|
||
"""
|
||
with self._data_lock:
|
||
workflows_snapshot = list(self._workflows.items())
|
||
result = []
|
||
for wf_id, wf in workflows_snapshot:
|
||
wf_machine = getattr(wf, '_machine_id', 'default')
|
||
# Filtre par machine si demandé
|
||
if machine_id and wf_machine != machine_id:
|
||
continue
|
||
result.append({
|
||
"workflow_id": wf_id,
|
||
"machine_id": wf_machine,
|
||
"nodes": len(wf.nodes) if hasattr(wf, "nodes") else 0,
|
||
"edges": len(wf.edges) if hasattr(wf, "edges") else 0,
|
||
"name": getattr(wf, "name", wf_id),
|
||
})
|
||
return result
|
||
|
||
def reload_workflows(self) -> int:
|
||
"""Recharger les workflows depuis le disque.
|
||
|
||
Utile après qu'un nouveau workflow a été exporté depuis le VWB
|
||
ou appris par le streaming. Retourne le nombre de workflows chargés.
|
||
"""
|
||
with self._data_lock:
|
||
self._workflows.clear()
|
||
self._load_persisted_workflows()
|
||
with self._data_lock:
|
||
count = len(self._workflows)
|
||
logger.info("Workflows rechargés depuis le disque : %d", count)
|
||
return count
|
||
|
||
@property
|
||
def stats(self) -> Dict[str, Any]:
|
||
"""Statistiques du processeur."""
|
||
with self._data_lock:
|
||
total_workflows = len(self._workflows)
|
||
return {
|
||
"active_sessions": self.session_manager.active_session_count,
|
||
"total_sessions": len(self.session_manager.session_ids),
|
||
"total_workflows": total_workflows,
|
||
"faiss_vectors": self._faiss_manager.index.ntotal if self._faiss_manager else 0,
|
||
"initialized": self._initialized,
|
||
}
|