#!/usr/bin/env python3 """ Pipeline de traitement des sessions agent V0 Transforme RawSession → ScreenStates → Embeddings → Workflow Étapes: 1. Charger RawSession 2. Construire ScreenStates (pour chaque event avec screenshot) 3. Générer embeddings (CLIP) 4. Indexer dans FAISS 5. Détecter UI (optionnel, si modèles disponibles) 6. Construire workflow """ import logging import sys import re import os from pathlib import Path from datetime import datetime from typing import List, Optional, Dict from collections import Counter import numpy as np # Ajouter le répertoire parent au path sys.path.insert(0, str(Path(__file__).parent.parent)) from core.models import RawSession from core.models.screen_state import ( ScreenState, WindowContext as ScreenWindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef ) from core.models.raw_session import WindowContext from core.persistence import StorageManager # Imports optionnels (si modèles disponibles) try: from core.embedding import CLIPEmbedder, FusionEngine, FAISSManager EMBEDDINGS_AVAILABLE = True except ImportError: logging.warning("Modules embedding non disponibles") EMBEDDINGS_AVAILABLE = False try: from core.detection import UIDetector UI_DETECTION_AVAILABLE = True except ImportError: logging.warning("Module UI detection non disponible") UI_DETECTION_AVAILABLE = False try: from core.graph import GraphBuilder GRAPH_AVAILABLE = True except ImportError: logging.warning("Module graph non disponible") GRAPH_AVAILABLE = False logger = logging.getLogger("processing_pipeline") class ProcessingPipeline: """ Pipeline de traitement des sessions agent. Transforme les données brutes en données exploitables pour RPA Vision V3. """ def __init__(self, base_path: str = "data/training"): """ Initialise le pipeline. Args: base_path: Chemin de base pour le stockage """ self.storage = StorageManager(base_path=base_path) self.base_path = Path(base_path) # Flags pour composants disponibles self.embeddings_available = EMBEDDINGS_AVAILABLE self.ui_detection_available = UI_DETECTION_AVAILABLE self.graph_available = GRAPH_AVAILABLE # Chemins pour l'index FAISS persistant self.faiss_index_path = self.base_path / "faiss_index" / "main.index" self.faiss_metadata_path = self.base_path / "faiss_index" / "main.metadata" # Initialiser les composants si disponibles if self.embeddings_available: try: self.clip = CLIPEmbedder() self.fusion = FusionEngine() # Charger l'index FAISS existant ou en créer un nouveau self.faiss = self._load_or_create_faiss_index() logger.info(f"Embeddings initialisés (FAISS: {self.faiss.index.ntotal} vecteurs)") except Exception as e: logger.warning(f"Erreur init embeddings: {e}") self.embeddings_available = False if self.ui_detection_available: try: self.ui_detector = UIDetector() logger.info("UI Detector initialisé") except Exception as e: logger.warning(f"Erreur init UI detector: {e}") self.ui_detection_available = False if self.graph_available: try: self.graph_builder = GraphBuilder() logger.info("Graph Builder initialisé") except Exception as e: logger.warning(f"Erreur init graph builder: {e}") self.graph_available = False def _load_or_create_faiss_index(self) -> 'FAISSManager': """ Charge l'index FAISS existant ou en crée un nouveau. Returns: FAISSManager chargé ou nouvellement créé """ if self.faiss_index_path.exists() and self.faiss_metadata_path.exists(): try: manager = FAISSManager.load(self.faiss_index_path, self.faiss_metadata_path) logger.info(f"Index FAISS chargé: {manager.index.ntotal} vecteurs") return manager except Exception as e: logger.warning(f"Erreur chargement index FAISS: {e}, création d'un nouveau") # Créer un nouvel index logger.info("Création d'un nouvel index FAISS") return FAISSManager(dimensions=512) def _save_faiss_index(self) -> None: """Sauvegarde l'index FAISS sur disque.""" if not self.embeddings_available or self.faiss is None: return try: self.faiss_index_path.parent.mkdir(parents=True, exist_ok=True) self.faiss.save(self.faiss_index_path, self.faiss_metadata_path) logger.info(f"Index FAISS sauvegardé: {self.faiss.index.ntotal} vecteurs") except Exception as e: logger.error(f"Erreur sauvegarde index FAISS: {e}") def process_session(self, session_id: str) -> dict: """ Traite une session complète. Args: session_id: ID de la session à traiter Returns: Dictionnaire avec statistiques du traitement """ logger.info(f"Début traitement session: {session_id}") stats = { "session_id": session_id, "started_at": datetime.now().isoformat(), "screen_states_created": 0, "embeddings_generated": 0, "ui_elements_detected": 0, "workflow_created": False, "errors": [] } try: # 1. Charger RawSession session = self._load_session(session_id) logger.info(f"Session chargée: {len(session.events)} events, {len(session.screenshots)} screenshots") # 1.5 Enrichir le nom du workflow (analyse serveur) enriched_name = self._enrich_workflow_name(session, stats) logger.info(f"Nom workflow: {enriched_name}") # 2. Construire ScreenStates screen_states = self._build_screen_states(session, stats) logger.info(f"ScreenStates créés: {len(screen_states)}") # 3. Générer embeddings et sauvegarder l'index FAISS if self.embeddings_available: self._generate_embeddings(screen_states, session_id, stats) self._save_faiss_index() # Persistance de l'index FAISS logger.info(f"Embeddings générés: {stats['embeddings_generated']}") else: logger.warning("Embeddings non disponibles, étape sautée") # 4. Détecter UI (optionnel) if self.ui_detection_available: self._detect_ui_elements(screen_states, session_id, stats) logger.info(f"UI éléments détectés: {stats['ui_elements_detected']}") else: logger.warning("UI detection non disponible, étape sautée") # 5. Construire workflow (optionnel) if self.graph_available and len(screen_states) > 0: self._build_workflow(screen_states, session, stats) logger.info(f"Workflow créé: {stats['workflow_created']}") else: logger.warning("Graph builder non disponible ou pas de states, étape sautée") stats["completed_at"] = datetime.now().isoformat() stats["status"] = "success" # 6. Nettoyer les fichiers bruts après traitement réussi # Seulement si des screen_states ont été créés (données traitées sauvegardées) if stats["screen_states_created"] > 0: self._cleanup_raw_files(session_id, stats) logger.info("Fichiers bruts nettoyés (embeddings, screen_states, workflows conservés)") else: logger.warning("Aucun screen_state créé, nettoyage annulé pour préserver les données") logger.info(f"Traitement terminé: {session_id}") return stats except Exception as e: logger.exception(f"Erreur traitement session {session_id}: {e}") stats["status"] = "error" stats["errors"].append(str(e)) stats["completed_at"] = datetime.now().isoformat() return stats def _cleanup_raw_files(self, session_id: str, stats: dict) -> None: """ Supprime les fichiers bruts après traitement réussi. Supprime: - Le fichier .enc/.zip uploadé dans uploads/ - Le dossier session extrait dans sessions/ (screenshots bruts) - Les fichiers JSON bruts dans sessions/YYYY-MM-DD/ (sans screenshots, inexploitables) Les données traitées (screen_states, embeddings, workflows) sont conservées. """ import shutil import os uploads_dir = self.base_path / "uploads" sessions_dir = self.base_path / "sessions" cleaned_files = [] try: # Supprimer le fichier uploadé (.enc ou .zip) for ext in ['.enc', '.zip']: upload_file = uploads_dir / f"{session_id}{ext}" if upload_file.exists(): os.remove(upload_file) cleaned_files.append(str(upload_file)) logger.info(f"Fichier uploadé supprimé: {upload_file}") # Supprimer le dossier session extrait (contient les screenshots bruts) session_dir = sessions_dir / session_id if session_dir.exists() and session_dir.is_dir(): shutil.rmtree(session_dir) cleaned_files.append(str(session_dir)) logger.info(f"Dossier session supprimé: {session_dir}") # Supprimer aussi les JSON bruts dans les dossiers par date (ex: sessions/2026-01-07/session_sess_xxx.json) # Ces JSON sans screenshots sont inexploitables, les screen_states contiennent toutes les données utiles for date_dir in sessions_dir.iterdir(): if date_dir.is_dir(): # Chercher les JSON de cette session dans les dossiers par date json_pattern = f"session_{session_id}.json" json_file = date_dir / json_pattern if json_file.exists(): os.remove(json_file) cleaned_files.append(str(json_file)) logger.info(f"Fichier JSON brut supprimé: {json_file}") stats["cleaned_files"] = cleaned_files logger.info(f"Nettoyage terminé: {len(cleaned_files)} éléments supprimés") except Exception as e: logger.warning(f"Erreur lors du nettoyage: {e}") stats["cleanup_error"] = str(e) def _load_session(self, session_id: str) -> RawSession: """Charge la RawSession depuis le stockage.""" # Chercher le fichier JSON session_dir = self.base_path / "sessions" / session_id json_files = list(session_dir.glob("*/*.json")) if not json_files: raise FileNotFoundError(f"Aucun fichier JSON trouvé pour session {session_id}") json_path = json_files[0] return RawSession.load_from_file(json_path) def _enrich_workflow_name(self, session: RawSession, stats: dict) -> str: """ Enrichit le nom du workflow basé sur l'analyse de la session. Analyse côté serveur qui peut utiliser plus de contexte que l'agent. Returns: Le nom enrichi du workflow """ original_name = session.context.get("training_label", "") try: # 1. Analyser les titres de fenêtres window_titles = [] for event in session.events: if hasattr(event, 'window') and event.window: title = event.window.get('title', '') if isinstance(event.window, dict) else getattr(event.window, 'title', '') if title: window_titles.append(title) # 2. Trouver l'application dominante app_name = self._extract_dominant_app(window_titles) # 3. Analyser les types d'actions action_summary = self._analyze_actions(session.events) # 4. Construire le nom enrichi enriched_name = self._build_enriched_name(original_name, app_name, action_summary) # 5. Mettre à jour le contexte de la session if enriched_name != original_name: session.context["training_label"] = enriched_name session.context["original_training_label"] = original_name # Garder l'original logger.info(f"Nom workflow enrichi: '{original_name}' -> '{enriched_name}'") stats["workflow_name_enriched"] = True else: stats["workflow_name_enriched"] = False return enriched_name except Exception as e: logger.warning(f"Erreur enrichissement nom workflow: {e}") stats["workflow_name_enriched"] = False return original_name def _extract_dominant_app(self, window_titles: List[str]) -> str: """Extrait l'application dominante des titres de fenêtres.""" if not window_titles: return "Unknown" # Patterns pour extraire le nom d'application app_patterns = [ r'^(.+?)\s*[-–—]\s*', # "App - Document" r'^(.+?)\s*\|\s*', # "App | Section" r'^(.+?)\s*:\s*', # "App: Section" ] extracted_apps = [] for title in window_titles: for pattern in app_patterns: match = re.match(pattern, title) if match: app = match.group(1).strip() # Nettoyer les suffixes communs app = re.sub(r'\s*(Pro|Professional|Enterprise|Standard|Basic|\d+)$', '', app) if app and len(app) > 2: extracted_apps.append(app) break else: # Si aucun pattern ne matche, prendre le premier mot significatif words = title.split() if words: extracted_apps.append(words[0]) if not extracted_apps: return "Unknown" # Retourner l'app la plus fréquente most_common = Counter(extracted_apps).most_common(1) return most_common[0][0] if most_common else "Unknown" def _analyze_actions(self, events) -> Dict[str, any]: """Analyse les types d'actions dans la session.""" action_counts = Counter() for event in events: event_type = getattr(event, 'type', '') action_counts[event_type] += 1 total = sum(action_counts.values()) # Déterminer le type dominant if total == 0: return {"type": "unknown", "dominant_action": "none"} most_common = action_counts.most_common(1)[0] dominant = most_common[0] # Classifier le workflow click_count = action_counts.get('mouse_click', 0) + action_counts.get('click', 0) key_count = action_counts.get('key_combo', 0) + action_counts.get('key', 0) scroll_count = action_counts.get('scroll', 0) if key_count > click_count * 2: workflow_type = "Saisie" elif scroll_count > click_count: workflow_type = "Navigation" elif click_count > key_count * 2: workflow_type = "Interaction" else: workflow_type = "Mixte" return { "type": workflow_type, "dominant_action": dominant, "click_count": click_count, "key_count": key_count, "total_events": total } def _build_enriched_name(self, original_name: str, app_name: str, action_summary: Dict) -> str: """Construit un nom enrichi basé sur l'analyse.""" workflow_type = action_summary.get("type", "Workflow") # Si le nom original est générique ou par défaut, le remplacer generic_names = ["Facturation_T2A_demo", "Nouveau_Workflow", "Workflow_Unknown", ""] if original_name in generic_names or original_name.startswith("Workflow_"): # Construire un nouveau nom if app_name != "Unknown": enriched = f"{workflow_type}_{app_name}" else: enriched = f"{workflow_type}_{datetime.now().strftime('%Y%m%d_%H%M')}" else: # Le nom est personnalisé, l'enrichir si possible if app_name != "Unknown" and app_name not in original_name: enriched = f"{original_name}_{app_name}" else: enriched = original_name # Sanitizer le nom (pas de caractères spéciaux) enriched = re.sub(r'[<>:"/\\|?*]', '_', enriched) enriched = re.sub(r'_+', '_', enriched) enriched = enriched.strip('_') # Limiter la longueur if len(enriched) > 60: enriched = enriched[:57] + "..." return enriched def _build_screen_states(self, session: RawSession, stats: dict) -> List[ScreenState]: """Construit les ScreenStates depuis la RawSession.""" screen_states = [] for i, event in enumerate(session.events): if not event.screenshot_id: continue # Trouver le screenshot correspondant screenshot = next( (s for s in session.screenshots if s.screenshot_id == event.screenshot_id), None ) if not screenshot: logger.warning(f"Screenshot {event.screenshot_id} non trouvé") continue try: # Construire ScreenState state = self._create_screen_state(event, screenshot, session, i) # Sauvegarder self.storage.save_screen_state(state) screen_states.append(state) stats["screen_states_created"] += 1 except Exception as e: logger.error(f"Erreur création ScreenState pour event {i}: {e}") stats["errors"].append(f"ScreenState {i}: {str(e)}") return screen_states def _create_screen_state( self, event, screenshot, session: RawSession, index: int ) -> ScreenState: """Crée un ScreenState depuis un Event.""" # Calculer timestamp relatif timestamp = session.started_at if hasattr(event, 't'): from datetime import timedelta timestamp = session.started_at + timedelta(seconds=event.t) # Raw level # Stocker seulement le chemin relatif depuis le dossier session # _generate_embeddings() reconstruit le chemin complet: base_path/sessions/session_id/screenshot.relative_path raw = RawLevel( screenshot_path=screenshot.relative_path, capture_method="agent_v0", file_size_bytes=self._get_file_size(session.session_id, screenshot.relative_path) ) # Perception level (sera rempli plus tard avec embeddings) # Pour l'instant, créer un embedding ref vide embedding_ref = EmbeddingRef( provider="pending", vector_id=f"emb_{screenshot.screenshot_id}", dimensions=512 ) perception = PerceptionLevel( embedding=embedding_ref, detected_text=[], text_detection_method="none", confidence_avg=1.0 ) # Context level context = ContextLevel( user_id=session.user.get("id", "unknown"), tags=[session.context.get("training_label", "")], business_variables=session.context ) # Window context # event.window peut être un WindowContext ou un dict if isinstance(event.window, WindowContext): app_name = event.window.app_name window_title = event.window.title else: app_name = event.window.get("app_name", "unknown") window_title = event.window.get("title", "unknown") window = ScreenWindowContext( app_name=app_name, window_title=window_title, screen_resolution=session.environment.get("screen", {}).get("primary_resolution", [0, 0]) ) return ScreenState( screen_state_id=f"state_{session.session_id}_{index:04d}", timestamp=timestamp, session_id=session.session_id, window=window, raw=raw, perception=perception, context=context ) def _get_file_size(self, session_id: str, relative_path: str) -> int: """Récupère la taille d'un fichier.""" try: # Structure: sessions/sess_xxx/sess_xxx/shots/shot_0001.png file_path = self.base_path / "sessions" / session_id / session_id / relative_path if file_path.exists(): return file_path.stat().st_size except Exception: pass return 0 def _generate_embeddings(self, screen_states: List[ScreenState], session_id: str, stats: dict): """Génère les embeddings pour les ScreenStates.""" from PIL import Image for state in screen_states: try: # Chemin du screenshot # Structure: sessions/sess_xxx/sess_xxx/shots/shot_0001.png # state.raw.screenshot_path contient "shots/shot_0001.png" screenshot_path = self.base_path / "sessions" / session_id / session_id / state.raw.screenshot_path if not screenshot_path.exists(): logger.warning(f"Screenshot non trouvé: {screenshot_path}") continue # Charger l'image avec PIL image = Image.open(screenshot_path) # Générer embedding visuel avec CLIP visual_emb = self.clip.embed_image(image) # Pour l'instant, utiliser seulement l'embedding visuel # TODO: Ajouter OCR + embedding texte final_emb = visual_emb # Sauvegarder embedding emb_id = f"emb_{state.screen_state_id}" self.storage.save_embedding( final_emb, embedding_id=emb_id, embedding_type="state", metadata={ "session_id": session_id, "screen_state_id": state.screen_state_id, "provider": "openclip" } ) # Indexer dans FAISS self.faiss.add_embedding( embedding_id=state.screen_state_id, vector=final_emb, metadata={ "session_id": session_id, "screen_state_id": state.screen_state_id } ) stats["embeddings_generated"] += 1 except Exception as e: logger.error(f"Erreur génération embedding pour {state.screen_state_id}: {e}") stats["errors"].append(f"Embedding {state.screen_state_id}: {str(e)}") def _detect_ui_elements(self, screen_states: List[ScreenState], session_id: str, stats: dict): """Détecte les éléments UI dans les screenshots.""" for state in screen_states: try: # Structure: sessions/sess_xxx/sess_xxx/shots/shot_0001.png screenshot_path = self.base_path / "sessions" / session_id / session_id / state.raw.screenshot_path if not screenshot_path.exists(): continue # Détecter UI elements = self.ui_detector.detect(str(screenshot_path)) stats["ui_elements_detected"] += len(elements) # TODO: Associer les éléments au ScreenState except Exception as e: logger.error(f"Erreur détection UI pour {state.screen_state_id}: {e}") stats["errors"].append(f"UI detection {state.screen_state_id}: {str(e)}") def _build_workflow(self, screen_states: List[ScreenState], session: RawSession, stats: dict): """Construit le workflow depuis les ScreenStates.""" try: # Construire workflow workflow = self.graph_builder.build_from_session(session) # Sauvegarder self.storage.save_workflow(workflow, workflow_name=session.context.get("training_label", "workflow")) stats["workflow_created"] = True except Exception as e: logger.error(f"Erreur construction workflow: {e}") stats["errors"].append(f"Workflow: {str(e)}") # Fonction standalone pour utilisation dans l'API def process_session_async(session_id: str, base_path: str = "data/training") -> dict: """ Traite une session de manière asynchrone. Args: session_id: ID de la session base_path: Chemin de base Returns: Statistiques du traitement """ pipeline = ProcessingPipeline(base_path=base_path) return pipeline.process_session(session_id) if __name__ == "__main__": # Test du pipeline import sys logging.basicConfig(level=logging.INFO) if len(sys.argv) < 2: print("Usage: python processing_pipeline.py ") sys.exit(1) session_id = sys.argv[1] print(f"Traitement de la session: {session_id}") stats = process_session_async(session_id) print("\nRésultats:") print(f" ScreenStates créés: {stats['screen_states_created']}") print(f" Embeddings générés: {stats['embeddings_generated']}") print(f" UI éléments détectés: {stats['ui_elements_detected']}") print(f" Workflow créé: {stats['workflow_created']}") if stats["errors"]: print(f"\nErreurs ({len(stats['errors'])}):") for error in stats["errors"]: print(f" - {error}")