v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions

View File

@@ -0,0 +1,5 @@
"""Persistence and storage management"""
from .storage_manager import StorageManager
__all__ = ["StorageManager"]

View File

@@ -0,0 +1,721 @@
"""
StorageManager - Gestion centralisée de la persistence
Organise et sauvegarde tous les artefacts du système RPA Vision V3
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional, Dict, Any, List
import numpy as np
from core.models import RawSession, ScreenState, get_workflow
logger = logging.getLogger(__name__)
class StorageManager:
"""
Gestionnaire de persistence pour tous les artefacts du système.
Organisation des fichiers:
data/
├── sessions/YYYY-MM-DD/
│ └── session_<timestamp>_<id>.json
├── screen_states/YYYY-MM-DD/
│ └── state_<timestamp>_<id>.json
├── embeddings/YYYY-MM-DD/
│ ├── state_<id>.npy
│ └── ui_element_<id>.npy
├── faiss_index/
│ ├── index.faiss
│ └── metadata.json
└── workflows/
└── workflow_<name>_<id>.json
Validates: Requirements 12.1, 12.2, 12.4, 12.7
"""
def __init__(self, base_path: str = "data"):
"""
Initialise le StorageManager.
Args:
base_path: Chemin de base pour tous les fichiers
"""
self.base_path = Path(base_path)
self._ensure_directories()
logger.info(f"StorageManager initialized with base_path: {self.base_path}")
def _ensure_directories(self):
"""Crée la structure de répertoires si elle n'existe pas."""
directories = [
self.base_path / "sessions",
self.base_path / "screen_states",
self.base_path / "embeddings",
self.base_path / "faiss_index",
self.base_path / "workflows",
]
for directory in directories:
directory.mkdir(parents=True, exist_ok=True)
logger.debug(f"Ensured directory exists: {directory}")
def _get_date_path(self, base_dir: str) -> Path:
"""
Retourne le chemin avec sous-répertoire de date (YYYY-MM-DD).
Args:
base_dir: Répertoire de base (sessions, screen_states, embeddings)
Returns:
Path avec sous-répertoire de date
"""
date_str = datetime.now().strftime("%Y-%m-%d")
path = self.base_path / base_dir / date_str
path.mkdir(parents=True, exist_ok=True)
return path
def save_raw_session(
self,
session: RawSession,
session_id: Optional[str] = None
) -> Path:
"""
Sauvegarde une RawSession en JSON.
Args:
session: RawSession à sauvegarder
session_id: ID optionnel (généré si non fourni)
Returns:
Path du fichier sauvegardé
Validates: Requirements 12.1, 12.7
"""
if session_id is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
session_id = f"{timestamp}_{id(session)}"
date_path = self._get_date_path("sessions")
filename = f"session_{session_id}.json"
filepath = date_path / filename
# Sérialiser en JSON
data = session.to_json()
# Ajouter métadonnées
data["_metadata"] = {
"saved_at": datetime.now().isoformat(),
"schema_version": "rawsession_v1",
"session_id": session_id
}
# Sauvegarder
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Saved RawSession to {filepath}")
return filepath
def load_raw_session(self, filepath: Path) -> RawSession:
"""
Charge une RawSession depuis JSON.
Args:
filepath: Chemin du fichier JSON
Returns:
RawSession chargée
Raises:
ValueError: Si le schéma est incompatible
"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Valider le schéma
metadata = data.get("_metadata", {})
schema_version = metadata.get("schema_version")
if schema_version != "rawsession_v1":
raise ValueError(
f"Incompatible schema version: {schema_version} "
f"(expected rawsession_v1)"
)
# Retirer les métadonnées avant désérialisation
data.pop("_metadata", None)
session = RawSession.from_json(data)
logger.info(f"Loaded RawSession from {filepath}")
return session
def save_screen_state(
self,
state: ScreenState,
state_id: Optional[str] = None
) -> Path:
"""
Sauvegarde un ScreenState en JSON.
Args:
state: ScreenState à sauvegarder
state_id: ID optionnel (généré si non fourni)
Returns:
Path du fichier sauvegardé
Validates: Requirements 12.1, 12.7
"""
if state_id is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
state_id = f"{timestamp}_{id(state)}"
date_path = self._get_date_path("screen_states")
filename = f"state_{state_id}.json"
filepath = date_path / filename
# Sérialiser en JSON
data = state.to_json()
# Ajouter métadonnées
data["_metadata"] = {
"saved_at": datetime.now().isoformat(),
"schema_version": "screenstate_v1",
"state_id": state_id
}
# Sauvegarder
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Saved ScreenState to {filepath}")
return filepath
def load_screen_state(self, filepath: Path) -> ScreenState:
"""
Charge un ScreenState depuis JSON.
Args:
filepath: Chemin du fichier JSON
Returns:
ScreenState chargé
Raises:
ValueError: Si le schéma est incompatible
"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Valider le schéma
metadata = data.get("_metadata", {})
schema_version = metadata.get("schema_version")
if schema_version != "screenstate_v1":
raise ValueError(
f"Incompatible schema version: {schema_version} "
f"(expected screenstate_v1)"
)
# Retirer les métadonnées avant désérialisation
data.pop("_metadata", None)
state = ScreenState.from_json(data)
logger.info(f"Loaded ScreenState from {filepath}")
return state
def save_workflow(
self,
workflow, # Type sera résolu dynamiquement
workflow_name: Optional[str] = None
) -> Path:
"""
Sauvegarde un Workflow en JSON.
Args:
workflow: Workflow à sauvegarder
workflow_name: Nom optionnel du workflow
Returns:
Path du fichier sauvegardé
Validates: Requirements 12.4, 12.7
"""
if workflow_name is None:
workflow_name = workflow.workflow_id or "unnamed"
# Nettoyer le nom pour le système de fichiers
safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in workflow_name)
filename = f"workflow_{safe_name}_{workflow.workflow_id}.json"
filepath = self.base_path / "workflows" / filename
# Sérialiser en dict (pas en string JSON!)
# FIX: workflow.to_json() retourne une string, on a besoin d'un dict
data = workflow.to_dict()
# Ajouter métadonnées
data["_metadata"] = {
"saved_at": datetime.now().isoformat(),
"schema_version": "workflow_v1",
"workflow_name": workflow_name
}
# Sauvegarder
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.info(f"Saved Workflow to {filepath}")
return filepath
def load_workflow(self, filepath: Path):
"""
Charge un Workflow depuis JSON.
Args:
filepath: Chemin du fichier JSON
Returns:
Workflow chargé
Raises:
ValueError: Si le schéma est incompatible
Validates: Requirements 12.5
"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Valider le schéma
metadata = data.get("_metadata", {})
schema_version = metadata.get("schema_version")
if schema_version != "workflow_v1":
raise ValueError(
f"Incompatible schema version: {schema_version} "
f"(expected workflow_v1)"
)
# Retirer les métadonnées avant désérialisation
data.pop("_metadata", None)
Workflow = get_workflow()
workflow = Workflow.from_json(data)
logger.info(f"Loaded Workflow from {filepath}")
return workflow
def list_workflows(self) -> List[Dict[str, Any]]:
"""
Liste tous les workflows sauvegardés.
Returns:
Liste de dictionnaires avec infos sur chaque workflow
"""
workflows_dir = self.base_path / "workflows"
workflows = []
for filepath in workflows_dir.glob("workflow_*.json"):
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
metadata = data.get("_metadata", {})
workflows.append({
"filepath": str(filepath),
"workflow_id": data.get("workflow_id"),
"workflow_name": metadata.get("workflow_name"),
"saved_at": metadata.get("saved_at"),
"num_nodes": len(data.get("nodes", [])),
"num_edges": len(data.get("edges", []))
})
except Exception as e:
logger.warning(f"Failed to read workflow {filepath}: {e}")
return workflows
def list_sessions(self, date: Optional[str] = None) -> List[Dict[str, Any]]:
"""
Liste les sessions sauvegardées.
Args:
date: Date au format YYYY-MM-DD (aujourd'hui si None)
Returns:
Liste de dictionnaires avec infos sur chaque session
"""
if date is None:
date = datetime.now().strftime("%Y-%m-%d")
sessions_dir = self.base_path / "sessions" / date
sessions = []
if not sessions_dir.exists():
return sessions
for filepath in sessions_dir.glob("session_*.json"):
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
metadata = data.get("_metadata", {})
sessions.append({
"filepath": str(filepath),
"session_id": metadata.get("session_id"),
"saved_at": metadata.get("saved_at"),
"num_events": len(data.get("events", [])),
"num_screenshots": len(data.get("screenshots", []))
})
except Exception as e:
logger.warning(f"Failed to read session {filepath}: {e}")
return sessions
def get_storage_stats(self) -> Dict[str, Any]:
"""
Retourne des statistiques sur le stockage.
Returns:
Dictionnaire avec statistiques
"""
stats = {
"base_path": str(self.base_path),
"sessions": 0,
"screen_states": 0,
"embeddings": 0,
"workflows": 0,
"total_size_mb": 0.0
}
# Compter les sessions et screen_states (fichiers JSON)
for category in ["sessions", "screen_states"]:
category_path = self.base_path / category
if category_path.exists():
stats[category] = len(list(category_path.rglob("*.json")))
# Compter les embeddings (fichiers .npy)
embeddings_path = self.base_path / "embeddings"
if embeddings_path.exists():
stats["embeddings"] = len(list(embeddings_path.rglob("*.npy")))
workflows_path = self.base_path / "workflows"
if workflows_path.exists():
stats["workflows"] = len(list(workflows_path.glob("workflow_*.json")))
# Calculer la taille totale
total_size = 0
for path in self.base_path.rglob("*"):
if path.is_file():
total_size += path.stat().st_size
stats["total_size_mb"] = round(total_size / (1024 * 1024), 2)
return stats
def save_embedding(
self,
embedding_vector: np.ndarray,
embedding_id: str,
embedding_type: str = "state",
metadata: Optional[Dict[str, Any]] = None
) -> Path:
"""
Sauvegarde un vecteur d'embedding en .npy.
Args:
embedding_vector: Vecteur numpy à sauvegarder
embedding_id: ID unique de l'embedding
embedding_type: Type d'embedding (state, ui_element, etc.)
metadata: Métadonnées optionnelles
Returns:
Path du fichier .npy sauvegardé
Validates: Requirements 12.2
"""
date_path = self._get_date_path("embeddings")
filename = f"{embedding_type}_{embedding_id}.npy"
filepath = date_path / filename
# Sauvegarder le vecteur
np.save(filepath, embedding_vector)
# Sauvegarder les métadonnées si fournies
if metadata is not None:
metadata_file = filepath.with_suffix('.json')
metadata_data = {
"embedding_id": embedding_id,
"embedding_type": embedding_type,
"shape": list(embedding_vector.shape),
"dtype": str(embedding_vector.dtype),
"saved_at": datetime.now().isoformat(),
**metadata
}
with open(metadata_file, 'w', encoding='utf-8') as f:
json.dump(metadata_data, f, indent=2)
logger.info(f"Saved embedding to {filepath}")
return filepath
def load_embedding(
self,
embedding_id: str,
embedding_type: str = "state",
date: Optional[str] = None
) -> tuple[np.ndarray, Optional[Dict[str, Any]]]:
"""
Charge un vecteur d'embedding depuis .npy.
Args:
embedding_id: ID de l'embedding
embedding_type: Type d'embedding
date: Date au format YYYY-MM-DD (aujourd'hui si None)
Returns:
Tuple (vecteur numpy, métadonnées optionnelles)
Raises:
FileNotFoundError: Si le fichier n'existe pas
"""
if date is None:
date = datetime.now().strftime("%Y-%m-%d")
embeddings_dir = self.base_path / "embeddings" / date
filename = f"{embedding_type}_{embedding_id}.npy"
filepath = embeddings_dir / filename
if not filepath.exists():
raise FileNotFoundError(f"Embedding not found: {filepath}")
# Charger le vecteur
vector = np.load(filepath)
# Charger les métadonnées si elles existent
metadata_file = filepath.with_suffix('.json')
metadata = None
if metadata_file.exists():
with open(metadata_file, 'r', encoding='utf-8') as f:
metadata = json.load(f)
logger.info(f"Loaded embedding from {filepath}")
return vector, metadata
def save_embeddings_batch(
self,
embeddings: Dict[str, np.ndarray],
embedding_type: str = "state",
metadata: Optional[Dict[str, Dict[str, Any]]] = None
) -> List[Path]:
"""
Sauvegarde un batch d'embeddings.
Args:
embeddings: Dictionnaire {embedding_id: vector}
embedding_type: Type d'embedding
metadata: Dictionnaire optionnel {embedding_id: metadata}
Returns:
Liste des paths sauvegardés
"""
paths = []
for embedding_id, vector in embeddings.items():
meta = metadata.get(embedding_id) if metadata else None
path = self.save_embedding(vector, embedding_id, embedding_type, meta)
paths.append(path)
logger.info(f"Saved {len(paths)} embeddings in batch")
return paths
def list_embeddings(
self,
embedding_type: Optional[str] = None,
date: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Liste les embeddings sauvegardés.
Args:
embedding_type: Filtrer par type (None = tous)
date: Date au format YYYY-MM-DD (aujourd'hui si None)
Returns:
Liste de dictionnaires avec infos sur chaque embedding
"""
if date is None:
date = datetime.now().strftime("%Y-%m-%d")
embeddings_dir = self.base_path / "embeddings" / date
embeddings = []
if not embeddings_dir.exists():
return embeddings
pattern = f"{embedding_type}_*.npy" if embedding_type else "*.npy"
for filepath in embeddings_dir.glob(pattern):
try:
# Extraire l'ID et le type du nom de fichier
stem = filepath.stem # ex: "state_12345"
parts = stem.split("_", 1)
if len(parts) == 2:
emb_type, emb_id = parts
else:
emb_type, emb_id = "unknown", stem
# Charger les métadonnées si elles existent
metadata_file = filepath.with_suffix('.json')
metadata = {}
if metadata_file.exists():
with open(metadata_file, 'r', encoding='utf-8') as f:
metadata = json.load(f)
embeddings.append({
"filepath": str(filepath),
"embedding_id": emb_id,
"embedding_type": emb_type,
"size_kb": round(filepath.stat().st_size / 1024, 2),
**metadata
})
except Exception as e:
logger.warning(f"Failed to read embedding {filepath}: {e}")
return embeddings
def save_faiss_index(
self,
faiss_manager,
index_name: str = "main"
) -> Path:
"""
Sauvegarde un index FAISS et ses métadonnées.
Args:
faiss_manager: Instance de FAISSManager
index_name: Nom de l'index
Returns:
Path du fichier d'index sauvegardé
Validates: Requirements 12.3
"""
index_dir = self.base_path / "faiss_index"
index_path = index_dir / f"{index_name}.faiss"
metadata_path = index_dir / f"{index_name}_metadata.json"
# Sauvegarder l'index FAISS
faiss_manager.save_index(str(index_path))
# Sauvegarder les métadonnées
metadata = {
"index_name": index_name,
"saved_at": datetime.now().isoformat(),
"num_vectors": faiss_manager.index.ntotal if faiss_manager.index else 0,
"dimension": faiss_manager.dimension,
"metadata_store": faiss_manager.metadata_store
}
with open(metadata_path, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Saved FAISS index to {index_path}")
return index_path
def load_faiss_index(
self,
faiss_manager,
index_name: str = "main"
) -> Dict[str, Any]:
"""
Charge un index FAISS et ses métadonnées.
Args:
faiss_manager: Instance de FAISSManager
index_name: Nom de l'index
Returns:
Métadonnées de l'index
Raises:
FileNotFoundError: Si l'index n'existe pas
Validates: Requirements 12.6
"""
index_dir = self.base_path / "faiss_index"
index_path = index_dir / f"{index_name}.faiss"
metadata_path = index_dir / f"{index_name}_metadata.json"
if not index_path.exists():
raise FileNotFoundError(f"FAISS index not found: {index_path}")
# Charger l'index FAISS
faiss_manager.load_index(str(index_path))
# Charger les métadonnées
metadata = {}
if metadata_path.exists():
with open(metadata_path, 'r', encoding='utf-8') as f:
metadata = json.load(f)
# Restaurer le metadata_store
if "metadata_store" in metadata:
faiss_manager.metadata_store = metadata["metadata_store"]
logger.info(f"Loaded FAISS index from {index_path}")
return metadata
def cleanup_old_files(self, days_to_keep: int = 30) -> Dict[str, int]:
"""
Nettoie les fichiers plus anciens que le nombre de jours spécifié.
Args:
days_to_keep: Nombre de jours à conserver
Returns:
Dictionnaire avec nombre de fichiers supprimés par catégorie
"""
from datetime import timedelta
cutoff_date = datetime.now() - timedelta(days=days_to_keep)
deleted = {
"sessions": 0,
"screen_states": 0,
"embeddings": 0
}
for category in ["sessions", "screen_states", "embeddings"]:
category_path = self.base_path / category
if not category_path.exists():
continue
# Parcourir les sous-répertoires de date
for date_dir in category_path.iterdir():
if not date_dir.is_dir():
continue
try:
# Parser la date du nom du répertoire
dir_date = datetime.strptime(date_dir.name, "%Y-%m-%d")
if dir_date < cutoff_date:
# Supprimer tous les fichiers du répertoire
for file in date_dir.iterdir():
if file.is_file():
file.unlink()
deleted[category] += 1
# Supprimer le répertoire s'il est vide
if not any(date_dir.iterdir()):
date_dir.rmdir()
logger.info(f"Removed empty directory: {date_dir}")
except ValueError:
# Nom de répertoire invalide, ignorer
logger.warning(f"Invalid date directory name: {date_dir.name}")
logger.info(f"Cleanup completed: {deleted}")
return deleted