v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
5
core/persistence/__init__.py
Normal file
5
core/persistence/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Persistence and storage management"""
|
||||
|
||||
from .storage_manager import StorageManager
|
||||
|
||||
__all__ = ["StorageManager"]
|
||||
721
core/persistence/storage_manager.py
Normal file
721
core/persistence/storage_manager.py
Normal file
@@ -0,0 +1,721 @@
|
||||
"""
|
||||
StorageManager - Gestion centralisée de la persistence
|
||||
Organise et sauvegarde tous les artefacts du système RPA Vision V3
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
import numpy as np
|
||||
|
||||
from core.models import RawSession, ScreenState, get_workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StorageManager:
|
||||
"""
|
||||
Gestionnaire de persistence pour tous les artefacts du système.
|
||||
|
||||
Organisation des fichiers:
|
||||
data/
|
||||
├── sessions/YYYY-MM-DD/
|
||||
│ └── session_<timestamp>_<id>.json
|
||||
├── screen_states/YYYY-MM-DD/
|
||||
│ └── state_<timestamp>_<id>.json
|
||||
├── embeddings/YYYY-MM-DD/
|
||||
│ ├── state_<id>.npy
|
||||
│ └── ui_element_<id>.npy
|
||||
├── faiss_index/
|
||||
│ ├── index.faiss
|
||||
│ └── metadata.json
|
||||
└── workflows/
|
||||
└── workflow_<name>_<id>.json
|
||||
|
||||
Validates: Requirements 12.1, 12.2, 12.4, 12.7
|
||||
"""
|
||||
|
||||
def __init__(self, base_path: str = "data"):
|
||||
"""
|
||||
Initialise le StorageManager.
|
||||
|
||||
Args:
|
||||
base_path: Chemin de base pour tous les fichiers
|
||||
"""
|
||||
self.base_path = Path(base_path)
|
||||
self._ensure_directories()
|
||||
logger.info(f"StorageManager initialized with base_path: {self.base_path}")
|
||||
|
||||
def _ensure_directories(self):
|
||||
"""Crée la structure de répertoires si elle n'existe pas."""
|
||||
directories = [
|
||||
self.base_path / "sessions",
|
||||
self.base_path / "screen_states",
|
||||
self.base_path / "embeddings",
|
||||
self.base_path / "faiss_index",
|
||||
self.base_path / "workflows",
|
||||
]
|
||||
|
||||
for directory in directories:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
logger.debug(f"Ensured directory exists: {directory}")
|
||||
|
||||
def _get_date_path(self, base_dir: str) -> Path:
|
||||
"""
|
||||
Retourne le chemin avec sous-répertoire de date (YYYY-MM-DD).
|
||||
|
||||
Args:
|
||||
base_dir: Répertoire de base (sessions, screen_states, embeddings)
|
||||
|
||||
Returns:
|
||||
Path avec sous-répertoire de date
|
||||
"""
|
||||
date_str = datetime.now().strftime("%Y-%m-%d")
|
||||
path = self.base_path / base_dir / date_str
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
def save_raw_session(
|
||||
self,
|
||||
session: RawSession,
|
||||
session_id: Optional[str] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Sauvegarde une RawSession en JSON.
|
||||
|
||||
Args:
|
||||
session: RawSession à sauvegarder
|
||||
session_id: ID optionnel (généré si non fourni)
|
||||
|
||||
Returns:
|
||||
Path du fichier sauvegardé
|
||||
|
||||
Validates: Requirements 12.1, 12.7
|
||||
"""
|
||||
if session_id is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
session_id = f"{timestamp}_{id(session)}"
|
||||
|
||||
date_path = self._get_date_path("sessions")
|
||||
filename = f"session_{session_id}.json"
|
||||
filepath = date_path / filename
|
||||
|
||||
# Sérialiser en JSON
|
||||
data = session.to_json()
|
||||
|
||||
# Ajouter métadonnées
|
||||
data["_metadata"] = {
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
"schema_version": "rawsession_v1",
|
||||
"session_id": session_id
|
||||
}
|
||||
|
||||
# Sauvegarder
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Saved RawSession to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load_raw_session(self, filepath: Path) -> RawSession:
|
||||
"""
|
||||
Charge une RawSession depuis JSON.
|
||||
|
||||
Args:
|
||||
filepath: Chemin du fichier JSON
|
||||
|
||||
Returns:
|
||||
RawSession chargée
|
||||
|
||||
Raises:
|
||||
ValueError: Si le schéma est incompatible
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Valider le schéma
|
||||
metadata = data.get("_metadata", {})
|
||||
schema_version = metadata.get("schema_version")
|
||||
|
||||
if schema_version != "rawsession_v1":
|
||||
raise ValueError(
|
||||
f"Incompatible schema version: {schema_version} "
|
||||
f"(expected rawsession_v1)"
|
||||
)
|
||||
|
||||
# Retirer les métadonnées avant désérialisation
|
||||
data.pop("_metadata", None)
|
||||
|
||||
session = RawSession.from_json(data)
|
||||
logger.info(f"Loaded RawSession from {filepath}")
|
||||
return session
|
||||
|
||||
def save_screen_state(
|
||||
self,
|
||||
state: ScreenState,
|
||||
state_id: Optional[str] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Sauvegarde un ScreenState en JSON.
|
||||
|
||||
Args:
|
||||
state: ScreenState à sauvegarder
|
||||
state_id: ID optionnel (généré si non fourni)
|
||||
|
||||
Returns:
|
||||
Path du fichier sauvegardé
|
||||
|
||||
Validates: Requirements 12.1, 12.7
|
||||
"""
|
||||
if state_id is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
state_id = f"{timestamp}_{id(state)}"
|
||||
|
||||
date_path = self._get_date_path("screen_states")
|
||||
filename = f"state_{state_id}.json"
|
||||
filepath = date_path / filename
|
||||
|
||||
# Sérialiser en JSON
|
||||
data = state.to_json()
|
||||
|
||||
# Ajouter métadonnées
|
||||
data["_metadata"] = {
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
"schema_version": "screenstate_v1",
|
||||
"state_id": state_id
|
||||
}
|
||||
|
||||
# Sauvegarder
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Saved ScreenState to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load_screen_state(self, filepath: Path) -> ScreenState:
|
||||
"""
|
||||
Charge un ScreenState depuis JSON.
|
||||
|
||||
Args:
|
||||
filepath: Chemin du fichier JSON
|
||||
|
||||
Returns:
|
||||
ScreenState chargé
|
||||
|
||||
Raises:
|
||||
ValueError: Si le schéma est incompatible
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Valider le schéma
|
||||
metadata = data.get("_metadata", {})
|
||||
schema_version = metadata.get("schema_version")
|
||||
|
||||
if schema_version != "screenstate_v1":
|
||||
raise ValueError(
|
||||
f"Incompatible schema version: {schema_version} "
|
||||
f"(expected screenstate_v1)"
|
||||
)
|
||||
|
||||
# Retirer les métadonnées avant désérialisation
|
||||
data.pop("_metadata", None)
|
||||
|
||||
state = ScreenState.from_json(data)
|
||||
logger.info(f"Loaded ScreenState from {filepath}")
|
||||
return state
|
||||
|
||||
def save_workflow(
|
||||
self,
|
||||
workflow, # Type sera résolu dynamiquement
|
||||
workflow_name: Optional[str] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Sauvegarde un Workflow en JSON.
|
||||
|
||||
Args:
|
||||
workflow: Workflow à sauvegarder
|
||||
workflow_name: Nom optionnel du workflow
|
||||
|
||||
Returns:
|
||||
Path du fichier sauvegardé
|
||||
|
||||
Validates: Requirements 12.4, 12.7
|
||||
"""
|
||||
if workflow_name is None:
|
||||
workflow_name = workflow.workflow_id or "unnamed"
|
||||
|
||||
# Nettoyer le nom pour le système de fichiers
|
||||
safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in workflow_name)
|
||||
|
||||
filename = f"workflow_{safe_name}_{workflow.workflow_id}.json"
|
||||
filepath = self.base_path / "workflows" / filename
|
||||
|
||||
# Sérialiser en dict (pas en string JSON!)
|
||||
# FIX: workflow.to_json() retourne une string, on a besoin d'un dict
|
||||
data = workflow.to_dict()
|
||||
|
||||
# Ajouter métadonnées
|
||||
data["_metadata"] = {
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
"schema_version": "workflow_v1",
|
||||
"workflow_name": workflow_name
|
||||
}
|
||||
|
||||
# Sauvegarder
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Saved Workflow to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load_workflow(self, filepath: Path):
|
||||
"""
|
||||
Charge un Workflow depuis JSON.
|
||||
|
||||
Args:
|
||||
filepath: Chemin du fichier JSON
|
||||
|
||||
Returns:
|
||||
Workflow chargé
|
||||
|
||||
Raises:
|
||||
ValueError: Si le schéma est incompatible
|
||||
|
||||
Validates: Requirements 12.5
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Valider le schéma
|
||||
metadata = data.get("_metadata", {})
|
||||
schema_version = metadata.get("schema_version")
|
||||
|
||||
if schema_version != "workflow_v1":
|
||||
raise ValueError(
|
||||
f"Incompatible schema version: {schema_version} "
|
||||
f"(expected workflow_v1)"
|
||||
)
|
||||
|
||||
# Retirer les métadonnées avant désérialisation
|
||||
data.pop("_metadata", None)
|
||||
|
||||
Workflow = get_workflow()
|
||||
workflow = Workflow.from_json(data)
|
||||
logger.info(f"Loaded Workflow from {filepath}")
|
||||
return workflow
|
||||
|
||||
def list_workflows(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Liste tous les workflows sauvegardés.
|
||||
|
||||
Returns:
|
||||
Liste de dictionnaires avec infos sur chaque workflow
|
||||
"""
|
||||
workflows_dir = self.base_path / "workflows"
|
||||
workflows = []
|
||||
|
||||
for filepath in workflows_dir.glob("workflow_*.json"):
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
metadata = data.get("_metadata", {})
|
||||
workflows.append({
|
||||
"filepath": str(filepath),
|
||||
"workflow_id": data.get("workflow_id"),
|
||||
"workflow_name": metadata.get("workflow_name"),
|
||||
"saved_at": metadata.get("saved_at"),
|
||||
"num_nodes": len(data.get("nodes", [])),
|
||||
"num_edges": len(data.get("edges", []))
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read workflow {filepath}: {e}")
|
||||
|
||||
return workflows
|
||||
|
||||
def list_sessions(self, date: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Liste les sessions sauvegardées.
|
||||
|
||||
Args:
|
||||
date: Date au format YYYY-MM-DD (aujourd'hui si None)
|
||||
|
||||
Returns:
|
||||
Liste de dictionnaires avec infos sur chaque session
|
||||
"""
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
sessions_dir = self.base_path / "sessions" / date
|
||||
sessions = []
|
||||
|
||||
if not sessions_dir.exists():
|
||||
return sessions
|
||||
|
||||
for filepath in sessions_dir.glob("session_*.json"):
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
metadata = data.get("_metadata", {})
|
||||
sessions.append({
|
||||
"filepath": str(filepath),
|
||||
"session_id": metadata.get("session_id"),
|
||||
"saved_at": metadata.get("saved_at"),
|
||||
"num_events": len(data.get("events", [])),
|
||||
"num_screenshots": len(data.get("screenshots", []))
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read session {filepath}: {e}")
|
||||
|
||||
return sessions
|
||||
|
||||
def get_storage_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Retourne des statistiques sur le stockage.
|
||||
|
||||
Returns:
|
||||
Dictionnaire avec statistiques
|
||||
"""
|
||||
stats = {
|
||||
"base_path": str(self.base_path),
|
||||
"sessions": 0,
|
||||
"screen_states": 0,
|
||||
"embeddings": 0,
|
||||
"workflows": 0,
|
||||
"total_size_mb": 0.0
|
||||
}
|
||||
|
||||
# Compter les sessions et screen_states (fichiers JSON)
|
||||
for category in ["sessions", "screen_states"]:
|
||||
category_path = self.base_path / category
|
||||
if category_path.exists():
|
||||
stats[category] = len(list(category_path.rglob("*.json")))
|
||||
|
||||
# Compter les embeddings (fichiers .npy)
|
||||
embeddings_path = self.base_path / "embeddings"
|
||||
if embeddings_path.exists():
|
||||
stats["embeddings"] = len(list(embeddings_path.rglob("*.npy")))
|
||||
|
||||
workflows_path = self.base_path / "workflows"
|
||||
if workflows_path.exists():
|
||||
stats["workflows"] = len(list(workflows_path.glob("workflow_*.json")))
|
||||
|
||||
# Calculer la taille totale
|
||||
total_size = 0
|
||||
for path in self.base_path.rglob("*"):
|
||||
if path.is_file():
|
||||
total_size += path.stat().st_size
|
||||
|
||||
stats["total_size_mb"] = round(total_size / (1024 * 1024), 2)
|
||||
|
||||
return stats
|
||||
|
||||
def save_embedding(
|
||||
self,
|
||||
embedding_vector: np.ndarray,
|
||||
embedding_id: str,
|
||||
embedding_type: str = "state",
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> Path:
|
||||
"""
|
||||
Sauvegarde un vecteur d'embedding en .npy.
|
||||
|
||||
Args:
|
||||
embedding_vector: Vecteur numpy à sauvegarder
|
||||
embedding_id: ID unique de l'embedding
|
||||
embedding_type: Type d'embedding (state, ui_element, etc.)
|
||||
metadata: Métadonnées optionnelles
|
||||
|
||||
Returns:
|
||||
Path du fichier .npy sauvegardé
|
||||
|
||||
Validates: Requirements 12.2
|
||||
"""
|
||||
date_path = self._get_date_path("embeddings")
|
||||
filename = f"{embedding_type}_{embedding_id}.npy"
|
||||
filepath = date_path / filename
|
||||
|
||||
# Sauvegarder le vecteur
|
||||
np.save(filepath, embedding_vector)
|
||||
|
||||
# Sauvegarder les métadonnées si fournies
|
||||
if metadata is not None:
|
||||
metadata_file = filepath.with_suffix('.json')
|
||||
metadata_data = {
|
||||
"embedding_id": embedding_id,
|
||||
"embedding_type": embedding_type,
|
||||
"shape": list(embedding_vector.shape),
|
||||
"dtype": str(embedding_vector.dtype),
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
**metadata
|
||||
}
|
||||
|
||||
with open(metadata_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata_data, f, indent=2)
|
||||
|
||||
logger.info(f"Saved embedding to {filepath}")
|
||||
return filepath
|
||||
|
||||
def load_embedding(
|
||||
self,
|
||||
embedding_id: str,
|
||||
embedding_type: str = "state",
|
||||
date: Optional[str] = None
|
||||
) -> tuple[np.ndarray, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
Charge un vecteur d'embedding depuis .npy.
|
||||
|
||||
Args:
|
||||
embedding_id: ID de l'embedding
|
||||
embedding_type: Type d'embedding
|
||||
date: Date au format YYYY-MM-DD (aujourd'hui si None)
|
||||
|
||||
Returns:
|
||||
Tuple (vecteur numpy, métadonnées optionnelles)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: Si le fichier n'existe pas
|
||||
"""
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
embeddings_dir = self.base_path / "embeddings" / date
|
||||
filename = f"{embedding_type}_{embedding_id}.npy"
|
||||
filepath = embeddings_dir / filename
|
||||
|
||||
if not filepath.exists():
|
||||
raise FileNotFoundError(f"Embedding not found: {filepath}")
|
||||
|
||||
# Charger le vecteur
|
||||
vector = np.load(filepath)
|
||||
|
||||
# Charger les métadonnées si elles existent
|
||||
metadata_file = filepath.with_suffix('.json')
|
||||
metadata = None
|
||||
if metadata_file.exists():
|
||||
with open(metadata_file, 'r', encoding='utf-8') as f:
|
||||
metadata = json.load(f)
|
||||
|
||||
logger.info(f"Loaded embedding from {filepath}")
|
||||
return vector, metadata
|
||||
|
||||
def save_embeddings_batch(
|
||||
self,
|
||||
embeddings: Dict[str, np.ndarray],
|
||||
embedding_type: str = "state",
|
||||
metadata: Optional[Dict[str, Dict[str, Any]]] = None
|
||||
) -> List[Path]:
|
||||
"""
|
||||
Sauvegarde un batch d'embeddings.
|
||||
|
||||
Args:
|
||||
embeddings: Dictionnaire {embedding_id: vector}
|
||||
embedding_type: Type d'embedding
|
||||
metadata: Dictionnaire optionnel {embedding_id: metadata}
|
||||
|
||||
Returns:
|
||||
Liste des paths sauvegardés
|
||||
"""
|
||||
paths = []
|
||||
|
||||
for embedding_id, vector in embeddings.items():
|
||||
meta = metadata.get(embedding_id) if metadata else None
|
||||
path = self.save_embedding(vector, embedding_id, embedding_type, meta)
|
||||
paths.append(path)
|
||||
|
||||
logger.info(f"Saved {len(paths)} embeddings in batch")
|
||||
return paths
|
||||
|
||||
def list_embeddings(
|
||||
self,
|
||||
embedding_type: Optional[str] = None,
|
||||
date: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Liste les embeddings sauvegardés.
|
||||
|
||||
Args:
|
||||
embedding_type: Filtrer par type (None = tous)
|
||||
date: Date au format YYYY-MM-DD (aujourd'hui si None)
|
||||
|
||||
Returns:
|
||||
Liste de dictionnaires avec infos sur chaque embedding
|
||||
"""
|
||||
if date is None:
|
||||
date = datetime.now().strftime("%Y-%m-%d")
|
||||
|
||||
embeddings_dir = self.base_path / "embeddings" / date
|
||||
embeddings = []
|
||||
|
||||
if not embeddings_dir.exists():
|
||||
return embeddings
|
||||
|
||||
pattern = f"{embedding_type}_*.npy" if embedding_type else "*.npy"
|
||||
|
||||
for filepath in embeddings_dir.glob(pattern):
|
||||
try:
|
||||
# Extraire l'ID et le type du nom de fichier
|
||||
stem = filepath.stem # ex: "state_12345"
|
||||
parts = stem.split("_", 1)
|
||||
if len(parts) == 2:
|
||||
emb_type, emb_id = parts
|
||||
else:
|
||||
emb_type, emb_id = "unknown", stem
|
||||
|
||||
# Charger les métadonnées si elles existent
|
||||
metadata_file = filepath.with_suffix('.json')
|
||||
metadata = {}
|
||||
if metadata_file.exists():
|
||||
with open(metadata_file, 'r', encoding='utf-8') as f:
|
||||
metadata = json.load(f)
|
||||
|
||||
embeddings.append({
|
||||
"filepath": str(filepath),
|
||||
"embedding_id": emb_id,
|
||||
"embedding_type": emb_type,
|
||||
"size_kb": round(filepath.stat().st_size / 1024, 2),
|
||||
**metadata
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read embedding {filepath}: {e}")
|
||||
|
||||
return embeddings
|
||||
|
||||
def save_faiss_index(
|
||||
self,
|
||||
faiss_manager,
|
||||
index_name: str = "main"
|
||||
) -> Path:
|
||||
"""
|
||||
Sauvegarde un index FAISS et ses métadonnées.
|
||||
|
||||
Args:
|
||||
faiss_manager: Instance de FAISSManager
|
||||
index_name: Nom de l'index
|
||||
|
||||
Returns:
|
||||
Path du fichier d'index sauvegardé
|
||||
|
||||
Validates: Requirements 12.3
|
||||
"""
|
||||
index_dir = self.base_path / "faiss_index"
|
||||
index_path = index_dir / f"{index_name}.faiss"
|
||||
metadata_path = index_dir / f"{index_name}_metadata.json"
|
||||
|
||||
# Sauvegarder l'index FAISS
|
||||
faiss_manager.save_index(str(index_path))
|
||||
|
||||
# Sauvegarder les métadonnées
|
||||
metadata = {
|
||||
"index_name": index_name,
|
||||
"saved_at": datetime.now().isoformat(),
|
||||
"num_vectors": faiss_manager.index.ntotal if faiss_manager.index else 0,
|
||||
"dimension": faiss_manager.dimension,
|
||||
"metadata_store": faiss_manager.metadata_store
|
||||
}
|
||||
|
||||
with open(metadata_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(metadata, f, indent=2)
|
||||
|
||||
logger.info(f"Saved FAISS index to {index_path}")
|
||||
return index_path
|
||||
|
||||
def load_faiss_index(
|
||||
self,
|
||||
faiss_manager,
|
||||
index_name: str = "main"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Charge un index FAISS et ses métadonnées.
|
||||
|
||||
Args:
|
||||
faiss_manager: Instance de FAISSManager
|
||||
index_name: Nom de l'index
|
||||
|
||||
Returns:
|
||||
Métadonnées de l'index
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: Si l'index n'existe pas
|
||||
|
||||
Validates: Requirements 12.6
|
||||
"""
|
||||
index_dir = self.base_path / "faiss_index"
|
||||
index_path = index_dir / f"{index_name}.faiss"
|
||||
metadata_path = index_dir / f"{index_name}_metadata.json"
|
||||
|
||||
if not index_path.exists():
|
||||
raise FileNotFoundError(f"FAISS index not found: {index_path}")
|
||||
|
||||
# Charger l'index FAISS
|
||||
faiss_manager.load_index(str(index_path))
|
||||
|
||||
# Charger les métadonnées
|
||||
metadata = {}
|
||||
if metadata_path.exists():
|
||||
with open(metadata_path, 'r', encoding='utf-8') as f:
|
||||
metadata = json.load(f)
|
||||
|
||||
# Restaurer le metadata_store
|
||||
if "metadata_store" in metadata:
|
||||
faiss_manager.metadata_store = metadata["metadata_store"]
|
||||
|
||||
logger.info(f"Loaded FAISS index from {index_path}")
|
||||
return metadata
|
||||
|
||||
def cleanup_old_files(self, days_to_keep: int = 30) -> Dict[str, int]:
|
||||
"""
|
||||
Nettoie les fichiers plus anciens que le nombre de jours spécifié.
|
||||
|
||||
Args:
|
||||
days_to_keep: Nombre de jours à conserver
|
||||
|
||||
Returns:
|
||||
Dictionnaire avec nombre de fichiers supprimés par catégorie
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
cutoff_date = datetime.now() - timedelta(days=days_to_keep)
|
||||
deleted = {
|
||||
"sessions": 0,
|
||||
"screen_states": 0,
|
||||
"embeddings": 0
|
||||
}
|
||||
|
||||
for category in ["sessions", "screen_states", "embeddings"]:
|
||||
category_path = self.base_path / category
|
||||
|
||||
if not category_path.exists():
|
||||
continue
|
||||
|
||||
# Parcourir les sous-répertoires de date
|
||||
for date_dir in category_path.iterdir():
|
||||
if not date_dir.is_dir():
|
||||
continue
|
||||
|
||||
try:
|
||||
# Parser la date du nom du répertoire
|
||||
dir_date = datetime.strptime(date_dir.name, "%Y-%m-%d")
|
||||
|
||||
if dir_date < cutoff_date:
|
||||
# Supprimer tous les fichiers du répertoire
|
||||
for file in date_dir.iterdir():
|
||||
if file.is_file():
|
||||
file.unlink()
|
||||
deleted[category] += 1
|
||||
|
||||
# Supprimer le répertoire s'il est vide
|
||||
if not any(date_dir.iterdir()):
|
||||
date_dir.rmdir()
|
||||
logger.info(f"Removed empty directory: {date_dir}")
|
||||
|
||||
except ValueError:
|
||||
# Nom de répertoire invalide, ignorer
|
||||
logger.warning(f"Invalid date directory name: {date_dir.name}")
|
||||
|
||||
logger.info(f"Cleanup completed: {deleted}")
|
||||
return deleted
|
||||
Reference in New Issue
Block a user