Files
Geniusia_v2/archive/old_scripts/rebuild_faiss_index.py
2026-03-05 00:20:25 +01:00

150 lines
5.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Script pour reconstruire l'index FAISS à partir des tâches existantes.
"""
import sys
import pickle
import json
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent / "geniusia2"))
from core.embeddings_manager import EmbeddingsManager
from core.logger import Logger
from core.config import get_config
def rebuild_index():
"""Reconstruit l'index FAISS à partir des tâches existantes."""
print("="*60)
print("🔨 RECONSTRUCTION DE L'INDEX FAISS")
print("="*60)
# Initialiser les composants
config = get_config()
logger = Logger()
embeddings_manager = EmbeddingsManager(logger=logger)
profiles_dir = Path("geniusia2/data/user_profiles")
task_dirs = [d for d in profiles_dir.iterdir() if d.is_dir() and d.name.startswith("task_")]
print(f"\n📁 Nombre de tâches trouvées: {len(task_dirs)}")
total_embeddings = 0
total_actions = 0
# Parcourir toutes les tâches
for i, task_dir in enumerate(task_dirs, 1):
signatures_file = task_dir / "signatures.pkl"
metadata_file = task_dir / "metadata.json"
if not signatures_file.exists():
continue
# Charger les métadonnées
task_name = "Unknown"
if metadata_file.exists():
with open(metadata_file, 'r') as f:
metadata = json.load(f)
task_name = metadata.get('task_name', 'Unknown')
# Charger les signatures
with open(signatures_file, 'rb') as f:
signatures = pickle.load(f)
total_actions += len(signatures)
# Ajouter les embeddings à l'index
for j, signature in enumerate(signatures):
embedding = signature.get('embedding')
if embedding is not None:
# Créer les métadonnées
meta = {
"task_id": task_dir.name,
"task_name": task_name,
"action_index": j,
"action_type": signature.get('action_type', 'unknown'),
"description": signature.get('description', ''),
"window": signature.get('window', ''),
"timestamp": signature.get('timestamp', '')
}
# Ajouter à l'index
embeddings_manager.add_to_index(embedding, meta)
total_embeddings += 1
if i % 10 == 0:
print(f" Traité {i}/{len(task_dirs)} tâches...")
print(f"\n✅ Traitement terminé:")
print(f" - Tâches traitées: {len(task_dirs)}")
print(f" - Actions totales: {total_actions}")
print(f" - Embeddings ajoutés: {total_embeddings}")
# Sauvegarder l'index
print(f"\n💾 Sauvegarde de l'index FAISS...")
embeddings_manager.save_index()
# Vérifier la sauvegarde
faiss_dir = Path("geniusia2/data/faiss_index")
index_file = faiss_dir / "embeddings.index"
metadata_file = faiss_dir / "metadata.pkl"
if index_file.exists() and metadata_file.exists():
index_size = index_file.stat().st_size
meta_size = metadata_file.stat().st_size
print(f"\n✅ Index FAISS créé avec succès!")
print(f" - embeddings.index: {index_size:,} bytes")
print(f" - metadata.pkl: {meta_size:,} bytes")
# Tester la recherche
print(f"\n🔍 Test de recherche...")
if total_embeddings > 0:
# Prendre le premier embedding pour tester
for task_dir in task_dirs:
signatures_file = task_dir / "signatures.pkl"
if signatures_file.exists():
with open(signatures_file, 'rb') as f:
signatures = pickle.load(f)
for sig in signatures:
if sig.get('embedding') is not None:
test_embedding = sig['embedding']
results = embeddings_manager.search_similar(test_embedding, k=3)
print(f" Résultats de recherche: {len(results)} trouvés")
for r in results[:3]:
print(f" - Similarité: {r['similarity']:.3f} | {r['metadata'].get('description', 'N/A')}")
break
break
else:
print(f"\n❌ Erreur: Index non créé")
def main():
"""Fonction principale."""
print("\n🔧 RECONSTRUCTION DE L'INDEX FAISS\n")
try:
rebuild_index()
print("\n" + "="*60)
print("✅ RECONSTRUCTION TERMINÉE")
print("="*60)
print("\n💡 L'index FAISS est maintenant disponible pour:")
print(" - Recherche de similarité")
print(" - Suggestions d'actions")
print(" - Rejeu intelligent")
except Exception as e:
print(f"\n❌ Erreur: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()