Initial commit
This commit is contained in:
183
rebuild_faiss_simple.py
Executable file
183
rebuild_faiss_simple.py
Executable file
@@ -0,0 +1,183 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script simplifié pour reconstruire l'index FAISS sans dépendances lourdes.
|
||||
|
||||
IMPORTANT: Utiliser le Python du venv !
|
||||
Usage: geniusia2/venv/bin/python rebuild_faiss_simple.py
|
||||
"""
|
||||
|
||||
import pickle
|
||||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def rebuild_index():
|
||||
"""Reconstruit l'index FAISS à partir des tâches existantes."""
|
||||
print("="*60)
|
||||
print("🔨 RECONSTRUCTION DE L'INDEX FAISS")
|
||||
print("="*60)
|
||||
|
||||
try:
|
||||
import faiss
|
||||
except ImportError:
|
||||
print("\n❌ FAISS n'est pas installé")
|
||||
print(" Installation: pip install faiss-cpu")
|
||||
return
|
||||
|
||||
profiles_dir = Path("geniusia2/data/user_profiles")
|
||||
task_dirs = [d for d in profiles_dir.iterdir() if d.is_dir() and d.name.startswith("task_")]
|
||||
|
||||
print(f"\n📁 Nombre de tâches trouvées: {len(task_dirs)}")
|
||||
|
||||
# Collecter tous les embeddings et métadonnées
|
||||
all_embeddings = []
|
||||
all_metadata = []
|
||||
|
||||
total_actions = 0
|
||||
|
||||
# Parcourir toutes les tâches
|
||||
for i, task_dir in enumerate(task_dirs, 1):
|
||||
signatures_file = task_dir / "signatures.pkl"
|
||||
metadata_file = task_dir / "metadata.json"
|
||||
|
||||
if not signatures_file.exists():
|
||||
continue
|
||||
|
||||
# Charger les métadonnées
|
||||
task_name = "Unknown"
|
||||
if metadata_file.exists():
|
||||
with open(metadata_file, 'r') as f:
|
||||
metadata = json.load(f)
|
||||
task_name = metadata.get('task_name', 'Unknown')
|
||||
|
||||
# Charger les signatures
|
||||
with open(signatures_file, 'rb') as f:
|
||||
signatures = pickle.load(f)
|
||||
|
||||
total_actions += len(signatures)
|
||||
|
||||
# Ajouter les embeddings
|
||||
for j, signature in enumerate(signatures):
|
||||
embedding = signature.get('embedding')
|
||||
|
||||
if embedding is not None:
|
||||
# Convertir en numpy array si nécessaire
|
||||
if not isinstance(embedding, np.ndarray):
|
||||
embedding = np.array(embedding)
|
||||
|
||||
# S'assurer que c'est float32
|
||||
embedding = embedding.astype(np.float32)
|
||||
|
||||
all_embeddings.append(embedding)
|
||||
|
||||
# Créer les métadonnées
|
||||
meta = {
|
||||
"task_id": task_dir.name,
|
||||
"task_name": task_name,
|
||||
"action_index": j,
|
||||
"action_type": signature.get('action_type', 'unknown'),
|
||||
"element_type": signature.get('element_type', ''),
|
||||
"window": signature.get('window', ''),
|
||||
"position": signature.get('position', None)
|
||||
}
|
||||
|
||||
all_metadata.append(meta)
|
||||
|
||||
if i % 10 == 0:
|
||||
print(f" Traité {i}/{len(task_dirs)} tâches...")
|
||||
|
||||
print(f"\n✅ Traitement terminé:")
|
||||
print(f" - Tâches traitées: {len(task_dirs)}")
|
||||
print(f" - Actions totales: {total_actions}")
|
||||
print(f" - Embeddings collectés: {len(all_embeddings)}")
|
||||
|
||||
if len(all_embeddings) == 0:
|
||||
print("\n❌ Aucun embedding trouvé")
|
||||
return
|
||||
|
||||
# Créer l'index FAISS
|
||||
print(f"\n🔨 Création de l'index FAISS...")
|
||||
|
||||
# Convertir en matrice numpy
|
||||
embeddings_matrix = np.vstack(all_embeddings)
|
||||
dimension = embeddings_matrix.shape[1]
|
||||
|
||||
print(f" Dimension des embeddings: {dimension}")
|
||||
print(f" Nombre d'embeddings: {embeddings_matrix.shape[0]}")
|
||||
|
||||
# Créer l'index (IndexFlatL2 pour recherche exacte)
|
||||
index = faiss.IndexFlatL2(dimension)
|
||||
|
||||
# Ajouter les embeddings
|
||||
index.add(embeddings_matrix)
|
||||
|
||||
print(f" Index créé avec {index.ntotal} vecteurs")
|
||||
|
||||
# Sauvegarder l'index
|
||||
faiss_dir = Path("geniusia2/data/faiss_index")
|
||||
faiss_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
index_file = faiss_dir / "embeddings.index"
|
||||
metadata_file = faiss_dir / "metadata.pkl"
|
||||
|
||||
print(f"\n💾 Sauvegarde de l'index...")
|
||||
|
||||
# Sauvegarder l'index FAISS
|
||||
faiss.write_index(index, str(index_file))
|
||||
|
||||
# Sauvegarder les métadonnées
|
||||
with open(metadata_file, 'wb') as f:
|
||||
pickle.dump(all_metadata, f)
|
||||
|
||||
# Vérifier la sauvegarde
|
||||
if index_file.exists() and metadata_file.exists():
|
||||
index_size = index_file.stat().st_size
|
||||
meta_size = metadata_file.stat().st_size
|
||||
|
||||
print(f"\n✅ Index FAISS créé avec succès!")
|
||||
print(f" - embeddings.index: {index_size:,} bytes ({index_size/1024:.1f} KB)")
|
||||
print(f" - metadata.pkl: {meta_size:,} bytes ({meta_size/1024:.1f} KB)")
|
||||
|
||||
# Tester la recherche
|
||||
print(f"\n🔍 Test de recherche...")
|
||||
|
||||
# Prendre le premier embedding pour tester
|
||||
test_embedding = embeddings_matrix[0:1]
|
||||
|
||||
# Rechercher les 3 plus proches
|
||||
distances, indices = index.search(test_embedding, 3)
|
||||
|
||||
print(f" Résultats de recherche:")
|
||||
for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
|
||||
meta = all_metadata[idx]
|
||||
similarity = 1 / (1 + dist) # Convertir distance en similarité
|
||||
print(f" {i+1}. Similarité: {similarity:.3f} | {meta.get('task_name', 'N/A')} - {meta.get('action_type', 'N/A')}")
|
||||
else:
|
||||
print(f"\n❌ Erreur: Index non créé")
|
||||
|
||||
|
||||
def main():
|
||||
"""Fonction principale."""
|
||||
print("\n🔧 RECONSTRUCTION DE L'INDEX FAISS\n")
|
||||
|
||||
try:
|
||||
rebuild_index()
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("✅ RECONSTRUCTION TERMINÉE")
|
||||
print("="*60)
|
||||
|
||||
print("\n💡 L'index FAISS est maintenant disponible pour:")
|
||||
print(" - Recherche de similarité")
|
||||
print(" - Suggestions d'actions")
|
||||
print(" - Rejeu intelligent")
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Erreur: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user