Initial commit
This commit is contained in:
176
test_faiss_index_builder.py
Executable file
176
test_faiss_index_builder.py
Executable file
@@ -0,0 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test du FAISSIndexBuilder pour reconstruire l'index à partir des tâches existantes.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'geniusia2'))
|
||||
|
||||
from core.logger import Logger
|
||||
from core.embeddings_manager import EmbeddingsManager
|
||||
from core.faiss_index_builder import FAISSIndexBuilder
|
||||
from core.config import get_config, ensure_directories
|
||||
|
||||
|
||||
def test_scan_tasks():
|
||||
"""Test du scan des tâches existantes."""
|
||||
print("🔍 Test 1: Scanner les tâches existantes")
|
||||
print("=" * 60)
|
||||
|
||||
ensure_directories()
|
||||
logger = Logger()
|
||||
embeddings_manager = EmbeddingsManager(logger=logger)
|
||||
builder = FAISSIndexBuilder(embeddings_manager, logger)
|
||||
|
||||
# Scanner les tâches
|
||||
tasks = builder.scan_tasks()
|
||||
|
||||
print(f"✅ Tâches trouvées: {len(tasks)}")
|
||||
for i, task in enumerate(tasks[:5], 1): # Afficher les 5 premières
|
||||
print(f" {i}. {task['task_id']}")
|
||||
|
||||
if len(tasks) > 5:
|
||||
print(f" ... et {len(tasks) - 5} autres")
|
||||
|
||||
return tasks
|
||||
|
||||
|
||||
def test_load_embeddings(tasks):
|
||||
"""Test du chargement des embeddings."""
|
||||
print(f"\n🧠 Test 2: Charger les embeddings des tâches")
|
||||
print("=" * 60)
|
||||
|
||||
logger = Logger()
|
||||
embeddings_manager = EmbeddingsManager(logger=logger)
|
||||
builder = FAISSIndexBuilder(embeddings_manager, logger)
|
||||
|
||||
total_embeddings = 0
|
||||
tasks_with_embeddings = 0
|
||||
|
||||
for task in tasks[:3]: # Tester les 3 premières
|
||||
embeddings, metadata = builder.load_task_embeddings(task)
|
||||
if embeddings:
|
||||
tasks_with_embeddings += 1
|
||||
total_embeddings += len(embeddings)
|
||||
print(f"✅ {task['task_id']}: {len(embeddings)} embeddings")
|
||||
else:
|
||||
print(f"⚠️ {task['task_id']}: Aucun embedding")
|
||||
|
||||
print(f"\n📊 Résumé:")
|
||||
print(f" - Tâches avec embeddings: {tasks_with_embeddings}/{len(tasks[:3])}")
|
||||
print(f" - Total embeddings: {total_embeddings}")
|
||||
|
||||
return total_embeddings > 0
|
||||
|
||||
|
||||
def test_verify_integrity():
|
||||
"""Test de vérification de l'intégrité."""
|
||||
print(f"\n🔍 Test 3: Vérifier l'intégrité de l'index")
|
||||
print("=" * 60)
|
||||
|
||||
logger = Logger()
|
||||
embeddings_manager = EmbeddingsManager(logger=logger)
|
||||
builder = FAISSIndexBuilder(embeddings_manager, logger)
|
||||
|
||||
report = builder.verify_index_integrity()
|
||||
|
||||
print(f"📊 Rapport d'intégrité:")
|
||||
print(f" - Tâches scannées: {report['tasks_scanned']}")
|
||||
print(f" - Tâches avec embeddings: {report['tasks_with_embeddings']}")
|
||||
print(f" - Embeddings attendus: {report['expected_embeddings']}")
|
||||
print(f" - Embeddings dans l'index: {report['actual_embeddings']}")
|
||||
print(f" - Embeddings manquants: {report['missing_embeddings']}")
|
||||
print(f" - Cohérent: {'✅ Oui' if report['is_consistent'] else '❌ Non'}")
|
||||
print(f" - Reconstruction nécessaire: {'⚠️ Oui' if report['needs_rebuild'] else '✅ Non'}")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def test_rebuild_index():
|
||||
"""Test de reconstruction de l'index."""
|
||||
print(f"\n🔨 Test 4: Reconstruire l'index FAISS")
|
||||
print("=" * 60)
|
||||
print("⚠️ ATTENTION: Ceci va reconstruire l'index complet")
|
||||
|
||||
response = input("Continuer? (oui/non): ").strip().lower()
|
||||
if response not in ['oui', 'o', 'yes', 'y']:
|
||||
print("❌ Reconstruction annulée")
|
||||
return None
|
||||
|
||||
logger = Logger()
|
||||
embeddings_manager = EmbeddingsManager(logger=logger)
|
||||
builder = FAISSIndexBuilder(embeddings_manager, logger)
|
||||
|
||||
print("\n🔄 Reconstruction en cours...")
|
||||
stats = builder.rebuild_index(force=True)
|
||||
|
||||
print(f"\n📊 Résultats de la reconstruction:")
|
||||
print(f" - Succès: {'✅ Oui' if stats['success'] else '❌ Non'}")
|
||||
print(f" - Tâches scannées: {stats['tasks_scanned']}")
|
||||
print(f" - Tâches traitées: {stats['tasks_processed']}")
|
||||
print(f" - Tâches échouées: {stats['tasks_failed']}")
|
||||
print(f" - Embeddings ajoutés: {stats['embeddings_added']}")
|
||||
print(f" - Taille finale de l'index: {stats['final_index_size']}")
|
||||
print(f" - Durée: {stats['duration_seconds']:.2f}s")
|
||||
|
||||
if stats['errors']:
|
||||
print(f"\n⚠️ Erreurs ({len(stats['errors'])}):")
|
||||
for error in stats['errors'][:5]: # Afficher les 5 premières
|
||||
print(f" - {error}")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main():
|
||||
"""Point d'entrée principal."""
|
||||
print("🧪 TEST DU FAISS INDEX BUILDER")
|
||||
print("=" * 60)
|
||||
print("Ce script teste la reconstruction de l'index FAISS")
|
||||
print("à partir des tâches existantes (19+ tâches attendues)")
|
||||
print("=" * 60)
|
||||
|
||||
try:
|
||||
# Test 1: Scanner les tâches
|
||||
tasks = test_scan_tasks()
|
||||
|
||||
if not tasks:
|
||||
print("\n❌ Aucune tâche trouvée. Impossible de continuer.")
|
||||
return
|
||||
|
||||
# Test 2: Charger les embeddings
|
||||
has_embeddings = test_load_embeddings(tasks)
|
||||
|
||||
if not has_embeddings:
|
||||
print("\n❌ Aucun embedding trouvé. Impossible de continuer.")
|
||||
return
|
||||
|
||||
# Test 3: Vérifier l'intégrité
|
||||
report = test_verify_integrity()
|
||||
|
||||
# Test 4: Reconstruire si nécessaire
|
||||
if report['needs_rebuild']:
|
||||
print(f"\n⚠️ L'index nécessite une reconstruction")
|
||||
stats = test_rebuild_index()
|
||||
|
||||
if stats and stats['success']:
|
||||
print(f"\n✅ RECONSTRUCTION RÉUSSIE!")
|
||||
print(f" L'index contient maintenant {stats['final_index_size']} embeddings")
|
||||
print(f" Les suggestions devraient maintenant fonctionner!")
|
||||
else:
|
||||
print(f"\n❌ La reconstruction a échoué")
|
||||
else:
|
||||
print(f"\n✅ L'index est cohérent, pas de reconstruction nécessaire")
|
||||
|
||||
print(f"\n" + "=" * 60)
|
||||
print(f"✅ TESTS TERMINÉS")
|
||||
print(f"=" * 60)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Erreur lors des tests: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user