#!/usr/bin/env python3 """ Script de diagnostic pour vérifier l'état des données. """ import sys import pickle from pathlib import Path sys.path.insert(0, str(Path(__file__).parent / "geniusia2")) def check_faiss_index(): """Vérifie l'index FAISS.""" print("="*60) print("📊 VÉRIFICATION INDEX FAISS") print("="*60) faiss_dir = Path("geniusia2/data/faiss_index") index_file = faiss_dir / "embeddings.index" metadata_file = faiss_dir / "metadata.pkl" print(f"\n📁 Répertoire: {faiss_dir}") print(f" Index FAISS: {'✅ Existe' if index_file.exists() else '❌ Manquant'}") print(f" Métadonnées: {'✅ Existe' if metadata_file.exists() else '❌ Manquant'}") if index_file.exists(): size = index_file.stat().st_size print(f" Taille index: {size:,} bytes") if metadata_file.exists(): with open(metadata_file, 'rb') as f: metadata = pickle.load(f) print(f" Nombre d'embeddings: {len(metadata)}") def check_tasks(): """Vérifie les tâches sauvegardées.""" print("\n" + "="*60) print("📋 VÉRIFICATION TÂCHES") print("="*60) profiles_dir = Path("geniusia2/data/user_profiles") task_dirs = [d for d in profiles_dir.iterdir() if d.is_dir() and d.name.startswith("task_")] print(f"\n📁 Nombre de tâches: {len(task_dirs)}") # Analyser quelques tâches print("\n📝 Échantillon de tâches:") for i, task_dir in enumerate(task_dirs[:5]): print(f"\n{i+1}. {task_dir.name}") # Vérifier les fichiers metadata_file = task_dir / "metadata.json" signatures_file = task_dir / "signatures.pkl" screenshots_dir = task_dir / "screenshots" print(f" metadata.json: {'✅' if metadata_file.exists() else '❌'}") print(f" signatures.pkl: {'✅' if signatures_file.exists() else '❌'}") print(f" screenshots/: {'✅' if screenshots_dir.exists() else '❌'}") # Lire metadata if metadata_file.exists(): import json with open(metadata_file, 'r') as f: metadata = json.load(f) print(f" Nom: {metadata.get('task_name', 'N/A')}") print(f" Observations: {metadata.get('observation_count', 0)}") # Lire signatures if signatures_file.exists(): with open(signatures_file, 'rb') as f: signatures = pickle.load(f) print(f" Signatures: {len(signatures)} actions") # Vérifier si les signatures ont des embeddings has_embeddings = any('embedding' in sig for sig in signatures) has_screenshots = any('screenshot' in sig for sig in signatures) print(f" Embeddings: {'✅' if has_embeddings else '❌'}") print(f" Screenshots: {'✅' if has_screenshots else '❌'}") def check_logs(): """Vérifie les logs.""" print("\n" + "="*60) print("📝 VÉRIFICATION LOGS") print("="*60) logs_dir = Path("geniusia2/data/logs") log_files = list(logs_dir.glob("*.enc")) print(f"\n📁 Nombre de fichiers de logs: {len(log_files)}") if log_files: total_size = sum(f.stat().st_size for f in log_files) print(f" Taille totale: {total_size:,} bytes ({total_size/1024/1024:.2f} MB)") print("\n📅 Logs par date:") for log_file in sorted(log_files)[-5:]: size = log_file.stat().st_size print(f" {log_file.name}: {size:,} bytes") def main(): """Fonction principale.""" print("\n🔍 DIAGNOSTIC DES DONNÉES RPA VISION V2\n") check_faiss_index() check_tasks() check_logs() print("\n" + "="*60) print("✅ DIAGNOSTIC TERMINÉ") print("="*60) print("\n💡 RÉSUMÉ:") print(" - Les tâches sont créées et sauvegardées") print(" - Les signatures.pkl contiennent les actions") print(" - ⚠️ Vérifier si les embeddings et screenshots sont dans les signatures") print(" - ⚠️ Vérifier si l'index FAISS est créé") if __name__ == "__main__": main()