v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/rebuild_faiss_simple.py
+++ b/rebuild_faiss_simple.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+"""
+Script utilitaire FAISS Rebuild Propre
+
+Auteur : Dom, Alice Kiro - 22 décembre 2025
+
+Script pour déclencher un rebuild complet de l'index FAISS depuis les prototypes
+stockés dans les workflows. Utilise la stratégie "clear + reindex complet".
+
+Usage:
+    python3 rebuild_faiss_simple.py [options]
+
+Options:
+    --dry-run       Afficher ce qui serait fait sans exécuter
+    --verbose       Affichage détaillé
+    --index-type    Type d'index FAISS (Flat, IVF) [défaut: Flat]
+    --data-dir      Répertoire de données [défaut: data]
+    --help          Afficher cette aide
+"""
+
+import sys
+import argparse
+import logging
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Tuple
+import json
+
+# Configuration du logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+def setup_logging(verbose: bool = False):
+    """Configurer le niveau de logging"""
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.getLogger().setLevel(level)
+
+
+def load_workflows_from_directory(workflows_dir: Path) -> List[Dict[str, Any]]:
+    """
+    Charger tous les workflows depuis un répertoire.
+    
+    Args:
+        workflows_dir: Répertoire contenant les fichiers .json de workflows
+    
+    Returns:
+        Liste des workflows chargés avec métadonnées
+    """
+    workflows = []
+    
+    if not workflows_dir.exists():
+        logger.warning(f"Répertoire workflows non trouvé: {workflows_dir}")
+        return workflows
+    
+    for workflow_file in workflows_dir.glob("*.json"):
+        try:
+            with open(workflow_file, 'r', encoding='utf-8') as f:
+                workflow_data = json.load(f)
+            
+            workflows.append({
+                "file_path": workflow_file,
+                "workflow_id": workflow_data.get("workflow_id", workflow_file.stem),
+                "name": workflow_data.get("name", "Unknown"),
+                "nodes_count": len(workflow_data.get("nodes", [])),
+                "data": workflow_data
+            })
+            
+            logger.debug(f"Chargé workflow: {workflow_data.get('name', 'Unknown')} ({len(workflow_data.get('nodes', []))} nodes)")
+            
+        except Exception as e:
+            logger.error(f"Erreur chargement workflow {workflow_file}: {e}")
+            continue
+    
+    logger.info(f"Chargé {len(workflows)} workflows depuis {workflows_dir}")
+    return workflows
+
+
+def extract_prototypes_from_workflows(workflows: List[Dict[str, Any]]) -> List[Tuple[str, Any, Dict[str, Any]]]:
+    """
+    Extraire tous les prototypes de vecteurs depuis les workflows.
+    
+    Args:
+        workflows: Liste des workflows chargés
+    
+    Returns:
+        Liste de tuples (embedding_id, vector, metadata)
+    """
+    import numpy as np
+    
+    prototypes = []
+    
+    for workflow in workflows:
+        workflow_id = workflow["workflow_id"]
+        workflow_name = workflow["name"]
+        nodes = workflow["data"].get("nodes", [])
+        
+        logger.debug(f"Extraction prototypes workflow {workflow_name} ({len(nodes)} nodes)")
+        
+        for node in nodes:
+            node_id = node.get("node_id", "unknown")
+            node_name = node.get("name", "")
+            
+            # Essayer différents formats de stockage de prototypes
+            vector = None
+            
+            # Format v1: template.embedding_prototype (liste)
+            template = node.get("template")
+            if template and isinstance(template, dict):
+                embedding_prototype = template.get("embedding_prototype")
+                if isinstance(embedding_prototype, list):
+                    try:
+                        vector = np.array(embedding_prototype, dtype=np.float32)
+                        logger.debug(f"Prototype v1 trouvé pour {node_id}: {len(vector)} dimensions")
+                    except Exception as e:
+                        logger.debug(f"Erreur conversion prototype v1 {node_id}: {e}")
+                
+                # Format v2: template.embedding.vector_id (fichier)
+                if vector is None:
+                    embedding = template.get("embedding")
+                    if embedding and isinstance(embedding, dict):
+                        vector_id = embedding.get("vector_id")
+                        if vector_id and Path(vector_id).exists():
+                            try:
+                                vector = np.load(vector_id).astype(np.float32)
+                                logger.debug(f"Prototype v2 trouvé pour {node_id}: {len(vector)} dimensions")
+                            except Exception as e:
+                                logger.debug(f"Erreur chargement prototype v2 {node_id}: {e}")
+            
+            # Format legacy: screen_template.embedding_prototype_path
+            if vector is None:
+                screen_template = node.get("screen_template")
+                if screen_template and isinstance(screen_template, dict):
+                    prototype_path = screen_template.get("embedding_prototype_path")
+                    if prototype_path and Path(prototype_path).exists():
+                        try:
+                            vector = np.load(prototype_path).astype(np.float32)
+                            logger.debug(f"Prototype legacy trouvé pour {node_id}: {len(vector)} dimensions")
+                        except Exception as e:
+                            logger.debug(f"Erreur chargement prototype legacy {node_id}: {e}")
+            
+            # Ajouter à la liste si vecteur trouvé
+            if vector is not None:
+                prototypes.append((
+                    node_id,
+                    vector,
+                    {
+                        "workflow_id": workflow_id,
+                        "workflow_name": workflow_name,
+                        "node_id": node_id,
+                        "node_name": node_name,
+                        "vector_dimensions": len(vector)
+                    }
+                ))
+            else:
+                logger.debug(f"Aucun prototype trouvé pour node {node_id} (workflow {workflow_name})")
+    
+    logger.info(f"Extrait {len(prototypes)} prototypes depuis {len(workflows)} workflows")
+    return prototypes
+
+
+def rebuild_faiss_index(
+    prototypes: List[Tuple[str, Any, Dict[str, Any]]],
+    index_type: str = "Flat",
+    dimensions: Optional[int] = None,
+    dry_run: bool = False
+) -> Dict[str, Any]:
+    """
+    Reconstruire l'index FAISS avec les prototypes.
+    
+    Args:
+        prototypes: Liste des prototypes à indexer
+        index_type: Type d'index FAISS
+        dimensions: Nombre de dimensions (auto-détecté si None)
+        dry_run: Mode simulation
+    
+    Returns:
+        Résultats du rebuild
+    """
+    if not prototypes:
+        return {
+            "success": False,
+            "message": "Aucun prototype à indexer",
+            "count": 0
+        }
+    
+    # Auto-détecter dimensions
+    if dimensions is None:
+        first_vector = prototypes[0][1]
+        dimensions = len(first_vector)
+        logger.info(f"Dimensions auto-détectées: {dimensions}")
+    
+    # Vérifier cohérence des dimensions
+    for embedding_id, vector, metadata in prototypes:
+        if len(vector) != dimensions:
+            logger.warning(f"Dimension incohérente pour {embedding_id}: {len(vector)} != {dimensions}")
+    
+    if dry_run:
+        logger.info("=== MODE DRY-RUN ===")
+        logger.info(f"Créerait index FAISS {index_type} avec {dimensions} dimensions")
+        logger.info(f"Indexerait {len(prototypes)} prototypes:")
+        
+        for embedding_id, vector, metadata in prototypes[:5]:  # Afficher les 5 premiers
+            logger.info(f"  - {embedding_id}: {metadata.get('workflow_name', 'Unknown')} / {metadata.get('node_name', 'Unknown')}")
+        
+        if len(prototypes) > 5:
+            logger.info(f"  ... et {len(prototypes) - 5} autres")
+        
+        return {
+            "success": True,
+            "message": "Simulation réussie",
+            "count": len(prototypes),
+            "dry_run": True
+        }
+    
+    # Rebuild réel
+    try:
+        from core.embedding.faiss_manager import FAISSManager
+        
+        logger.info(f"Création index FAISS {index_type} avec {dimensions} dimensions")
+        manager = FAISSManager(
+            dimensions=dimensions,
+            index_type=index_type,
+            metric="cosine"
+        )
+        
+        logger.info(f"Rebuild FAISS avec {len(prototypes)} prototypes...")
+        start_time = datetime.now()
+        
+        count = manager.reindex(prototypes, force_train_ivf=True)
+        
+        duration = (datetime.now() - start_time).total_seconds()
+        
+        logger.info(f"Rebuild terminé: {count} prototypes indexés en {duration:.2f}s")
+        
+        # Statistiques finales
+        stats = manager.get_stats()
+        logger.info(f"Index final: {stats['total_vectors']} vecteurs, trained={stats['is_trained']}")
+        
+        return {
+            "success": True,
+            "message": f"Rebuild réussi: {count} prototypes indexés",
+            "count": count,
+            "duration_seconds": duration,
+            "stats": stats
+        }
+        
+    except ImportError as e:
+        return {
+            "success": False,
+            "message": f"FAISS non disponible: {e}",
+            "count": 0
+        }
+    except Exception as e:
+        logger.error(f"Erreur rebuild FAISS: {e}", exc_info=True)
+        return {
+            "success": False,
+            "message": f"Erreur rebuild: {e}",
+            "count": 0
+        }
+
+
+def main():
+    """Point d'entrée principal"""
+    parser = argparse.ArgumentParser(
+        description="Script utilitaire FAISS Rebuild Propre",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Exemples:
+    python3 rebuild_faiss_simple.py --dry-run
+    python3 rebuild_faiss_simple.py --verbose --index-type IVF
+    python3 rebuild_faiss_simple.py --data-dir /path/to/data
+        """
+    )
+    
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Mode simulation - afficher ce qui serait fait sans exécuter"
+    )
+    
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Affichage détaillé"
+    )
+    
+    parser.add_argument(
+        "--index-type",
+        choices=["Flat", "IVF"],
+        default="Flat",
+        help="Type d'index FAISS (défaut: Flat)"
+    )
+    
+    parser.add_argument(
+        "--data-dir",
+        type=Path,
+        default=Path("data"),
+        help="Répertoire de données (défaut: data)"
+    )
+    
+    args = parser.parse_args()
+    
+    # Configuration
+    setup_logging(args.verbose)
+    
+    logger.info("🔧 FAISS Rebuild Propre - Script utilitaire")
+    logger.info("=" * 60)
+    logger.info(f"Mode: {'DRY-RUN' if args.dry_run else 'EXECUTION'}")
+    logger.info(f"Index type: {args.index_type}")
+    logger.info(f"Data dir: {args.data_dir}")
+    
+    # Étape 1: Charger workflows
+    logger.info("\n1. Chargement des workflows...")
+    workflows_dir = args.data_dir / "workflows"
+    workflows = load_workflows_from_directory(workflows_dir)
+    
+    if not workflows:
+        logger.error("Aucun workflow trouvé. Vérifiez le répertoire de données.")
+        return 1
+    
+    # Étape 2: Extraire prototypes
+    logger.info("\n2. Extraction des prototypes...")
+    prototypes = extract_prototypes_from_workflows(workflows)
+    
+    if not prototypes:
+        logger.error("Aucun prototype trouvé dans les workflows.")
+        return 1
+    
+    # Étape 3: Rebuild FAISS
+    logger.info("\n3. Rebuild index FAISS...")
+    result = rebuild_faiss_index(
+        prototypes=prototypes,
+        index_type=args.index_type,
+        dry_run=args.dry_run
+    )
+    
+    # Résultats
+    logger.info("\n" + "=" * 60)
+    if result["success"]:
+        logger.info(f"✅ {result['message']}")
+        if not args.dry_run:
+            logger.info(f"📊 Statistiques: {result.get('stats', {})}")
+    else:
+        logger.error(f"❌ {result['message']}")
+        return 1
+    
+    logger.info("🎉 FAISS Rebuild Propre terminé avec succès")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())