v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/embedding/base_embedder.py
+++ b/core/embedding/base_embedder.py
@@ -0,0 +1,136 @@
+"""
+Abstract base class for embedding models.
+
+This module defines the interface that all embedding models must implement,
+ensuring consistency across different model implementations (CLIP, etc.).
+"""
+
+from abc import ABC, abstractmethod
+from typing import List
+from PIL import Image
+import numpy as np
+
+
+class EmbedderBase(ABC):
+    """
+    Abstract base class for image and text embedding models.
+    
+    All embedding models must implement this interface to ensure
+    compatibility with the state embedding system.
+    """
+    
+    @abstractmethod
+    def embed_image(self, image: Image.Image) -> np.ndarray:
+        """
+        Generate an embedding vector for a single image.
+        
+        Args:
+            image: PIL Image to embed
+            
+        Returns:
+            np.ndarray: Normalized embedding vector of shape (dimension,)
+                       The vector should be L2-normalized for cosine similarity
+                       
+        Raises:
+            ValueError: If image is invalid or cannot be processed
+            RuntimeError: If model inference fails
+        """
+        pass
+    
+    @abstractmethod
+    def embed_text(self, text: str) -> np.ndarray:
+        """
+        Generate an embedding vector for text.
+        
+        Args:
+            text: Text string to embed
+            
+        Returns:
+            np.ndarray: Normalized embedding vector of shape (dimension,)
+                       The vector should be L2-normalized for cosine similarity
+                       
+        Raises:
+            ValueError: If text is invalid
+            RuntimeError: If model inference fails
+        """
+        pass
+    
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """
+        Get the dimensionality of embeddings produced by this model.
+        
+        Returns:
+            int: Embedding dimension (e.g., 512 for CLIP ViT-B/32)
+        """
+        pass
+    
+    @abstractmethod
+    def get_model_name(self) -> str:
+        """
+        Get a unique identifier for this model.
+        
+        Returns:
+            str: Model name (e.g., "clip-vit-b32")
+        """
+        pass
+    
+    def embed_image_batch(self, images: List[Image.Image]) -> np.ndarray:
+        """
+        Generate embeddings for multiple images.
+        
+        Default implementation processes images one by one.
+        Subclasses can override this for optimized batch processing.
+        
+        Args:
+            images: List of PIL Images to embed
+            
+        Returns:
+            np.ndarray: Array of embeddings with shape (len(images), dimension)
+                       Each row is a normalized embedding vector
+                       
+        Raises:
+            ValueError: If any image is invalid
+            RuntimeError: If model inference fails
+        """
+        if not images:
+            return np.array([]).reshape(0, self.get_dimension())
+        
+        embeddings = []
+        for img in images:
+            embedding = self.embed_image(img)
+            embeddings.append(embedding)
+        
+        return np.array(embeddings)
+    
+    def embed_text_batch(self, texts: List[str]) -> np.ndarray:
+        """
+        Generate embeddings for multiple texts.
+        
+        Default implementation processes texts one by one.
+        Subclasses can override this for optimized batch processing.
+        
+        Args:
+            texts: List of text strings to embed
+            
+        Returns:
+            np.ndarray: Array of embeddings with shape (len(texts), dimension)
+                       Each row is a normalized embedding vector
+                       
+        Raises:
+            ValueError: If any text is invalid
+            RuntimeError: If model inference fails
+        """
+        if not texts:
+            return np.array([]).reshape(0, self.get_dimension())
+        
+        embeddings = []
+        for text in texts:
+            embedding = self.embed_text(text)
+            embeddings.append(embedding)
+        
+        return np.array(embeddings)
+    
+    def __repr__(self) -> str:
+        """String representation of the embedder."""
+        return f"{self.__class__.__name__}(model={self.get_model_name()}, dim={self.get_dimension()})"