v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
136
core/embedding/base_embedder.py
Normal file
136
core/embedding/base_embedder.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Abstract base class for embedding models.
|
||||
|
||||
This module defines the interface that all embedding models must implement,
|
||||
ensuring consistency across different model implementations (CLIP, etc.).
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
|
||||
class EmbedderBase(ABC):
|
||||
"""
|
||||
Abstract base class for image and text embedding models.
|
||||
|
||||
All embedding models must implement this interface to ensure
|
||||
compatibility with the state embedding system.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def embed_image(self, image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Generate an embedding vector for a single image.
|
||||
|
||||
Args:
|
||||
image: PIL Image to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
The vector should be L2-normalized for cosine similarity
|
||||
|
||||
Raises:
|
||||
ValueError: If image is invalid or cannot be processed
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def embed_text(self, text: str) -> np.ndarray:
|
||||
"""
|
||||
Generate an embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Text string to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
The vector should be L2-normalized for cosine similarity
|
||||
|
||||
Raises:
|
||||
ValueError: If text is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get the dimensionality of embeddings produced by this model.
|
||||
|
||||
Returns:
|
||||
int: Embedding dimension (e.g., 512 for CLIP ViT-B/32)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_model_name(self) -> str:
|
||||
"""
|
||||
Get a unique identifier for this model.
|
||||
|
||||
Returns:
|
||||
str: Model name (e.g., "clip-vit-b32")
|
||||
"""
|
||||
pass
|
||||
|
||||
def embed_image_batch(self, images: List[Image.Image]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple images.
|
||||
|
||||
Default implementation processes images one by one.
|
||||
Subclasses can override this for optimized batch processing.
|
||||
|
||||
Args:
|
||||
images: List of PIL Images to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(images), dimension)
|
||||
Each row is a normalized embedding vector
|
||||
|
||||
Raises:
|
||||
ValueError: If any image is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
if not images:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
embeddings = []
|
||||
for img in images:
|
||||
embedding = self.embed_image(img)
|
||||
embeddings.append(embedding)
|
||||
|
||||
return np.array(embeddings)
|
||||
|
||||
def embed_text_batch(self, texts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Default implementation processes texts one by one.
|
||||
Subclasses can override this for optimized batch processing.
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(texts), dimension)
|
||||
Each row is a normalized embedding vector
|
||||
|
||||
Raises:
|
||||
ValueError: If any text is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
if not texts:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = self.embed_text(text)
|
||||
embeddings.append(embedding)
|
||||
|
||||
return np.array(embeddings)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the embedder."""
|
||||
return f"{self.__class__.__name__}(model={self.get_model_name()}, dim={self.get_dimension()})"
|
||||
Reference in New Issue
Block a user