- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
137 lines
4.1 KiB
Python
137 lines
4.1 KiB
Python
"""
|
|
Abstract base class for embedding models.
|
|
|
|
This module defines the interface that all embedding models must implement,
|
|
ensuring consistency across different model implementations (CLIP, etc.).
|
|
"""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import List
|
|
from PIL import Image
|
|
import numpy as np
|
|
|
|
|
|
class EmbedderBase(ABC):
|
|
"""
|
|
Abstract base class for image and text embedding models.
|
|
|
|
All embedding models must implement this interface to ensure
|
|
compatibility with the state embedding system.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def embed_image(self, image: Image.Image) -> np.ndarray:
|
|
"""
|
|
Generate an embedding vector for a single image.
|
|
|
|
Args:
|
|
image: PIL Image to embed
|
|
|
|
Returns:
|
|
np.ndarray: Normalized embedding vector of shape (dimension,)
|
|
The vector should be L2-normalized for cosine similarity
|
|
|
|
Raises:
|
|
ValueError: If image is invalid or cannot be processed
|
|
RuntimeError: If model inference fails
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def embed_text(self, text: str) -> np.ndarray:
|
|
"""
|
|
Generate an embedding vector for text.
|
|
|
|
Args:
|
|
text: Text string to embed
|
|
|
|
Returns:
|
|
np.ndarray: Normalized embedding vector of shape (dimension,)
|
|
The vector should be L2-normalized for cosine similarity
|
|
|
|
Raises:
|
|
ValueError: If text is invalid
|
|
RuntimeError: If model inference fails
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_dimension(self) -> int:
|
|
"""
|
|
Get the dimensionality of embeddings produced by this model.
|
|
|
|
Returns:
|
|
int: Embedding dimension (e.g., 512 for CLIP ViT-B/32)
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_model_name(self) -> str:
|
|
"""
|
|
Get a unique identifier for this model.
|
|
|
|
Returns:
|
|
str: Model name (e.g., "clip-vit-b32")
|
|
"""
|
|
pass
|
|
|
|
def embed_image_batch(self, images: List[Image.Image]) -> np.ndarray:
|
|
"""
|
|
Generate embeddings for multiple images.
|
|
|
|
Default implementation processes images one by one.
|
|
Subclasses can override this for optimized batch processing.
|
|
|
|
Args:
|
|
images: List of PIL Images to embed
|
|
|
|
Returns:
|
|
np.ndarray: Array of embeddings with shape (len(images), dimension)
|
|
Each row is a normalized embedding vector
|
|
|
|
Raises:
|
|
ValueError: If any image is invalid
|
|
RuntimeError: If model inference fails
|
|
"""
|
|
if not images:
|
|
return np.array([]).reshape(0, self.get_dimension())
|
|
|
|
embeddings = []
|
|
for img in images:
|
|
embedding = self.embed_image(img)
|
|
embeddings.append(embedding)
|
|
|
|
return np.array(embeddings)
|
|
|
|
def embed_text_batch(self, texts: List[str]) -> np.ndarray:
|
|
"""
|
|
Generate embeddings for multiple texts.
|
|
|
|
Default implementation processes texts one by one.
|
|
Subclasses can override this for optimized batch processing.
|
|
|
|
Args:
|
|
texts: List of text strings to embed
|
|
|
|
Returns:
|
|
np.ndarray: Array of embeddings with shape (len(texts), dimension)
|
|
Each row is a normalized embedding vector
|
|
|
|
Raises:
|
|
ValueError: If any text is invalid
|
|
RuntimeError: If model inference fails
|
|
"""
|
|
if not texts:
|
|
return np.array([]).reshape(0, self.get_dimension())
|
|
|
|
embeddings = []
|
|
for text in texts:
|
|
embedding = self.embed_text(text)
|
|
embeddings.append(embedding)
|
|
|
|
return np.array(embeddings)
|
|
|
|
def __repr__(self) -> str:
|
|
"""String representation of the embedder."""
|
|
return f"{self.__class__.__name__}(model={self.get_model_name()}, dim={self.get_dimension()})"
|