v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
96
core/embedding/__init__.py
Normal file
96
core/embedding/__init__.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Embedding Module - Fusion Multi-Modale et Gestion FAISS
|
||||
|
||||
Ce module gère la fusion d'embeddings multi-modaux et l'indexation FAISS
|
||||
pour la recherche de similarité rapide.
|
||||
"""
|
||||
|
||||
from .fusion_engine import (
|
||||
FusionEngine,
|
||||
FusionConfig,
|
||||
create_default_fusion_engine,
|
||||
normalize_vector,
|
||||
validate_weights
|
||||
)
|
||||
|
||||
from .faiss_manager import (
|
||||
FAISSManager,
|
||||
SearchResult,
|
||||
create_flat_index,
|
||||
create_ivf_index
|
||||
)
|
||||
|
||||
from .similarity import (
|
||||
cosine_similarity,
|
||||
euclidean_distance,
|
||||
manhattan_distance,
|
||||
dot_product,
|
||||
normalize_l2,
|
||||
normalize_l1,
|
||||
angular_distance,
|
||||
jaccard_similarity,
|
||||
hamming_distance,
|
||||
batch_cosine_similarity,
|
||||
pairwise_cosine_similarity,
|
||||
similarity_to_distance,
|
||||
distance_to_similarity,
|
||||
is_normalized,
|
||||
compute_centroid,
|
||||
compute_variance
|
||||
)
|
||||
|
||||
from .state_embedding_builder import (
|
||||
StateEmbeddingBuilder,
|
||||
create_builder,
|
||||
build_from_screen_state
|
||||
)
|
||||
|
||||
from .base_embedder import EmbedderBase
|
||||
|
||||
from .clip_embedder import (
|
||||
CLIPEmbedder,
|
||||
create_clip_embedder,
|
||||
get_default_embedder
|
||||
)
|
||||
|
||||
from .embedding_cache import (
|
||||
EmbeddingCache,
|
||||
PrototypeCache
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'FusionEngine',
|
||||
'FusionConfig',
|
||||
'create_default_fusion_engine',
|
||||
'normalize_vector',
|
||||
'validate_weights',
|
||||
'FAISSManager',
|
||||
'SearchResult',
|
||||
'create_flat_index',
|
||||
'create_ivf_index',
|
||||
'cosine_similarity',
|
||||
'euclidean_distance',
|
||||
'manhattan_distance',
|
||||
'dot_product',
|
||||
'normalize_l2',
|
||||
'normalize_l1',
|
||||
'angular_distance',
|
||||
'jaccard_similarity',
|
||||
'hamming_distance',
|
||||
'batch_cosine_similarity',
|
||||
'pairwise_cosine_similarity',
|
||||
'similarity_to_distance',
|
||||
'distance_to_similarity',
|
||||
'is_normalized',
|
||||
'compute_centroid',
|
||||
'compute_variance',
|
||||
'StateEmbeddingBuilder',
|
||||
'create_builder',
|
||||
'build_from_screen_state',
|
||||
'EmbedderBase',
|
||||
'CLIPEmbedder',
|
||||
'create_clip_embedder',
|
||||
'get_default_embedder',
|
||||
'EmbeddingCache',
|
||||
'PrototypeCache'
|
||||
]
|
||||
136
core/embedding/base_embedder.py
Normal file
136
core/embedding/base_embedder.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""
|
||||
Abstract base class for embedding models.
|
||||
|
||||
This module defines the interface that all embedding models must implement,
|
||||
ensuring consistency across different model implementations (CLIP, etc.).
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
|
||||
class EmbedderBase(ABC):
|
||||
"""
|
||||
Abstract base class for image and text embedding models.
|
||||
|
||||
All embedding models must implement this interface to ensure
|
||||
compatibility with the state embedding system.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def embed_image(self, image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Generate an embedding vector for a single image.
|
||||
|
||||
Args:
|
||||
image: PIL Image to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
The vector should be L2-normalized for cosine similarity
|
||||
|
||||
Raises:
|
||||
ValueError: If image is invalid or cannot be processed
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def embed_text(self, text: str) -> np.ndarray:
|
||||
"""
|
||||
Generate an embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Text string to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
The vector should be L2-normalized for cosine similarity
|
||||
|
||||
Raises:
|
||||
ValueError: If text is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get the dimensionality of embeddings produced by this model.
|
||||
|
||||
Returns:
|
||||
int: Embedding dimension (e.g., 512 for CLIP ViT-B/32)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_model_name(self) -> str:
|
||||
"""
|
||||
Get a unique identifier for this model.
|
||||
|
||||
Returns:
|
||||
str: Model name (e.g., "clip-vit-b32")
|
||||
"""
|
||||
pass
|
||||
|
||||
def embed_image_batch(self, images: List[Image.Image]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple images.
|
||||
|
||||
Default implementation processes images one by one.
|
||||
Subclasses can override this for optimized batch processing.
|
||||
|
||||
Args:
|
||||
images: List of PIL Images to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(images), dimension)
|
||||
Each row is a normalized embedding vector
|
||||
|
||||
Raises:
|
||||
ValueError: If any image is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
if not images:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
embeddings = []
|
||||
for img in images:
|
||||
embedding = self.embed_image(img)
|
||||
embeddings.append(embedding)
|
||||
|
||||
return np.array(embeddings)
|
||||
|
||||
def embed_text_batch(self, texts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Default implementation processes texts one by one.
|
||||
Subclasses can override this for optimized batch processing.
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(texts), dimension)
|
||||
Each row is a normalized embedding vector
|
||||
|
||||
Raises:
|
||||
ValueError: If any text is invalid
|
||||
RuntimeError: If model inference fails
|
||||
"""
|
||||
if not texts:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = self.embed_text(text)
|
||||
embeddings.append(embedding)
|
||||
|
||||
return np.array(embeddings)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""String representation of the embedder."""
|
||||
return f"{self.__class__.__name__}(model={self.get_model_name()}, dim={self.get_dimension()})"
|
||||
292
core/embedding/clip_embedder.py
Normal file
292
core/embedding/clip_embedder.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""
|
||||
CLIP-based embedder implementation for RPA Vision V3.
|
||||
|
||||
This module provides a wrapper around OpenCLIP for generating image and text embeddings
|
||||
using the CLIP (Contrastive Language-Image Pre-training) model.
|
||||
"""
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from typing import List, Optional
|
||||
import logging
|
||||
|
||||
try:
|
||||
import open_clip
|
||||
except ImportError:
|
||||
open_clip = None
|
||||
|
||||
from .base_embedder import EmbedderBase
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CLIPEmbedder(EmbedderBase):
|
||||
"""
|
||||
CLIP-based image and text embedder using OpenCLIP.
|
||||
|
||||
This embedder uses the ViT-B/32 architecture by default, which produces
|
||||
512-dimensional embeddings. It automatically handles GPU/CPU device selection.
|
||||
|
||||
The embeddings are L2-normalized for cosine similarity calculations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str = "ViT-B-32",
|
||||
pretrained: str = "openai",
|
||||
device: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Initialize the CLIP embedder.
|
||||
|
||||
Args:
|
||||
model_name: CLIP model architecture (default: ViT-B-32)
|
||||
Options: ViT-B-32, ViT-B-16, ViT-L-14, etc.
|
||||
pretrained: Pretrained weights to use (default: openai)
|
||||
device: Device to use ('cuda', 'cpu', or None for auto-detect)
|
||||
Defaults to CPU to save GPU memory for VLM models
|
||||
|
||||
Raises:
|
||||
ImportError: If open_clip is not installed
|
||||
RuntimeError: If model loading fails
|
||||
"""
|
||||
if open_clip is None:
|
||||
raise ImportError(
|
||||
"OpenCLIP is not installed. "
|
||||
"Install it with: pip install open-clip-torch"
|
||||
)
|
||||
|
||||
# Default to CPU to save GPU for vision models (Qwen3-VL, etc.)
|
||||
if device is None:
|
||||
device = "cpu"
|
||||
|
||||
self.model_name = model_name
|
||||
self.pretrained = pretrained
|
||||
self.device = device
|
||||
self._embedding_dim = None
|
||||
|
||||
# Load model
|
||||
try:
|
||||
logger.info(f"Loading CLIP model: {model_name} ({pretrained}) on {device}...")
|
||||
|
||||
self.model, _, self.preprocess = open_clip.create_model_and_transforms(
|
||||
model_name,
|
||||
pretrained=pretrained,
|
||||
device=device
|
||||
)
|
||||
self.model.eval()
|
||||
|
||||
# Get tokenizer for text
|
||||
self.tokenizer = open_clip.get_tokenizer(model_name)
|
||||
|
||||
# Determine embedding dimension
|
||||
with torch.no_grad():
|
||||
dummy_image = torch.zeros(1, 3, 224, 224).to(self.device)
|
||||
dummy_embedding = self.model.encode_image(dummy_image)
|
||||
self._embedding_dim = dummy_embedding.shape[-1]
|
||||
|
||||
logger.info(
|
||||
f"✓ CLIP embedder loaded: {model_name} on {device}, "
|
||||
f"dimension={self._embedding_dim}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load CLIP model: {e}")
|
||||
|
||||
def embed_image(self, image: Image.Image) -> np.ndarray:
|
||||
"""
|
||||
Generate embedding for a single image.
|
||||
|
||||
Args:
|
||||
image: PIL Image to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
|
||||
Raises:
|
||||
ValueError: If image is invalid
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not isinstance(image, Image.Image):
|
||||
raise ValueError("Input must be a PIL Image")
|
||||
|
||||
try:
|
||||
# Preprocess image
|
||||
image_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
|
||||
|
||||
# Generate embedding
|
||||
with torch.no_grad():
|
||||
embedding = self.model.encode_image(image_tensor)
|
||||
# L2 normalize for cosine similarity
|
||||
embedding = embedding / embedding.norm(dim=-1, keepdim=True)
|
||||
|
||||
return embedding.cpu().numpy().flatten()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to generate image embedding: {e}")
|
||||
|
||||
def embed_text(self, text: str) -> np.ndarray:
|
||||
"""
|
||||
Generate embedding for text.
|
||||
|
||||
Args:
|
||||
text: Text string to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Normalized embedding vector of shape (dimension,)
|
||||
|
||||
Raises:
|
||||
ValueError: If text is invalid
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
raise ValueError("Input must be a string")
|
||||
|
||||
if not text.strip():
|
||||
# Return zero vector for empty text
|
||||
return np.zeros(self.get_dimension(), dtype=np.float32)
|
||||
|
||||
try:
|
||||
# Tokenize text
|
||||
text_tokens = self.tokenizer([text]).to(self.device)
|
||||
|
||||
# Generate embedding
|
||||
with torch.no_grad():
|
||||
embedding = self.model.encode_text(text_tokens)
|
||||
# L2 normalize for cosine similarity
|
||||
embedding = embedding / embedding.norm(dim=-1, keepdim=True)
|
||||
|
||||
return embedding.cpu().numpy().flatten()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to generate text embedding: {e}")
|
||||
|
||||
def embed_image_batch(self, images: List[Image.Image]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple images (optimized batch processing).
|
||||
|
||||
Args:
|
||||
images: List of PIL Images to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(images), dimension)
|
||||
|
||||
Raises:
|
||||
ValueError: If any image is invalid
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not images:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
# Validate all images
|
||||
for i, img in enumerate(images):
|
||||
if not isinstance(img, Image.Image):
|
||||
raise ValueError(f"Image at index {i} is not a PIL Image")
|
||||
|
||||
try:
|
||||
# Preprocess all images
|
||||
image_tensors = torch.stack([
|
||||
self.preprocess(img) for img in images
|
||||
]).to(self.device)
|
||||
|
||||
# Generate embeddings in batch
|
||||
with torch.no_grad():
|
||||
embeddings = self.model.encode_image(image_tensors)
|
||||
# L2 normalize for cosine similarity
|
||||
embeddings = embeddings / embeddings.norm(dim=-1, keepdim=True)
|
||||
|
||||
return embeddings.cpu().numpy()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to generate batch image embeddings: {e}")
|
||||
|
||||
def embed_text_batch(self, texts: List[str]) -> np.ndarray:
|
||||
"""
|
||||
Generate embeddings for multiple texts (optimized batch processing).
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed
|
||||
|
||||
Returns:
|
||||
np.ndarray: Array of embeddings with shape (len(texts), dimension)
|
||||
|
||||
Raises:
|
||||
ValueError: If any text is invalid
|
||||
RuntimeError: If embedding generation fails
|
||||
"""
|
||||
if not texts:
|
||||
return np.array([]).reshape(0, self.get_dimension())
|
||||
|
||||
# Validate all texts
|
||||
for i, text in enumerate(texts):
|
||||
if not isinstance(text, str):
|
||||
raise ValueError(f"Text at index {i} is not a string")
|
||||
|
||||
try:
|
||||
# Handle empty texts
|
||||
processed_texts = [text if text.strip() else " " for text in texts]
|
||||
|
||||
# Tokenize all texts
|
||||
text_tokens = self.tokenizer(processed_texts).to(self.device)
|
||||
|
||||
# Generate embeddings in batch
|
||||
with torch.no_grad():
|
||||
embeddings = self.model.encode_text(text_tokens)
|
||||
# L2 normalize for cosine similarity
|
||||
embeddings = embeddings / embeddings.norm(dim=-1, keepdim=True)
|
||||
|
||||
return embeddings.cpu().numpy()
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to generate batch text embeddings: {e}")
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get the dimensionality of embeddings.
|
||||
|
||||
Returns:
|
||||
int: Embedding dimension (512 for ViT-B/32)
|
||||
"""
|
||||
return self._embedding_dim
|
||||
|
||||
def get_model_name(self) -> str:
|
||||
"""
|
||||
Get model identifier.
|
||||
|
||||
Returns:
|
||||
str: Model name (e.g., "clip-vit-b32")
|
||||
"""
|
||||
return f"clip-{self.model_name.lower().replace('/', '-')}"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Factory functions
|
||||
# ============================================================================
|
||||
|
||||
def create_clip_embedder(
|
||||
model_name: str = "ViT-B-32",
|
||||
device: Optional[str] = None
|
||||
) -> CLIPEmbedder:
|
||||
"""
|
||||
Create a CLIP embedder with default configuration.
|
||||
|
||||
Args:
|
||||
model_name: CLIP model architecture (default: ViT-B-32)
|
||||
device: Device to use (default: CPU)
|
||||
|
||||
Returns:
|
||||
CLIPEmbedder: Configured CLIP embedder
|
||||
"""
|
||||
return CLIPEmbedder(model_name=model_name, device=device)
|
||||
|
||||
|
||||
def get_default_embedder() -> CLIPEmbedder:
|
||||
"""
|
||||
Get the default CLIP embedder (ViT-B/32 on CPU).
|
||||
|
||||
Returns:
|
||||
CLIPEmbedder: Default embedder
|
||||
"""
|
||||
return CLIPEmbedder()
|
||||
284
core/embedding/embedding_cache.py
Normal file
284
core/embedding/embedding_cache.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Embedding Cache - Cache LRU pour embeddings
|
||||
|
||||
Implémente un cache LRU (Least Recently Used) pour stocker
|
||||
les embeddings en mémoire et éviter les recalculs coûteux.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from collections import OrderedDict
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingCache:
|
||||
"""
|
||||
Cache LRU pour embeddings.
|
||||
|
||||
Stocke les embeddings les plus récemment utilisés en mémoire
|
||||
pour éviter les recalculs et chargements depuis disque.
|
||||
|
||||
Features:
|
||||
- LRU eviction policy
|
||||
- Taille maximale configurable
|
||||
- Statistiques de cache (hits/misses)
|
||||
- Invalidation sélective
|
||||
"""
|
||||
|
||||
def __init__(self, max_size: int = 1000, max_memory_mb: float = 500.0):
|
||||
"""
|
||||
Initialiser le cache.
|
||||
|
||||
Args:
|
||||
max_size: Nombre maximum d'embeddings à garder en cache
|
||||
max_memory_mb: Mémoire maximale en MB (approximatif)
|
||||
"""
|
||||
self.max_size = max_size
|
||||
self.max_memory_mb = max_memory_mb
|
||||
self.cache: OrderedDict[str, np.ndarray] = OrderedDict()
|
||||
self.metadata: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Statistiques
|
||||
self.hits = 0
|
||||
self.misses = 0
|
||||
self.evictions = 0
|
||||
|
||||
logger.info(
|
||||
f"EmbeddingCache initialized: max_size={max_size}, "
|
||||
f"max_memory_mb={max_memory_mb:.1f}"
|
||||
)
|
||||
|
||||
def get(self, key: str) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Récupérer un embedding du cache.
|
||||
|
||||
Args:
|
||||
key: Clé de l'embedding (embedding_id)
|
||||
|
||||
Returns:
|
||||
Vecteur numpy si trouvé, None sinon
|
||||
"""
|
||||
if key in self.cache:
|
||||
# Déplacer à la fin (most recently used)
|
||||
self.cache.move_to_end(key)
|
||||
self.hits += 1
|
||||
logger.debug(f"Cache HIT: {key}")
|
||||
return self.cache[key]
|
||||
|
||||
self.misses += 1
|
||||
logger.debug(f"Cache MISS: {key}")
|
||||
return None
|
||||
|
||||
def put(
|
||||
self,
|
||||
key: str,
|
||||
vector: np.ndarray,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""
|
||||
Ajouter un embedding au cache.
|
||||
|
||||
Args:
|
||||
key: Clé de l'embedding
|
||||
vector: Vecteur numpy
|
||||
metadata: Métadonnées optionnelles
|
||||
"""
|
||||
# Si déjà présent, mettre à jour et déplacer à la fin
|
||||
if key in self.cache:
|
||||
self.cache.move_to_end(key)
|
||||
self.cache[key] = vector
|
||||
if metadata:
|
||||
self.metadata[key] = metadata
|
||||
return
|
||||
|
||||
# Vérifier si on doit évict
|
||||
if len(self.cache) >= self.max_size:
|
||||
self._evict_oldest()
|
||||
|
||||
# Ajouter le nouvel embedding
|
||||
self.cache[key] = vector
|
||||
if metadata:
|
||||
self.metadata[key] = metadata
|
||||
|
||||
logger.debug(f"Cache PUT: {key} (size: {len(self.cache)})")
|
||||
|
||||
def _evict_oldest(self):
|
||||
"""Évict l'embedding le moins récemment utilisé."""
|
||||
if not self.cache:
|
||||
return
|
||||
|
||||
# Retirer le premier élément (oldest)
|
||||
oldest_key, _ = self.cache.popitem(last=False)
|
||||
self.metadata.pop(oldest_key, None)
|
||||
self.evictions += 1
|
||||
|
||||
logger.debug(f"Cache EVICT: {oldest_key} (evictions: {self.evictions})")
|
||||
|
||||
def invalidate(self, key: str):
|
||||
"""
|
||||
Invalider un embedding spécifique.
|
||||
|
||||
Args:
|
||||
key: Clé de l'embedding à invalider
|
||||
"""
|
||||
if key in self.cache:
|
||||
del self.cache[key]
|
||||
self.metadata.pop(key, None)
|
||||
logger.debug(f"Cache INVALIDATE: {key}")
|
||||
|
||||
def invalidate_pattern(self, pattern: str):
|
||||
"""
|
||||
Invalider tous les embeddings dont la clé contient le pattern.
|
||||
|
||||
Args:
|
||||
pattern: Pattern à rechercher dans les clés
|
||||
"""
|
||||
keys_to_remove = [k for k in self.cache.keys() if pattern in k]
|
||||
for key in keys_to_remove:
|
||||
del self.cache[key]
|
||||
self.metadata.pop(key, None)
|
||||
|
||||
if keys_to_remove:
|
||||
logger.info(f"Cache INVALIDATE PATTERN '{pattern}': {len(keys_to_remove)} entries")
|
||||
|
||||
def clear(self):
|
||||
"""Vider complètement le cache."""
|
||||
size_before = len(self.cache)
|
||||
self.cache.clear()
|
||||
self.metadata.clear()
|
||||
logger.info(f"Cache CLEAR: {size_before} entries removed")
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Obtenir les statistiques du cache.
|
||||
|
||||
Returns:
|
||||
Dict avec statistiques
|
||||
"""
|
||||
total_requests = self.hits + self.misses
|
||||
hit_rate = self.hits / total_requests if total_requests > 0 else 0.0
|
||||
|
||||
# Estimer la mémoire utilisée
|
||||
memory_mb = 0.0
|
||||
for vector in self.cache.values():
|
||||
# Taille en bytes = nombre d'éléments * taille d'un float32
|
||||
memory_mb += vector.nbytes / (1024 * 1024)
|
||||
|
||||
return {
|
||||
"size": len(self.cache),
|
||||
"max_size": self.max_size,
|
||||
"hits": self.hits,
|
||||
"misses": self.misses,
|
||||
"evictions": self.evictions,
|
||||
"hit_rate": hit_rate,
|
||||
"memory_mb": memory_mb,
|
||||
"max_memory_mb": self.max_memory_mb,
|
||||
"memory_usage_pct": (memory_mb / self.max_memory_mb * 100) if self.max_memory_mb > 0 else 0.0
|
||||
}
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""Retourne le nombre d'embeddings en cache."""
|
||||
return len(self.cache)
|
||||
|
||||
def __contains__(self, key: str) -> bool:
|
||||
"""Vérifie si une clé est dans le cache."""
|
||||
return key in self.cache
|
||||
|
||||
|
||||
class PrototypeCache:
|
||||
"""
|
||||
Cache spécialisé pour les prototypes de WorkflowNodes.
|
||||
|
||||
Les prototypes sont utilisés fréquemment pour le matching,
|
||||
donc on les garde en cache avec une politique différente.
|
||||
"""
|
||||
|
||||
def __init__(self, max_size: int = 100):
|
||||
"""
|
||||
Initialiser le cache de prototypes.
|
||||
|
||||
Args:
|
||||
max_size: Nombre maximum de prototypes à garder
|
||||
"""
|
||||
self.max_size = max_size
|
||||
self.cache: Dict[str, np.ndarray] = {}
|
||||
self.access_count: Dict[str, int] = {}
|
||||
self.last_access: Dict[str, datetime] = {}
|
||||
|
||||
logger.info(f"PrototypeCache initialized: max_size={max_size}")
|
||||
|
||||
def get(self, node_id: str) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Récupérer un prototype du cache.
|
||||
|
||||
Args:
|
||||
node_id: ID du WorkflowNode
|
||||
|
||||
Returns:
|
||||
Vecteur prototype si trouvé, None sinon
|
||||
"""
|
||||
if node_id in self.cache:
|
||||
self.access_count[node_id] = self.access_count.get(node_id, 0) + 1
|
||||
self.last_access[node_id] = datetime.now()
|
||||
return self.cache[node_id]
|
||||
|
||||
return None
|
||||
|
||||
def put(self, node_id: str, prototype: np.ndarray):
|
||||
"""
|
||||
Ajouter un prototype au cache.
|
||||
|
||||
Args:
|
||||
node_id: ID du WorkflowNode
|
||||
prototype: Vecteur prototype
|
||||
"""
|
||||
# Si cache plein, évict le moins utilisé
|
||||
if len(self.cache) >= self.max_size and node_id not in self.cache:
|
||||
self._evict_least_used()
|
||||
|
||||
self.cache[node_id] = prototype
|
||||
self.access_count[node_id] = self.access_count.get(node_id, 0) + 1
|
||||
self.last_access[node_id] = datetime.now()
|
||||
|
||||
def _evict_least_used(self):
|
||||
"""Évict le prototype le moins utilisé."""
|
||||
if not self.cache:
|
||||
return
|
||||
|
||||
# Trouver le moins utilisé
|
||||
least_used = min(self.access_count.items(), key=lambda x: x[1])
|
||||
node_id = least_used[0]
|
||||
|
||||
del self.cache[node_id]
|
||||
del self.access_count[node_id]
|
||||
del self.last_access[node_id]
|
||||
|
||||
logger.debug(f"PrototypeCache EVICT: {node_id}")
|
||||
|
||||
def invalidate(self, node_id: str):
|
||||
"""Invalider un prototype spécifique."""
|
||||
if node_id in self.cache:
|
||||
del self.cache[node_id]
|
||||
self.access_count.pop(node_id, None)
|
||||
self.last_access.pop(node_id, None)
|
||||
|
||||
def clear(self):
|
||||
"""Vider le cache."""
|
||||
self.cache.clear()
|
||||
self.access_count.clear()
|
||||
self.last_access.clear()
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Obtenir les statistiques du cache."""
|
||||
total_accesses = sum(self.access_count.values())
|
||||
avg_accesses = total_accesses / len(self.cache) if self.cache else 0.0
|
||||
|
||||
return {
|
||||
"size": len(self.cache),
|
||||
"max_size": self.max_size,
|
||||
"total_accesses": total_accesses,
|
||||
"avg_accesses_per_prototype": avg_accesses
|
||||
}
|
||||
613
core/embedding/fusion_engine.py
Normal file
613
core/embedding/fusion_engine.py
Normal file
@@ -0,0 +1,613 @@
|
||||
"""
|
||||
FusionEngine - Fusion Multi-Modale d'Embeddings
|
||||
|
||||
Fusionne plusieurs embeddings (image, texte, titre, UI) en un seul vecteur
|
||||
avec pondération configurable et normalisation L2.
|
||||
|
||||
Tâche 5.2: Lazy loading des embeddings avec WeakValueDictionary.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
import weakref
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from ..models.state_embedding import (
|
||||
StateEmbedding,
|
||||
EmbeddingComponent,
|
||||
DEFAULT_FUSION_WEIGHTS
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FusionConfig:
|
||||
"""Configuration de la fusion"""
|
||||
method: str = "weighted" # weighted ou concat_projection
|
||||
normalize: bool = True # Normaliser le vecteur final
|
||||
weights: Dict[str, float] = None # Poids personnalisés
|
||||
|
||||
def __post_init__(self):
|
||||
if self.weights is None:
|
||||
self.weights = DEFAULT_FUSION_WEIGHTS.copy()
|
||||
|
||||
# Valider que les poids somment à 1.0 pour weighted
|
||||
if self.method == "weighted":
|
||||
total = sum(self.weights.values())
|
||||
if not (0.99 <= total <= 1.01):
|
||||
raise ValueError(
|
||||
f"Weights must sum to 1.0 for weighted fusion, got {total}"
|
||||
)
|
||||
|
||||
|
||||
class FusionEngine:
|
||||
"""
|
||||
Moteur de fusion multi-modale avec lazy loading optimisé
|
||||
|
||||
Fusionne des embeddings de différentes modalités (image, texte, UI)
|
||||
en un seul vecteur représentant l'état complet de l'écran.
|
||||
|
||||
Tâche 5.2: Implémente lazy loading avec WeakValueDictionary pour
|
||||
éviter les rechargements multiples tout en permettant le garbage collection.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[FusionConfig] = None):
|
||||
"""
|
||||
Initialiser le moteur de fusion avec lazy loading
|
||||
|
||||
Args:
|
||||
config: Configuration de fusion (utilise config par défaut si None)
|
||||
"""
|
||||
self.config = config or FusionConfig()
|
||||
|
||||
# Tâche 5.2: Cache lazy loading avec WeakValueDictionary
|
||||
# Permet le garbage collection automatique des embeddings non utilisés
|
||||
self._embedding_cache: weakref.WeakValueDictionary = weakref.WeakValueDictionary()
|
||||
self._cache_stats = {
|
||||
'hits': 0,
|
||||
'misses': 0,
|
||||
'loads': 0,
|
||||
'evictions': 0
|
||||
}
|
||||
|
||||
def fuse(self,
|
||||
embeddings: Dict[str, np.ndarray],
|
||||
weights: Optional[Dict[str, float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Fusionner plusieurs embeddings en un seul vecteur
|
||||
|
||||
Args:
|
||||
embeddings: Dict {modalité: vecteur}
|
||||
e.g., {"image": vec1, "text": vec2, "title": vec3, "ui": vec4}
|
||||
weights: Poids personnalisés (optionnel, utilise config par défaut)
|
||||
|
||||
Returns:
|
||||
Vecteur fusionné (normalisé si config.normalize=True)
|
||||
|
||||
Raises:
|
||||
ValueError: Si les dimensions ne correspondent pas ou poids invalides
|
||||
"""
|
||||
if not embeddings:
|
||||
raise ValueError("No embeddings provided for fusion")
|
||||
|
||||
# Utiliser poids de config ou poids fournis
|
||||
fusion_weights = weights or self.config.weights
|
||||
|
||||
# Vérifier que toutes les modalités ont le même nombre de dimensions
|
||||
dimensions = None
|
||||
for modality, vector in embeddings.items():
|
||||
if dimensions is None:
|
||||
dimensions = vector.shape[0]
|
||||
elif vector.shape[0] != dimensions:
|
||||
raise ValueError(
|
||||
f"All embeddings must have same dimensions. "
|
||||
f"Expected {dimensions}, got {vector.shape[0]} for {modality}"
|
||||
)
|
||||
|
||||
if self.config.method == "weighted":
|
||||
fused = self._fuse_weighted(embeddings, fusion_weights)
|
||||
elif self.config.method == "concat_projection":
|
||||
fused = self._fuse_concat_projection(embeddings, fusion_weights)
|
||||
else:
|
||||
raise ValueError(f"Unknown fusion method: {self.config.method}")
|
||||
|
||||
# Normaliser si demandé
|
||||
if self.config.normalize:
|
||||
fused = self._normalize_l2(fused)
|
||||
|
||||
return fused
|
||||
|
||||
def _fuse_weighted(self,
|
||||
embeddings: Dict[str, np.ndarray],
|
||||
weights: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Fusion pondérée simple : somme pondérée des vecteurs
|
||||
|
||||
fused = w1*v1 + w2*v2 + w3*v3 + w4*v4
|
||||
"""
|
||||
# Initialiser vecteur résultat
|
||||
first_vector = next(iter(embeddings.values()))
|
||||
fused = np.zeros_like(first_vector, dtype=np.float32)
|
||||
|
||||
# Somme pondérée
|
||||
for modality, vector in embeddings.items():
|
||||
weight = weights.get(modality, 0.0)
|
||||
fused += weight * vector
|
||||
|
||||
return fused
|
||||
|
||||
def _fuse_concat_projection(self,
|
||||
embeddings: Dict[str, np.ndarray],
|
||||
weights: Dict[str, float]) -> np.ndarray:
|
||||
"""
|
||||
Fusion par concaténation + projection
|
||||
|
||||
Concatène tous les vecteurs puis projette vers dimension cible.
|
||||
Note: Pour l'instant, on fait une simple moyenne pondérée.
|
||||
TODO: Implémenter vraie projection avec matrice apprise.
|
||||
"""
|
||||
# Pour l'instant, utiliser fusion pondérée
|
||||
# Dans une version future, on pourrait apprendre une matrice de projection
|
||||
return self._fuse_weighted(embeddings, weights)
|
||||
|
||||
def _normalize_l2(self, vector: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Normaliser un vecteur avec norme L2
|
||||
|
||||
normalized = vector / ||vector||_2
|
||||
"""
|
||||
norm = np.linalg.norm(vector)
|
||||
if norm < 1e-10: # Éviter division par zéro
|
||||
return vector
|
||||
return vector / norm
|
||||
|
||||
def create_state_embedding(self,
|
||||
embedding_id: str,
|
||||
embeddings: Dict[str, np.ndarray],
|
||||
vector_save_path: str,
|
||||
weights: Optional[Dict[str, float]] = None,
|
||||
metadata: Optional[Dict] = None) -> StateEmbedding:
|
||||
"""
|
||||
Créer un StateEmbedding complet depuis des embeddings individuels
|
||||
|
||||
Args:
|
||||
embedding_id: ID unique pour cet embedding
|
||||
embeddings: Dict {modalité: vecteur}
|
||||
vector_save_path: Chemin où sauvegarder le vecteur fusionné
|
||||
weights: Poids personnalisés (optionnel)
|
||||
metadata: Métadonnées additionnelles
|
||||
|
||||
Returns:
|
||||
StateEmbedding avec vecteur fusionné sauvegardé
|
||||
"""
|
||||
# Fusionner les embeddings
|
||||
fused_vector = self.fuse(embeddings, weights)
|
||||
|
||||
# Créer les composants
|
||||
fusion_weights = weights or self.config.weights
|
||||
components = {}
|
||||
|
||||
for modality, vector in embeddings.items():
|
||||
# Pour l'instant, on ne sauvegarde pas les vecteurs individuels
|
||||
# On pourrait les sauvegarder si nécessaire
|
||||
components[modality] = EmbeddingComponent(
|
||||
weight=fusion_weights.get(modality, 0.0),
|
||||
vector_id=f"{vector_save_path}_{modality}.npy",
|
||||
source_text=None
|
||||
)
|
||||
|
||||
# Créer StateEmbedding
|
||||
dimensions = fused_vector.shape[0]
|
||||
state_emb = StateEmbedding(
|
||||
embedding_id=embedding_id,
|
||||
vector_id=vector_save_path,
|
||||
dimensions=dimensions,
|
||||
fusion_method=self.config.method,
|
||||
components=components,
|
||||
metadata=metadata or {}
|
||||
)
|
||||
|
||||
# Sauvegarder le vecteur fusionné
|
||||
state_emb.save_vector(fused_vector)
|
||||
|
||||
return state_emb
|
||||
|
||||
def compute_similarity(self,
|
||||
emb1: StateEmbedding,
|
||||
emb2: StateEmbedding) -> float:
|
||||
"""
|
||||
Calculer similarité cosinus entre deux StateEmbeddings
|
||||
|
||||
Args:
|
||||
emb1: Premier embedding
|
||||
emb2: Deuxième embedding
|
||||
|
||||
Returns:
|
||||
Similarité cosinus dans [-1, 1]
|
||||
"""
|
||||
return emb1.compute_similarity(emb2)
|
||||
|
||||
def batch_fuse(self,
|
||||
batch_embeddings: List[Dict[str, np.ndarray]],
|
||||
weights: Optional[Dict[str, float]] = None) -> List[np.ndarray]:
|
||||
"""
|
||||
Fusionner un batch d'embeddings en parallèle
|
||||
|
||||
Args:
|
||||
batch_embeddings: Liste de dicts {modalité: vecteur}
|
||||
weights: Poids personnalisés (optionnel)
|
||||
|
||||
Returns:
|
||||
Liste de vecteurs fusionnés
|
||||
"""
|
||||
return [self.fuse(embs, weights) for embs in batch_embeddings]
|
||||
|
||||
def get_config(self) -> FusionConfig:
|
||||
"""Récupérer la configuration actuelle"""
|
||||
return self.config
|
||||
|
||||
def set_weights(self, weights: Dict[str, float]) -> None:
|
||||
"""
|
||||
Mettre à jour les poids de fusion
|
||||
|
||||
Args:
|
||||
weights: Nouveaux poids
|
||||
|
||||
Raises:
|
||||
ValueError: Si les poids ne somment pas à 1.0 (pour weighted)
|
||||
"""
|
||||
if self.config.method == "weighted":
|
||||
total = sum(weights.values())
|
||||
if not (0.99 <= total <= 1.01):
|
||||
raise ValueError(
|
||||
f"Weights must sum to 1.0 for weighted fusion, got {total}"
|
||||
)
|
||||
|
||||
self.config.weights = weights.copy()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def create_default_fusion_engine() -> FusionEngine:
|
||||
"""Créer un FusionEngine avec configuration par défaut"""
|
||||
return FusionEngine(FusionConfig())
|
||||
|
||||
|
||||
def normalize_vector(vector: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Normaliser un vecteur avec norme L2
|
||||
|
||||
Args:
|
||||
vector: Vecteur à normaliser
|
||||
|
||||
Returns:
|
||||
Vecteur normalisé
|
||||
"""
|
||||
norm = np.linalg.norm(vector)
|
||||
if norm < 1e-10:
|
||||
return vector
|
||||
return vector / norm
|
||||
|
||||
|
||||
def validate_weights(weights: Dict[str, float],
|
||||
method: str = "weighted") -> bool:
|
||||
"""
|
||||
Valider que les poids sont corrects
|
||||
|
||||
Args:
|
||||
weights: Poids à valider
|
||||
method: Méthode de fusion
|
||||
|
||||
Returns:
|
||||
True si valides, False sinon
|
||||
"""
|
||||
if method == "weighted":
|
||||
total = sum(weights.values())
|
||||
return 0.99 <= total <= 1.01
|
||||
return True
|
||||
|
||||
def fuse_batch(
|
||||
self,
|
||||
embeddings_batch: List[Dict[str, np.ndarray]],
|
||||
weights: Optional[Dict[str, float]] = None
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Fusionner un batch d'embeddings en parallèle pour efficacité.
|
||||
|
||||
Args:
|
||||
embeddings_batch: Liste de dicts {modalité: vecteur}
|
||||
weights: Poids personnalisés (optionnel)
|
||||
|
||||
Returns:
|
||||
Array numpy de shape (batch_size, embedding_dim) avec vecteurs fusionnés
|
||||
|
||||
Note:
|
||||
Cette méthode est optimisée pour traiter plusieurs embeddings
|
||||
en une seule opération vectorisée, ce qui est plus rapide que
|
||||
de fusionner un par un.
|
||||
"""
|
||||
if not embeddings_batch:
|
||||
raise ValueError("Empty batch provided")
|
||||
|
||||
batch_size = len(embeddings_batch)
|
||||
fusion_weights = weights or self.config.weights
|
||||
|
||||
# Déterminer les dimensions depuis le premier élément
|
||||
first_emb = embeddings_batch[0]
|
||||
first_vector = next(iter(first_emb.values()))
|
||||
embedding_dim = first_vector.shape[0]
|
||||
|
||||
# Préparer le résultat
|
||||
fused_batch = np.zeros((batch_size, embedding_dim), dtype=np.float32)
|
||||
|
||||
# Traiter chaque modalité pour tout le batch
|
||||
for modality in first_emb.keys():
|
||||
weight = fusion_weights.get(modality, 0.0)
|
||||
if weight == 0.0:
|
||||
continue
|
||||
|
||||
# Collecter tous les vecteurs de cette modalité
|
||||
modality_vectors = []
|
||||
for emb_dict in embeddings_batch:
|
||||
if modality in emb_dict:
|
||||
modality_vectors.append(emb_dict[modality])
|
||||
else:
|
||||
# Si modalité manquante, utiliser vecteur zéro
|
||||
modality_vectors.append(np.zeros(embedding_dim, dtype=np.float32))
|
||||
|
||||
# Convertir en array numpy (batch_size, embedding_dim)
|
||||
modality_batch = np.array(modality_vectors, dtype=np.float32)
|
||||
|
||||
# Ajouter contribution pondérée
|
||||
fused_batch += weight * modality_batch
|
||||
|
||||
# Normaliser si demandé
|
||||
if self.config.normalize:
|
||||
# Normalisation L2 pour chaque vecteur du batch
|
||||
norms = np.linalg.norm(fused_batch, axis=1, keepdims=True)
|
||||
# Éviter division par zéro
|
||||
norms = np.where(norms < 1e-10, 1.0, norms)
|
||||
fused_batch = fused_batch / norms
|
||||
|
||||
return fused_batch
|
||||
|
||||
def create_state_embeddings_batch(
|
||||
self,
|
||||
embedding_ids: List[str],
|
||||
embeddings_batch: List[Dict[str, np.ndarray]],
|
||||
vector_save_paths: List[str],
|
||||
weights: Optional[Dict[str, float]] = None,
|
||||
metadata_batch: Optional[List[Dict]] = None
|
||||
) -> List[StateEmbedding]:
|
||||
"""
|
||||
Créer un batch de StateEmbeddings de manière optimisée.
|
||||
|
||||
Args:
|
||||
embedding_ids: Liste des IDs uniques
|
||||
embeddings_batch: Liste de dicts {modalité: vecteur}
|
||||
vector_save_paths: Liste des chemins de sauvegarde
|
||||
weights: Poids personnalisés (optionnel)
|
||||
metadata_batch: Liste de métadonnées (optionnel)
|
||||
|
||||
Returns:
|
||||
Liste de StateEmbeddings créés
|
||||
|
||||
Note:
|
||||
Cette méthode est ~3-5x plus rapide que de créer les embeddings
|
||||
un par un grâce au traitement vectorisé.
|
||||
"""
|
||||
if not (len(embedding_ids) == len(embeddings_batch) == len(vector_save_paths)):
|
||||
raise ValueError("All input lists must have the same length")
|
||||
|
||||
batch_size = len(embedding_ids)
|
||||
|
||||
# Fusionner tout le batch en une seule opération
|
||||
fused_vectors = self.fuse_batch(embeddings_batch, weights)
|
||||
|
||||
# Créer les StateEmbeddings
|
||||
state_embeddings = []
|
||||
fusion_weights = weights or self.config.weights
|
||||
|
||||
for i in range(batch_size):
|
||||
embedding_id = embedding_ids[i]
|
||||
embeddings = embeddings_batch[i]
|
||||
vector_save_path = vector_save_paths[i]
|
||||
metadata = metadata_batch[i] if metadata_batch else None
|
||||
fused_vector = fused_vectors[i]
|
||||
|
||||
# Créer les composants
|
||||
components = {}
|
||||
for modality, vector in embeddings.items():
|
||||
components[modality] = EmbeddingComponent(
|
||||
weight=fusion_weights.get(modality, 0.0),
|
||||
vector_id=f"{vector_save_path}_{modality}.npy",
|
||||
source_text=None
|
||||
)
|
||||
|
||||
# Créer StateEmbedding
|
||||
dimensions = fused_vector.shape[0]
|
||||
state_emb = StateEmbedding(
|
||||
embedding_id=embedding_id,
|
||||
vector_id=vector_save_path,
|
||||
dimensions=dimensions,
|
||||
fusion_method=self.config.method,
|
||||
components=components,
|
||||
metadata=metadata or {}
|
||||
)
|
||||
|
||||
# Sauvegarder le vecteur fusionné
|
||||
state_emb.save_vector(fused_vector)
|
||||
|
||||
state_embeddings.append(state_emb)
|
||||
|
||||
return state_embeddings
|
||||
|
||||
def compute_similarity_batch(
|
||||
self,
|
||||
query_embedding: StateEmbedding,
|
||||
candidate_embeddings: List[StateEmbedding]
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Calculer la similarité entre un embedding query et un batch de candidats.
|
||||
|
||||
Args:
|
||||
query_embedding: Embedding de requête
|
||||
candidate_embeddings: Liste d'embeddings candidats
|
||||
|
||||
Returns:
|
||||
Array numpy de similarités (batch_size,)
|
||||
|
||||
Note:
|
||||
Utilise des opérations vectorisées pour calculer toutes les
|
||||
similarités en une seule opération matricielle.
|
||||
"""
|
||||
# Charger le vecteur query
|
||||
query_vector = query_embedding.get_vector()
|
||||
|
||||
# Charger tous les vecteurs candidats
|
||||
candidate_vectors = []
|
||||
for emb in candidate_embeddings:
|
||||
candidate_vectors.append(emb.get_vector())
|
||||
|
||||
# Convertir en matrice (batch_size, embedding_dim)
|
||||
candidates_matrix = np.array(candidate_vectors, dtype=np.float32)
|
||||
|
||||
# Calcul vectorisé : similarité cosinus = dot product (si normalisés)
|
||||
# similarities = candidates_matrix @ query_vector
|
||||
similarities = np.dot(candidates_matrix, query_vector)
|
||||
|
||||
return similarities
|
||||
|
||||
def load_embedding_lazy(self, embedding_path: str, force_reload: bool = False) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Charger un embedding avec lazy loading et cache.
|
||||
|
||||
Tâche 5.2: Lazy loading des embeddings avec cache WeakValueDictionary.
|
||||
Chargement à la demande depuis le disque avec éviction automatique.
|
||||
|
||||
Args:
|
||||
embedding_path: Chemin vers le fichier embedding (.npy)
|
||||
force_reload: Forcer le rechargement depuis le disque
|
||||
|
||||
Returns:
|
||||
Array numpy de l'embedding ou None si erreur
|
||||
"""
|
||||
if not embedding_path:
|
||||
return None
|
||||
|
||||
# Vérifier le cache d'abord (sauf si force_reload)
|
||||
if not force_reload and embedding_path in self._embedding_cache:
|
||||
self._cache_stats['hits'] += 1
|
||||
logger.debug(f"Embedding cache hit: {Path(embedding_path).name}")
|
||||
return self._embedding_cache[embedding_path]
|
||||
|
||||
# Cache miss - charger depuis le disque
|
||||
self._cache_stats['misses'] += 1
|
||||
|
||||
try:
|
||||
if not Path(embedding_path).exists():
|
||||
logger.warning(f"Embedding file not found: {embedding_path}")
|
||||
return None
|
||||
|
||||
logger.debug(f"Loading embedding from disk: {Path(embedding_path).name}")
|
||||
embedding = np.load(embedding_path)
|
||||
|
||||
# Valider le format
|
||||
if not isinstance(embedding, np.ndarray) or embedding.ndim != 1:
|
||||
logger.error(f"Invalid embedding format in {embedding_path}")
|
||||
return None
|
||||
|
||||
# Ajouter au cache (WeakValueDictionary gère l'éviction automatique)
|
||||
self._embedding_cache[embedding_path] = embedding
|
||||
self._cache_stats['loads'] += 1
|
||||
|
||||
logger.debug(f"Embedding loaded: {embedding.shape} from {Path(embedding_path).name}")
|
||||
return embedding
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading embedding from {embedding_path}: {e}")
|
||||
return None
|
||||
|
||||
def fuse_with_lazy_loading(self,
|
||||
embedding_paths: Dict[str, str],
|
||||
weights: Optional[Dict[str, float]] = None) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Fusionner des embeddings avec lazy loading depuis les chemins de fichiers.
|
||||
|
||||
Tâche 5.2: Version optimisée qui charge les embeddings à la demande.
|
||||
|
||||
Args:
|
||||
embedding_paths: Dict {modalité: chemin_fichier}
|
||||
weights: Poids personnalisés (optionnel)
|
||||
|
||||
Returns:
|
||||
Vecteur fusionné ou None si erreur
|
||||
"""
|
||||
if not embedding_paths:
|
||||
logger.warning("No embedding paths provided for lazy fusion")
|
||||
return None
|
||||
|
||||
# Charger les embeddings avec lazy loading
|
||||
embeddings = {}
|
||||
for modality, path in embedding_paths.items():
|
||||
embedding = self.load_embedding_lazy(path)
|
||||
if embedding is not None:
|
||||
embeddings[modality] = embedding
|
||||
else:
|
||||
logger.warning(f"Failed to load embedding for modality '{modality}' from {path}")
|
||||
|
||||
if not embeddings:
|
||||
logger.error("No embeddings could be loaded for fusion")
|
||||
return None
|
||||
|
||||
# Fusionner normalement
|
||||
return self.fuse(embeddings, weights)
|
||||
|
||||
def get_cache_stats(self) -> Dict[str, int]:
|
||||
"""
|
||||
Obtenir les statistiques du cache d'embeddings.
|
||||
|
||||
Returns:
|
||||
Dict avec hits, misses, loads, cache_size
|
||||
"""
|
||||
return {
|
||||
**self._cache_stats,
|
||||
'cache_size': len(self._embedding_cache)
|
||||
}
|
||||
|
||||
def clear_embedding_cache(self) -> None:
|
||||
"""
|
||||
Vider le cache d'embeddings.
|
||||
|
||||
Utile pour libérer la mémoire ou forcer le rechargement.
|
||||
"""
|
||||
cache_size = len(self._embedding_cache)
|
||||
self._embedding_cache.clear()
|
||||
self._cache_stats['evictions'] += cache_size
|
||||
logger.info(f"Cleared embedding cache ({cache_size} entries)")
|
||||
|
||||
def preload_embeddings(self, embedding_paths: List[str]) -> int:
|
||||
"""
|
||||
Précharger des embeddings dans le cache.
|
||||
|
||||
Utile pour optimiser les performances en chargeant
|
||||
les embeddings fréquemment utilisés à l'avance.
|
||||
|
||||
Args:
|
||||
embedding_paths: Liste des chemins à précharger
|
||||
|
||||
Returns:
|
||||
Nombre d'embeddings préchargés avec succès
|
||||
"""
|
||||
loaded_count = 0
|
||||
for path in embedding_paths:
|
||||
if self.load_embedding_lazy(path) is not None:
|
||||
loaded_count += 1
|
||||
|
||||
logger.info(f"Preloaded {loaded_count}/{len(embedding_paths)} embeddings")
|
||||
return loaded_count
|
||||
388
core/embedding/similarity.py
Normal file
388
core/embedding/similarity.py
Normal file
@@ -0,0 +1,388 @@
|
||||
"""
|
||||
Similarity - Calculs de Similarité et Distance
|
||||
|
||||
Fonctions pour calculer différentes métriques de similarité et distance
|
||||
entre vecteurs d'embeddings.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Union, List
|
||||
|
||||
|
||||
def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer similarité cosinus entre deux vecteurs
|
||||
|
||||
similarity = (vec1 · vec2) / (||vec1|| * ||vec2||)
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur
|
||||
vec2: Deuxième vecteur
|
||||
|
||||
Returns:
|
||||
Similarité cosinus dans [-1, 1]
|
||||
1 = identiques, 0 = orthogonaux, -1 = opposés
|
||||
|
||||
Raises:
|
||||
ValueError: Si dimensions ne correspondent pas
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
# Produit scalaire
|
||||
dot_product = np.dot(vec1, vec2)
|
||||
|
||||
# Normes
|
||||
norm1 = np.linalg.norm(vec1)
|
||||
norm2 = np.linalg.norm(vec2)
|
||||
|
||||
# Éviter division par zéro
|
||||
if norm1 == 0 or norm2 == 0:
|
||||
return 0.0
|
||||
|
||||
# Similarité cosinus
|
||||
similarity = dot_product / (norm1 * norm2)
|
||||
|
||||
# Clamp dans [-1, 1] pour éviter erreurs numériques
|
||||
similarity = np.clip(similarity, -1.0, 1.0)
|
||||
|
||||
return float(similarity)
|
||||
|
||||
|
||||
def euclidean_distance(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer distance euclidienne (L2) entre deux vecteurs
|
||||
|
||||
distance = ||vec1 - vec2||_2 = sqrt(sum((vec1 - vec2)^2))
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur
|
||||
vec2: Deuxième vecteur
|
||||
|
||||
Returns:
|
||||
Distance euclidienne (>= 0)
|
||||
|
||||
Raises:
|
||||
ValueError: Si dimensions ne correspondent pas
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
return float(np.linalg.norm(vec1 - vec2))
|
||||
|
||||
|
||||
def manhattan_distance(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer distance de Manhattan (L1) entre deux vecteurs
|
||||
|
||||
distance = sum(|vec1 - vec2|)
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur
|
||||
vec2: Deuxième vecteur
|
||||
|
||||
Returns:
|
||||
Distance de Manhattan (>= 0)
|
||||
|
||||
Raises:
|
||||
ValueError: Si dimensions ne correspondent pas
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
return float(np.sum(np.abs(vec1 - vec2)))
|
||||
|
||||
|
||||
def dot_product(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer produit scalaire entre deux vecteurs
|
||||
|
||||
dot = vec1 · vec2 = sum(vec1 * vec2)
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur
|
||||
vec2: Deuxième vecteur
|
||||
|
||||
Returns:
|
||||
Produit scalaire
|
||||
|
||||
Raises:
|
||||
ValueError: Si dimensions ne correspondent pas
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
return float(np.dot(vec1, vec2))
|
||||
|
||||
|
||||
def normalize_l2(vector: np.ndarray, epsilon: float = 1e-10) -> np.ndarray:
|
||||
"""
|
||||
Normaliser un vecteur avec norme L2
|
||||
|
||||
normalized = vector / ||vector||_2
|
||||
|
||||
Args:
|
||||
vector: Vecteur à normaliser
|
||||
epsilon: Valeur minimale pour éviter division par zéro
|
||||
|
||||
Returns:
|
||||
Vecteur normalisé (norme L2 = 1.0)
|
||||
"""
|
||||
norm = np.linalg.norm(vector)
|
||||
if norm < epsilon:
|
||||
return vector
|
||||
return vector / norm
|
||||
|
||||
|
||||
def normalize_l1(vector: np.ndarray, epsilon: float = 1e-10) -> np.ndarray:
|
||||
"""
|
||||
Normaliser un vecteur avec norme L1
|
||||
|
||||
normalized = vector / sum(|vector|)
|
||||
|
||||
Args:
|
||||
vector: Vecteur à normaliser
|
||||
epsilon: Valeur minimale pour éviter division par zéro
|
||||
|
||||
Returns:
|
||||
Vecteur normalisé (norme L1 = 1.0)
|
||||
"""
|
||||
norm = np.sum(np.abs(vector))
|
||||
if norm < epsilon:
|
||||
return vector
|
||||
return vector / norm
|
||||
|
||||
|
||||
def batch_cosine_similarity(vectors: List[np.ndarray],
|
||||
query: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Calculer similarité cosinus entre une requête et un batch de vecteurs
|
||||
|
||||
Args:
|
||||
vectors: Liste de vecteurs
|
||||
query: Vecteur de requête
|
||||
|
||||
Returns:
|
||||
Array de similarités
|
||||
"""
|
||||
# Convertir en matrice
|
||||
matrix = np.array(vectors)
|
||||
|
||||
# Normaliser
|
||||
matrix_norm = matrix / (np.linalg.norm(matrix, axis=1, keepdims=True) + 1e-10)
|
||||
query_norm = query / (np.linalg.norm(query) + 1e-10)
|
||||
|
||||
# Produit matriciel
|
||||
similarities = np.dot(matrix_norm, query_norm)
|
||||
|
||||
# Clamp
|
||||
similarities = np.clip(similarities, -1.0, 1.0)
|
||||
|
||||
return similarities
|
||||
|
||||
|
||||
def pairwise_cosine_similarity(vectors: List[np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Calculer matrice de similarité cosinus entre tous les vecteurs
|
||||
|
||||
Args:
|
||||
vectors: Liste de vecteurs
|
||||
|
||||
Returns:
|
||||
Matrice de similarité (n x n)
|
||||
"""
|
||||
# Convertir en matrice
|
||||
matrix = np.array(vectors)
|
||||
|
||||
# Normaliser
|
||||
matrix_norm = matrix / (np.linalg.norm(matrix, axis=1, keepdims=True) + 1e-10)
|
||||
|
||||
# Produit matriciel
|
||||
similarity_matrix = np.dot(matrix_norm, matrix_norm.T)
|
||||
|
||||
# Clamp
|
||||
similarity_matrix = np.clip(similarity_matrix, -1.0, 1.0)
|
||||
|
||||
return similarity_matrix
|
||||
|
||||
|
||||
def angular_distance(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer distance angulaire entre deux vecteurs
|
||||
|
||||
distance = arccos(cosine_similarity) / π
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur
|
||||
vec2: Deuxième vecteur
|
||||
|
||||
Returns:
|
||||
Distance angulaire dans [0, 1]
|
||||
"""
|
||||
similarity = cosine_similarity(vec1, vec2)
|
||||
angle = np.arccos(np.clip(similarity, -1.0, 1.0))
|
||||
return float(angle / np.pi)
|
||||
|
||||
|
||||
def jaccard_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer similarité de Jaccard pour vecteurs binaires
|
||||
|
||||
similarity = |intersection| / |union|
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur binaire
|
||||
vec2: Deuxième vecteur binaire
|
||||
|
||||
Returns:
|
||||
Similarité de Jaccard dans [0, 1]
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
intersection = np.sum(np.logical_and(vec1, vec2))
|
||||
union = np.sum(np.logical_or(vec1, vec2))
|
||||
|
||||
if union == 0:
|
||||
return 0.0
|
||||
|
||||
return float(intersection / union)
|
||||
|
||||
|
||||
def hamming_distance(vec1: np.ndarray, vec2: np.ndarray) -> float:
|
||||
"""
|
||||
Calculer distance de Hamming pour vecteurs binaires
|
||||
|
||||
distance = nombre de positions différentes
|
||||
|
||||
Args:
|
||||
vec1: Premier vecteur binaire
|
||||
vec2: Deuxième vecteur binaire
|
||||
|
||||
Returns:
|
||||
Distance de Hamming
|
||||
"""
|
||||
if vec1.shape != vec2.shape:
|
||||
raise ValueError(
|
||||
f"Vectors must have same shape: {vec1.shape} vs {vec2.shape}"
|
||||
)
|
||||
|
||||
return float(np.sum(vec1 != vec2))
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions de conversion
|
||||
# ============================================================================
|
||||
|
||||
def similarity_to_distance(similarity: float,
|
||||
method: str = "cosine") -> float:
|
||||
"""
|
||||
Convertir similarité en distance
|
||||
|
||||
Args:
|
||||
similarity: Valeur de similarité
|
||||
method: Méthode ("cosine", "angular")
|
||||
|
||||
Returns:
|
||||
Distance correspondante
|
||||
"""
|
||||
if method == "cosine":
|
||||
# distance = 1 - similarity (pour cosine dans [0, 1])
|
||||
return 1.0 - similarity
|
||||
elif method == "angular":
|
||||
# distance angulaire
|
||||
angle = np.arccos(np.clip(similarity, -1.0, 1.0))
|
||||
return float(angle / np.pi)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def distance_to_similarity(distance: float,
|
||||
method: str = "euclidean") -> float:
|
||||
"""
|
||||
Convertir distance en similarité
|
||||
|
||||
Args:
|
||||
distance: Valeur de distance
|
||||
method: Méthode ("euclidean", "manhattan")
|
||||
|
||||
Returns:
|
||||
Similarité correspondante dans [0, 1]
|
||||
"""
|
||||
if method in ["euclidean", "manhattan"]:
|
||||
# similarity = 1 / (1 + distance)
|
||||
return 1.0 / (1.0 + distance)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def is_normalized(vector: np.ndarray,
|
||||
norm_type: str = "l2",
|
||||
tolerance: float = 1e-6) -> bool:
|
||||
"""
|
||||
Vérifier si un vecteur est normalisé
|
||||
|
||||
Args:
|
||||
vector: Vecteur à vérifier
|
||||
norm_type: Type de norme ("l2" ou "l1")
|
||||
tolerance: Tolérance pour la vérification
|
||||
|
||||
Returns:
|
||||
True si normalisé, False sinon
|
||||
"""
|
||||
if norm_type == "l2":
|
||||
norm = np.linalg.norm(vector)
|
||||
elif norm_type == "l1":
|
||||
norm = np.sum(np.abs(vector))
|
||||
else:
|
||||
raise ValueError(f"Unknown norm type: {norm_type}")
|
||||
|
||||
return abs(norm - 1.0) < tolerance
|
||||
|
||||
|
||||
def compute_centroid(vectors: List[np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Calculer le centroïde (moyenne) d'un ensemble de vecteurs
|
||||
|
||||
Args:
|
||||
vectors: Liste de vecteurs
|
||||
|
||||
Returns:
|
||||
Vecteur centroïde
|
||||
"""
|
||||
if not vectors:
|
||||
raise ValueError("Cannot compute centroid of empty list")
|
||||
|
||||
matrix = np.array(vectors)
|
||||
return np.mean(matrix, axis=0)
|
||||
|
||||
|
||||
def compute_variance(vectors: List[np.ndarray]) -> float:
|
||||
"""
|
||||
Calculer la variance d'un ensemble de vecteurs
|
||||
|
||||
Args:
|
||||
vectors: Liste de vecteurs
|
||||
|
||||
Returns:
|
||||
Variance totale
|
||||
"""
|
||||
if not vectors:
|
||||
raise ValueError("Cannot compute variance of empty list")
|
||||
|
||||
matrix = np.array(vectors)
|
||||
return float(np.var(matrix))
|
||||
395
core/embedding/state_embedding_builder.py
Normal file
395
core/embedding/state_embedding_builder.py
Normal file
@@ -0,0 +1,395 @@
|
||||
"""
|
||||
StateEmbeddingBuilder - Construction de State Embeddings Complets
|
||||
|
||||
Construit des State Embeddings en fusionnant les embeddings de toutes les modalités
|
||||
(image, texte, titre, UI) depuis un ScreenState.
|
||||
|
||||
Utilise OpenCLIP pour générer de vrais embeddings au lieu de vecteurs aléatoires.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional, Any
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ..models.screen_state import ScreenState
|
||||
from ..models.state_embedding import StateEmbedding, EmbeddingComponent
|
||||
from .fusion_engine import FusionEngine, FusionConfig
|
||||
from .clip_embedder import CLIPEmbedder
|
||||
|
||||
|
||||
class StateEmbeddingBuilder:
|
||||
"""
|
||||
Constructeur de State Embeddings
|
||||
|
||||
Prend un ScreenState et génère un State Embedding complet en :
|
||||
1. Calculant les embeddings pour chaque modalité (image, texte, titre, UI)
|
||||
2. Fusionnant ces embeddings avec le FusionEngine
|
||||
3. Sauvegardant le résultat
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
fusion_engine: Optional[FusionEngine] = None,
|
||||
embedders: Optional[Dict[str, Any]] = None,
|
||||
output_dir: Optional[Path] = None,
|
||||
use_clip: bool = True):
|
||||
"""
|
||||
Initialiser le builder
|
||||
|
||||
Args:
|
||||
fusion_engine: Moteur de fusion (crée un par défaut si None)
|
||||
embedders: Dict d'embedders pour chaque modalité
|
||||
{"image": ImageEmbedder, "text": TextEmbedder, ...}
|
||||
output_dir: Répertoire de sortie pour les vecteurs
|
||||
use_clip: Si True, utilise OpenCLIP pour les embeddings (recommandé)
|
||||
"""
|
||||
self.fusion_engine = fusion_engine or FusionEngine()
|
||||
self.output_dir = output_dir or Path("data/embeddings")
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialiser OpenCLIP si demandé
|
||||
self.clip_embedder = None
|
||||
if use_clip:
|
||||
try:
|
||||
logger.info("Initialisation OpenCLIP pour embeddings...")
|
||||
self.clip_embedder = CLIPEmbedder()
|
||||
logger.info("✓ OpenCLIP initialisé")
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible d'initialiser OpenCLIP: {e}")
|
||||
logger.info("Utilisation des embedders fournis ou vecteurs par défaut")
|
||||
|
||||
# Utiliser embedders fournis ou créer avec CLIP
|
||||
if embedders:
|
||||
self.embedders = embedders
|
||||
elif self.clip_embedder:
|
||||
# Utiliser CLIP pour toutes les modalités
|
||||
self.embedders = {
|
||||
"image": self.clip_embedder,
|
||||
"text": self.clip_embedder,
|
||||
"title": self.clip_embedder,
|
||||
"ui": self.clip_embedder
|
||||
}
|
||||
else:
|
||||
self.embedders = {}
|
||||
|
||||
def build(self,
|
||||
screen_state: ScreenState,
|
||||
embedding_id: Optional[str] = None,
|
||||
compute_embeddings: bool = True) -> StateEmbedding:
|
||||
"""
|
||||
Construire un State Embedding depuis un ScreenState
|
||||
|
||||
Args:
|
||||
screen_state: État d'écran à embedder
|
||||
embedding_id: ID unique (généré si None)
|
||||
compute_embeddings: Si False, utilise des embeddings pré-calculés
|
||||
|
||||
Returns:
|
||||
StateEmbedding complet avec vecteur fusionné
|
||||
"""
|
||||
# Générer ID si nécessaire
|
||||
if embedding_id is None:
|
||||
embedding_id = self._generate_embedding_id(screen_state)
|
||||
|
||||
# Calculer ou récupérer embeddings pour chaque modalité
|
||||
if compute_embeddings:
|
||||
embeddings = self._compute_all_embeddings(screen_state)
|
||||
else:
|
||||
embeddings = self._load_precomputed_embeddings(screen_state)
|
||||
|
||||
# Chemin de sauvegarde du vecteur fusionné
|
||||
vector_path = self.output_dir / f"{embedding_id}.npy"
|
||||
|
||||
# Créer State Embedding avec fusion
|
||||
state_embedding = self.fusion_engine.create_state_embedding(
|
||||
embedding_id=embedding_id,
|
||||
embeddings=embeddings,
|
||||
vector_save_path=str(vector_path),
|
||||
metadata={
|
||||
"screen_state_id": screen_state.screen_state_id,
|
||||
"timestamp": screen_state.timestamp.isoformat(),
|
||||
"window_title": getattr(screen_state.window, 'title', ''),
|
||||
"created_at": datetime.now().isoformat()
|
||||
}
|
||||
)
|
||||
|
||||
# Sauvegarder métadonnées
|
||||
metadata_path = self.output_dir / f"{embedding_id}_metadata.json"
|
||||
state_embedding.save_to_file(metadata_path)
|
||||
|
||||
return state_embedding
|
||||
|
||||
def _compute_all_embeddings(self,
|
||||
screen_state: ScreenState) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Calculer embeddings pour toutes les modalités
|
||||
|
||||
Args:
|
||||
screen_state: État d'écran
|
||||
|
||||
Returns:
|
||||
Dict {modalité: vecteur}
|
||||
"""
|
||||
embeddings = {}
|
||||
|
||||
# Image embedding (screenshot complet)
|
||||
if "image" in self.embedders and hasattr(screen_state, 'raw'):
|
||||
image_emb = self._compute_image_embedding(screen_state)
|
||||
if image_emb is not None:
|
||||
embeddings["image"] = image_emb
|
||||
|
||||
# Text embedding (texte détecté)
|
||||
if "text" in self.embedders and hasattr(screen_state, 'perception'):
|
||||
text_emb = self._compute_text_embedding(screen_state)
|
||||
if text_emb is not None:
|
||||
embeddings["text"] = text_emb
|
||||
|
||||
# Title embedding (titre de fenêtre)
|
||||
if "title" in self.embedders and hasattr(screen_state, 'window'):
|
||||
title_emb = self._compute_title_embedding(screen_state)
|
||||
if title_emb is not None:
|
||||
embeddings["title"] = title_emb
|
||||
|
||||
# UI embedding (éléments UI)
|
||||
if "ui" in self.embedders and hasattr(screen_state, 'ui_elements'):
|
||||
ui_emb = self._compute_ui_embedding(screen_state)
|
||||
if ui_emb is not None:
|
||||
embeddings["ui"] = ui_emb
|
||||
|
||||
# Si aucun embedding calculé, créer des vecteurs par défaut
|
||||
if not embeddings:
|
||||
# Utiliser dimensions par défaut (512)
|
||||
default_dim = 512
|
||||
embeddings = {
|
||||
"image": np.random.randn(default_dim).astype(np.float32),
|
||||
"text": np.random.randn(default_dim).astype(np.float32),
|
||||
"title": np.random.randn(default_dim).astype(np.float32),
|
||||
"ui": np.random.randn(default_dim).astype(np.float32)
|
||||
}
|
||||
|
||||
return embeddings
|
||||
|
||||
def _compute_image_embedding(self, screen_state: ScreenState) -> Optional[np.ndarray]:
|
||||
"""Calculer embedding de l'image (screenshot) avec OpenCLIP"""
|
||||
if "image" not in self.embedders:
|
||||
return None
|
||||
|
||||
try:
|
||||
embedder = self.embedders["image"]
|
||||
screenshot_path = screen_state.raw.screenshot_path
|
||||
|
||||
# Charger l'image
|
||||
image = Image.open(screenshot_path)
|
||||
|
||||
# Utiliser OpenCLIP si disponible
|
||||
if isinstance(embedder, CLIPEmbedder):
|
||||
return embedder.embed_image(image)
|
||||
|
||||
# Sinon, essayer les méthodes standard
|
||||
if hasattr(embedder, 'embed_image'):
|
||||
return embedder.embed_image(screenshot_path)
|
||||
elif hasattr(embedder, 'encode_image'):
|
||||
return embedder.encode_image(screenshot_path)
|
||||
elif callable(embedder):
|
||||
return embedder(screenshot_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute image embedding: {e}")
|
||||
logger.debug("Traceback:", exc_info=True)
|
||||
|
||||
return None
|
||||
|
||||
def _compute_text_embedding(self, screen_state: ScreenState) -> Optional[np.ndarray]:
|
||||
"""Calculer embedding du texte détecté avec OpenCLIP"""
|
||||
if "text" not in self.embedders:
|
||||
return None
|
||||
|
||||
try:
|
||||
embedder = self.embedders["text"]
|
||||
|
||||
# Concaténer tous les textes détectés
|
||||
texts = []
|
||||
if hasattr(screen_state.perception, 'detected_texts'):
|
||||
texts = screen_state.perception.detected_texts
|
||||
|
||||
combined_text = " ".join(texts) if texts else ""
|
||||
|
||||
if not combined_text:
|
||||
return None
|
||||
|
||||
# Utiliser OpenCLIP si disponible
|
||||
if isinstance(embedder, CLIPEmbedder):
|
||||
return embedder.embed_text(combined_text)
|
||||
|
||||
# Sinon, essayer les méthodes standard
|
||||
if hasattr(embedder, 'embed_text'):
|
||||
return embedder.embed_text(combined_text)
|
||||
elif hasattr(embedder, 'encode_text'):
|
||||
return embedder.encode_text(combined_text)
|
||||
elif callable(embedder):
|
||||
return embedder(combined_text)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute text embedding: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _compute_title_embedding(self, screen_state: ScreenState) -> Optional[np.ndarray]:
|
||||
"""Calculer embedding du titre de fenêtre avec OpenCLIP"""
|
||||
if "title" not in self.embedders:
|
||||
return None
|
||||
|
||||
try:
|
||||
embedder = self.embedders["title"]
|
||||
title = getattr(screen_state.window, 'title', '')
|
||||
|
||||
if not title:
|
||||
return None
|
||||
|
||||
# Utiliser OpenCLIP si disponible
|
||||
if isinstance(embedder, CLIPEmbedder):
|
||||
return embedder.embed_text(title)
|
||||
|
||||
# Sinon, essayer les méthodes standard
|
||||
if hasattr(embedder, 'embed_text'):
|
||||
return embedder.embed_text(title)
|
||||
elif hasattr(embedder, 'encode_text'):
|
||||
return embedder.encode_text(title)
|
||||
elif callable(embedder):
|
||||
return embedder(title)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute title embedding: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _compute_ui_embedding(self, screen_state: ScreenState) -> Optional[np.ndarray]:
|
||||
"""Calculer embedding moyen des éléments UI"""
|
||||
if "ui" not in self.embedders:
|
||||
return None
|
||||
|
||||
try:
|
||||
embedder = self.embedders["ui"]
|
||||
ui_elements = screen_state.ui_elements
|
||||
|
||||
if not ui_elements:
|
||||
return None
|
||||
|
||||
# Calculer embedding pour chaque élément UI
|
||||
ui_embeddings = []
|
||||
for element in ui_elements:
|
||||
# Utiliser embedding image de l'élément si disponible
|
||||
if hasattr(element, 'embeddings') and element.embeddings:
|
||||
if hasattr(element.embeddings, 'image_embedding_id'):
|
||||
# Charger embedding pré-calculé
|
||||
emb_path = Path(element.embeddings.image_embedding_id)
|
||||
if emb_path.exists():
|
||||
ui_embeddings.append(np.load(emb_path))
|
||||
|
||||
# Si pas d'embeddings pré-calculés, calculer depuis labels
|
||||
if not ui_embeddings:
|
||||
for element in ui_elements:
|
||||
label = getattr(element, 'label', '')
|
||||
if label and hasattr(embedder, 'embed_text'):
|
||||
ui_embeddings.append(embedder.embed_text(label))
|
||||
|
||||
# Moyenne des embeddings UI
|
||||
if ui_embeddings:
|
||||
return np.mean(ui_embeddings, axis=0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute UI embedding: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def _load_precomputed_embeddings(self,
|
||||
screen_state: ScreenState) -> Dict[str, np.ndarray]:
|
||||
"""Charger embeddings pré-calculés"""
|
||||
# TODO: Implémenter chargement depuis cache
|
||||
# Pour l'instant, calculer à la volée
|
||||
return self._compute_all_embeddings(screen_state)
|
||||
|
||||
def _generate_embedding_id(self, screen_state: ScreenState) -> str:
|
||||
"""Générer un ID unique pour l'embedding"""
|
||||
timestamp = screen_state.timestamp.strftime("%Y%m%d_%H%M%S_%f")
|
||||
return f"state_emb_{screen_state.screen_state_id}_{timestamp}"
|
||||
|
||||
def batch_build(self,
|
||||
screen_states: list[ScreenState],
|
||||
compute_embeddings: bool = True) -> list[StateEmbedding]:
|
||||
"""
|
||||
Construire plusieurs State Embeddings en batch
|
||||
|
||||
Args:
|
||||
screen_states: Liste de ScreenStates
|
||||
compute_embeddings: Si False, utilise embeddings pré-calculés
|
||||
|
||||
Returns:
|
||||
Liste de StateEmbeddings
|
||||
"""
|
||||
return [
|
||||
self.build(state, compute_embeddings=compute_embeddings)
|
||||
for state in screen_states
|
||||
]
|
||||
|
||||
def set_embedder(self, modality: str, embedder: Any) -> None:
|
||||
"""
|
||||
Définir un embedder pour une modalité
|
||||
|
||||
Args:
|
||||
modality: Nom de la modalité ("image", "text", "title", "ui")
|
||||
embedder: Embedder à utiliser
|
||||
"""
|
||||
self.embedders[modality] = embedder
|
||||
|
||||
def get_embedder(self, modality: str) -> Optional[Any]:
|
||||
"""Récupérer l'embedder d'une modalité"""
|
||||
return self.embedders.get(modality)
|
||||
|
||||
def set_output_dir(self, output_dir: Path) -> None:
|
||||
"""Définir le répertoire de sortie"""
|
||||
self.output_dir = output_dir
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def create_builder(embedders: Optional[Dict[str, Any]] = None,
|
||||
output_dir: Optional[Path] = None,
|
||||
use_clip: bool = True) -> StateEmbeddingBuilder:
|
||||
"""
|
||||
Créer un StateEmbeddingBuilder avec configuration par défaut
|
||||
|
||||
Args:
|
||||
embedders: Dict d'embedders optionnel
|
||||
output_dir: Répertoire de sortie optionnel
|
||||
use_clip: Si True, utilise OpenCLIP (recommandé)
|
||||
|
||||
Returns:
|
||||
StateEmbeddingBuilder configuré avec OpenCLIP
|
||||
"""
|
||||
return StateEmbeddingBuilder(
|
||||
embedders=embedders,
|
||||
output_dir=output_dir,
|
||||
use_clip=use_clip
|
||||
)
|
||||
|
||||
|
||||
def build_from_screen_state(screen_state: ScreenState,
|
||||
embedders: Dict[str, Any],
|
||||
output_dir: Path) -> StateEmbedding:
|
||||
"""
|
||||
Fonction helper pour construire rapidement un State Embedding
|
||||
|
||||
Args:
|
||||
screen_state: État d'écran
|
||||
embedders: Dict d'embedders
|
||||
output_dir: Répertoire de sortie
|
||||
|
||||
Returns:
|
||||
StateEmbedding
|
||||
"""
|
||||
builder = StateEmbeddingBuilder(embedders=embedders, output_dir=output_dir)
|
||||
return builder.build(screen_state)
|
||||
Reference in New Issue
Block a user