feat: embeddings sur GPU (CUDA) pour l'indexation et la recherche RAG
Détection automatique GPU/CPU avec fallback. Index FAISS reconstruit en 1min (GPU) au lieu de 16min (CPU). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -423,9 +423,11 @@ def build_index(force: bool = False) -> None:
|
||||
|
||||
logger.info("Total : %d chunks à indexer", len(all_chunks))
|
||||
|
||||
# Embeddings — forcer CPU pour éviter les bugs CUDA avec ce modèle
|
||||
logger.info("Chargement du modèle d'embedding dangvantuan/sentence-camembert-large (CPU)...")
|
||||
model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
|
||||
# Embeddings — GPU si disponible
|
||||
import torch
|
||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
logger.info("Chargement du modèle d'embedding dangvantuan/sentence-camembert-large (%s)...", _device)
|
||||
model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
model.max_seq_length = 512 # CamemBERT max position embeddings
|
||||
|
||||
texts = [c.text[:2000] for c in all_chunks] # Tronquer les chunks trop longs
|
||||
|
||||
@@ -29,7 +29,9 @@ def _get_embed_model():
|
||||
if _embed_model is None:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
logger.info("Chargement du modèle d'embedding pour la recherche...")
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
|
||||
import torch
|
||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
_embed_model.max_seq_length = 512
|
||||
return _embed_model
|
||||
|
||||
|
||||
Reference in New Issue
Block a user