fix: fallback CPU embedding + protection CPAM contre crash OOM
- SentenceTransformer : fallback CPU si CUDA OOM (Ollama peut occuper la VRAM) - Bloc CPAM dans main.py : try/except pour éviter crash fatal du pipeline Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -25,14 +25,25 @@ _MIN_SCORE = 0.3
|
||||
|
||||
|
||||
def _get_embed_model():
|
||||
"""Charge le modèle d'embedding (singleton)."""
|
||||
"""Charge le modèle d'embedding (singleton).
|
||||
|
||||
Tente CUDA d'abord, fallback CPU si OOM (Ollama peut occuper la VRAM).
|
||||
"""
|
||||
global _embed_model
|
||||
if _embed_model is None:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
logger.info("Chargement du modèle d'embedding pour la recherche...")
|
||||
import torch
|
||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
try:
|
||||
logger.info("Chargement du modèle d'embedding (%s)...", _device)
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
except torch.OutOfMemoryError:
|
||||
if _device == "cuda":
|
||||
logger.warning("CUDA OOM pour l'embedding — fallback CPU")
|
||||
torch.cuda.empty_cache()
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
|
||||
else:
|
||||
raise
|
||||
_embed_model.max_seq_length = 512
|
||||
return _embed_model
|
||||
|
||||
|
||||
Reference in New Issue
Block a user