fix: fallback CPU embedding + protection CPAM contre crash OOM
- SentenceTransformer : fallback CPU si CUDA OOM (Ollama peut occuper la VRAM) - Bloc CPAM dans main.py : try/except pour éviter crash fatal du pipeline Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
27
src/main.py
27
src/main.py
@@ -355,18 +355,21 @@ def main(input_path: str | None = None) -> None:
|
|||||||
|
|
||||||
# Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier)
|
# Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier)
|
||||||
if cpam_data and subdir:
|
if cpam_data and subdir:
|
||||||
from .control.cpam_parser import match_dossier_ogc
|
try:
|
||||||
controles = match_dossier_ogc(subdir, cpam_data)
|
from .control.cpam_parser import match_dossier_ogc
|
||||||
if controles:
|
controles = match_dossier_ogc(subdir, cpam_data)
|
||||||
from .control.cpam_response import generate_cpam_response
|
if controles:
|
||||||
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
|
from .control.cpam_response import generate_cpam_response
|
||||||
if target:
|
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
|
||||||
logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir)
|
if target:
|
||||||
for ctrl in controles:
|
logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir)
|
||||||
text, sources = generate_cpam_response(target, ctrl)
|
for ctrl in controles:
|
||||||
ctrl.contre_argumentation = text
|
text, sources = generate_cpam_response(target, ctrl)
|
||||||
ctrl.sources_reponse = sources
|
ctrl.contre_argumentation = text
|
||||||
target.controles_cpam = controles
|
ctrl.sources_reponse = sources
|
||||||
|
target.controles_cpam = controles
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Erreur CPAM pour %s", subdir)
|
||||||
|
|
||||||
# Écrire le dossier fusionné (après enrichissement CPAM éventuel)
|
# Écrire le dossier fusionné (après enrichissement CPAM éventuel)
|
||||||
if merged is not None and subdir:
|
if merged is not None and subdir:
|
||||||
|
|||||||
@@ -25,14 +25,25 @@ _MIN_SCORE = 0.3
|
|||||||
|
|
||||||
|
|
||||||
def _get_embed_model():
|
def _get_embed_model():
|
||||||
"""Charge le modèle d'embedding (singleton)."""
|
"""Charge le modèle d'embedding (singleton).
|
||||||
|
|
||||||
|
Tente CUDA d'abord, fallback CPU si OOM (Ollama peut occuper la VRAM).
|
||||||
|
"""
|
||||||
global _embed_model
|
global _embed_model
|
||||||
if _embed_model is None:
|
if _embed_model is None:
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
logger.info("Chargement du modèle d'embedding pour la recherche...")
|
|
||||||
import torch
|
import torch
|
||||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
try:
|
||||||
|
logger.info("Chargement du modèle d'embedding (%s)...", _device)
|
||||||
|
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||||
|
except torch.OutOfMemoryError:
|
||||||
|
if _device == "cuda":
|
||||||
|
logger.warning("CUDA OOM pour l'embedding — fallback CPU")
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
|
||||||
|
else:
|
||||||
|
raise
|
||||||
_embed_model.max_seq_length = 512
|
_embed_model.max_seq_length = 512
|
||||||
return _embed_model
|
return _embed_model
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user