fix: fallback CPU embedding + protection CPAM contre crash OOM

- SentenceTransformer : fallback CPU si CUDA OOM (Ollama peut occuper la VRAM)
- Bloc CPAM dans main.py : try/except pour éviter crash fatal du pipeline

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-13 06:11:38 +01:00
parent e90450903e
commit 0d3cb83f12
2 changed files with 29 additions and 15 deletions

View File

@@ -355,6 +355,7 @@ def main(input_path: str | None = None) -> None:
# Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier) # Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier)
if cpam_data and subdir: if cpam_data and subdir:
try:
from .control.cpam_parser import match_dossier_ogc from .control.cpam_parser import match_dossier_ogc
controles = match_dossier_ogc(subdir, cpam_data) controles = match_dossier_ogc(subdir, cpam_data)
if controles: if controles:
@@ -367,6 +368,8 @@ def main(input_path: str | None = None) -> None:
ctrl.contre_argumentation = text ctrl.contre_argumentation = text
ctrl.sources_reponse = sources ctrl.sources_reponse = sources
target.controles_cpam = controles target.controles_cpam = controles
except Exception:
logger.exception("Erreur CPAM pour %s", subdir)
# Écrire le dossier fusionné (après enrichissement CPAM éventuel) # Écrire le dossier fusionné (après enrichissement CPAM éventuel)
if merged is not None and subdir: if merged is not None and subdir:

View File

@@ -25,14 +25,25 @@ _MIN_SCORE = 0.3
def _get_embed_model(): def _get_embed_model():
"""Charge le modèle d'embedding (singleton).""" """Charge le modèle d'embedding (singleton).
Tente CUDA d'abord, fallback CPU si OOM (Ollama peut occuper la VRAM).
"""
global _embed_model global _embed_model
if _embed_model is None: if _embed_model is None:
from sentence_transformers import SentenceTransformer from sentence_transformers import SentenceTransformer
logger.info("Chargement du modèle d'embedding pour la recherche...")
import torch import torch
_device = "cuda" if torch.cuda.is_available() else "cpu" _device = "cuda" if torch.cuda.is_available() else "cpu"
try:
logger.info("Chargement du modèle d'embedding (%s)...", _device)
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device) _embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
except torch.OutOfMemoryError:
if _device == "cuda":
logger.warning("CUDA OOM pour l'embedding — fallback CPU")
torch.cuda.empty_cache()
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
else:
raise
_embed_model.max_seq_length = 512 _embed_model.max_seq_length = 512
return _embed_model return _embed_model