From 0d3cb83f124eb3c6eb0d56cf2d060412d0146b6c Mon Sep 17 00:00:00 2001 From: dom Date: Fri, 13 Feb 2026 06:11:38 +0100 Subject: [PATCH] fix: fallback CPU embedding + protection CPAM contre crash OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SentenceTransformer : fallback CPU si CUDA OOM (Ollama peut occuper la VRAM) - Bloc CPAM dans main.py : try/except pour éviter crash fatal du pipeline Co-Authored-By: Claude Opus 4.6 --- src/main.py | 27 +++++++++++++++------------ src/medical/rag_search.py | 17 ++++++++++++++--- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/main.py b/src/main.py index ebbd420..6c9fca2 100644 --- a/src/main.py +++ b/src/main.py @@ -355,18 +355,21 @@ def main(input_path: str | None = None) -> None: # Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier) if cpam_data and subdir: - from .control.cpam_parser import match_dossier_ogc - controles = match_dossier_ogc(subdir, cpam_data) - if controles: - from .control.cpam_response import generate_cpam_response - target = merged if merged else (group_dossiers[-1] if group_dossiers else None) - if target: - logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir) - for ctrl in controles: - text, sources = generate_cpam_response(target, ctrl) - ctrl.contre_argumentation = text - ctrl.sources_reponse = sources - target.controles_cpam = controles + try: + from .control.cpam_parser import match_dossier_ogc + controles = match_dossier_ogc(subdir, cpam_data) + if controles: + from .control.cpam_response import generate_cpam_response + target = merged if merged else (group_dossiers[-1] if group_dossiers else None) + if target: + logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir) + for ctrl in controles: + text, sources = generate_cpam_response(target, ctrl) + ctrl.contre_argumentation = text + ctrl.sources_reponse = sources + target.controles_cpam = controles + except Exception: + logger.exception("Erreur CPAM pour %s", subdir) # Écrire le dossier fusionné (après enrichissement CPAM éventuel) if merged is not None and subdir: diff --git a/src/medical/rag_search.py b/src/medical/rag_search.py index 358bb8e..9d8bea5 100644 --- a/src/medical/rag_search.py +++ b/src/medical/rag_search.py @@ -25,14 +25,25 @@ _MIN_SCORE = 0.3 def _get_embed_model(): - """Charge le modèle d'embedding (singleton).""" + """Charge le modèle d'embedding (singleton). + + Tente CUDA d'abord, fallback CPU si OOM (Ollama peut occuper la VRAM). + """ global _embed_model if _embed_model is None: from sentence_transformers import SentenceTransformer - logger.info("Chargement du modèle d'embedding pour la recherche...") import torch _device = "cuda" if torch.cuda.is_available() else "cpu" - _embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device) + try: + logger.info("Chargement du modèle d'embedding (%s)...", _device) + _embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device) + except torch.OutOfMemoryError: + if _device == "cuda": + logger.warning("CUDA OOM pour l'embedding — fallback CPU") + torch.cuda.empty_cache() + _embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu") + else: + raise _embed_model.max_seq_length = 512 return _embed_model