feat: configuration externalisée via .env + audit requirements
- Externalise 13 variables de config via python-dotenv (chemins PDF, modèles Ollama/embedding/NER, FINESS, seuils) avec défauts identiques - Centralise EMBEDDING_MODEL dans config.py (était hardcodé en 3 endroits) - Ajoute .env.example documenté et .env au .gitignore - Ajoute openpyxl et pandas manquants au requirements.txt - Ajoute data/referentiels au mkdir de run.sh Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ from typing import Optional
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH, REFERENTIELS_DIR
|
||||
from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH, REFERENTIELS_DIR, EMBEDDING_MODEL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -426,8 +426,8 @@ def build_index(force: bool = False) -> None:
|
||||
# Embeddings — GPU si disponible
|
||||
import torch
|
||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
logger.info("Chargement du modèle d'embedding dangvantuan/sentence-camembert-large (%s)...", _device)
|
||||
model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
logger.info("Chargement du modèle d'embedding %s (%s)...", EMBEDDING_MODEL, _device)
|
||||
model = SentenceTransformer(EMBEDDING_MODEL, device=_device)
|
||||
model.max_seq_length = 512 # CamemBERT max position embeddings
|
||||
|
||||
texts = [c.text[:2000] for c in all_chunks] # Tronquer les chunks trop longs
|
||||
|
||||
@@ -8,6 +8,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from ..config import (
|
||||
ActeCCAM, Diagnostic, DossierMedical, RAGSource,
|
||||
OLLAMA_CACHE_PATH, OLLAMA_MAX_PARALLEL, OLLAMA_MODEL,
|
||||
EMBEDDING_MODEL,
|
||||
)
|
||||
from .cim10_dict import normalize_code, validate_code as cim10_validate
|
||||
from .cim10_extractor import BIO_NORMALS
|
||||
@@ -36,12 +37,12 @@ def _get_embed_model():
|
||||
_device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
try:
|
||||
logger.info("Chargement du modèle d'embedding (%s)...", _device)
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device=_device)
|
||||
_embed_model = SentenceTransformer(EMBEDDING_MODEL, device=_device)
|
||||
except torch.OutOfMemoryError:
|
||||
if _device == "cuda":
|
||||
logger.warning("CUDA OOM pour l'embedding — fallback CPU")
|
||||
torch.cuda.empty_cache()
|
||||
_embed_model = SentenceTransformer("dangvantuan/sentence-camembert-large", device="cpu")
|
||||
_embed_model = SentenceTransformer(EMBEDDING_MODEL, device="cpu")
|
||||
else:
|
||||
raise
|
||||
_embed_model.max_seq_length = 512
|
||||
|
||||
Reference in New Issue
Block a user