feat: configuration externalisée via .env + audit requirements
- Externalise 13 variables de config via python-dotenv (chemins PDF, modèles Ollama/embedding/NER, FINESS, seuils) avec défauts identiques - Centralise EMBEDDING_MODEL dans config.py (était hardcodé en 3 endroits) - Ajoute .env.example documenté et .env au .gitignore - Ajoute openpyxl et pandas manquants au requirements.txt - Ajoute data/referentiels au mkdir de run.sh Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,11 +2,15 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# --- Chemins ---
|
||||
|
||||
@@ -23,24 +27,24 @@ for d in (INPUT_DIR, ANONYMIZED_DIR, STRUCTURED_DIR, REPORTS_DIR):
|
||||
|
||||
# --- Configuration anonymisation ---
|
||||
|
||||
KEEP_ESTABLISHMENT_NAME = True
|
||||
NER_MODEL = "Jean-Baptiste/camembert-ner"
|
||||
NER_CONFIDENCE_THRESHOLD = 0.80
|
||||
KEEP_ESTABLISHMENT_NAME = os.environ.get("T2A_KEEP_ESTABLISHMENT", "True").lower() in ("true", "1", "yes")
|
||||
NER_MODEL = os.environ.get("T2A_NER_MODEL", "Jean-Baptiste/camembert-ner")
|
||||
NER_CONFIDENCE_THRESHOLD = float(os.environ.get("T2A_NER_THRESHOLD", "0.80"))
|
||||
|
||||
|
||||
# --- Configuration Ollama ---
|
||||
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
OLLAMA_MODEL = "gemma3:12b"
|
||||
OLLAMA_TIMEOUT = 120
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma3:12b")
|
||||
OLLAMA_TIMEOUT = int(os.environ.get("OLLAMA_TIMEOUT", "120"))
|
||||
OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json"
|
||||
OLLAMA_MAX_PARALLEL = 2
|
||||
OLLAMA_MAX_PARALLEL = int(os.environ.get("OLLAMA_MAX_PARALLEL", "2"))
|
||||
|
||||
|
||||
# --- Configuration RUM / établissement ---
|
||||
|
||||
FINESS = "000000000"
|
||||
NUM_UM = "0000"
|
||||
FINESS = os.environ.get("T2A_FINESS", "000000000")
|
||||
NUM_UM = os.environ.get("T2A_NUM_UM", "0000")
|
||||
|
||||
|
||||
# --- Configuration RAG ---
|
||||
@@ -52,9 +56,13 @@ ALLOWED_EXTENSIONS = {".pdf", ".csv", ".xlsx", ".xls", ".txt"}
|
||||
CIM10_DICT_PATH = BASE_DIR / "data" / "cim10_dict.json"
|
||||
CIM10_SUPPLEMENTS_PATH = BASE_DIR / "data" / "cim10_supplements.json"
|
||||
CCAM_DICT_PATH = BASE_DIR / "data" / "ccam_dict.json"
|
||||
CIM10_PDF = Path("/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf")
|
||||
GUIDE_METHODO_PDF = Path("/home/dom/ai/aivanov_CIM/guide_methodo_mco_2026_version_provisoire.pdf")
|
||||
CCAM_PDF = Path("/home/dom/ai/aivanov_CIM/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf")
|
||||
CIM10_PDF = Path(os.environ.get("T2A_CIM10_PDF", "/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf"))
|
||||
GUIDE_METHODO_PDF = Path(os.environ.get("T2A_GUIDE_METHODO_PDF", "/home/dom/ai/aivanov_CIM/guide_methodo_mco_2026_version_provisoire.pdf"))
|
||||
CCAM_PDF = Path(os.environ.get("T2A_CCAM_PDF", "/home/dom/ai/aivanov_CIM/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf"))
|
||||
|
||||
# --- Modèle d'embedding ---
|
||||
|
||||
EMBEDDING_MODEL = os.environ.get("T2A_EMBEDDING_MODEL", "dangvantuan/sentence-camembert-large")
|
||||
|
||||
|
||||
# --- Modèles de données CIM-10 ---
|
||||
|
||||
Reference in New Issue
Block a user