""" Configuration T2A Extractor """ from pathlib import Path # === Ollama === OLLAMA_BASE_URL = "http://localhost:11434" OLLAMA_MODEL = "gemma3:27b-cloud" # À adapter selon le tag exact OLLAMA_TIMEOUT = 120 # secondes par requête OLLAMA_MAX_RETRIES = 2 # === OCR (docTR) === DOCTR_DET_ARCH = "db_resnet50" DOCTR_RECO_ARCH = "crnn_vgg16_bn" OCR_DPI = 200 # résolution pour conversion page → image OCR_MIN_CONFIDENCE = 0.5 # seuil de confiance minimum docTR # === Extraction PDF === # Seuil de caractères pour considérer une page comme "native" # (certaines pages scannées ont quelques caractères parasites) NATIVE_TEXT_MIN_CHARS = 50 # === Schéma de sortie === OUTPUT_COLUMNS = [ "champ", "num_ogc", "type_desaccord", "codes_etablissement", "libelle_etablissement", "codes_controleurs", "libelle_controleurs", "decision_ucr", "codes_retenus", "ghm_ghs", "texte_decision", ] # Valeurs autorisées pour les enums DECISION_VALUES = ["Favorable", "Défavorable"] TYPE_DESACCORD_VALUES = ["DP", "DAS", "DP+DAS", "Actes"] # === Chemins === PROJECT_ROOT = Path(__file__).parent DEFAULT_OUTPUT_DIR = PROJECT_ROOT / "output"