# Configuration exemple pour le Pipeline MCO PMSI # Copier ce fichier vers config/config.yaml et adapter les valeurs # Base de données database: url: "postgresql://user:password@localhost:5432/pmsi_db" # Pour SQLite en développement : # url: "sqlite:///data/pmsi_dev.db" echo: false pool_size: 10 max_overflow: 20 # Modèle LLM llm: provider: "ollama" # ollama, vllm, llamacpp base_url: "http://localhost:11434" model_name: "mistral" model_tag: "7b-instruct-v0.2" # Paramètres d'inférence inference: temperature: 0.1 # Faible pour reproductibilité top_p: 0.9 max_tokens: 2048 context_window: 8192 # Prompts prompts: codeur_version: "v1.0" verificateur_version: "v1.1" # Doit être différent du codeur # Embeddings embeddings: model: "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" device: "cpu" # cpu ou cuda batch_size: 32 normalize: true # Vector store vector_store: type: "faiss" # faiss ou qdrant dimension: 768 index_type: "HNSW" # Pour Qdrant # url: "http://localhost:6333" # collection_name: "pmsi_referentiels" # Recherche RAG rag: # Recherche hybride bm25_weight: 0.3 vector_weight: 0.7 top_k_retrieval: 50 top_k_reranking: 10 # Reranking reranker_model: "cross-encoder/ms-marco-multilingual-MiniLM-L-12-v2" alphabetic_index_boost: 1.2 # Boost pour résultats de l'index alphabétique # Référentiels ATIH referentiels: cim10: version: "2026" file_path: "data/referentiels/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf" chunk_size: 500 chunk_overlap: 100 ccam: version: "2025" file_path: "data/referentiels/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf" chunk_size: 600 chunk_overlap: 100 guide_mco: version: "2026" file_path: "data/referentiels/guide_methodo_mco_2026_version_provisoire.pdf" chunk_size: 800 chunk_overlap: 150 # Fonction de groupage groupage: version: "2026" # Chemin vers la bibliothèque de groupage ATIH library_path: "/opt/atih/groupage/libgroupage.so" # Protection des DIP pii: enabled: true detection_method: "hybrid" # hybrid, regex, ner anonymize_exports: true recall_threshold: 0.95 # Préférer les faux positifs # Validation PMSI validation: max_questions: 5 confidence_threshold_low: 0.5 confidence_threshold_high: 0.8 # Mode conservateur vs agressif mode: "conservative" # conservative ou aggressive # Règles de codage rules: version: "v1.0" file_path: "config/rules.yaml" # Priorités de sources source_priorities: cr_operatoire: 1 cr_medical: 2 imagerie: 3 biologie: 4 courrier: 5 # Audit et logging audit: enabled: true log_level: "INFO" # DEBUG, INFO, WARNING, ERROR log_file: "logs/pipeline.log" log_format: "json" # json ou text # Chiffrement des exports encryption: enabled: true algorithm: "AES-256-GCM" key_file: "config/encryption.key" # Performance performance: # Timeouts (en secondes) timeout_mono_document_p50: 20 timeout_mono_document_p95: 45 timeout_multi_documents_p50: 35 timeout_multi_documents_p95: 75 # Mode résultats partiels partial_results_enabled: true # Cache cache_embeddings: true cache_ttl: 3600 # 1 heure # Métriques et monitoring monitoring: enabled: true metrics_interval: 60 # secondes # Seuils d'alerte thresholds: codes_sans_preuve_max: 0.05 # 5% diagnostics_nies_codes_max: 0.01 # 1% das_fantomes_max: 0.10 # 10% actes_sans_preuve_max: 0.02 # 2% # Jeu gold pour validation gold_set: path: "data/gold_set/" min_size: 200 specialties: ["chirurgie", "medecine"] # Seuils de non-régression regression_thresholds: dp_accuracy_min: 0.70 das_precision_min: 0.60 das_recall_min: 0.65 tim_acceptance_min: 0.50 # Sécurité security: # Contrôle d'accès rbac_enabled: true # Rôles roles: - name: "tim" permissions: ["view", "correct", "validate"] - name: "responsable_dim" permissions: ["view", "correct", "validate", "export", "configure"] - name: "admin" permissions: ["all"] # Authentification auth: method: "local" # local, ldap, oauth session_timeout: 3600 # 1 heure