Files
aivanov_CIM/config/config.example.yaml
2026-03-05 01:20:14 +01:00

187 lines
4.2 KiB
YAML

# Configuration exemple pour le Pipeline MCO PMSI
# Copier ce fichier vers config/config.yaml et adapter les valeurs
# Base de données
database:
url: "postgresql://user:password@localhost:5432/pmsi_db"
# Pour SQLite en développement :
# url: "sqlite:///data/pmsi_dev.db"
echo: false
pool_size: 10
max_overflow: 20
# Modèle LLM
llm:
provider: "ollama" # ollama, vllm, llamacpp
base_url: "http://localhost:11434"
model_name: "mistral"
model_tag: "7b-instruct-v0.2"
# Paramètres d'inférence
inference:
temperature: 0.1 # Faible pour reproductibilité
top_p: 0.9
max_tokens: 2048
context_window: 8192
# Prompts
prompts:
codeur_version: "v1.0"
verificateur_version: "v1.1" # Doit être différent du codeur
# Embeddings
embeddings:
model: "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
device: "cpu" # cpu ou cuda
batch_size: 32
normalize: true
# Vector store
vector_store:
type: "faiss" # faiss ou qdrant
dimension: 768
index_type: "HNSW"
# Pour Qdrant
# url: "http://localhost:6333"
# collection_name: "pmsi_referentiels"
# Recherche RAG
rag:
# Recherche hybride
bm25_weight: 0.3
vector_weight: 0.7
top_k_retrieval: 50
top_k_reranking: 10
# Reranking
reranker_model: "cross-encoder/ms-marco-multilingual-MiniLM-L-12-v2"
alphabetic_index_boost: 1.2 # Boost pour résultats de l'index alphabétique
# Référentiels ATIH
referentiels:
cim10:
version: "2026"
file_path: "data/referentiels/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf"
chunk_size: 500
chunk_overlap: 100
ccam:
version: "2025"
file_path: "data/referentiels/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf"
chunk_size: 600
chunk_overlap: 100
guide_mco:
version: "2026"
file_path: "data/referentiels/guide_methodo_mco_2026_version_provisoire.pdf"
chunk_size: 800
chunk_overlap: 150
# Fonction de groupage
groupage:
version: "2026"
# Chemin vers la bibliothèque de groupage ATIH
library_path: "/opt/atih/groupage/libgroupage.so"
# Protection des DIP
pii:
enabled: true
detection_method: "hybrid" # hybrid, regex, ner
anonymize_exports: true
recall_threshold: 0.95 # Préférer les faux positifs
# Validation PMSI
validation:
max_questions: 5
confidence_threshold_low: 0.5
confidence_threshold_high: 0.8
# Mode conservateur vs agressif
mode: "conservative" # conservative ou aggressive
# Règles de codage
rules:
version: "v1.0"
file_path: "config/rules.yaml"
# Priorités de sources
source_priorities:
cr_operatoire: 1
cr_medical: 2
imagerie: 3
biologie: 4
courrier: 5
# Audit et logging
audit:
enabled: true
log_level: "INFO" # DEBUG, INFO, WARNING, ERROR
log_file: "logs/pipeline.log"
log_format: "json" # json ou text
# Chiffrement des exports
encryption:
enabled: true
algorithm: "AES-256-GCM"
key_file: "config/encryption.key"
# Performance
performance:
# Timeouts (en secondes)
timeout_mono_document_p50: 20
timeout_mono_document_p95: 45
timeout_multi_documents_p50: 35
timeout_multi_documents_p95: 75
# Mode résultats partiels
partial_results_enabled: true
# Cache
cache_embeddings: true
cache_ttl: 3600 # 1 heure
# Métriques et monitoring
monitoring:
enabled: true
metrics_interval: 60 # secondes
# Seuils d'alerte
thresholds:
codes_sans_preuve_max: 0.05 # 5%
diagnostics_nies_codes_max: 0.01 # 1%
das_fantomes_max: 0.10 # 10%
actes_sans_preuve_max: 0.02 # 2%
# Jeu gold pour validation
gold_set:
path: "data/gold_set/"
min_size: 200
specialties: ["chirurgie", "medecine"]
# Seuils de non-régression
regression_thresholds:
dp_accuracy_min: 0.70
das_precision_min: 0.60
das_recall_min: 0.65
tim_acceptance_min: 0.50
# Sécurité
security:
# Contrôle d'accès
rbac_enabled: true
# Rôles
roles:
- name: "tim"
permissions: ["view", "correct", "validate"]
- name: "responsable_dim"
permissions: ["view", "correct", "validate", "export", "configure"]
- name: "admin"
permissions: ["all"]
# Authentification
auth:
method: "local" # local, ldap, oauth
session_timeout: 3600 # 1 heure