Files
t2a_v2/src/config.py
dom 037d255aa0 feat: ajout viewer Flask CIM-10 avec config Ollama centralisée et chronométrage
Ajoute une interface web Flask pour visualiser les dossiers médicaux CIM-10,
avec temps de traitement par PDF, sélecteur de modèle Ollama, et centralisation
de la config Ollama dans src/config.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 20:11:07 +01:00

126 lines
3.4 KiB
Python

"""Configuration globale et modèles de données pour le pipeline T2A."""
from __future__ import annotations
from pathlib import Path
from typing import Optional
from pydantic import BaseModel, Field
# --- Chemins ---
BASE_DIR = Path(__file__).resolve().parent.parent
INPUT_DIR = BASE_DIR / "input"
OUTPUT_DIR = BASE_DIR / "output"
ANONYMIZED_DIR = OUTPUT_DIR / "anonymized"
STRUCTURED_DIR = OUTPUT_DIR / "structured"
REPORTS_DIR = OUTPUT_DIR / "reports"
for d in (INPUT_DIR, ANONYMIZED_DIR, STRUCTURED_DIR, REPORTS_DIR):
d.mkdir(parents=True, exist_ok=True)
# --- Configuration anonymisation ---
KEEP_ESTABLISHMENT_NAME = True
NER_MODEL = "Jean-Baptiste/camembert-ner"
NER_CONFIDENCE_THRESHOLD = 0.80
# --- Configuration Ollama ---
OLLAMA_URL = "http://localhost:11434"
OLLAMA_MODEL = "mistral-large-3:675b-cloud"
OLLAMA_TIMEOUT = 120
# --- Configuration RAG ---
RAG_INDEX_DIR = BASE_DIR / "data" / "rag_index"
CIM10_PDF = Path("/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf")
GUIDE_METHODO_PDF = Path("/home/dom/ai/aivanov_CIM/guide_methodo_mco_2026_version_provisoire.pdf")
CCAM_PDF = Path("/home/dom/ai/aivanov_CIM/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf")
# --- Modèles de données CIM-10 ---
class RAGSource(BaseModel):
document: str
page: Optional[int] = None
code: Optional[str] = None
extrait: Optional[str] = None
class Sejour(BaseModel):
sexe: Optional[str] = None
age: Optional[int] = None
date_entree: Optional[str] = None
date_sortie: Optional[str] = None
duree_sejour: Optional[int] = None
mode_entree: Optional[str] = None
mode_sortie: Optional[str] = None
imc: Optional[float] = None
poids: Optional[float] = None
taille: Optional[float] = None
class Diagnostic(BaseModel):
texte: str
cim10_suggestion: Optional[str] = None
cim10_confidence: Optional[str] = None
justification: Optional[str] = None
sources_rag: list[RAGSource] = Field(default_factory=list)
class ActeCCAM(BaseModel):
texte: str
code_ccam_suggestion: Optional[str] = None
date: Optional[str] = None
class Traitement(BaseModel):
medicament: str
posologie: Optional[str] = None
code_atc: Optional[str] = None
class BiologieCle(BaseModel):
test: str
valeur: Optional[str] = None
anomalie: Optional[bool] = None
class Imagerie(BaseModel):
type: str
conclusion: Optional[str] = None
score: Optional[str] = None
class DossierMedical(BaseModel):
source_file: str = ""
document_type: str = ""
sejour: Sejour = Field(default_factory=Sejour)
diagnostic_principal: Optional[Diagnostic] = None
diagnostics_associes: list[Diagnostic] = Field(default_factory=list)
actes_ccam: list[ActeCCAM] = Field(default_factory=list)
antecedents: list[str] = Field(default_factory=list)
traitements_sortie: list[Traitement] = Field(default_factory=list)
biologie_cle: list[BiologieCle] = Field(default_factory=list)
imagerie: list[Imagerie] = Field(default_factory=list)
complications: list[str] = Field(default_factory=list)
processing_time_s: float | None = None
# --- Rapport d'anonymisation ---
class AnonymizationReport(BaseModel):
source_file: str
total_replacements: int = 0
regex_replacements: int = 0
ner_replacements: int = 0
sweep_replacements: int = 0
entities_found: list[dict] = Field(default_factory=list)