Chantier 1 — Extraction DAS par LLM : - Nouveau prompt expert DIM dans rag_search.py (extract_das_llm) - Phase 4 dans cim10_extractor.py : détection DAS supplémentaires avant enrichissement RAG - Cache persistant (clé hash du texte), validation CIM-10, déduplication - Activé uniquement avec use_rag=True (--no-rag le désactive) Chantier 2 — Admin référentiels : - Config : REFERENTIELS_DIR, UPLOAD_MAX_SIZE_MB, ALLOWED_EXTENSIONS - Chunking générique (PDF/CSV/Excel/TXT) + ajout incrémental FAISS dans rag_index.py - ReferentielManager CRUD dans viewer/referentiels.py - 5 routes Flask (listing, upload, indexation, suppression, rebuild) - Template admin avec tableau interactif + lien sidebar Fix : if cache → if cache is not None (OllamaCache vide évaluait à False) 410 tests passent (27 nouveaux, 0 régression). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
177 lines
5.2 KiB
Python
177 lines
5.2 KiB
Python
"""Configuration globale et modèles de données pour le pipeline T2A."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# --- Chemins ---
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
|
INPUT_DIR = BASE_DIR / "input"
|
|
OUTPUT_DIR = BASE_DIR / "output"
|
|
ANONYMIZED_DIR = OUTPUT_DIR / "anonymized"
|
|
STRUCTURED_DIR = OUTPUT_DIR / "structured"
|
|
REPORTS_DIR = OUTPUT_DIR / "reports"
|
|
|
|
for d in (INPUT_DIR, ANONYMIZED_DIR, STRUCTURED_DIR, REPORTS_DIR):
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
# --- Configuration anonymisation ---
|
|
|
|
KEEP_ESTABLISHMENT_NAME = True
|
|
NER_MODEL = "Jean-Baptiste/camembert-ner"
|
|
NER_CONFIDENCE_THRESHOLD = 0.80
|
|
|
|
|
|
# --- Configuration Ollama ---
|
|
|
|
OLLAMA_URL = "http://localhost:11434"
|
|
OLLAMA_MODEL = "gemma3:12b"
|
|
OLLAMA_TIMEOUT = 120
|
|
OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json"
|
|
OLLAMA_MAX_PARALLEL = 2
|
|
|
|
|
|
# --- Configuration RUM / établissement ---
|
|
|
|
FINESS = "000000000"
|
|
NUM_UM = "0000"
|
|
|
|
|
|
# --- Configuration RAG ---
|
|
|
|
RAG_INDEX_DIR = BASE_DIR / "data" / "rag_index"
|
|
REFERENTIELS_DIR = BASE_DIR / "data" / "referentiels"
|
|
UPLOAD_MAX_SIZE_MB = 50
|
|
ALLOWED_EXTENSIONS = {".pdf", ".csv", ".xlsx", ".xls", ".txt"}
|
|
CIM10_DICT_PATH = BASE_DIR / "data" / "cim10_dict.json"
|
|
CCAM_DICT_PATH = BASE_DIR / "data" / "ccam_dict.json"
|
|
CIM10_PDF = Path("/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf")
|
|
GUIDE_METHODO_PDF = Path("/home/dom/ai/aivanov_CIM/guide_methodo_mco_2026_version_provisoire.pdf")
|
|
CCAM_PDF = Path("/home/dom/ai/aivanov_CIM/actualisation_ccam_descriptive_a_usage_pmsi_v4_2025.pdf")
|
|
|
|
|
|
# --- Modèles de données CIM-10 ---
|
|
|
|
|
|
class RAGSource(BaseModel):
|
|
document: str
|
|
page: Optional[int] = None
|
|
code: Optional[str] = None
|
|
extrait: Optional[str] = None
|
|
|
|
|
|
class Sejour(BaseModel):
|
|
sexe: Optional[str] = None
|
|
age: Optional[int] = None
|
|
date_entree: Optional[str] = None
|
|
date_sortie: Optional[str] = None
|
|
duree_sejour: Optional[int] = None
|
|
mode_entree: Optional[str] = None
|
|
mode_sortie: Optional[str] = None
|
|
imc: Optional[float] = None
|
|
poids: Optional[float] = None
|
|
taille: Optional[float] = None
|
|
|
|
|
|
class Diagnostic(BaseModel):
|
|
texte: str
|
|
cim10_suggestion: Optional[str] = None
|
|
cim10_confidence: Optional[str] = None
|
|
justification: Optional[str] = None
|
|
raisonnement: Optional[str] = None
|
|
sources_rag: list[RAGSource] = Field(default_factory=list)
|
|
est_cma: Optional[bool] = None
|
|
est_cms: Optional[bool] = None
|
|
niveau_severite: Optional[str] = None # "leger" | "modere" | "severe" | "non_evalue"
|
|
|
|
|
|
class ActeCCAM(BaseModel):
|
|
texte: str
|
|
code_ccam_suggestion: Optional[str] = None
|
|
ccam_confidence: Optional[str] = None
|
|
justification: Optional[str] = None
|
|
raisonnement: Optional[str] = None
|
|
sources_rag: list[RAGSource] = Field(default_factory=list)
|
|
date: Optional[str] = None
|
|
validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie"
|
|
alertes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class Traitement(BaseModel):
|
|
medicament: str
|
|
posologie: Optional[str] = None
|
|
code_atc: Optional[str] = None
|
|
|
|
|
|
class BiologieCle(BaseModel):
|
|
test: str
|
|
valeur: Optional[str] = None
|
|
anomalie: Optional[bool] = None
|
|
|
|
|
|
class Imagerie(BaseModel):
|
|
type: str
|
|
conclusion: Optional[str] = None
|
|
score: Optional[str] = None
|
|
|
|
|
|
class DossierMedical(BaseModel):
|
|
source_file: str = ""
|
|
document_type: str = ""
|
|
sejour: Sejour = Field(default_factory=Sejour)
|
|
diagnostic_principal: Optional[Diagnostic] = None
|
|
diagnostics_associes: list[Diagnostic] = Field(default_factory=list)
|
|
actes_ccam: list[ActeCCAM] = Field(default_factory=list)
|
|
antecedents: list[str] = Field(default_factory=list)
|
|
traitements_sortie: list[Traitement] = Field(default_factory=list)
|
|
biologie_cle: list[BiologieCle] = Field(default_factory=list)
|
|
imagerie: list[Imagerie] = Field(default_factory=list)
|
|
complications: list[str] = Field(default_factory=list)
|
|
alertes_codage: list[str] = Field(default_factory=list)
|
|
source_files: list[str] = Field(default_factory=list)
|
|
ghm_estimation: Optional[GHMEstimation] = None
|
|
controles_cpam: list[ControleCPAM] = Field(default_factory=list)
|
|
processing_time_s: float | None = None
|
|
|
|
|
|
# --- Rapport d'anonymisation ---
|
|
|
|
|
|
class GHMEstimation(BaseModel):
|
|
cmd: Optional[str] = None
|
|
cmd_libelle: Optional[str] = None
|
|
type_ghm: Optional[str] = None # "C" / "M" / "K"
|
|
severite: int = 1 # 1-4
|
|
ghm_approx: Optional[str] = None # ex: "07C??2"
|
|
cma_count: int = 0
|
|
cms_count: int = 0
|
|
alertes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class ControleCPAM(BaseModel):
|
|
numero_ogc: int
|
|
titre: str = ""
|
|
arg_ucr: str = ""
|
|
decision_ucr: str = ""
|
|
dp_ucr: Optional[str] = None
|
|
da_ucr: Optional[str] = None
|
|
dr_ucr: Optional[str] = None
|
|
actes_ucr: Optional[str] = None
|
|
contre_argumentation: Optional[str] = None
|
|
sources_reponse: list[RAGSource] = Field(default_factory=list)
|
|
|
|
|
|
class AnonymizationReport(BaseModel):
|
|
source_file: str
|
|
total_replacements: int = 0
|
|
regex_replacements: int = 0
|
|
ner_replacements: int = 0
|
|
sweep_replacements: int = 0
|
|
entities_found: list[dict] = Field(default_factory=list)
|