feat: cache Ollama + parallélisation ThreadPool + filtrage DAS renforcé + modules GHM/CPAM/export RUM
- Cache persistant JSON thread-safe pour les résultats Ollama (invalidation par modèle) - Parallélisation des appels Ollama (ThreadPoolExecutor, 2 workers) - 6 nouvelles règles de filtrage DAS parasites (doublons, ponctuation, OCR, labo, fragments) - Client Ollama centralisé (mode JSON natif + retry) - Module GHM (estimation CMD/sévérité) - Module contrôle CPAM (parser + contre-argumentation RAG) - Export RUM (format RSS) - Viewer enrichi (détail dossier) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
45
run.sh
Executable file
45
run.sh
Executable file
@@ -0,0 +1,45 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
echo "🚀 Démarrage de l'application T2A..."
|
||||||
|
|
||||||
|
# Vérifier si l'environnement virtuel existe
|
||||||
|
if [ ! -d ".venv" ]; then
|
||||||
|
echo "📦 Création de l'environnement virtuel..."
|
||||||
|
python3 -m venv .venv
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Activer l'environnement virtuel
|
||||||
|
echo "🔧 Activation de l'environnement virtuel..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# Installer/mettre à jour les dépendances
|
||||||
|
if [ ! -f ".venv/.deps_installed" ] || [ "requirements.txt" -nt ".venv/.deps_installed" ]; then
|
||||||
|
echo "📥 Installation des dépendances..."
|
||||||
|
pip install -q --upgrade pip
|
||||||
|
pip install -q -r requirements.txt
|
||||||
|
touch .venv/.deps_installed
|
||||||
|
else
|
||||||
|
echo "✅ Dépendances déjà installées"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Créer les répertoires nécessaires
|
||||||
|
mkdir -p input output/anonymized output/structured output/reports data/rag_index
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✨ Application prête !"
|
||||||
|
echo ""
|
||||||
|
echo "📂 Répertoires :"
|
||||||
|
echo " - input/ : Placez vos PDFs ici"
|
||||||
|
echo " - output/ : Résultats du traitement"
|
||||||
|
echo ""
|
||||||
|
echo "🌐 Lancement du viewer sur http://localhost:5000"
|
||||||
|
echo ""
|
||||||
|
echo " Appuyez sur Ctrl+C pour arrêter"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Lancer le viewer
|
||||||
|
python3 -m src.viewer
|
||||||
@@ -33,6 +33,14 @@ NER_CONFIDENCE_THRESHOLD = 0.80
|
|||||||
OLLAMA_URL = "http://localhost:11434"
|
OLLAMA_URL = "http://localhost:11434"
|
||||||
OLLAMA_MODEL = "gemma3:12b"
|
OLLAMA_MODEL = "gemma3:12b"
|
||||||
OLLAMA_TIMEOUT = 120
|
OLLAMA_TIMEOUT = 120
|
||||||
|
OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json"
|
||||||
|
OLLAMA_MAX_PARALLEL = 2
|
||||||
|
|
||||||
|
|
||||||
|
# --- Configuration RUM / établissement ---
|
||||||
|
|
||||||
|
FINESS = "000000000"
|
||||||
|
NUM_UM = "0000"
|
||||||
|
|
||||||
|
|
||||||
# --- Configuration RAG ---
|
# --- Configuration RAG ---
|
||||||
@@ -83,6 +91,10 @@ class Diagnostic(BaseModel):
|
|||||||
class ActeCCAM(BaseModel):
|
class ActeCCAM(BaseModel):
|
||||||
texte: str
|
texte: str
|
||||||
code_ccam_suggestion: Optional[str] = None
|
code_ccam_suggestion: Optional[str] = None
|
||||||
|
ccam_confidence: Optional[str] = None
|
||||||
|
justification: Optional[str] = None
|
||||||
|
raisonnement: Optional[str] = None
|
||||||
|
sources_rag: list[RAGSource] = Field(default_factory=list)
|
||||||
date: Optional[str] = None
|
date: Optional[str] = None
|
||||||
validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie"
|
validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie"
|
||||||
alertes: list[str] = Field(default_factory=list)
|
alertes: list[str] = Field(default_factory=list)
|
||||||
@@ -120,12 +132,38 @@ class DossierMedical(BaseModel):
|
|||||||
complications: list[str] = Field(default_factory=list)
|
complications: list[str] = Field(default_factory=list)
|
||||||
alertes_codage: list[str] = Field(default_factory=list)
|
alertes_codage: list[str] = Field(default_factory=list)
|
||||||
source_files: list[str] = Field(default_factory=list)
|
source_files: list[str] = Field(default_factory=list)
|
||||||
|
ghm_estimation: Optional[GHMEstimation] = None
|
||||||
|
controles_cpam: list[ControleCPAM] = Field(default_factory=list)
|
||||||
processing_time_s: float | None = None
|
processing_time_s: float | None = None
|
||||||
|
|
||||||
|
|
||||||
# --- Rapport d'anonymisation ---
|
# --- Rapport d'anonymisation ---
|
||||||
|
|
||||||
|
|
||||||
|
class GHMEstimation(BaseModel):
|
||||||
|
cmd: Optional[str] = None
|
||||||
|
cmd_libelle: Optional[str] = None
|
||||||
|
type_ghm: Optional[str] = None # "C" / "M" / "K"
|
||||||
|
severite: int = 1 # 1-4
|
||||||
|
ghm_approx: Optional[str] = None # ex: "07C??2"
|
||||||
|
cma_count: int = 0
|
||||||
|
cms_count: int = 0
|
||||||
|
alertes: list[str] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class ControleCPAM(BaseModel):
|
||||||
|
numero_ogc: int
|
||||||
|
titre: str = ""
|
||||||
|
arg_ucr: str = ""
|
||||||
|
decision_ucr: str = ""
|
||||||
|
dp_ucr: Optional[str] = None
|
||||||
|
da_ucr: Optional[str] = None
|
||||||
|
dr_ucr: Optional[str] = None
|
||||||
|
actes_ucr: Optional[str] = None
|
||||||
|
contre_argumentation: Optional[str] = None
|
||||||
|
sources_reponse: list[RAGSource] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
class AnonymizationReport(BaseModel):
|
class AnonymizationReport(BaseModel):
|
||||||
source_file: str
|
source_file: str
|
||||||
total_replacements: int = 0
|
total_replacements: int = 0
|
||||||
|
|||||||
0
src/control/__init__.py
Normal file
0
src/control/__init__.py
Normal file
115
src/control/cpam_parser.py
Normal file
115
src/control/cpam_parser.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
"""Parsing du fichier Excel de contrôle CPAM (UCR) et matching OGC."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import openpyxl
|
||||||
|
|
||||||
|
from ..config import ControleCPAM
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Colonnes attendues dans le fichier Excel
|
||||||
|
_EXPECTED_COLUMNS = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_cpam_excel(path: str | Path) -> dict[int, list[ControleCPAM]]:
|
||||||
|
"""Lit le fichier Excel de contrôle CPAM et retourne un dict OGC -> liste de contrôles.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Chemin vers le fichier .xlsx CPAM.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec le numéro OGC comme clé et la liste des contrôles associés.
|
||||||
|
"""
|
||||||
|
path = Path(path)
|
||||||
|
if not path.exists():
|
||||||
|
logger.error("Fichier CPAM introuvable : %s", path)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
wb = openpyxl.load_workbook(path, read_only=True)
|
||||||
|
ws = wb[wb.sheetnames[0]]
|
||||||
|
|
||||||
|
# Lire l'en-tête
|
||||||
|
rows = ws.iter_rows(values_only=True)
|
||||||
|
header = next(rows, None)
|
||||||
|
if header is None:
|
||||||
|
logger.error("Fichier CPAM vide : %s", path)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Construire le mapping colonne -> index
|
||||||
|
col_map = {}
|
||||||
|
for i, col_name in enumerate(header):
|
||||||
|
if col_name:
|
||||||
|
col_map[col_name.strip()] = i
|
||||||
|
|
||||||
|
# Vérifier les colonnes requises
|
||||||
|
missing = [c for c in _EXPECTED_COLUMNS[:4] if c not in col_map]
|
||||||
|
if missing:
|
||||||
|
logger.error("Colonnes manquantes dans le fichier CPAM : %s", missing)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
result: dict[int, list[ControleCPAM]] = {}
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
ogc_val = row[col_map["N° OGC"]]
|
||||||
|
if ogc_val is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
numero_ogc = int(ogc_val)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
logger.warning("N° OGC invalide ignoré : %s", ogc_val)
|
||||||
|
continue
|
||||||
|
|
||||||
|
controle = ControleCPAM(
|
||||||
|
numero_ogc=numero_ogc,
|
||||||
|
titre=str(row[col_map.get("Titre", 1)] or "").strip(),
|
||||||
|
arg_ucr=str(row[col_map.get("Arg_UCR", 2)] or "").strip(),
|
||||||
|
decision_ucr=str(row[col_map.get("Décision_UCR", 3)] or "").strip(),
|
||||||
|
dp_ucr=_clean_optional(row, col_map.get("DP_UCR")),
|
||||||
|
da_ucr=_clean_optional(row, col_map.get("DA_UCR")),
|
||||||
|
dr_ucr=_clean_optional(row, col_map.get("DR_UCR")),
|
||||||
|
actes_ucr=_clean_optional(row, col_map.get("Actes_UCR")),
|
||||||
|
)
|
||||||
|
|
||||||
|
result.setdefault(numero_ogc, []).append(controle)
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
logger.info("CPAM : %d contrôles chargés pour %d OGC distincts", count, len(result))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_optional(row: tuple, idx: int | None) -> str | None:
|
||||||
|
"""Extrait une valeur optionnelle depuis une ligne Excel."""
|
||||||
|
if idx is None or idx >= len(row):
|
||||||
|
return None
|
||||||
|
val = row[idx]
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
val = str(val).strip()
|
||||||
|
return val if val else None
|
||||||
|
|
||||||
|
|
||||||
|
def match_dossier_ogc(source_name: str, cpam_data: dict[int, list[ControleCPAM]]) -> list[ControleCPAM]:
|
||||||
|
"""Cherche les contrôles CPAM correspondant à un dossier par préfixe OGC.
|
||||||
|
|
||||||
|
Le nom du dossier suit le format "17_23100690" où 17 est le N° OGC.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source_name: Nom du sous-dossier (ex: "17_23100690").
|
||||||
|
cpam_data: Dict OGC -> contrôles retourné par parse_cpam_excel().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste des contrôles CPAM pour cet OGC, ou liste vide.
|
||||||
|
"""
|
||||||
|
match = re.match(r"^(\d+)_", source_name)
|
||||||
|
if not match:
|
||||||
|
return []
|
||||||
|
|
||||||
|
ogc = int(match.group(1))
|
||||||
|
return cpam_data.get(ogc, [])
|
||||||
228
src/control/cpam_response.py
Normal file
228
src/control/cpam_response.py
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
"""Génération de contre-argumentation pour les contrôles CPAM via RAG + Ollama."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from ..config import ControleCPAM, DossierMedical, RAGSource
|
||||||
|
from ..medical.ollama_client import call_ollama
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _search_rag_for_control(controle: ControleCPAM, dossier: DossierMedical) -> list[dict]:
|
||||||
|
"""Recherche RAG ciblée pour le sujet du désaccord."""
|
||||||
|
try:
|
||||||
|
from ..medical.rag_search import search_similar
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Index RAG non disponible pour la contre-argumentation")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Construire une requête combinant l'argument CPAM et le diagnostic concerné
|
||||||
|
query_parts = []
|
||||||
|
|
||||||
|
if controle.titre:
|
||||||
|
query_parts.append(controle.titre)
|
||||||
|
|
||||||
|
# Ajouter les codes contestés pour cibler la recherche
|
||||||
|
if controle.dp_ucr:
|
||||||
|
query_parts.append(f"diagnostic principal {controle.dp_ucr}")
|
||||||
|
if controle.da_ucr:
|
||||||
|
query_parts.append(f"diagnostic associé {controle.da_ucr}")
|
||||||
|
|
||||||
|
# Tronquer l'argument CPAM pour ne garder que le coeur
|
||||||
|
arg_short = controle.arg_ucr[:300] if controle.arg_ucr else ""
|
||||||
|
if arg_short:
|
||||||
|
query_parts.append(arg_short)
|
||||||
|
|
||||||
|
query = " ".join(query_parts)
|
||||||
|
if not query.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
return search_similar(query, top_k=8)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_cpam_prompt(
|
||||||
|
dossier: DossierMedical,
|
||||||
|
controle: ControleCPAM,
|
||||||
|
sources: list[dict],
|
||||||
|
) -> str:
|
||||||
|
"""Construit le prompt pour la contre-argumentation CPAM."""
|
||||||
|
# Résumé du dossier médical
|
||||||
|
dossier_lines = []
|
||||||
|
|
||||||
|
if dossier.diagnostic_principal:
|
||||||
|
dp = dossier.diagnostic_principal
|
||||||
|
dp_code = f" ({dp.cim10_suggestion})" if dp.cim10_suggestion else ""
|
||||||
|
dossier_lines.append(f"- DP : {dp.texte}{dp_code}")
|
||||||
|
|
||||||
|
if dossier.diagnostics_associes:
|
||||||
|
das_parts = []
|
||||||
|
for das in dossier.diagnostics_associes:
|
||||||
|
code = f" ({das.cim10_suggestion})" if das.cim10_suggestion else ""
|
||||||
|
das_parts.append(f"{das.texte}{code}")
|
||||||
|
dossier_lines.append(f"- DAS : {', '.join(das_parts)}")
|
||||||
|
|
||||||
|
if dossier.actes_ccam:
|
||||||
|
actes = [f"{a.texte} ({a.code_ccam_suggestion})" if a.code_ccam_suggestion else a.texte
|
||||||
|
for a in dossier.actes_ccam]
|
||||||
|
dossier_lines.append(f"- Actes CCAM : {', '.join(actes)}")
|
||||||
|
|
||||||
|
sejour = dossier.sejour
|
||||||
|
if sejour.duree_sejour is not None:
|
||||||
|
dossier_lines.append(f"- Durée séjour : {sejour.duree_sejour} jours")
|
||||||
|
if sejour.sexe or sejour.age is not None:
|
||||||
|
patient_info = []
|
||||||
|
if sejour.sexe:
|
||||||
|
patient_info.append(sejour.sexe)
|
||||||
|
if sejour.age is not None:
|
||||||
|
patient_info.append(f"{sejour.age} ans")
|
||||||
|
dossier_lines.append(f"- Patient : {', '.join(patient_info)}")
|
||||||
|
|
||||||
|
if dossier.biologie_cle:
|
||||||
|
bio = [f"{b.test}: {b.valeur}" for b in dossier.biologie_cle[:5] if b.valeur]
|
||||||
|
if bio:
|
||||||
|
dossier_lines.append(f"- Biologie clé : {', '.join(bio)}")
|
||||||
|
|
||||||
|
if dossier.complications:
|
||||||
|
dossier_lines.append(f"- Complications : {', '.join(dossier.complications)}")
|
||||||
|
|
||||||
|
dossier_str = "\n".join(dossier_lines) if dossier_lines else "Non disponible"
|
||||||
|
|
||||||
|
# Codes contestés par la CPAM
|
||||||
|
codes_contestes = []
|
||||||
|
if controle.dp_ucr:
|
||||||
|
codes_contestes.append(f"DP proposé par UCR : {controle.dp_ucr}")
|
||||||
|
if controle.da_ucr:
|
||||||
|
codes_contestes.append(f"DA proposés par UCR : {controle.da_ucr}")
|
||||||
|
if controle.dr_ucr:
|
||||||
|
codes_contestes.append(f"DR proposé par UCR : {controle.dr_ucr}")
|
||||||
|
if controle.actes_ucr:
|
||||||
|
codes_contestes.append(f"Actes proposés par UCR : {controle.actes_ucr}")
|
||||||
|
codes_str = "\n".join(codes_contestes) if codes_contestes else "Aucun code spécifique proposé"
|
||||||
|
|
||||||
|
# Sources RAG
|
||||||
|
sources_text = ""
|
||||||
|
for i, src in enumerate(sources, 1):
|
||||||
|
doc_name = {
|
||||||
|
"cim10": "CIM-10 FR 2026",
|
||||||
|
"cim10_alpha": "CIM-10 Index Alphabétique 2026",
|
||||||
|
"guide_methodo": "Guide Méthodologique MCO 2026",
|
||||||
|
"ccam": "CCAM PMSI V4 2025",
|
||||||
|
}.get(src.get("document", ""), src.get("document", ""))
|
||||||
|
|
||||||
|
code_info = f" (code: {src['code']})" if src.get("code") else ""
|
||||||
|
page_info = f" [page {src['page']}]" if src.get("page") else ""
|
||||||
|
|
||||||
|
sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n"
|
||||||
|
sources_text += (src.get("extrait", "")[:800]) + "\n\n"
|
||||||
|
|
||||||
|
return f"""Tu es un médecin DIM (Département d'Information Médicale) expert en contentieux T2A.
|
||||||
|
Tu dois contre-argumenter la décision de la CPAM (UCR) point par point, en t'appuyant sur le guide méthodologique et la CIM-10.
|
||||||
|
|
||||||
|
DOSSIER MÉDICAL DE L'ÉTABLISSEMENT :
|
||||||
|
{dossier_str}
|
||||||
|
|
||||||
|
OBJET DU DÉSACCORD : {controle.titre}
|
||||||
|
|
||||||
|
ARGUMENTATION DE LA CPAM (UCR) :
|
||||||
|
{controle.arg_ucr}
|
||||||
|
|
||||||
|
DÉCISION UCR : {controle.decision_ucr}
|
||||||
|
|
||||||
|
CODES CONTESTÉS :
|
||||||
|
{codes_str}
|
||||||
|
|
||||||
|
SOURCES RÉGLEMENTAIRES (Guide méthodologique, CIM-10) :
|
||||||
|
{sources_text}
|
||||||
|
|
||||||
|
CONSIGNES :
|
||||||
|
- Analyse objectivement l'argument de la CPAM
|
||||||
|
- Identifie les points où la CPAM a raison (le cas échéant)
|
||||||
|
- Contre-argumente point par point en citant le guide méthodologique et la CIM-10
|
||||||
|
- Cite les références précises (pages, articles, fascicules)
|
||||||
|
- Propose une conclusion et la position recommandée
|
||||||
|
|
||||||
|
Réponds UNIQUEMENT avec un objet JSON au format suivant :
|
||||||
|
{{
|
||||||
|
"analyse_contestation": "Résumé de ce que conteste la CPAM",
|
||||||
|
"points_accord": "Points où la CPAM a raison (ou 'Aucun' si non applicable)",
|
||||||
|
"contre_arguments": "Arguments point par point en faveur de l'établissement",
|
||||||
|
"references": "Références guide méthodologique / CIM-10 citées",
|
||||||
|
"conclusion": "Synthèse et position recommandée"
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
def _format_response(parsed: dict) -> str:
|
||||||
|
"""Formate la réponse LLM en texte lisible."""
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
analyse = parsed.get("analyse_contestation")
|
||||||
|
if analyse:
|
||||||
|
sections.append(f"ANALYSE DE LA CONTESTATION\n{analyse}")
|
||||||
|
|
||||||
|
accord = parsed.get("points_accord")
|
||||||
|
if accord and accord.lower() not in ("aucun", "non applicable", "n/a", ""):
|
||||||
|
sections.append(f"POINTS D'ACCORD\n{accord}")
|
||||||
|
|
||||||
|
contre = parsed.get("contre_arguments")
|
||||||
|
if contre:
|
||||||
|
sections.append(f"CONTRE-ARGUMENTS\n{contre}")
|
||||||
|
|
||||||
|
refs = parsed.get("references")
|
||||||
|
if refs:
|
||||||
|
sections.append(f"REFERENCES\n{refs}")
|
||||||
|
|
||||||
|
conclusion = parsed.get("conclusion")
|
||||||
|
if conclusion:
|
||||||
|
sections.append(f"CONCLUSION\n{conclusion}")
|
||||||
|
|
||||||
|
return "\n\n".join(sections)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_cpam_response(
|
||||||
|
dossier: DossierMedical,
|
||||||
|
controle: ControleCPAM,
|
||||||
|
) -> tuple[str, list[RAGSource]]:
|
||||||
|
"""Génère une contre-argumentation pour un contrôle CPAM.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dossier: Le dossier médical analysé.
|
||||||
|
controle: Le contrôle CPAM à contester.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple (texte de contre-argumentation, sources RAG utilisées).
|
||||||
|
"""
|
||||||
|
logger.info("CPAM : génération contre-argumentation pour OGC %d — %s",
|
||||||
|
controle.numero_ogc, controle.titre)
|
||||||
|
|
||||||
|
# 1. Recherche RAG ciblée
|
||||||
|
sources = _search_rag_for_control(controle, dossier)
|
||||||
|
logger.info(" RAG : %d sources trouvées", len(sources))
|
||||||
|
|
||||||
|
# 2. Construction du prompt
|
||||||
|
prompt = _build_cpam_prompt(dossier, controle, sources)
|
||||||
|
|
||||||
|
# 3. Appel Ollama
|
||||||
|
result = call_ollama(prompt, temperature=0.1, max_tokens=3000)
|
||||||
|
|
||||||
|
# 4. Conversion des sources RAG
|
||||||
|
rag_sources = [
|
||||||
|
RAGSource(
|
||||||
|
document=s.get("document", ""),
|
||||||
|
page=s.get("page"),
|
||||||
|
code=s.get("code"),
|
||||||
|
extrait=s.get("extrait", "")[:200],
|
||||||
|
)
|
||||||
|
for s in sources
|
||||||
|
]
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
logger.warning(" Ollama non disponible — contre-argumentation non générée")
|
||||||
|
return "", rag_sources
|
||||||
|
|
||||||
|
# 5. Formater la réponse
|
||||||
|
text = _format_response(result)
|
||||||
|
logger.info(" Contre-argumentation générée (%d caractères)", len(text))
|
||||||
|
|
||||||
|
return text, rag_sources
|
||||||
0
src/export/__init__.py
Normal file
0
src/export/__init__.py
Normal file
190
src/export/rum_export.py
Normal file
190
src/export/rum_export.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
"""Export au format RUM (Résumé d'Unité Médicale) V016 pour le groupeur ATIH.
|
||||||
|
|
||||||
|
Génère une ligne fixe de 165 caractères suivie de zones variables
|
||||||
|
(DAS en 8 chars, actes CCAM en 29 chars chacun).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..config import FINESS, NUM_UM, DossierMedical
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RUMConfig:
|
||||||
|
finess: str = FINESS
|
||||||
|
num_um: str = NUM_UM
|
||||||
|
|
||||||
|
|
||||||
|
def _format_cim10(code: str | None) -> str:
|
||||||
|
"""Formate un code CIM-10 sur 8 caractères (sans point, padded)."""
|
||||||
|
if not code:
|
||||||
|
return " " * 8
|
||||||
|
clean = code.upper().replace(".", "").strip()
|
||||||
|
return clean.ljust(8)[:8]
|
||||||
|
|
||||||
|
|
||||||
|
def _format_date(date_str: str | None) -> str:
|
||||||
|
"""Convertit une date DD/MM/YYYY ou YYYY-MM-DD en DDMMYYYY (8 chars)."""
|
||||||
|
if not date_str:
|
||||||
|
return " " * 8
|
||||||
|
date_str = date_str.strip()
|
||||||
|
# Format DD/MM/YYYY
|
||||||
|
m = re.match(r"(\d{2})/(\d{2})/(\d{4})", date_str)
|
||||||
|
if m:
|
||||||
|
return f"{m.group(1)}{m.group(2)}{m.group(3)}"
|
||||||
|
# Format YYYY-MM-DD
|
||||||
|
m = re.match(r"(\d{4})-(\d{2})-(\d{2})", date_str)
|
||||||
|
if m:
|
||||||
|
return f"{m.group(3)}{m.group(2)}{m.group(1)}"
|
||||||
|
return " " * 8
|
||||||
|
|
||||||
|
|
||||||
|
def _format_sex(sexe: str | None) -> str:
|
||||||
|
"""Convertit le sexe en code RUM (1=M, 2=F)."""
|
||||||
|
if not sexe:
|
||||||
|
return " "
|
||||||
|
s = sexe.strip().upper()
|
||||||
|
if s in ("M", "MASCULIN", "HOMME", "H"):
|
||||||
|
return "1"
|
||||||
|
if s in ("F", "FEMININ", "FÉMININ", "FEMME"):
|
||||||
|
return "2"
|
||||||
|
return " "
|
||||||
|
|
||||||
|
|
||||||
|
def _map_mode_entree(text: str | None) -> str:
|
||||||
|
"""Convertit le mode d'entrée textuel en code RUM (1 char)."""
|
||||||
|
if not text:
|
||||||
|
return " "
|
||||||
|
t = text.strip().lower()
|
||||||
|
mapping = {
|
||||||
|
"domicile": "8",
|
||||||
|
"mutation": "6",
|
||||||
|
"transfert": "7",
|
||||||
|
"urgences": "8",
|
||||||
|
"urgence": "8",
|
||||||
|
}
|
||||||
|
for key, code in mapping.items():
|
||||||
|
if key in t:
|
||||||
|
return code
|
||||||
|
return " "
|
||||||
|
|
||||||
|
|
||||||
|
def _map_mode_sortie(text: str | None) -> str:
|
||||||
|
"""Convertit le mode de sortie textuel en code RUM (1 char)."""
|
||||||
|
if not text:
|
||||||
|
return " "
|
||||||
|
t = text.strip().lower()
|
||||||
|
mapping = {
|
||||||
|
"domicile": "8",
|
||||||
|
"mutation": "6",
|
||||||
|
"transfert": "7",
|
||||||
|
"deces": "9",
|
||||||
|
"décès": "9",
|
||||||
|
"décédé": "9",
|
||||||
|
"decede": "9",
|
||||||
|
}
|
||||||
|
for key, code in mapping.items():
|
||||||
|
if key in t:
|
||||||
|
return code
|
||||||
|
return " "
|
||||||
|
|
||||||
|
|
||||||
|
def _format_ccam_act(acte) -> str:
|
||||||
|
"""Formate un acte CCAM sur 29 caractères.
|
||||||
|
|
||||||
|
Structure : code(7) + phase(1) + activité(1) + date(8) + doc/extension(12)
|
||||||
|
"""
|
||||||
|
code = (acte.code_ccam_suggestion or "").upper().replace(" ", "")
|
||||||
|
code = code.ljust(7)[:7]
|
||||||
|
phase = "1"
|
||||||
|
activite = "1"
|
||||||
|
date = _format_date(acte.date)
|
||||||
|
extension = " " * 12
|
||||||
|
return f"{code}{phase}{activite}{date}{extension}"
|
||||||
|
|
||||||
|
|
||||||
|
def export_rum(dossier: DossierMedical, config: RUMConfig | None = None) -> str:
|
||||||
|
"""Génère le texte RUM complet pour un dossier médical.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chaîne texte au format RUM V016 (165 chars fixes + zones variables).
|
||||||
|
"""
|
||||||
|
if config is None:
|
||||||
|
config = RUMConfig()
|
||||||
|
|
||||||
|
sejour = dossier.sejour
|
||||||
|
dp = dossier.diagnostic_principal
|
||||||
|
|
||||||
|
# Compteurs
|
||||||
|
das_list = dossier.diagnostics_associes
|
||||||
|
actes_list = dossier.actes_ccam
|
||||||
|
nb_das = len(das_list)
|
||||||
|
nb_actes = len(actes_list)
|
||||||
|
|
||||||
|
# Numéros générés
|
||||||
|
source = dossier.source_file or "UNKNOWN"
|
||||||
|
num_rss = source.replace(".pdf", "").replace(" ", "_").ljust(20)[:20]
|
||||||
|
num_admin = num_rss
|
||||||
|
num_rum = source[:10].ljust(10)[:10]
|
||||||
|
|
||||||
|
# Construction de la zone fixe (165 caractères)
|
||||||
|
parts = [
|
||||||
|
" " * 2, # 1-2 : Version classification (vide)
|
||||||
|
" " * 6, # 3-8 : GHM (vide, rempli par groupeur)
|
||||||
|
" ", # 9 : Filler
|
||||||
|
"016", # 10-12 : Version format
|
||||||
|
" " * 3, # 13-15 : Code retour
|
||||||
|
config.finess.ljust(9)[:9], # 16-24 : FINESS
|
||||||
|
"016", # 25-27 : Version RUM
|
||||||
|
num_rss, # 28-47 : N° RSS
|
||||||
|
num_admin, # 48-67 : N° admin
|
||||||
|
num_rum, # 68-77 : N° RUM
|
||||||
|
_format_date(None), # 78-85 : Date naissance (non disponible)
|
||||||
|
_format_sex(sejour.sexe), # 86 : Sexe
|
||||||
|
config.num_um.ljust(4)[:4], # 87-90 : N° UM
|
||||||
|
" " * 2, # 91-92 : Type autorisation
|
||||||
|
_format_date(sejour.date_entree), # 93-100: Date entrée UM
|
||||||
|
_map_mode_entree(sejour.mode_entree), # 101 : Mode entrée
|
||||||
|
" ", # 102 : Provenance
|
||||||
|
_format_date(sejour.date_sortie), # 103-110: Date sortie UM
|
||||||
|
_map_mode_sortie(sejour.mode_sortie), # 111 : Mode sortie
|
||||||
|
" ", # 112 : Destination
|
||||||
|
" " * 5, # 113-117: CP résidence
|
||||||
|
" " * 4, # 118-121: Poids nné
|
||||||
|
" " * 2, # 122-123: Âge gestationnel
|
||||||
|
"00", # 124-125: Nb séances
|
||||||
|
str(nb_das).zfill(2)[-2:], # 126-127: Nb DAS
|
||||||
|
"00", # 128-129: Nb DAD
|
||||||
|
str(nb_actes).zfill(2)[-2:], # 130-131: Nb actes
|
||||||
|
_format_cim10(dp.cim10_suggestion if dp else None), # 132-139: DP
|
||||||
|
" " * 8, # 140-147: DR
|
||||||
|
" " * 3, # 148-150: IGS2
|
||||||
|
" " * 15, # 151-165: Réservé
|
||||||
|
]
|
||||||
|
|
||||||
|
fixed = "".join(parts)
|
||||||
|
assert len(fixed) == 165, f"Zone fixe RUM: attendu 165, obtenu {len(fixed)}"
|
||||||
|
|
||||||
|
# Zones variables
|
||||||
|
variable_parts: list[str] = []
|
||||||
|
|
||||||
|
# DAS (8 chars chacun)
|
||||||
|
for das in das_list:
|
||||||
|
variable_parts.append(_format_cim10(das.cim10_suggestion))
|
||||||
|
|
||||||
|
# Actes CCAM (29 chars chacun)
|
||||||
|
for acte in actes_list:
|
||||||
|
variable_parts.append(_format_ccam_act(acte))
|
||||||
|
|
||||||
|
return fixed + "".join(variable_parts)
|
||||||
|
|
||||||
|
|
||||||
|
def save_rum(dossier: DossierMedical, path: Path, config: RUMConfig | None = None) -> None:
|
||||||
|
"""Exporte un dossier au format RUM dans un fichier."""
|
||||||
|
rum_text = export_rum(dossier, config)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(rum_text, encoding="utf-8")
|
||||||
86
src/main.py
86
src/main.py
@@ -10,13 +10,14 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .anonymization.anonymizer import Anonymizer
|
from .anonymization.anonymizer import Anonymizer
|
||||||
from .config import ANONYMIZED_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical
|
from .config import ANONYMIZED_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical
|
||||||
from .extraction.document_classifier import classify
|
from .extraction.document_classifier import classify
|
||||||
from .extraction.crh_parser import parse_crh
|
from .extraction.crh_parser import parse_crh
|
||||||
from .extraction.document_splitter import split_documents
|
from .extraction.document_splitter import split_documents
|
||||||
from .extraction.pdf_extractor import extract_text
|
from .extraction.pdf_extractor import extract_text
|
||||||
from .extraction.trackare_parser import parse_trackare
|
from .extraction.trackare_parser import parse_trackare
|
||||||
from .medical.cim10_extractor import extract_medical_info
|
from .medical.cim10_extractor import extract_medical_info
|
||||||
|
from .medical.ghm import estimate_ghm
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
@@ -84,10 +85,20 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
|||||||
dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag)
|
dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag)
|
||||||
dossier.source_file = pdf_path.name
|
dossier.source_file = pdf_path.name
|
||||||
dossier.document_type = doc_type
|
dossier.document_type = doc_type
|
||||||
dossier.processing_time_s = round(time.time() - t0, 2)
|
|
||||||
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
|
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
|
||||||
logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam))
|
logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam))
|
||||||
|
|
||||||
|
# 8. Estimation GHM
|
||||||
|
try:
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
dossier.ghm_estimation = ghm
|
||||||
|
logger.info(" GHM : CMD=%s, Type=%s, Sévérité=%d → %s",
|
||||||
|
ghm.cmd or "?", ghm.type_ghm or "?",
|
||||||
|
ghm.severite, ghm.ghm_approx or "?")
|
||||||
|
except Exception:
|
||||||
|
logger.warning(" Erreur estimation GHM", exc_info=True)
|
||||||
|
|
||||||
|
dossier.processing_time_s = round(time.time() - t0, 2)
|
||||||
results.append((anonymized_text, dossier, report))
|
results.append((anonymized_text, dossier, report))
|
||||||
|
|
||||||
logger.info(" Temps total : %.2fs", time.time() - t0)
|
logger.info(" Temps total : %.2fs", time.time() - t0)
|
||||||
@@ -120,6 +131,7 @@ def write_outputs(
|
|||||||
dossier: DossierMedical,
|
dossier: DossierMedical,
|
||||||
report: AnonymizationReport,
|
report: AnonymizationReport,
|
||||||
subdir: str | None = None,
|
subdir: str | None = None,
|
||||||
|
export_rum_flag: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Écrit les fichiers de sortie."""
|
"""Écrit les fichiers de sortie."""
|
||||||
anon_dir = ANONYMIZED_DIR / subdir if subdir else ANONYMIZED_DIR
|
anon_dir = ANONYMIZED_DIR / subdir if subdir else ANONYMIZED_DIR
|
||||||
@@ -151,6 +163,17 @@ def write_outputs(
|
|||||||
)
|
)
|
||||||
logger.info(" → %s", report_path)
|
logger.info(" → %s", report_path)
|
||||||
|
|
||||||
|
# Export RUM
|
||||||
|
if export_rum_flag:
|
||||||
|
from .export.rum_export import save_rum
|
||||||
|
rum_dir = OUTPUT_DIR / "rum"
|
||||||
|
if subdir:
|
||||||
|
rum_dir = rum_dir / subdir
|
||||||
|
rum_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
rum_path = rum_dir / f"{stem}_rum.txt"
|
||||||
|
save_rum(dossier, rum_path)
|
||||||
|
logger.info(" → %s", rum_path)
|
||||||
|
|
||||||
|
|
||||||
def main(input_path: str | None = None) -> None:
|
def main(input_path: str | None = None) -> None:
|
||||||
"""Point d'entrée principal."""
|
"""Point d'entrée principal."""
|
||||||
@@ -197,6 +220,16 @@ def main(input_path: str | None = None) -> None:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Forcer la reconstruction de l'index FAISS",
|
help="Forcer la reconstruction de l'index FAISS",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--export-rum",
|
||||||
|
action="store_true",
|
||||||
|
help="Exporter les dossiers au format RUM V016 (pour groupeur ATIH)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--control-cpam",
|
||||||
|
metavar="PATH",
|
||||||
|
help="Fichier Excel de contrôle CPAM (enrichit les dossiers avec contre-argumentation)",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.build_dict:
|
if args.build_dict:
|
||||||
@@ -226,6 +259,16 @@ def main(input_path: str | None = None) -> None:
|
|||||||
if args.no_rag:
|
if args.no_rag:
|
||||||
_use_rag = False
|
_use_rag = False
|
||||||
|
|
||||||
|
export_rum_flag = args.export_rum
|
||||||
|
|
||||||
|
# Chargement contrôle CPAM
|
||||||
|
cpam_data = None
|
||||||
|
if args.control_cpam:
|
||||||
|
from .control.cpam_parser import parse_cpam_excel
|
||||||
|
cpam_data = parse_cpam_excel(args.control_cpam)
|
||||||
|
if not cpam_data:
|
||||||
|
logger.warning("Aucun contrôle CPAM chargé depuis %s", args.control_cpam)
|
||||||
|
|
||||||
input_paths = args.input
|
input_paths = args.input
|
||||||
|
|
||||||
# Collecte des groupes (pdfs, subdir) à traiter
|
# Collecte des groupes (pdfs, subdir) à traiter
|
||||||
@@ -274,12 +317,13 @@ def main(input_path: str | None = None) -> None:
|
|||||||
multi = len(pdf_results) > 1
|
multi = len(pdf_results) > 1
|
||||||
for part_idx, (anonymized_text, dossier, report) in enumerate(pdf_results):
|
for part_idx, (anonymized_text, dossier, report) in enumerate(pdf_results):
|
||||||
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
|
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
|
||||||
write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir)
|
write_outputs(part_stem, anonymized_text, dossier, report, subdir=subdir, export_rum_flag=export_rum_flag)
|
||||||
group_dossiers.append(dossier)
|
group_dossiers.append(dossier)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Erreur lors du traitement de %s", pdf_path.name)
|
logger.exception("Erreur lors du traitement de %s", pdf_path.name)
|
||||||
|
|
||||||
# Fusion multi-PDFs si plusieurs documents dans le même groupe
|
# Fusion multi-PDFs si plusieurs documents dans le même groupe
|
||||||
|
merged = None
|
||||||
if len(group_dossiers) > 1 and subdir:
|
if len(group_dossiers) > 1 and subdir:
|
||||||
try:
|
try:
|
||||||
from .medical.fusion import merge_dossiers
|
from .medical.fusion import merge_dossiers
|
||||||
@@ -287,13 +331,47 @@ def main(input_path: str | None = None) -> None:
|
|||||||
struct_dir = STRUCTURED_DIR / subdir
|
struct_dir = STRUCTURED_DIR / subdir
|
||||||
struct_dir.mkdir(parents=True, exist_ok=True)
|
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||||
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
||||||
|
|
||||||
|
# Export RUM du dossier fusionné
|
||||||
|
if export_rum_flag:
|
||||||
|
from .export.rum_export import save_rum
|
||||||
|
rum_dir = OUTPUT_DIR / "rum" / subdir
|
||||||
|
rum_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
rum_path = rum_dir / f"{subdir}_fusionne_rum.txt"
|
||||||
|
save_rum(merged, rum_path)
|
||||||
|
logger.info(" → RUM fusionné : %s", rum_path)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Erreur lors de la fusion du groupe %s", subdir)
|
||||||
|
merged = None
|
||||||
|
|
||||||
|
# Contrôle CPAM : enrichir le dossier principal (fusionné ou dernier)
|
||||||
|
if cpam_data and subdir:
|
||||||
|
from .control.cpam_parser import match_dossier_ogc
|
||||||
|
controles = match_dossier_ogc(subdir, cpam_data)
|
||||||
|
if controles:
|
||||||
|
from .control.cpam_response import generate_cpam_response
|
||||||
|
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
|
||||||
|
if target:
|
||||||
|
logger.info(" CPAM : %d contrôle(s) pour %s", len(controles), subdir)
|
||||||
|
for ctrl in controles:
|
||||||
|
text, sources = generate_cpam_response(target, ctrl)
|
||||||
|
ctrl.contre_argumentation = text
|
||||||
|
ctrl.sources_reponse = sources
|
||||||
|
target.controles_cpam = controles
|
||||||
|
|
||||||
|
# Écrire le dossier fusionné (après enrichissement CPAM éventuel)
|
||||||
|
if merged is not None and subdir:
|
||||||
|
try:
|
||||||
|
struct_dir = STRUCTURED_DIR / subdir
|
||||||
|
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
||||||
merged_path.write_text(
|
merged_path.write_text(
|
||||||
merged.model_dump_json(indent=2, exclude_none=True),
|
merged.model_dump_json(indent=2, exclude_none=True),
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
logger.info(" → Dossier fusionné : %s", merged_path)
|
logger.info(" → Dossier fusionné : %s", merged_path)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Erreur lors de la fusion du groupe %s", subdir)
|
logger.exception("Erreur écriture dossier fusionné %s", subdir)
|
||||||
|
|
||||||
logger.info("Terminé.")
|
logger.info("Terminé.")
|
||||||
|
|
||||||
|
|||||||
@@ -173,6 +173,32 @@ def lookup(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_code(code: str) -> str:
|
||||||
|
"""Normalise un code CIM-10 : K810 → K81.0, k85.1 → K85.1."""
|
||||||
|
code = code.strip().upper()
|
||||||
|
# Insérer le point si absent : K810 → K81.0
|
||||||
|
if len(code) > 3 and "." not in code:
|
||||||
|
code = code[:3] + "." + code[3:]
|
||||||
|
return code
|
||||||
|
|
||||||
|
|
||||||
|
def validate_code(code: str) -> tuple[bool, str]:
|
||||||
|
"""Vérifie si un code CIM-10 existe dans le dictionnaire.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_valid, label) — label vide si invalide.
|
||||||
|
"""
|
||||||
|
d = load_dict()
|
||||||
|
normalized = normalize_code(code)
|
||||||
|
if normalized in d:
|
||||||
|
return True, d[normalized]
|
||||||
|
# Tenter aussi le code brut (3 caractères sans point)
|
||||||
|
raw = code.upper().strip()
|
||||||
|
if raw in d:
|
||||||
|
return True, d[raw]
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
|
||||||
def reset_cache() -> None:
|
def reset_cache() -> None:
|
||||||
"""Réinitialise les caches (utile pour les tests)."""
|
"""Réinitialise les caches (utile pour les tests)."""
|
||||||
global _dict_cache, _normalized_cache
|
global _dict_cache, _normalized_cache
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from typing import Optional
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from .cim10_dict import lookup as dict_lookup, normalize_text
|
from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate
|
||||||
from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
|
from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
|
||||||
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text
|
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text
|
||||||
from ..config import (
|
from ..config import (
|
||||||
@@ -118,6 +118,9 @@ def extract_medical_info(
|
|||||||
# Post-processing : validation des codes CCAM contre le dictionnaire
|
# Post-processing : validation des codes CCAM contre le dictionnaire
|
||||||
_validate_ccam(dossier)
|
_validate_ccam(dossier)
|
||||||
|
|
||||||
|
# Post-processing : validation des codes CIM-10 contre le dictionnaire
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
|
||||||
# Post-processing : exclusions symptôme vs diagnostic précis
|
# Post-processing : exclusions symptôme vs diagnostic précis
|
||||||
_apply_exclusion_rules(dossier)
|
_apply_exclusion_rules(dossier)
|
||||||
|
|
||||||
@@ -663,6 +666,68 @@ def _validate_ccam(dossier: DossierMedical) -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_INVALID_CODE_PATTERNS = {"aucun", "none", "n/a", "non_codable", "aucun_code_valide", "inconnu"}
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_cim10(texte: str) -> str | None:
|
||||||
|
"""Tente de trouver un code CIM-10 via le dictionnaire à partir du texte diagnostic."""
|
||||||
|
code = dict_lookup(texte, domain_overrides=CIM10_MAP)
|
||||||
|
if code:
|
||||||
|
is_valid, _ = cim10_validate(code)
|
||||||
|
if is_valid:
|
||||||
|
return code
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_cim10(dossier: DossierMedical) -> None:
|
||||||
|
"""Valide les codes CIM-10 suggérés par Ollama contre le dictionnaire."""
|
||||||
|
diags: list[tuple[str, Diagnostic]] = []
|
||||||
|
if dossier.diagnostic_principal:
|
||||||
|
diags.append(("DP", dossier.diagnostic_principal))
|
||||||
|
for das in dossier.diagnostics_associes:
|
||||||
|
diags.append(("DAS", das))
|
||||||
|
|
||||||
|
for type_diag, diag in diags:
|
||||||
|
if not diag.cim10_suggestion:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rejeter les hallucinations
|
||||||
|
if diag.cim10_suggestion.lower().strip() in _INVALID_CODE_PATTERNS:
|
||||||
|
fallback = _fallback_cim10(diag.texte)
|
||||||
|
if fallback:
|
||||||
|
dossier.alertes_codage.append(
|
||||||
|
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} » → fallback {fallback}"
|
||||||
|
)
|
||||||
|
diag.cim10_suggestion = fallback
|
||||||
|
diag.cim10_confidence = "medium"
|
||||||
|
else:
|
||||||
|
dossier.alertes_codage.append(
|
||||||
|
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} »"
|
||||||
|
)
|
||||||
|
diag.cim10_suggestion = None
|
||||||
|
diag.cim10_confidence = None
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Normaliser le format (K810 → K81.0)
|
||||||
|
diag.cim10_suggestion = normalize_code(diag.cim10_suggestion)
|
||||||
|
|
||||||
|
# Valider contre le dictionnaire
|
||||||
|
is_valid, label = cim10_validate(diag.cim10_suggestion)
|
||||||
|
if not is_valid:
|
||||||
|
fallback = _fallback_cim10(diag.texte)
|
||||||
|
if fallback:
|
||||||
|
dossier.alertes_codage.append(
|
||||||
|
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code invalide → fallback {fallback}"
|
||||||
|
)
|
||||||
|
diag.cim10_suggestion = fallback
|
||||||
|
diag.cim10_confidence = "medium"
|
||||||
|
else:
|
||||||
|
dossier.alertes_codage.append(
|
||||||
|
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code absent du dictionnaire CIM-10"
|
||||||
|
)
|
||||||
|
diag.cim10_confidence = "low"
|
||||||
|
|
||||||
|
|
||||||
def _find_act_date(text: str, act_pattern: str) -> str | None:
|
def _find_act_date(text: str, act_pattern: str) -> str | None:
|
||||||
"""Trouve la date associée à un acte."""
|
"""Trouve la date associée à un acte."""
|
||||||
# Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY"
|
# Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY"
|
||||||
@@ -705,7 +770,7 @@ def _apply_severity_rules(dossier: DossierMedical) -> None:
|
|||||||
"""Enrichit les diagnostics avec les informations de sévérité heuristique."""
|
"""Enrichit les diagnostics avec les informations de sévérité heuristique."""
|
||||||
try:
|
try:
|
||||||
from .severity import enrich_dossier_severity
|
from .severity import enrich_dossier_severity
|
||||||
alertes = enrich_dossier_severity(
|
alertes, _cma_count, _cms_count = enrich_dossier_severity(
|
||||||
dossier.diagnostic_principal, dossier.diagnostics_associes,
|
dossier.diagnostic_principal, dossier.diagnostics_associes,
|
||||||
)
|
)
|
||||||
dossier.alertes_codage.extend(alertes)
|
dossier.alertes_codage.extend(alertes)
|
||||||
|
|||||||
@@ -33,9 +33,12 @@ def is_valid_diagnostic_text(text: str) -> bool:
|
|||||||
if re.match(r"^([a-zà-ÿ]{3,})\1+[a-zà-ÿ]*$", t, re.IGNORECASE):
|
if re.match(r"^([a-zà-ÿ]{3,})\1+[a-zà-ÿ]*$", t, re.IGNORECASE):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 5. Mots répétés ≥ 3 fois : "Spontanée spontanée spontanée spontanée"
|
# 5. Mots répétés : tous identiques ("Absence absence", "Anticoagulant anticoagulant")
|
||||||
|
# ou ≥ 3 occurrences du même mot
|
||||||
words = t.lower().split()
|
words = t.lower().split()
|
||||||
if words:
|
if len(words) >= 2:
|
||||||
|
if len(set(words)) == 1:
|
||||||
|
return False
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
counts = Counter(words)
|
counts = Counter(words)
|
||||||
if counts.most_common(1)[0][1] >= 3:
|
if counts.most_common(1)[0][1] >= 3:
|
||||||
@@ -47,4 +50,27 @@ def is_valid_diagnostic_text(text: str) -> bool:
|
|||||||
if t in {"Isolement", "Pp 500"}:
|
if t in {"Isolement", "Pp 500"}:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# 7. Ponctuation initiale (artefacts OCR) : ", sans précision"
|
||||||
|
if re.match(r'^[,.\-;:!)\]]\s', t):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 8. Pattern "À X.X" / "A X.X" (valeurs numériques OCR)
|
||||||
|
if re.match(r'^[ÀA]\s+\d+([.,]\d+)?$', t):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 9. Crochets (artefacts OCR) : "Episode [episode"
|
||||||
|
if '[' in t or ']' in t:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 10. Termes de laboratoire isolés (un seul mot ≠ diagnostic)
|
||||||
|
_LAB_TERMS = {"hémoglobine", "créatinine", "plaquettes", "leucocytes", "glycémie",
|
||||||
|
"natrémie", "kaliémie", "calcémie", "bilirubine", "albumine",
|
||||||
|
"fibrinogène", "hématocrite", "cétonurie", "glycosurie"}
|
||||||
|
if t.lower() in _LAB_TERMS:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 11. Fragments anatomiques courts sans pathologie : "Dans la vessie", "Le rein"
|
||||||
|
if re.match(r'^(Dans |La |Le |Les |Au |Aux )', t) and len(t) < 30:
|
||||||
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|||||||
215
src/medical/ghm.py
Normal file
215
src/medical/ghm.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
"""Estimation heuristique du GHM (Groupe Homogène de Malades).
|
||||||
|
|
||||||
|
L'algorithme officiel (ATIH FG-MCO) est propriétaire. Ce module fournit une
|
||||||
|
estimation approximative utile comme pré-codage / aide au DIM :
|
||||||
|
1. CMD depuis le DP (table de plages CIM-10)
|
||||||
|
2. Type de prise en charge depuis les actes CCAM
|
||||||
|
3. Sévérité depuis les CMA/CMS
|
||||||
|
4. Construction du code GHM approximatif
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from ..config import DossierMedical, GHMEstimation
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Table CIM-10 → CMD (Catégorie Majeure de Diagnostic)
|
||||||
|
# Triée par borne inférieure pour lookup par bisect.
|
||||||
|
# Format : (debut, fin, cmd, libelle)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_CMD_RANGES: list[tuple[str, str, str, str]] = [
|
||||||
|
("A00", "A99", "18", "Maladies infectieuses et parasitaires"),
|
||||||
|
("B00", "B19", "18", "Maladies infectieuses et parasitaires"),
|
||||||
|
("B20", "B24", "25", "Maladies dues au VIH"),
|
||||||
|
("B25", "B99", "18", "Maladies infectieuses et parasitaires"),
|
||||||
|
("C00", "C97", "17", "Tumeurs malignes"),
|
||||||
|
("D00", "D09", "17", "Tumeurs malignes"),
|
||||||
|
("D10", "D48", "16", "Tumeurs bénignes, hémopathies"),
|
||||||
|
("D50", "D89", "16", "Tumeurs bénignes, hémopathies"),
|
||||||
|
("E00", "E07", "10", "Maladies endocriniennes"),
|
||||||
|
("E10", "E14", "10", "Maladies endocriniennes"),
|
||||||
|
("E15", "E46", "10", "Maladies endocriniennes"),
|
||||||
|
("E47", "E90", "10", "Maladies endocriniennes"),
|
||||||
|
("F00", "F09", "19", "Maladies mentales"),
|
||||||
|
("F10", "F19", "20", "Troubles mentaux liés à l'alcool et aux toxiques"),
|
||||||
|
("F20", "F99", "19", "Maladies mentales"),
|
||||||
|
("G00", "G99", "01", "Affections du système nerveux"),
|
||||||
|
("H00", "H59", "02", "Affections de l'oeil"),
|
||||||
|
("H60", "H95", "03", "Affections ORL"),
|
||||||
|
("I00", "I99", "05", "Affections de l'appareil circulatoire"),
|
||||||
|
("J00", "J99", "04", "Affections de l'appareil respiratoire"),
|
||||||
|
("K00", "K67", "06", "Affections du tube digestif"),
|
||||||
|
("K70", "K87", "07", "Affections hépatobiliaires et pancréatiques"),
|
||||||
|
("K90", "K93", "06", "Affections du tube digestif"),
|
||||||
|
("L00", "L99", "09", "Affections de la peau"),
|
||||||
|
("M00", "M99", "08", "Affections du système ostéo-articulaire"),
|
||||||
|
("N00", "N39", "11", "Affections du rein et des voies urinaires"),
|
||||||
|
("N40", "N51", "12", "Affections de l'appareil génital masculin"),
|
||||||
|
("N60", "N98", "13", "Affections de l'appareil génital féminin"),
|
||||||
|
("N99", "N99", "11", "Affections du rein et des voies urinaires"),
|
||||||
|
("O00", "O99", "14", "Grossesses, accouchements, post-partum"),
|
||||||
|
("P00", "P96", "15", "Nouveau-nés, période périnatale"),
|
||||||
|
("Q00", "Q99", "15", "Nouveau-nés, période périnatale"),
|
||||||
|
("R00", "R99", "23", "Facteurs influençant l'état de santé (symptômes)"),
|
||||||
|
("S00", "S99", "21", "Traumatismes"),
|
||||||
|
("T00", "T19", "21", "Traumatismes"),
|
||||||
|
("T20", "T32", "22", "Brûlures"),
|
||||||
|
("T33", "T98", "21", "Traumatismes"),
|
||||||
|
("U00", "U99", "26", "Catégories spéciales"),
|
||||||
|
("V00", "Y98", "24", "Causes externes"),
|
||||||
|
("Z00", "Z99", "23", "Facteurs influençant l'état de santé"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Pré-calcul : liste triée des bornes inférieures pour bisect
|
||||||
|
_CMD_STARTS = [r[0] for r in _CMD_RANGES]
|
||||||
|
|
||||||
|
|
||||||
|
def find_cmd(code_cim10: str) -> tuple[Optional[str], Optional[str]]:
|
||||||
|
"""Trouve la CMD correspondant à un code CIM-10.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(cmd, libelle) ou (None, None) si non trouvé.
|
||||||
|
"""
|
||||||
|
if not code_cim10:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Normaliser : majuscules, retirer le point
|
||||||
|
code = code_cim10.upper().replace(".", "").strip()
|
||||||
|
if len(code) < 3:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Prendre les 3 premiers caractères pour le lookup
|
||||||
|
code3 = code[:3]
|
||||||
|
|
||||||
|
# bisect pour trouver la plage candidate
|
||||||
|
idx = bisect.bisect_right(_CMD_STARTS, code3) - 1
|
||||||
|
if idx < 0:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
debut, fin, cmd, libelle = _CMD_RANGES[idx]
|
||||||
|
if debut <= code3 <= fin:
|
||||||
|
return cmd, libelle
|
||||||
|
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Préfixes CCAM classants (chirurgicaux)
|
||||||
|
# Les codes CCAM commençant par ces lettres correspondent à des organes
|
||||||
|
# et sont considérés chirurgicaux quand ils désignent un acte opératoire.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_CCAM_CHIRURGICAL_PREFIXES = {"H", "J", "K", "L", "N", "P", "Q"}
|
||||||
|
|
||||||
|
# Préfixes interventionnels (imagerie, endoscopie)
|
||||||
|
_CCAM_INTERVENTIONNEL_PREFIXES = {"Z", "Y"}
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_type_ghm(actes_ccam: list) -> str:
|
||||||
|
"""Détermine le type de prise en charge depuis les actes CCAM.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
"C" (chirurgical), "K" (interventionnel) ou "M" (médical).
|
||||||
|
"""
|
||||||
|
has_chirurgical = False
|
||||||
|
has_interventionnel = False
|
||||||
|
|
||||||
|
for acte in actes_ccam:
|
||||||
|
code = acte.code_ccam_suggestion
|
||||||
|
if not code or len(code) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
prefix = code[0].upper()
|
||||||
|
if prefix in _CCAM_CHIRURGICAL_PREFIXES:
|
||||||
|
has_chirurgical = True
|
||||||
|
break
|
||||||
|
if prefix in _CCAM_INTERVENTIONNEL_PREFIXES:
|
||||||
|
has_interventionnel = True
|
||||||
|
|
||||||
|
if has_chirurgical:
|
||||||
|
return "C"
|
||||||
|
if has_interventionnel:
|
||||||
|
return "K"
|
||||||
|
return "M"
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_severity(das_list: list) -> tuple[int, int, int]:
|
||||||
|
"""Calcule le niveau de sévérité à partir des DAS.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(niveau, cma_count, cms_count)
|
||||||
|
"""
|
||||||
|
cma_count = 0
|
||||||
|
cms_count = 0
|
||||||
|
|
||||||
|
for das in das_list:
|
||||||
|
if getattr(das, "est_cma", False):
|
||||||
|
cma_count += 1
|
||||||
|
if getattr(das, "est_cms", False):
|
||||||
|
cms_count += 1
|
||||||
|
|
||||||
|
if cms_count >= 2:
|
||||||
|
niveau = 4
|
||||||
|
elif cms_count >= 1 or cma_count >= 3:
|
||||||
|
niveau = 3
|
||||||
|
elif cma_count >= 2:
|
||||||
|
niveau = 2
|
||||||
|
else:
|
||||||
|
niveau = 1
|
||||||
|
|
||||||
|
return niveau, cma_count, cms_count
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_ghm(dossier: DossierMedical) -> GHMEstimation:
|
||||||
|
"""Estime le GHM d'un dossier médical.
|
||||||
|
|
||||||
|
Heuristique en 4 étapes :
|
||||||
|
1. CMD depuis le DP
|
||||||
|
2. Type de prise en charge depuis les actes CCAM
|
||||||
|
3. Sévérité depuis les CMA/CMS
|
||||||
|
4. Construction du code approximatif
|
||||||
|
"""
|
||||||
|
estimation = GHMEstimation()
|
||||||
|
|
||||||
|
# 1. CMD depuis le DP
|
||||||
|
dp = dossier.diagnostic_principal
|
||||||
|
dp_code = dp.cim10_suggestion if dp else None
|
||||||
|
|
||||||
|
if not dp:
|
||||||
|
estimation.alertes.append("DP absent — CMD non déterminable")
|
||||||
|
elif not dp_code:
|
||||||
|
estimation.alertes.append("DP sans code CIM-10 — CMD non déterminable")
|
||||||
|
else:
|
||||||
|
cmd, libelle = find_cmd(dp_code)
|
||||||
|
if cmd:
|
||||||
|
estimation.cmd = cmd
|
||||||
|
estimation.cmd_libelle = libelle
|
||||||
|
else:
|
||||||
|
estimation.alertes.append(f"CMD inconnue pour le code {dp_code}")
|
||||||
|
|
||||||
|
# Alerte DP symptomatique
|
||||||
|
code_letter = dp_code.upper().replace(".", "").strip()[:1]
|
||||||
|
if code_letter in ("R", "Z"):
|
||||||
|
estimation.alertes.append(
|
||||||
|
f"DP symptomatique ({dp_code}) — risque de CMD 23, impact tarif"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Type de prise en charge
|
||||||
|
estimation.type_ghm = _detect_type_ghm(dossier.actes_ccam)
|
||||||
|
|
||||||
|
# 3. Sévérité
|
||||||
|
niveau, cma_count, cms_count = _compute_severity(dossier.diagnostics_associes)
|
||||||
|
estimation.severite = niveau
|
||||||
|
estimation.cma_count = cma_count
|
||||||
|
estimation.cms_count = cms_count
|
||||||
|
|
||||||
|
# 4. Code approximatif
|
||||||
|
if estimation.cmd and estimation.type_ghm:
|
||||||
|
estimation.ghm_approx = f"{estimation.cmd}{estimation.type_ghm}??{estimation.severite}"
|
||||||
|
|
||||||
|
return estimation
|
||||||
85
src/medical/ollama_cache.py
Normal file
85
src/medical/ollama_cache.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""Cache persistant thread-safe pour les résultats Ollama."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class OllamaCache:
|
||||||
|
"""Cache JSON persistant pour éviter les appels Ollama redondants.
|
||||||
|
|
||||||
|
Clé = (texte_diagnostic_normalisé, type).
|
||||||
|
Le modèle Ollama est stocké dans les métadonnées : si le modèle change,
|
||||||
|
le cache est automatiquement invalidé.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, cache_path: Path, model: str):
|
||||||
|
self._path = cache_path
|
||||||
|
self._model = model
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._data: dict[str, dict] = {}
|
||||||
|
self._dirty = False
|
||||||
|
self._load()
|
||||||
|
|
||||||
|
def _load(self) -> None:
|
||||||
|
"""Charge le cache depuis le disque."""
|
||||||
|
if not self._path.exists():
|
||||||
|
logger.info("Cache Ollama : nouveau cache (%s)", self._path)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
raw = json.loads(self._path.read_text(encoding="utf-8"))
|
||||||
|
if raw.get("model") != self._model:
|
||||||
|
logger.info(
|
||||||
|
"Cache Ollama : modèle changé (%s → %s), cache invalidé",
|
||||||
|
raw.get("model"), self._model,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
self._data = raw.get("entries", {})
|
||||||
|
logger.info("Cache Ollama : %d entrées chargées", len(self._data))
|
||||||
|
except (json.JSONDecodeError, KeyError) as e:
|
||||||
|
logger.warning("Cache Ollama : fichier corrompu (%s), réinitialisé", e)
|
||||||
|
self._data = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_key(texte: str, diag_type: str) -> str:
|
||||||
|
"""Construit une clé normalisée."""
|
||||||
|
return f"{diag_type}::{texte.strip().lower()}"
|
||||||
|
|
||||||
|
def get(self, texte: str, diag_type: str) -> dict | None:
|
||||||
|
"""Récupère un résultat caché, ou None si absent."""
|
||||||
|
key = self._make_key(texte, diag_type)
|
||||||
|
with self._lock:
|
||||||
|
return self._data.get(key)
|
||||||
|
|
||||||
|
def put(self, texte: str, diag_type: str, result: dict) -> None:
|
||||||
|
"""Stocke un résultat dans le cache."""
|
||||||
|
key = self._make_key(texte, diag_type)
|
||||||
|
with self._lock:
|
||||||
|
self._data[key] = result
|
||||||
|
self._dirty = True
|
||||||
|
|
||||||
|
def save(self) -> None:
|
||||||
|
"""Persiste le cache sur disque si modifié."""
|
||||||
|
with self._lock:
|
||||||
|
if not self._dirty:
|
||||||
|
return
|
||||||
|
self._path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
payload = {
|
||||||
|
"model": self._model,
|
||||||
|
"entries": self._data,
|
||||||
|
}
|
||||||
|
self._path.write_text(
|
||||||
|
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
self._dirty = False
|
||||||
|
logger.info("Cache Ollama : %d entrées sauvegardées", len(self._data))
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
with self._lock:
|
||||||
|
return len(self._data)
|
||||||
80
src/medical/ollama_client.py
Normal file
80
src/medical/ollama_client.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""Client Ollama partagé — appel LLM en mode JSON natif."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_json_response(raw: str) -> dict | None:
|
||||||
|
"""Parse une réponse JSON d'Ollama, en gérant les blocs markdown."""
|
||||||
|
text = raw.strip()
|
||||||
|
if text.startswith("```"):
|
||||||
|
first_nl = text.find("\n")
|
||||||
|
if first_nl != -1:
|
||||||
|
text = text[first_nl + 1:]
|
||||||
|
if text.rstrip().endswith("```"):
|
||||||
|
text = text.rstrip()[:-3]
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning("Ollama : JSON invalide : %s", raw[:200])
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def call_ollama(
|
||||||
|
prompt: str,
|
||||||
|
temperature: float = 0.1,
|
||||||
|
max_tokens: int = 2500,
|
||||||
|
) -> dict | None:
|
||||||
|
"""Appelle Ollama en mode JSON natif avec retry.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: Le prompt à envoyer.
|
||||||
|
temperature: Température de génération (défaut: 0.1).
|
||||||
|
max_tokens: Nombre max de tokens (défaut: 2500).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Le dict JSON parsé, ou None en cas d'erreur.
|
||||||
|
"""
|
||||||
|
for attempt in range(2):
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{OLLAMA_URL}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": OLLAMA_MODEL,
|
||||||
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
"format": "json",
|
||||||
|
"options": {
|
||||||
|
"temperature": temperature,
|
||||||
|
"num_predict": max_tokens,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
timeout=OLLAMA_TIMEOUT,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
raw = response.json().get("response", "")
|
||||||
|
result = parse_json_response(raw)
|
||||||
|
if result is not None:
|
||||||
|
return result
|
||||||
|
if attempt == 0:
|
||||||
|
logger.info("Ollama : retry après échec de parsing")
|
||||||
|
except requests.ConnectionError:
|
||||||
|
logger.warning("Ollama non disponible (connexion refusée)")
|
||||||
|
return None
|
||||||
|
except requests.Timeout:
|
||||||
|
logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
|
||||||
|
return None
|
||||||
|
except (requests.RequestException, json.JSONDecodeError) as e:
|
||||||
|
logger.warning("Ollama erreur : %s", e)
|
||||||
|
return None
|
||||||
|
return None
|
||||||
@@ -2,12 +2,17 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
import requests
|
from ..config import (
|
||||||
|
ActeCCAM, Diagnostic, DossierMedical, RAGSource,
|
||||||
from ..config import Diagnostic, DossierMedical, RAGSource, OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
|
OLLAMA_CACHE_PATH, OLLAMA_MAX_PARALLEL, OLLAMA_MODEL,
|
||||||
|
)
|
||||||
|
from .cim10_dict import normalize_code, validate_code as cim10_validate
|
||||||
|
from .ccam_dict import validate_code as ccam_validate
|
||||||
|
from .ollama_client import call_ollama, parse_json_response
|
||||||
|
from .ollama_cache import OllamaCache
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -85,6 +90,52 @@ def search_similar(query: str, top_k: int = 10) -> list[dict]:
|
|||||||
return final
|
return final
|
||||||
|
|
||||||
|
|
||||||
|
def search_similar_ccam(query: str, top_k: int = 8) -> list[dict]:
|
||||||
|
"""Recherche les passages CCAM les plus similaires dans l'index FAISS.
|
||||||
|
|
||||||
|
Même logique que search_similar() mais priorise les sources CCAM.
|
||||||
|
"""
|
||||||
|
from .rag_index import get_index
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
result = get_index()
|
||||||
|
if result is None:
|
||||||
|
logger.warning("Index FAISS non disponible")
|
||||||
|
return []
|
||||||
|
|
||||||
|
faiss_index, metadata = result
|
||||||
|
|
||||||
|
model = _get_embed_model()
|
||||||
|
query_vec = model.encode([query], normalize_embeddings=True)
|
||||||
|
query_vec = np.array(query_vec, dtype=np.float32)
|
||||||
|
|
||||||
|
fetch_k = min(top_k * 2, faiss_index.ntotal)
|
||||||
|
scores, indices = faiss_index.search(query_vec, fetch_k)
|
||||||
|
|
||||||
|
raw_results = []
|
||||||
|
for score, idx in zip(scores[0], indices[0]):
|
||||||
|
if idx < 0:
|
||||||
|
continue
|
||||||
|
if float(score) < _MIN_SCORE:
|
||||||
|
continue
|
||||||
|
meta = metadata[idx].copy()
|
||||||
|
meta["score"] = float(score)
|
||||||
|
raw_results.append(meta)
|
||||||
|
|
||||||
|
# Prioriser les sources CCAM (au moins 5 sur top_k)
|
||||||
|
ccam_results = [r for r in raw_results if r["document"] == "ccam"]
|
||||||
|
other_results = [r for r in raw_results if r["document"] != "ccam"]
|
||||||
|
|
||||||
|
min_ccam = min(5, len(ccam_results))
|
||||||
|
final = ccam_results[:min_ccam]
|
||||||
|
remaining_slots = top_k - len(final)
|
||||||
|
remaining = ccam_results[min_ccam:] + other_results
|
||||||
|
remaining.sort(key=lambda r: r["score"], reverse=True)
|
||||||
|
final.extend(remaining[:remaining_slots])
|
||||||
|
|
||||||
|
return final
|
||||||
|
|
||||||
|
|
||||||
def _format_contexte(contexte: dict) -> str:
|
def _format_contexte(contexte: dict) -> str:
|
||||||
"""Formate le contexte patient de manière structurée pour le prompt."""
|
"""Formate le contexte patient de manière structurée pour le prompt."""
|
||||||
lines = []
|
lines = []
|
||||||
@@ -193,31 +244,63 @@ Réponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant
|
|||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
def _build_prompt_ccam(texte: str, sources: list[dict], contexte: dict) -> str:
|
||||||
|
"""Construit le prompt expert DIM pour le codage CCAM avec raisonnement structuré."""
|
||||||
|
sources_text = ""
|
||||||
|
for i, src in enumerate(sources, 1):
|
||||||
|
doc_name = {
|
||||||
|
"cim10": "CIM-10 FR 2026",
|
||||||
|
"cim10_alpha": "CIM-10 Index Alphabétique 2026",
|
||||||
|
"guide_methodo": "Guide Méthodologique MCO 2026",
|
||||||
|
"ccam": "CCAM PMSI V4 2025",
|
||||||
|
}.get(src["document"], src["document"])
|
||||||
|
|
||||||
|
code_info = f" (code: {src['code']})" if src.get("code") else ""
|
||||||
|
page_info = f" [page {src['page']}]" if src.get("page") else ""
|
||||||
|
|
||||||
|
sources_text += f"--- Source {i}: {doc_name}{code_info}{page_info} ---\n"
|
||||||
|
sources_text += (src.get("extrait", "")[:800]) + "\n\n"
|
||||||
|
|
||||||
|
ctx_str = _format_contexte(contexte)
|
||||||
|
|
||||||
|
return f"""Tu es un médecin DIM (Département d'Information Médicale) expert en codage CCAM PMSI.
|
||||||
|
Tu dois coder l'acte chirurgical/médical suivant en respectant STRICTEMENT la nomenclature CCAM.
|
||||||
|
|
||||||
|
RÈGLES IMPÉRATIVES :
|
||||||
|
- Le code doit provenir UNIQUEMENT des sources CCAM fournies
|
||||||
|
- Un code CCAM est composé de 4 lettres + 3 chiffres (ex: HMFC004)
|
||||||
|
- Vérifie l'activité (1=acte technique, 4=anesthésie) et le regroupement
|
||||||
|
- Tiens compte du tarif secteur 1 pour valider la cohérence
|
||||||
|
- Si plusieurs codes sont possibles, choisis le plus spécifique à l'acte décrit
|
||||||
|
- En cas de doute, indique confidence "low" plutôt que de proposer un code inadapté
|
||||||
|
|
||||||
|
ACTE À CODER : "{texte}"
|
||||||
|
|
||||||
|
CONTEXTE CLINIQUE :
|
||||||
|
{ctx_str}
|
||||||
|
|
||||||
|
SOURCES CCAM :
|
||||||
|
{sources_text}
|
||||||
|
Réponds UNIQUEMENT avec un objet JSON au format suivant, sans aucun texte avant ou après :
|
||||||
|
{{
|
||||||
|
"analyse_acte": "que décrit cet acte sur le plan technique/chirurgical",
|
||||||
|
"codes_candidats": "quels codes CCAM des sources sont compatibles",
|
||||||
|
"discrimination": "pourquoi choisir ce code plutôt qu'un autre (activité, regroupement, tarif)",
|
||||||
|
"code": "ABCD123",
|
||||||
|
"confidence": "high ou medium ou low",
|
||||||
|
"justification": "explication courte en français"
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
def _parse_ollama_response(raw: str) -> dict | None:
|
def _parse_ollama_response(raw: str) -> dict | None:
|
||||||
"""Parse la réponse JSON d'Ollama (mode JSON).
|
"""Parse la réponse JSON d'Ollama et reconstitue le raisonnement structuré."""
|
||||||
|
parsed = parse_json_response(raw)
|
||||||
Reconstitue le raisonnement à partir des champs structurés.
|
if parsed is None:
|
||||||
"""
|
|
||||||
# Stripper les blocs markdown ```json ... ``` que certains modèles ajoutent
|
|
||||||
text = raw.strip()
|
|
||||||
if text.startswith("```"):
|
|
||||||
first_nl = text.find("\n")
|
|
||||||
if first_nl != -1:
|
|
||||||
text = text[first_nl + 1:]
|
|
||||||
# Retirer la fence fermante seulement si elle existe en fin de texte
|
|
||||||
if text.rstrip().endswith("```"):
|
|
||||||
text = text.rstrip()[:-3]
|
|
||||||
text = text.strip()
|
|
||||||
|
|
||||||
try:
|
|
||||||
parsed = json.loads(text)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.warning("Ollama : JSON invalide : %s", raw[:200])
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Reconstituer le raisonnement à partir des champs structurés
|
# Reconstituer le raisonnement à partir des champs structurés
|
||||||
reasoning_parts = []
|
reasoning_parts = []
|
||||||
for key in ("analyse_clinique", "codes_candidats", "discrimination", "regle_pmsi"):
|
for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"):
|
||||||
val = parsed.pop(key, None)
|
val = parsed.pop(key, None)
|
||||||
if val:
|
if val:
|
||||||
titre = key.replace("_", " ").upper()
|
titre = key.replace("_", " ").upper()
|
||||||
@@ -229,59 +312,70 @@ def _parse_ollama_response(raw: str) -> dict | None:
|
|||||||
|
|
||||||
|
|
||||||
def _call_ollama(prompt: str) -> dict | None:
|
def _call_ollama(prompt: str) -> dict | None:
|
||||||
"""Appelle Ollama (mode JSON) et parse la réponse. Retry une fois si parsing échoue."""
|
"""Appelle Ollama (mode JSON) et parse la réponse avec reconstitution du raisonnement."""
|
||||||
for attempt in range(2):
|
result = call_ollama(prompt, temperature=0.1, max_tokens=2500)
|
||||||
try:
|
if result is None:
|
||||||
response = requests.post(
|
return None
|
||||||
f"{OLLAMA_URL}/api/generate",
|
# Reconstituer le raisonnement structuré
|
||||||
json={
|
reasoning_parts = []
|
||||||
"model": OLLAMA_MODEL,
|
for key in ("analyse_clinique", "analyse_acte", "codes_candidats", "discrimination", "regle_pmsi"):
|
||||||
"prompt": prompt,
|
val = result.pop(key, None)
|
||||||
"stream": False,
|
if val:
|
||||||
"format": "json",
|
titre = key.replace("_", " ").upper()
|
||||||
"options": {
|
reasoning_parts.append(f"{titre} :\n{val}")
|
||||||
"temperature": 0.1,
|
if reasoning_parts:
|
||||||
"num_predict": 2500,
|
result["raisonnement"] = "\n\n".join(reasoning_parts)
|
||||||
},
|
return result
|
||||||
},
|
|
||||||
timeout=OLLAMA_TIMEOUT,
|
|
||||||
|
def _apply_llm_result_diagnostic(diagnostic: Diagnostic, llm_result: dict) -> None:
|
||||||
|
"""Applique un résultat LLM (frais ou caché) à un Diagnostic."""
|
||||||
|
code = llm_result.get("code")
|
||||||
|
confidence = llm_result.get("confidence")
|
||||||
|
justification = llm_result.get("justification")
|
||||||
|
raisonnement = llm_result.get("raisonnement")
|
||||||
|
|
||||||
|
if code:
|
||||||
|
code = normalize_code(code)
|
||||||
|
is_valid, _ = cim10_validate(code)
|
||||||
|
if is_valid:
|
||||||
|
diagnostic.cim10_suggestion = code
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"RAG : code Ollama %s invalide pour « %s », code ignoré",
|
||||||
|
code, diagnostic.texte,
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
if confidence in ("high", "medium", "low"):
|
||||||
raw = response.json().get("response", "")
|
diagnostic.cim10_confidence = confidence
|
||||||
result = _parse_ollama_response(raw)
|
if justification:
|
||||||
if result is not None:
|
diagnostic.justification = justification
|
||||||
return result
|
if raisonnement:
|
||||||
if attempt == 0:
|
diagnostic.raisonnement = raisonnement
|
||||||
logger.info("Ollama : retry après échec de parsing")
|
|
||||||
except requests.ConnectionError:
|
|
||||||
logger.warning("Ollama non disponible (connexion refusée)")
|
|
||||||
return None
|
|
||||||
except requests.Timeout:
|
|
||||||
logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
|
|
||||||
return None
|
|
||||||
except (requests.RequestException, json.JSONDecodeError) as e:
|
|
||||||
logger.warning("Ollama erreur : %s", e)
|
|
||||||
return None
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def enrich_diagnostic(
|
def enrich_diagnostic(
|
||||||
diagnostic: Diagnostic,
|
diagnostic: Diagnostic,
|
||||||
contexte: dict,
|
contexte: dict,
|
||||||
est_dp: bool = True,
|
est_dp: bool = True,
|
||||||
|
cache: OllamaCache | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Enrichit un Diagnostic avec le RAG (FAISS + Ollama).
|
"""Enrichit un Diagnostic avec le RAG (FAISS + Ollama).
|
||||||
|
|
||||||
Modifie le diagnostic en place. Fallback gracieux si FAISS ou Ollama échouent.
|
Modifie le diagnostic en place. Fallback gracieux si FAISS ou Ollama échouent.
|
||||||
"""
|
"""
|
||||||
# 1. Recherche FAISS
|
diag_type = "dp" if est_dp else "das"
|
||||||
|
|
||||||
|
# 1. Vérifier le cache
|
||||||
|
cached = cache.get(diagnostic.texte, diag_type) if cache else None
|
||||||
|
|
||||||
|
# 2. Recherche FAISS (toujours, pour les sources_rag fraîches)
|
||||||
sources = search_similar(diagnostic.texte, top_k=10)
|
sources = search_similar(diagnostic.texte, top_k=10)
|
||||||
|
|
||||||
if not sources:
|
if not sources:
|
||||||
logger.debug("Aucune source RAG trouvée pour : %s", diagnostic.texte)
|
logger.debug("Aucune source RAG trouvée pour : %s", diagnostic.texte)
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. Stocker les sources RAG
|
# 3. Stocker les sources RAG
|
||||||
diagnostic.sources_rag = [
|
diagnostic.sources_rag = [
|
||||||
RAGSource(
|
RAGSource(
|
||||||
document=s["document"],
|
document=s["document"],
|
||||||
@@ -292,30 +386,101 @@ def enrich_diagnostic(
|
|||||||
for s in sources
|
for s in sources
|
||||||
]
|
]
|
||||||
|
|
||||||
# 3. Appel Ollama pour justification avec raisonnement structuré
|
# 4. Si cache hit, appliquer et court-circuiter Ollama
|
||||||
|
if cached is not None:
|
||||||
|
logger.info("Cache hit pour %s : « %s »", diag_type.upper(), diagnostic.texte)
|
||||||
|
_apply_llm_result_diagnostic(diagnostic, cached)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 5. Appel Ollama pour justification avec raisonnement structuré
|
||||||
prompt = _build_prompt(diagnostic.texte, sources, contexte, est_dp=est_dp)
|
prompt = _build_prompt(diagnostic.texte, sources, contexte, est_dp=est_dp)
|
||||||
llm_result = _call_ollama(prompt)
|
llm_result = _call_ollama(prompt)
|
||||||
|
|
||||||
if llm_result:
|
if llm_result:
|
||||||
code = llm_result.get("code")
|
_apply_llm_result_diagnostic(diagnostic, llm_result)
|
||||||
confidence = llm_result.get("confidence")
|
if cache:
|
||||||
justification = llm_result.get("justification")
|
cache.put(diagnostic.texte, diag_type, llm_result)
|
||||||
raisonnement = llm_result.get("raisonnement")
|
|
||||||
|
|
||||||
if code:
|
|
||||||
diagnostic.cim10_suggestion = code
|
|
||||||
if confidence in ("high", "medium", "low"):
|
|
||||||
diagnostic.cim10_confidence = confidence
|
|
||||||
if justification:
|
|
||||||
diagnostic.justification = justification
|
|
||||||
if raisonnement:
|
|
||||||
diagnostic.raisonnement = raisonnement
|
|
||||||
else:
|
else:
|
||||||
logger.info("Ollama non disponible — sources FAISS conservées sans justification LLM")
|
logger.info("Ollama non disponible — sources FAISS conservées sans justification LLM")
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_llm_result_acte(acte: ActeCCAM, llm_result: dict) -> None:
|
||||||
|
"""Applique un résultat LLM (frais ou caché) à un ActeCCAM."""
|
||||||
|
code = llm_result.get("code")
|
||||||
|
confidence = llm_result.get("confidence")
|
||||||
|
justification = llm_result.get("justification")
|
||||||
|
raisonnement = llm_result.get("raisonnement")
|
||||||
|
|
||||||
|
if code:
|
||||||
|
code = code.strip().upper()
|
||||||
|
is_valid, _ = ccam_validate(code)
|
||||||
|
if is_valid:
|
||||||
|
acte.code_ccam_suggestion = code
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"RAG : code CCAM Ollama %s invalide pour « %s », code ignoré",
|
||||||
|
code, acte.texte,
|
||||||
|
)
|
||||||
|
if confidence in ("high", "medium", "low"):
|
||||||
|
acte.ccam_confidence = confidence
|
||||||
|
if justification:
|
||||||
|
acte.justification = justification
|
||||||
|
if raisonnement:
|
||||||
|
acte.raisonnement = raisonnement
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_acte(acte: ActeCCAM, contexte: dict, cache: OllamaCache | None = None) -> None:
|
||||||
|
"""Enrichit un ActeCCAM avec le RAG (FAISS + Ollama).
|
||||||
|
|
||||||
|
Modifie l'acte en place. Fallback gracieux si FAISS ou Ollama échouent.
|
||||||
|
"""
|
||||||
|
# 1. Vérifier le cache
|
||||||
|
cached = cache.get(acte.texte, "ccam") if cache else None
|
||||||
|
|
||||||
|
# 2. Recherche FAISS (sources CCAM priorisées)
|
||||||
|
sources = search_similar_ccam(acte.texte, top_k=8)
|
||||||
|
|
||||||
|
if not sources:
|
||||||
|
logger.debug("Aucune source RAG CCAM trouvée pour : %s", acte.texte)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 3. Stocker les sources RAG
|
||||||
|
acte.sources_rag = [
|
||||||
|
RAGSource(
|
||||||
|
document=s["document"],
|
||||||
|
page=s.get("page"),
|
||||||
|
code=s.get("code"),
|
||||||
|
extrait=s.get("extrait", "")[:200],
|
||||||
|
)
|
||||||
|
for s in sources
|
||||||
|
]
|
||||||
|
|
||||||
|
# 4. Si cache hit, appliquer et court-circuiter Ollama
|
||||||
|
if cached is not None:
|
||||||
|
logger.info("Cache hit pour CCAM : « %s »", acte.texte)
|
||||||
|
_apply_llm_result_acte(acte, cached)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 5. Appel Ollama pour justification avec raisonnement structuré
|
||||||
|
prompt = _build_prompt_ccam(acte.texte, sources, contexte)
|
||||||
|
llm_result = _call_ollama(prompt)
|
||||||
|
|
||||||
|
if llm_result:
|
||||||
|
_apply_llm_result_acte(acte, llm_result)
|
||||||
|
if cache:
|
||||||
|
cache.put(acte.texte, "ccam", llm_result)
|
||||||
|
else:
|
||||||
|
logger.info("Ollama non disponible — sources FAISS CCAM conservées sans justification LLM")
|
||||||
|
|
||||||
|
|
||||||
def enrich_dossier(dossier: DossierMedical) -> None:
|
def enrich_dossier(dossier: DossierMedical) -> None:
|
||||||
"""Enrichit le DP et tous les DAS d'un dossier via le RAG."""
|
"""Enrichit le DP et tous les DAS d'un dossier via le RAG.
|
||||||
|
|
||||||
|
Utilise un cache persistant et parallélise les appels Ollama
|
||||||
|
pour les DAS et actes CCAM (max_workers = OLLAMA_MAX_PARALLEL).
|
||||||
|
"""
|
||||||
|
cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL)
|
||||||
|
|
||||||
contexte = {
|
contexte = {
|
||||||
"sexe": dossier.sejour.sexe,
|
"sexe": dossier.sejour.sexe,
|
||||||
"age": dossier.sejour.age,
|
"age": dossier.sejour.age,
|
||||||
@@ -327,11 +492,12 @@ def enrich_dossier(dossier: DossierMedical) -> None:
|
|||||||
"complications": dossier.complications,
|
"complications": dossier.complications,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Phase 1 : DP seul (le contexte DAS en dépend)
|
||||||
if dossier.diagnostic_principal:
|
if dossier.diagnostic_principal:
|
||||||
logger.info("RAG enrichissement DP : %s", dossier.diagnostic_principal.texte)
|
logger.info("RAG enrichissement DP : %s", dossier.diagnostic_principal.texte)
|
||||||
enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True)
|
enrich_diagnostic(dossier.diagnostic_principal, contexte, est_dp=True, cache=cache)
|
||||||
|
|
||||||
# Pour les DAS, ajouter le DP et les DAS existants au contexte pour cohérence
|
# Mettre à jour le contexte avec le DP pour les DAS
|
||||||
if dossier.diagnostic_principal:
|
if dossier.diagnostic_principal:
|
||||||
contexte["dp_texte"] = dossier.diagnostic_principal.texte
|
contexte["dp_texte"] = dossier.diagnostic_principal.texte
|
||||||
contexte["das_codes_existants"] = [
|
contexte["das_codes_existants"] = [
|
||||||
@@ -340,6 +506,20 @@ def enrich_dossier(dossier: DossierMedical) -> None:
|
|||||||
if d.cim10_suggestion
|
if d.cim10_suggestion
|
||||||
]
|
]
|
||||||
|
|
||||||
for das in dossier.diagnostics_associes:
|
# Phase 2 : DAS + Actes en parallèle
|
||||||
logger.info("RAG enrichissement DAS : %s", das.texte)
|
das_list = dossier.diagnostics_associes
|
||||||
enrich_diagnostic(das, contexte, est_dp=False)
|
actes_list = dossier.actes_ccam
|
||||||
|
|
||||||
|
if das_list or actes_list:
|
||||||
|
with ThreadPoolExecutor(max_workers=OLLAMA_MAX_PARALLEL) as executor:
|
||||||
|
futures = []
|
||||||
|
for das in das_list:
|
||||||
|
logger.info("RAG enrichissement DAS : %s", das.texte)
|
||||||
|
futures.append(executor.submit(enrich_diagnostic, das, contexte, False, cache))
|
||||||
|
for acte in actes_list:
|
||||||
|
logger.info("RAG enrichissement CCAM : %s", acte.texte)
|
||||||
|
futures.append(executor.submit(enrich_acte, acte, contexte, cache))
|
||||||
|
for f in as_completed(futures):
|
||||||
|
f.result() # propage les exceptions
|
||||||
|
|
||||||
|
cache.save()
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ def evaluate_severity(diagnostic) -> SeverityInfo:
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
def enrich_dossier_severity(dp, das_list: list) -> list[str]:
|
def enrich_dossier_severity(dp, das_list: list) -> tuple[list[str], int, int]:
|
||||||
"""Enrichit les diagnostics d'un dossier avec les informations de sévérité.
|
"""Enrichit les diagnostics d'un dossier avec les informations de sévérité.
|
||||||
|
|
||||||
Modifie les diagnostics en place (attributs est_cma, est_cms, niveau_severite).
|
Modifie les diagnostics en place (attributs est_cma, est_cms, niveau_severite).
|
||||||
@@ -168,7 +168,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
|
|||||||
das_list: Liste des diagnostics associés.
|
das_list: Liste des diagnostics associés.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Liste d'alertes de sévérité générées.
|
(alertes, cma_count, cms_count).
|
||||||
"""
|
"""
|
||||||
alertes = []
|
alertes = []
|
||||||
|
|
||||||
@@ -181,6 +181,7 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
|
|||||||
|
|
||||||
# Évaluer chaque DAS
|
# Évaluer chaque DAS
|
||||||
cma_count = 0
|
cma_count = 0
|
||||||
|
cms_count = 0
|
||||||
for das in das_list:
|
for das in das_list:
|
||||||
if not das.cim10_suggestion:
|
if not das.cim10_suggestion:
|
||||||
continue
|
continue
|
||||||
@@ -189,6 +190,10 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
|
|||||||
if info.est_cma_probable:
|
if info.est_cma_probable:
|
||||||
das.est_cma = True
|
das.est_cma = True
|
||||||
cma_count += 1
|
cma_count += 1
|
||||||
|
# CMS = CMA sévère
|
||||||
|
if info.niveau_severite == "severe":
|
||||||
|
das.est_cms = True
|
||||||
|
cms_count += 1
|
||||||
alertes.append(
|
alertes.append(
|
||||||
f"CMA probable : '{das.texte}' ({das.cim10_suggestion}) — "
|
f"CMA probable : '{das.texte}' ({das.cim10_suggestion}) — "
|
||||||
f"sévérité {info.niveau_severite}"
|
f"sévérité {info.niveau_severite}"
|
||||||
@@ -198,4 +203,4 @@ def enrich_dossier_severity(dp, das_list: list) -> list[str]:
|
|||||||
if cma_count >= 2:
|
if cma_count >= 2:
|
||||||
alertes.insert(0, f"{cma_count} CMA probables détectées — impact potentiel sur le niveau de sévérité GHM")
|
alertes.insert(0, f"{cma_count} CMA probables détectées — impact potentiel sur le niveau de sévérité GHM")
|
||||||
|
|
||||||
return alertes
|
return alertes, cma_count, cms_count
|
||||||
|
|||||||
@@ -69,6 +69,125 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{# ---- Estimation GHM ---- #}
|
||||||
|
{% if dossier.ghm_estimation %}
|
||||||
|
{% set ghm = dossier.ghm_estimation %}
|
||||||
|
<div class="card section" style="border-left:4px solid #8b5cf6;">
|
||||||
|
<h3 style="color:#6d28d9;">Estimation GHM</h3>
|
||||||
|
<div class="info-grid">
|
||||||
|
{% if ghm.cmd %}
|
||||||
|
<div class="info-item">
|
||||||
|
<label>CMD</label>
|
||||||
|
<span><strong>{{ ghm.cmd }}</strong>{% if ghm.cmd_libelle %} — {{ ghm.cmd_libelle }}{% endif %}</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
<div class="info-item">
|
||||||
|
<label>Type</label>
|
||||||
|
{% if ghm.type_ghm == 'C' %}
|
||||||
|
<span class="badge" style="background:#fee2e2;color:#dc2626;">C — Chirurgical</span>
|
||||||
|
{% elif ghm.type_ghm == 'K' %}
|
||||||
|
<span class="badge" style="background:#fef3c7;color:#92400e;">K — Interventionnel</span>
|
||||||
|
{% elif ghm.type_ghm == 'M' %}
|
||||||
|
<span class="badge" style="background:#dbeafe;color:#1d4ed8;">M — Médical</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
<div class="info-item">
|
||||||
|
<label>Sévérité</label>
|
||||||
|
{% if ghm.severite <= 1 %}
|
||||||
|
<span class="badge" style="background:#d1fae5;color:#065f46;">Niveau {{ ghm.severite }}</span>
|
||||||
|
{% elif ghm.severite == 2 %}
|
||||||
|
<span class="badge" style="background:#fef3c7;color:#92400e;">Niveau {{ ghm.severite }}</span>
|
||||||
|
{% elif ghm.severite == 3 %}
|
||||||
|
<span class="badge" style="background:#fed7aa;color:#9a3412;">Niveau {{ ghm.severite }}</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="badge" style="background:#fee2e2;color:#dc2626;">Niveau {{ ghm.severite }}</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% if ghm.ghm_approx %}
|
||||||
|
<div class="info-item">
|
||||||
|
<label>Code GHM approx.</label>
|
||||||
|
<code style="font-size:1.1rem;font-weight:700;letter-spacing:0.05em;">{{ ghm.ghm_approx }}</code>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
<div class="info-item">
|
||||||
|
<label>CMA / CMS</label>
|
||||||
|
<span>{{ ghm.cma_count }} CMA, {{ ghm.cms_count }} CMS</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% if ghm.alertes %}
|
||||||
|
<div style="margin-top:0.75rem;">
|
||||||
|
{% for alerte in ghm.alertes %}
|
||||||
|
<div style="font-size:0.8rem;color:#c2410c;margin-bottom:0.2rem;">{{ alerte }}</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
<div style="margin-top:0.75rem;font-size:0.7rem;color:#94a3b8;font-style:italic;">
|
||||||
|
Estimation heuristique — le GHM définitif nécessite le groupeur officiel ATIH
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{# ---- Contrôle CPAM ---- #}
|
||||||
|
{% if dossier.controles_cpam %}
|
||||||
|
<div class="card section" style="border-left:4px solid #f59e0b;">
|
||||||
|
<h3 style="color:#b45309;">Contrôle CPAM ({{ dossier.controles_cpam|length }})</h3>
|
||||||
|
{% for ctrl in dossier.controles_cpam %}
|
||||||
|
<div style="margin-bottom:1.5rem;{% if not loop.last %}border-bottom:1px solid #e2e8f0;padding-bottom:1rem;{% endif %}">
|
||||||
|
<div style="display:flex;align-items:center;gap:0.5rem;margin-bottom:0.5rem;">
|
||||||
|
<strong>OGC {{ ctrl.numero_ogc }} — {{ ctrl.titre }}</strong>
|
||||||
|
{% if 'retient' in ctrl.decision_ucr|lower %}
|
||||||
|
<span class="badge" style="background:#d1fae5;color:#065f46;">{{ ctrl.decision_ucr }}</span>
|
||||||
|
{% elif 'confirme' in ctrl.decision_ucr|lower %}
|
||||||
|
<span class="badge" style="background:#fee2e2;color:#dc2626;">{{ ctrl.decision_ucr }}</span>
|
||||||
|
{% else %}
|
||||||
|
<span class="badge" style="background:#e0e7ff;color:#3730a3;">{{ ctrl.decision_ucr }}</span>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{# Argument CPAM #}
|
||||||
|
{% if ctrl.arg_ucr %}
|
||||||
|
<div style="border-left:3px solid #f59e0b;padding:0.5rem 0.75rem;background:#fffbeb;margin-bottom:0.75rem;font-size:0.85rem;color:#78350f;">
|
||||||
|
<div style="font-size:0.7rem;color:#92400e;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Argument CPAM</div>
|
||||||
|
{{ ctrl.arg_ucr }}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{# Codes contestés #}
|
||||||
|
{% if ctrl.dp_ucr or ctrl.da_ucr or ctrl.dr_ucr or ctrl.actes_ucr %}
|
||||||
|
<div style="margin-bottom:0.75rem;">
|
||||||
|
<div style="font-size:0.7rem;color:#64748b;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Codes contestés</div>
|
||||||
|
<div style="display:flex;gap:0.5rem;flex-wrap:wrap;">
|
||||||
|
{% if ctrl.dp_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DP: {{ ctrl.dp_ucr }}</span>{% endif %}
|
||||||
|
{% if ctrl.da_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DA: {{ ctrl.da_ucr }}</span>{% endif %}
|
||||||
|
{% if ctrl.dr_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">DR: {{ ctrl.dr_ucr }}</span>{% endif %}
|
||||||
|
{% if ctrl.actes_ucr %}<span class="badge" style="background:#fef3c7;color:#92400e;">Actes: {{ ctrl.actes_ucr }}</span>{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{# Contre-argumentation #}
|
||||||
|
{% if ctrl.contre_argumentation %}
|
||||||
|
<div style="border-left:3px solid #3b82f6;padding:0.5rem 0.75rem;background:#eff6ff;margin-bottom:0.75rem;font-size:0.85rem;color:#1e3a5f;">
|
||||||
|
<div style="font-size:0.7rem;color:#1d4ed8;text-transform:uppercase;font-weight:600;margin-bottom:0.25rem;">Contre-argumentation</div>
|
||||||
|
<pre style="white-space:pre-wrap;font-family:inherit;margin:0;">{{ ctrl.contre_argumentation }}</pre>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{# Sources RAG #}
|
||||||
|
{% if ctrl.sources_reponse %}
|
||||||
|
<details>
|
||||||
|
<summary style="font-size:0.8rem;color:#64748b;">Sources RAG ({{ ctrl.sources_reponse|length }})</summary>
|
||||||
|
{% for src in ctrl.sources_reponse %}
|
||||||
|
<pre style="font-size:0.75rem;">{{ src.document }}{% if src.code %} — {{ src.code }}{% endif %}{% if src.page %} [p.{{ src.page }}]{% endif %}
|
||||||
|
{{ src.extrait or '' }}</pre>
|
||||||
|
{% endfor %}
|
||||||
|
</details>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{# ---- Alertes de codage ---- #}
|
{# ---- Alertes de codage ---- #}
|
||||||
{% if dossier.alertes_codage %}
|
{% if dossier.alertes_codage %}
|
||||||
<div class="card section" style="border-left:4px solid #f97316;background:#fff7ed;">
|
<div class="card section" style="border-left:4px solid #f97316;background:#fff7ed;">
|
||||||
|
|||||||
130
tests/test_cpam_parser.py
Normal file
130
tests/test_cpam_parser.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Tests pour le parser de contrôle CPAM."""
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import openpyxl
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.config import ControleCPAM
|
||||||
|
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
|
||||||
|
|
||||||
|
|
||||||
|
def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
|
||||||
|
"""Crée un fichier xlsx de test avec les lignes données."""
|
||||||
|
wb = openpyxl.Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = "OGC Contrôle T2A"
|
||||||
|
ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR"))
|
||||||
|
for row in rows:
|
||||||
|
ws.append(row)
|
||||||
|
wb.save(path)
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseCpamExcel:
|
||||||
|
def test_parse_basic(self, tmp_path):
|
||||||
|
xlsx = tmp_path / "test.xlsx"
|
||||||
|
_create_test_xlsx([
|
||||||
|
(17, "Désaccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None),
|
||||||
|
(21, "Désaccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None),
|
||||||
|
], xlsx)
|
||||||
|
|
||||||
|
result = parse_cpam_excel(xlsx)
|
||||||
|
|
||||||
|
assert 17 in result
|
||||||
|
assert 21 in result
|
||||||
|
assert len(result[17]) == 1
|
||||||
|
assert len(result[21]) == 1
|
||||||
|
assert result[17][0].titre == "Désaccord sur les DAS"
|
||||||
|
assert result[17][0].decision_ucr == "UCR retient"
|
||||||
|
assert result[21][0].dp_ucr == "K85.1"
|
||||||
|
|
||||||
|
def test_parse_multiple_same_ogc(self, tmp_path):
|
||||||
|
xlsx = tmp_path / "test.xlsx"
|
||||||
|
_create_test_xlsx([
|
||||||
|
(17, "Titre 1", "Arg 1", "Décision 1", None, None, None, None),
|
||||||
|
(17, "Titre 2", "Arg 2", "Décision 2", None, None, None, None),
|
||||||
|
], xlsx)
|
||||||
|
|
||||||
|
result = parse_cpam_excel(xlsx)
|
||||||
|
|
||||||
|
assert len(result[17]) == 2
|
||||||
|
|
||||||
|
def test_parse_empty_file(self, tmp_path):
|
||||||
|
xlsx = tmp_path / "empty.xlsx"
|
||||||
|
_create_test_xlsx([], xlsx)
|
||||||
|
|
||||||
|
result = parse_cpam_excel(xlsx)
|
||||||
|
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
def test_parse_nonexistent_file(self):
|
||||||
|
result = parse_cpam_excel("/nonexistent/path.xlsx")
|
||||||
|
assert result == {}
|
||||||
|
|
||||||
|
def test_parse_optional_fields(self, tmp_path):
|
||||||
|
xlsx = tmp_path / "test.xlsx"
|
||||||
|
_create_test_xlsx([
|
||||||
|
(42, "Titre", "Arg", "Décision", "E11.40", "G63.2", "E11.9", "ABCD123"),
|
||||||
|
], xlsx)
|
||||||
|
|
||||||
|
result = parse_cpam_excel(xlsx)
|
||||||
|
|
||||||
|
ctrl = result[42][0]
|
||||||
|
assert ctrl.dp_ucr == "E11.40"
|
||||||
|
assert ctrl.da_ucr == "G63.2"
|
||||||
|
assert ctrl.dr_ucr == "E11.9"
|
||||||
|
assert ctrl.actes_ucr == "ABCD123"
|
||||||
|
|
||||||
|
|
||||||
|
class TestMatchDossierOGC:
|
||||||
|
def setup_method(self):
|
||||||
|
self.cpam_data = {
|
||||||
|
17: [ControleCPAM(numero_ogc=17, titre="Test 17")],
|
||||||
|
21: [ControleCPAM(numero_ogc=21, titre="Test 21")],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_match_found(self):
|
||||||
|
result = match_dossier_ogc("17_23100690", self.cpam_data)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].numero_ogc == 17
|
||||||
|
|
||||||
|
def test_match_not_found(self):
|
||||||
|
result = match_dossier_ogc("15_23096332", self.cpam_data)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_match_no_prefix(self):
|
||||||
|
result = match_dossier_ogc("nodash", self.cpam_data)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_match_empty_data(self):
|
||||||
|
result = match_dossier_ogc("17_23100690", {})
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestControleCPAMModel:
|
||||||
|
def test_serialization(self):
|
||||||
|
ctrl = ControleCPAM(
|
||||||
|
numero_ogc=17,
|
||||||
|
titre="Désaccord sur les DAS",
|
||||||
|
arg_ucr="Argument...",
|
||||||
|
decision_ucr="UCR retient",
|
||||||
|
dp_ucr="K85.1",
|
||||||
|
)
|
||||||
|
data = ctrl.model_dump()
|
||||||
|
assert data["numero_ogc"] == 17
|
||||||
|
assert data["dp_ucr"] == "K85.1"
|
||||||
|
assert data["contre_argumentation"] is None
|
||||||
|
|
||||||
|
def test_deserialization(self):
|
||||||
|
data = {
|
||||||
|
"numero_ogc": 21,
|
||||||
|
"titre": "Test",
|
||||||
|
"arg_ucr": "Arg",
|
||||||
|
"decision_ucr": "Décision",
|
||||||
|
"contre_argumentation": "Ma réponse",
|
||||||
|
}
|
||||||
|
ctrl = ControleCPAM(**data)
|
||||||
|
assert ctrl.numero_ogc == 21
|
||||||
|
assert ctrl.contre_argumentation == "Ma réponse"
|
||||||
|
assert ctrl.sources_reponse == []
|
||||||
146
tests/test_cpam_response.py
Normal file
146
tests/test_cpam_response.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
"""Tests pour la génération de contre-argumentation CPAM."""
|
||||||
|
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.config import ControleCPAM, Diagnostic, DossierMedical, RAGSource, Sejour
|
||||||
|
from src.control.cpam_response import _build_cpam_prompt, _format_response, generate_cpam_response
|
||||||
|
|
||||||
|
|
||||||
|
def _make_dossier() -> DossierMedical:
|
||||||
|
"""Crée un dossier médical de test."""
|
||||||
|
return DossierMedical(
|
||||||
|
source_file="test.pdf",
|
||||||
|
document_type="crh",
|
||||||
|
sejour=Sejour(sexe="M", age=65, duree_sejour=5),
|
||||||
|
diagnostic_principal=Diagnostic(
|
||||||
|
texte="Cholécystite aiguë",
|
||||||
|
cim10_suggestion="K81.0",
|
||||||
|
),
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Iléus réflexe", cim10_suggestion="K56.0"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_controle() -> ControleCPAM:
|
||||||
|
"""Crée un contrôle CPAM de test."""
|
||||||
|
return ControleCPAM(
|
||||||
|
numero_ogc=17,
|
||||||
|
titre="Désaccord sur les DAS",
|
||||||
|
arg_ucr="L'UCR confirme l'avis des médecins contrôleurs au motif que le DAS K56.0 n'est pas justifié.",
|
||||||
|
decision_ucr="UCR confirme avis médecins contrôleurs",
|
||||||
|
dp_ucr=None,
|
||||||
|
da_ucr="K56.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildPrompt:
|
||||||
|
def test_prompt_contains_dossier_info(self):
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||||
|
|
||||||
|
assert "Cholécystite aiguë" in prompt
|
||||||
|
assert "K81.0" in prompt
|
||||||
|
assert "Iléus réflexe" in prompt
|
||||||
|
assert "65 ans" in prompt
|
||||||
|
|
||||||
|
def test_prompt_contains_cpam_argument(self):
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||||
|
|
||||||
|
assert controle.arg_ucr in prompt
|
||||||
|
assert controle.decision_ucr in prompt
|
||||||
|
|
||||||
|
def test_prompt_contains_codes_contestes(self):
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||||
|
|
||||||
|
assert "DA proposés par UCR : K56.0" in prompt
|
||||||
|
|
||||||
|
def test_prompt_contains_rag_sources(self):
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
sources = [
|
||||||
|
{"document": "guide_methodo", "page": 64, "extrait": "Texte du guide..."},
|
||||||
|
{"document": "cim10", "code": "K56.0", "extrait": "Iléus paralytique..."},
|
||||||
|
]
|
||||||
|
prompt = _build_cpam_prompt(dossier, controle, sources)
|
||||||
|
|
||||||
|
assert "Guide Méthodologique MCO 2026" in prompt
|
||||||
|
assert "CIM-10 FR 2026" in prompt
|
||||||
|
assert "page 64" in prompt
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatResponse:
|
||||||
|
def test_full_response(self):
|
||||||
|
parsed = {
|
||||||
|
"analyse_contestation": "La CPAM conteste le DAS K56.0",
|
||||||
|
"points_accord": "Aucun",
|
||||||
|
"contre_arguments": "Le guide méthodologique précise...",
|
||||||
|
"references": "Guide métho p.64",
|
||||||
|
"conclusion": "Le DAS est justifié",
|
||||||
|
}
|
||||||
|
text = _format_response(parsed)
|
||||||
|
|
||||||
|
assert "ANALYSE DE LA CONTESTATION" in text
|
||||||
|
assert "CONTRE-ARGUMENTS" in text
|
||||||
|
assert "CONCLUSION" in text
|
||||||
|
# "Aucun" ne doit pas générer la section points d'accord
|
||||||
|
assert "POINTS D'ACCORD" not in text
|
||||||
|
|
||||||
|
def test_partial_response(self):
|
||||||
|
parsed = {
|
||||||
|
"contre_arguments": "Arguments...",
|
||||||
|
"conclusion": "Conclusion...",
|
||||||
|
}
|
||||||
|
text = _format_response(parsed)
|
||||||
|
|
||||||
|
assert "CONTRE-ARGUMENTS" in text
|
||||||
|
assert "CONCLUSION" in text
|
||||||
|
|
||||||
|
def test_empty_response(self):
|
||||||
|
text = _format_response({})
|
||||||
|
assert text == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestGenerateResponse:
|
||||||
|
@patch("src.control.cpam_response.call_ollama")
|
||||||
|
@patch("src.control.cpam_response._search_rag_for_control")
|
||||||
|
def test_generate_success(self, mock_rag, mock_ollama):
|
||||||
|
mock_rag.return_value = [
|
||||||
|
{"document": "guide_methodo", "page": 64, "extrait": "Texte guide"},
|
||||||
|
]
|
||||||
|
mock_ollama.return_value = {
|
||||||
|
"analyse_contestation": "Analyse...",
|
||||||
|
"contre_arguments": "Contre-arguments...",
|
||||||
|
"conclusion": "Conclusion...",
|
||||||
|
}
|
||||||
|
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
|
||||||
|
text, sources = generate_cpam_response(dossier, controle)
|
||||||
|
|
||||||
|
assert "Contre-arguments..." in text
|
||||||
|
assert len(sources) == 1
|
||||||
|
assert sources[0].document == "guide_methodo"
|
||||||
|
mock_ollama.assert_called_once()
|
||||||
|
|
||||||
|
@patch("src.control.cpam_response.call_ollama")
|
||||||
|
@patch("src.control.cpam_response._search_rag_for_control")
|
||||||
|
def test_generate_ollama_unavailable(self, mock_rag, mock_ollama):
|
||||||
|
mock_rag.return_value = []
|
||||||
|
mock_ollama.return_value = None
|
||||||
|
|
||||||
|
dossier = _make_dossier()
|
||||||
|
controle = _make_controle()
|
||||||
|
|
||||||
|
text, sources = generate_cpam_response(dossier, controle)
|
||||||
|
|
||||||
|
assert text == ""
|
||||||
|
assert sources == []
|
||||||
@@ -104,3 +104,59 @@ class TestIsValidDiagnosticText:
|
|||||||
|
|
||||||
def test_accept_sepsis(self):
|
def test_accept_sepsis(self):
|
||||||
assert is_valid_diagnostic_text("Sepsis sévère")
|
assert is_valid_diagnostic_text("Sepsis sévère")
|
||||||
|
|
||||||
|
# --- Règle 5 modifiée : mots dupliqués (2 mots identiques) ---
|
||||||
|
def test_reject_absence_absence(self):
|
||||||
|
assert not is_valid_diagnostic_text("Absence absence")
|
||||||
|
|
||||||
|
def test_reject_anticoagulant_anticoagulant(self):
|
||||||
|
assert not is_valid_diagnostic_text("Anticoagulant anticoagulant")
|
||||||
|
|
||||||
|
def test_reject_ventilation_ventilation(self):
|
||||||
|
assert not is_valid_diagnostic_text("Ventilation ventilation")
|
||||||
|
|
||||||
|
# --- Règle 7 : ponctuation initiale ---
|
||||||
|
def test_reject_comma_prefix(self):
|
||||||
|
assert not is_valid_diagnostic_text(", sans précision")
|
||||||
|
|
||||||
|
def test_reject_dash_prefix(self):
|
||||||
|
assert not is_valid_diagnostic_text("- masse musculaire")
|
||||||
|
|
||||||
|
# --- Règle 8 : valeurs numériques OCR "À X.X" ---
|
||||||
|
def test_reject_a_accent_value(self):
|
||||||
|
assert not is_valid_diagnostic_text("À 0.1")
|
||||||
|
|
||||||
|
def test_reject_a_accent_value_3(self):
|
||||||
|
assert not is_valid_diagnostic_text("À 3.0")
|
||||||
|
|
||||||
|
def test_reject_a_value(self):
|
||||||
|
assert not is_valid_diagnostic_text("A 12,5")
|
||||||
|
|
||||||
|
# --- Règle 9 : crochets (artefacts OCR) ---
|
||||||
|
def test_reject_bracket_fragment(self):
|
||||||
|
assert not is_valid_diagnostic_text("Episode [episode")
|
||||||
|
|
||||||
|
def test_reject_closing_bracket(self):
|
||||||
|
assert not is_valid_diagnostic_text("valeur]")
|
||||||
|
|
||||||
|
# --- Règle 10 : termes de laboratoire isolés ---
|
||||||
|
def test_reject_hemoglobine(self):
|
||||||
|
assert not is_valid_diagnostic_text("Hémoglobine")
|
||||||
|
|
||||||
|
def test_reject_creatinine(self):
|
||||||
|
assert not is_valid_diagnostic_text("Créatinine")
|
||||||
|
|
||||||
|
def test_accept_hemoglobine_in_phrase(self):
|
||||||
|
"""Un terme labo dans un contexte clinique est accepté."""
|
||||||
|
assert is_valid_diagnostic_text("Hémoglobine basse avec anémie")
|
||||||
|
|
||||||
|
# --- Règle 11 : fragments anatomiques courts ---
|
||||||
|
def test_reject_dans_la_vessie(self):
|
||||||
|
assert not is_valid_diagnostic_text("Dans la vessie")
|
||||||
|
|
||||||
|
def test_reject_le_rein(self):
|
||||||
|
assert not is_valid_diagnostic_text("Le rein")
|
||||||
|
|
||||||
|
def test_accept_long_fragment(self):
|
||||||
|
"""Un fragment long commençant par 'Dans' peut être légitime."""
|
||||||
|
assert is_valid_diagnostic_text("Dans le cadre d'une insuffisance rénale chronique terminale")
|
||||||
|
|||||||
189
tests/test_ghm.py
Normal file
189
tests/test_ghm.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
"""Tests pour le module d'estimation GHM."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.config import ActeCCAM, Diagnostic, DossierMedical
|
||||||
|
from src.medical.ghm import estimate_ghm, find_cmd, _detect_type_ghm, _compute_severity
|
||||||
|
|
||||||
|
|
||||||
|
class TestFindCMD:
|
||||||
|
def test_k85_hepatobilaire(self):
|
||||||
|
cmd, libelle = find_cmd("K85.1")
|
||||||
|
assert cmd == "07"
|
||||||
|
assert "hépatobiliaire" in libelle.lower() or "pancréat" in libelle.lower()
|
||||||
|
|
||||||
|
def test_j18_respiratoire(self):
|
||||||
|
cmd, _ = find_cmd("J18")
|
||||||
|
assert cmd == "04"
|
||||||
|
|
||||||
|
def test_n17_renal(self):
|
||||||
|
cmd, _ = find_cmd("N17")
|
||||||
|
assert cmd == "11"
|
||||||
|
|
||||||
|
def test_n40_genital_masculin(self):
|
||||||
|
cmd, _ = find_cmd("N40")
|
||||||
|
assert cmd == "12"
|
||||||
|
|
||||||
|
def test_f10_toxicomanie(self):
|
||||||
|
cmd, _ = find_cmd("F10")
|
||||||
|
assert cmd == "20"
|
||||||
|
|
||||||
|
def test_z00_facteurs(self):
|
||||||
|
cmd, _ = find_cmd("Z00")
|
||||||
|
assert cmd == "23"
|
||||||
|
|
||||||
|
def test_k40_digestif(self):
|
||||||
|
cmd, _ = find_cmd("K40")
|
||||||
|
assert cmd == "06"
|
||||||
|
|
||||||
|
def test_b20_vih(self):
|
||||||
|
cmd, _ = find_cmd("B20")
|
||||||
|
assert cmd == "25"
|
||||||
|
|
||||||
|
def test_t25_brulures(self):
|
||||||
|
cmd, _ = find_cmd("T25")
|
||||||
|
assert cmd == "22"
|
||||||
|
|
||||||
|
def test_s72_traumatismes(self):
|
||||||
|
cmd, _ = find_cmd("S72")
|
||||||
|
assert cmd == "21"
|
||||||
|
|
||||||
|
def test_code_with_dot(self):
|
||||||
|
cmd, _ = find_cmd("K85.1")
|
||||||
|
assert cmd == "07"
|
||||||
|
|
||||||
|
def test_code_lowercase(self):
|
||||||
|
cmd, _ = find_cmd("k85.1")
|
||||||
|
assert cmd == "07"
|
||||||
|
|
||||||
|
def test_empty_code(self):
|
||||||
|
cmd, libelle = find_cmd("")
|
||||||
|
assert cmd is None
|
||||||
|
assert libelle is None
|
||||||
|
|
||||||
|
def test_none_code(self):
|
||||||
|
cmd, libelle = find_cmd(None)
|
||||||
|
assert cmd is None
|
||||||
|
assert libelle is None
|
||||||
|
|
||||||
|
def test_short_code(self):
|
||||||
|
cmd, libelle = find_cmd("K8")
|
||||||
|
assert cmd is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestDetectTypeGHM:
|
||||||
|
def test_chirurgical(self):
|
||||||
|
actes = [ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")]
|
||||||
|
assert _detect_type_ghm(actes) == "C"
|
||||||
|
|
||||||
|
def test_interventionnel(self):
|
||||||
|
actes = [ActeCCAM(texte="Échographie", code_ccam_suggestion="ZCQM001")]
|
||||||
|
assert _detect_type_ghm(actes) == "K"
|
||||||
|
|
||||||
|
def test_medical_no_actes(self):
|
||||||
|
assert _detect_type_ghm([]) == "M"
|
||||||
|
|
||||||
|
def test_medical_no_code(self):
|
||||||
|
actes = [ActeCCAM(texte="Biopsie", code_ccam_suggestion=None)]
|
||||||
|
assert _detect_type_ghm(actes) == "M"
|
||||||
|
|
||||||
|
def test_chirurgical_overrides_interventionnel(self):
|
||||||
|
actes = [
|
||||||
|
ActeCCAM(texte="Écho", code_ccam_suggestion="ZCQM001"),
|
||||||
|
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"),
|
||||||
|
]
|
||||||
|
assert _detect_type_ghm(actes) == "C"
|
||||||
|
|
||||||
|
|
||||||
|
class TestSeverityLevels:
|
||||||
|
def test_no_cma_level_1(self):
|
||||||
|
das = [Diagnostic(texte="HTA", cim10_suggestion="I10")]
|
||||||
|
niveau, cma, cms = _compute_severity(das)
|
||||||
|
assert niveau == 1
|
||||||
|
|
||||||
|
def test_two_cma_level_2(self):
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
|
||||||
|
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
|
||||||
|
]
|
||||||
|
niveau, cma, cms = _compute_severity(das)
|
||||||
|
assert niveau == 2
|
||||||
|
assert cma == 2
|
||||||
|
|
||||||
|
def test_one_cms_level_3(self):
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
|
||||||
|
]
|
||||||
|
niveau, cma, cms = _compute_severity(das)
|
||||||
|
assert niveau == 3
|
||||||
|
assert cms == 1
|
||||||
|
|
||||||
|
def test_two_cms_level_4(self):
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Sepsis", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
|
||||||
|
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True, est_cms=True),
|
||||||
|
]
|
||||||
|
niveau, cma, cms = _compute_severity(das)
|
||||||
|
assert niveau == 4
|
||||||
|
assert cms == 2
|
||||||
|
|
||||||
|
def test_three_cma_level_3(self):
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
|
||||||
|
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
|
||||||
|
Diagnostic(texte="Diabète", cim10_suggestion="E11.9", est_cma=True),
|
||||||
|
]
|
||||||
|
niveau, cma, cms = _compute_severity(das)
|
||||||
|
assert niveau == 3
|
||||||
|
assert cma == 3
|
||||||
|
|
||||||
|
|
||||||
|
class TestEstimateGHM:
|
||||||
|
def test_chirurgical_with_cma(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K80.1"),
|
||||||
|
actes_ccam=[ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")],
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
|
||||||
|
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
assert ghm.cmd == "07"
|
||||||
|
assert ghm.type_ghm == "C"
|
||||||
|
assert ghm.severite == 2
|
||||||
|
assert ghm.ghm_approx == "07C??2"
|
||||||
|
assert ghm.cma_count == 2
|
||||||
|
|
||||||
|
def test_medical_sans_actes(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
|
||||||
|
)
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
assert ghm.cmd == "04"
|
||||||
|
assert ghm.type_ghm == "M"
|
||||||
|
assert ghm.severite == 1
|
||||||
|
assert ghm.ghm_approx == "04M??1"
|
||||||
|
|
||||||
|
def test_dp_absent(self):
|
||||||
|
dossier = DossierMedical()
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
assert ghm.cmd is None
|
||||||
|
assert ghm.ghm_approx is None
|
||||||
|
assert any("DP absent" in a for a in ghm.alertes)
|
||||||
|
|
||||||
|
def test_dp_sans_code(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Douleur thoracique"),
|
||||||
|
)
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
assert ghm.cmd is None
|
||||||
|
assert any("sans code" in a for a in ghm.alertes)
|
||||||
|
|
||||||
|
def test_dp_symptomatique(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Douleur thoracique", cim10_suggestion="R07.4"),
|
||||||
|
)
|
||||||
|
ghm = estimate_ghm(dossier)
|
||||||
|
assert ghm.cmd == "23"
|
||||||
|
assert any("symptomatique" in a for a in ghm.alertes)
|
||||||
108
tests/test_ollama_cache.py
Normal file
108
tests/test_ollama_cache.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
"""Tests unitaires pour le cache Ollama persistant."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.medical.ollama_cache import OllamaCache
|
||||||
|
|
||||||
|
|
||||||
|
class TestOllamaCache:
|
||||||
|
def test_get_miss(self, tmp_path):
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
assert cache.get("HTA", "das") is None
|
||||||
|
|
||||||
|
def test_put_and_get(self, tmp_path):
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"}
|
||||||
|
cache.put("HTA", "das", result)
|
||||||
|
assert cache.get("HTA", "das") == result
|
||||||
|
|
||||||
|
def test_key_normalization(self, tmp_path):
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
result = {"code": "I10", "confidence": "high"}
|
||||||
|
cache.put(" HTA ", "das", result)
|
||||||
|
assert cache.get("hta", "das") == result
|
||||||
|
|
||||||
|
def test_different_types_different_keys(self, tmp_path):
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
cache.put("Diabète", "dp", {"code": "E11.9"})
|
||||||
|
cache.put("Diabète", "das", {"code": "E11.8"})
|
||||||
|
assert cache.get("Diabète", "dp")["code"] == "E11.9"
|
||||||
|
assert cache.get("Diabète", "das")["code"] == "E11.8"
|
||||||
|
|
||||||
|
def test_save_and_reload(self, tmp_path):
|
||||||
|
path = tmp_path / "cache.json"
|
||||||
|
cache = OllamaCache(path, "gemma3:12b")
|
||||||
|
cache.put("HTA", "das", {"code": "I10"})
|
||||||
|
cache.save()
|
||||||
|
|
||||||
|
assert path.exists()
|
||||||
|
|
||||||
|
cache2 = OllamaCache(path, "gemma3:12b")
|
||||||
|
assert cache2.get("HTA", "das") == {"code": "I10"}
|
||||||
|
|
||||||
|
def test_save_no_write_if_clean(self, tmp_path):
|
||||||
|
path = tmp_path / "cache.json"
|
||||||
|
cache = OllamaCache(path, "gemma3:12b")
|
||||||
|
cache.save()
|
||||||
|
assert not path.exists()
|
||||||
|
|
||||||
|
def test_model_change_invalidates(self, tmp_path):
|
||||||
|
path = tmp_path / "cache.json"
|
||||||
|
cache = OllamaCache(path, "gemma3:12b")
|
||||||
|
cache.put("HTA", "das", {"code": "I10"})
|
||||||
|
cache.save()
|
||||||
|
|
||||||
|
cache2 = OllamaCache(path, "llama3:8b")
|
||||||
|
assert cache2.get("HTA", "das") is None
|
||||||
|
assert len(cache2) == 0
|
||||||
|
|
||||||
|
def test_corrupted_file(self, tmp_path):
|
||||||
|
path = tmp_path / "cache.json"
|
||||||
|
path.write_text("not valid json", encoding="utf-8")
|
||||||
|
|
||||||
|
cache = OllamaCache(path, "gemma3:12b")
|
||||||
|
assert len(cache) == 0
|
||||||
|
assert cache.get("HTA", "das") is None
|
||||||
|
|
||||||
|
def test_len(self, tmp_path):
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
assert len(cache) == 0
|
||||||
|
cache.put("HTA", "das", {"code": "I10"})
|
||||||
|
assert len(cache) == 1
|
||||||
|
cache.put("Diabète", "dp", {"code": "E11.9"})
|
||||||
|
assert len(cache) == 2
|
||||||
|
|
||||||
|
def test_thread_safety(self, tmp_path):
|
||||||
|
"""Écriture concurrente depuis plusieurs threads."""
|
||||||
|
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
def writer(i):
|
||||||
|
try:
|
||||||
|
cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"})
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(e)
|
||||||
|
|
||||||
|
threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)]
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
assert not errors
|
||||||
|
assert len(cache) == 20
|
||||||
|
|
||||||
|
def test_json_format(self, tmp_path):
|
||||||
|
"""Le fichier JSON contient le modèle et les entrées."""
|
||||||
|
path = tmp_path / "cache.json"
|
||||||
|
cache = OllamaCache(path, "gemma3:12b")
|
||||||
|
cache.put("HTA", "das", {"code": "I10"})
|
||||||
|
cache.save()
|
||||||
|
|
||||||
|
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert raw["model"] == "gemma3:12b"
|
||||||
|
assert "entries" in raw
|
||||||
|
assert len(raw["entries"]) == 1
|
||||||
@@ -7,7 +7,8 @@ from unittest.mock import patch, MagicMock
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
|
from src.config import RAGSource, Diagnostic, ActeCCAM, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
|
||||||
|
from src.medical.ollama_cache import OllamaCache
|
||||||
|
|
||||||
|
|
||||||
class TestRAGSource:
|
class TestRAGSource:
|
||||||
@@ -494,6 +495,47 @@ class TestRAGSearchMocked:
|
|||||||
assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancréatite..."
|
assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancréatite..."
|
||||||
assert len(diag.sources_rag) == 1
|
assert len(diag.sources_rag) == 1
|
||||||
|
|
||||||
|
def test_enrich_diagnostic_invalid_code_ignored(self):
|
||||||
|
"""Un code Ollama invalide ne remplace pas le code existant."""
|
||||||
|
from src.medical.rag_search import enrich_diagnostic
|
||||||
|
|
||||||
|
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
|
||||||
|
mock_sources = [
|
||||||
|
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
|
||||||
|
]
|
||||||
|
mock_llm = {
|
||||||
|
"code": "X99.99", # code invalide
|
||||||
|
"confidence": "high",
|
||||||
|
"justification": "Hallucination",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
||||||
|
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
|
||||||
|
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
|
||||||
|
|
||||||
|
# Le code original est conservé (pas remplacé par le code invalide)
|
||||||
|
assert diag.cim10_suggestion == "K85.9"
|
||||||
|
|
||||||
|
def test_enrich_diagnostic_normalizes_code(self):
|
||||||
|
"""Un code Ollama sans point est normalisé (K851 → K85.1)."""
|
||||||
|
from src.medical.rag_search import enrich_diagnostic
|
||||||
|
|
||||||
|
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
|
||||||
|
mock_sources = [
|
||||||
|
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
|
||||||
|
]
|
||||||
|
mock_llm = {
|
||||||
|
"code": "K851", # sans point
|
||||||
|
"confidence": "high",
|
||||||
|
"justification": "Pancréatite biliaire",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
||||||
|
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
|
||||||
|
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
|
||||||
|
|
||||||
|
assert diag.cim10_suggestion == "K85.1"
|
||||||
|
|
||||||
def test_enrich_diagnostic_est_dp_flag(self):
|
def test_enrich_diagnostic_est_dp_flag(self):
|
||||||
"""Le flag est_dp est bien passé à _build_prompt."""
|
"""Le flag est_dp est bien passé à _build_prompt."""
|
||||||
from src.medical.rag_search import enrich_diagnostic
|
from src.medical.rag_search import enrich_diagnostic
|
||||||
@@ -533,10 +575,12 @@ class TestEnrichDossier:
|
|||||||
|
|
||||||
captured_contexts = []
|
captured_contexts = []
|
||||||
|
|
||||||
def mock_enrich(diag, contexte, est_dp=True):
|
def mock_enrich(diag, contexte, est_dp=True, cache=None):
|
||||||
captured_contexts.append(contexte.copy())
|
captured_contexts.append(contexte.copy())
|
||||||
|
|
||||||
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
|
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
|
||||||
|
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
|
||||||
|
mock_cache_cls.return_value = MagicMock()
|
||||||
enrich_dossier(dossier)
|
enrich_dossier(dossier)
|
||||||
|
|
||||||
assert len(captured_contexts) == 1 # DP seulement (pas de DAS)
|
assert len(captured_contexts) == 1 # DP seulement (pas de DAS)
|
||||||
@@ -563,10 +607,12 @@ class TestEnrichDossier:
|
|||||||
|
|
||||||
captured = []
|
captured = []
|
||||||
|
|
||||||
def mock_enrich(diag, contexte, est_dp=True):
|
def mock_enrich(diag, contexte, est_dp=True, cache=None):
|
||||||
captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")})
|
captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")})
|
||||||
|
|
||||||
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
|
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
|
||||||
|
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
|
||||||
|
mock_cache_cls.return_value = MagicMock()
|
||||||
enrich_dossier(dossier)
|
enrich_dossier(dossier)
|
||||||
|
|
||||||
assert len(captured) == 2
|
assert len(captured) == 2
|
||||||
@@ -578,6 +624,149 @@ class TestEnrichDossier:
|
|||||||
assert captured[1]["dp_texte"] == "Pancréatite aiguë biliaire"
|
assert captured[1]["dp_texte"] == "Pancréatite aiguë biliaire"
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalizeCode:
|
||||||
|
def test_insert_dot(self):
|
||||||
|
from src.medical.cim10_dict import normalize_code
|
||||||
|
assert normalize_code("K810") == "K81.0"
|
||||||
|
|
||||||
|
def test_already_dotted(self):
|
||||||
|
from src.medical.cim10_dict import normalize_code
|
||||||
|
assert normalize_code("k85.1") == "K85.1"
|
||||||
|
|
||||||
|
def test_three_chars(self):
|
||||||
|
from src.medical.cim10_dict import normalize_code
|
||||||
|
assert normalize_code("K85") == "K85"
|
||||||
|
|
||||||
|
def test_strip_spaces(self):
|
||||||
|
from src.medical.cim10_dict import normalize_code
|
||||||
|
assert normalize_code(" E660 ") == "E66.0"
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidateCodeCIM10:
|
||||||
|
def test_known_code(self):
|
||||||
|
from src.medical.cim10_dict import validate_code
|
||||||
|
is_valid, label = validate_code("K81.9")
|
||||||
|
assert is_valid is True
|
||||||
|
assert label # non vide
|
||||||
|
|
||||||
|
def test_unknown_code(self):
|
||||||
|
from src.medical.cim10_dict import validate_code
|
||||||
|
is_valid, label = validate_code("Z99.99")
|
||||||
|
assert is_valid is False
|
||||||
|
assert label == ""
|
||||||
|
|
||||||
|
def test_normalize_before_validate(self):
|
||||||
|
"""K810 doit être normalisé en K81.0 et trouvé."""
|
||||||
|
from src.medical.cim10_dict import validate_code
|
||||||
|
is_valid, label = validate_code("K810")
|
||||||
|
assert is_valid is True
|
||||||
|
|
||||||
|
def test_three_char_code(self):
|
||||||
|
"""Code parent sans point (K85) doit être validé."""
|
||||||
|
from src.medical.cim10_dict import validate_code
|
||||||
|
is_valid, label = validate_code("K85")
|
||||||
|
assert is_valid is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidateCIM10PostProcessing:
|
||||||
|
def test_hallucination_rejected(self):
|
||||||
|
"""Les codes hallucination (Aucun, N/A...) sont rejetés."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Aucun"),
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostic_principal.cim10_suggestion is None
|
||||||
|
assert any("rejeté" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
def test_normalizes_format(self):
|
||||||
|
"""K810 est normalisé en K81.0."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K810"),
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
|
||||||
|
|
||||||
|
def test_invalid_code_gets_low_confidence(self):
|
||||||
|
"""Un code inexistant reçoit confidence=low et une alerte."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Chose bizarre", cim10_suggestion="Z99.99"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
|
||||||
|
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
def test_valid_code_unchanged(self):
|
||||||
|
"""Un code valide n'est pas modifié et pas d'alerte."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1"),
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||||
|
assert not any("CIM-10" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
def test_non_codable_rejected(self):
|
||||||
|
"""'non_codable' est rejeté comme hallucination."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Truc", cim10_suggestion="non_codable"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_suggestion is None
|
||||||
|
|
||||||
|
def test_hallucination_fallback_found(self):
|
||||||
|
"""Hallucination rejetée mais fallback dictionnaire trouve un code."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Cholécystite aiguë", cim10_suggestion="Aucun"),
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
|
||||||
|
assert dossier.diagnostic_principal.cim10_confidence == "medium"
|
||||||
|
assert any("fallback" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
def test_invalid_code_fallback_found(self):
|
||||||
|
"""Code invalide remplacé par fallback dictionnaire."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I99.99"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_suggestion == "I10"
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_confidence == "medium"
|
||||||
|
assert any("fallback" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
def test_invalid_code_no_fallback(self):
|
||||||
|
"""Code invalide sans fallback possible → low confidence."""
|
||||||
|
from src.medical.cim10_extractor import _validate_cim10
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Chose bizarre inconnue", cim10_suggestion="Z99.99"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
_validate_cim10(dossier)
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_suggestion == "Z99.99"
|
||||||
|
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
|
||||||
|
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
|
||||||
|
|
||||||
|
|
||||||
class TestFormatContexte:
|
class TestFormatContexte:
|
||||||
"""Tests pour _format_contexte."""
|
"""Tests pour _format_contexte."""
|
||||||
|
|
||||||
@@ -610,3 +799,241 @@ class TestFormatContexte:
|
|||||||
assert "TDM abdominal" in result
|
assert "TDM abdominal" in result
|
||||||
assert "éruption cutanée" in result
|
assert "éruption cutanée" in result
|
||||||
assert "Pancréatite aiguë biliaire" in result
|
assert "Pancréatite aiguë biliaire" in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestActeCCAMExtended:
|
||||||
|
def test_backward_compatible(self):
|
||||||
|
"""Les nouveaux champs RAG sont optionnels — rétrocompatible."""
|
||||||
|
a = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
|
||||||
|
assert a.texte == "Cholécystectomie"
|
||||||
|
assert a.code_ccam_suggestion == "HMFC004"
|
||||||
|
assert a.ccam_confidence is None
|
||||||
|
assert a.justification is None
|
||||||
|
assert a.raisonnement is None
|
||||||
|
assert a.sources_rag == []
|
||||||
|
|
||||||
|
def test_with_rag_fields(self):
|
||||||
|
a = ActeCCAM(
|
||||||
|
texte="Cholécystectomie par coelioscopie",
|
||||||
|
code_ccam_suggestion="HMFC004",
|
||||||
|
ccam_confidence="high",
|
||||||
|
justification="HMFC004 correspond à la cholécystectomie par coelioscopie",
|
||||||
|
raisonnement="ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
|
||||||
|
sources_rag=[
|
||||||
|
RAGSource(document="ccam", page=10, code="HMFC004"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert a.ccam_confidence == "high"
|
||||||
|
assert a.justification is not None
|
||||||
|
assert len(a.sources_rag) == 1
|
||||||
|
assert a.sources_rag[0].code == "HMFC004"
|
||||||
|
|
||||||
|
def test_serialization_exclude_none(self):
|
||||||
|
a = ActeCCAM(texte="Test", code_ccam_suggestion="HMFC004")
|
||||||
|
data = a.model_dump(exclude_none=True)
|
||||||
|
assert "ccam_confidence" not in data
|
||||||
|
assert "justification" not in data
|
||||||
|
assert "raisonnement" not in data
|
||||||
|
assert "sources_rag" in data
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchSimilarCCAM:
|
||||||
|
def test_prioritizes_ccam(self):
|
||||||
|
"""Les sources CCAM sont priorisées (au moins 5 sur 8)."""
|
||||||
|
from src.medical.rag_search import search_similar_ccam
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
mock_metadata = []
|
||||||
|
for i in range(6):
|
||||||
|
mock_metadata.append({"document": "ccam", "code": f"HMFC00{i}", "page": i, "extrait": f"CCAM {i}"})
|
||||||
|
for i in range(6):
|
||||||
|
mock_metadata.append({"document": "guide_methodo", "page": i + 10, "extrait": f"Guide {i}"})
|
||||||
|
|
||||||
|
mock_index = MagicMock()
|
||||||
|
mock_index.ntotal = 12
|
||||||
|
scores = np.array([[0.9 - i * 0.03 for i in range(12)]], dtype=np.float32)
|
||||||
|
indices = np.array([list(range(12))], dtype=np.int64)
|
||||||
|
mock_index.search.return_value = (scores, indices)
|
||||||
|
|
||||||
|
with patch("src.medical.rag_index.get_index", return_value=(mock_index, mock_metadata)), \
|
||||||
|
patch("src.medical.rag_search._get_embed_model") as mock_model:
|
||||||
|
mock_model.return_value.encode.return_value = np.array([[0.1] * 768], dtype=np.float32)
|
||||||
|
results = search_similar_ccam("cholécystectomie", top_k=8)
|
||||||
|
|
||||||
|
ccam_count = sum(1 for r in results if r["document"] == "ccam")
|
||||||
|
assert ccam_count >= 5, f"Seulement {ccam_count} sources CCAM sur {len(results)}"
|
||||||
|
|
||||||
|
def test_no_index(self):
|
||||||
|
"""search_similar_ccam retourne une liste vide si l'index n'existe pas."""
|
||||||
|
from src.medical.rag_search import search_similar_ccam
|
||||||
|
|
||||||
|
with patch("src.medical.rag_index.get_index", return_value=None):
|
||||||
|
results = search_similar_ccam("cholécystectomie")
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnrichActe:
|
||||||
|
def test_enrich_with_ollama(self):
|
||||||
|
"""Enrichissement complet avec sources + Ollama."""
|
||||||
|
from src.medical.rag_search import enrich_acte
|
||||||
|
|
||||||
|
acte = ActeCCAM(texte="Cholécystectomie par coelioscopie")
|
||||||
|
mock_sources = [
|
||||||
|
{
|
||||||
|
"document": "ccam",
|
||||||
|
"page": 10,
|
||||||
|
"code": "HMFC004",
|
||||||
|
"extrait": "HMFC004 Cholécystectomie par coelioscopie...",
|
||||||
|
"score": 0.92,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
mock_llm = {
|
||||||
|
"code": "HMFC004",
|
||||||
|
"confidence": "high",
|
||||||
|
"justification": "Cholécystectomie par coelioscopie = HMFC004",
|
||||||
|
"raisonnement": "ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
|
||||||
|
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
|
||||||
|
patch("src.medical.rag_search.ccam_validate", return_value=(True, "Cholécystectomie")):
|
||||||
|
enrich_acte(acte, {"sexe": "F", "age": 43})
|
||||||
|
|
||||||
|
assert acte.code_ccam_suggestion == "HMFC004"
|
||||||
|
assert acte.ccam_confidence == "high"
|
||||||
|
assert acte.justification == "Cholécystectomie par coelioscopie = HMFC004"
|
||||||
|
assert acte.raisonnement is not None
|
||||||
|
assert len(acte.sources_rag) == 1
|
||||||
|
|
||||||
|
def test_enrich_no_sources(self):
|
||||||
|
"""enrich_acte ne plante pas si aucune source trouvée."""
|
||||||
|
from src.medical.rag_search import enrich_acte
|
||||||
|
|
||||||
|
acte = ActeCCAM(texte="Acte inconnu", code_ccam_suggestion="ABCD123")
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar_ccam", return_value=[]):
|
||||||
|
enrich_acte(acte, {"sexe": "M", "age": 50})
|
||||||
|
|
||||||
|
assert acte.sources_rag == []
|
||||||
|
assert acte.justification is None
|
||||||
|
|
||||||
|
def test_enrich_no_ollama(self):
|
||||||
|
"""Enrichissement avec sources FAISS mais sans Ollama."""
|
||||||
|
from src.medical.rag_search import enrich_acte
|
||||||
|
|
||||||
|
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
|
||||||
|
mock_sources = [
|
||||||
|
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
|
||||||
|
patch("src.medical.rag_search._call_ollama", return_value=None):
|
||||||
|
enrich_acte(acte, {"sexe": "M", "age": 50})
|
||||||
|
|
||||||
|
assert len(acte.sources_rag) == 1
|
||||||
|
assert acte.justification is None
|
||||||
|
assert acte.raisonnement is None
|
||||||
|
|
||||||
|
def test_enrich_invalid_code(self):
|
||||||
|
"""Un code CCAM invalide d'Ollama ne remplace pas le code existant."""
|
||||||
|
from src.medical.rag_search import enrich_acte
|
||||||
|
|
||||||
|
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
|
||||||
|
mock_sources = [
|
||||||
|
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
|
||||||
|
]
|
||||||
|
mock_llm = {
|
||||||
|
"code": "ZZZZ999",
|
||||||
|
"confidence": "high",
|
||||||
|
"justification": "Hallucination",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
|
||||||
|
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
|
||||||
|
patch("src.medical.rag_search.ccam_validate", return_value=(False, "")):
|
||||||
|
enrich_acte(acte, {"sexe": "M", "age": 50})
|
||||||
|
|
||||||
|
# Le code original est conservé
|
||||||
|
assert acte.code_ccam_suggestion == "HMFC004"
|
||||||
|
# Mais la confidence est quand même affectée
|
||||||
|
assert acte.ccam_confidence == "high"
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnrichDossierCCAM:
|
||||||
|
def test_enriches_actes(self):
|
||||||
|
"""enrich_dossier enrichit aussi les actes CCAM."""
|
||||||
|
from src.medical.rag_search import enrich_dossier
|
||||||
|
|
||||||
|
dossier = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="Lithiase vésiculaire"),
|
||||||
|
actes_ccam=[
|
||||||
|
ActeCCAM(texte="Cholécystectomie par coelioscopie"),
|
||||||
|
ActeCCAM(texte="Anesthésie générale"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
enriched = []
|
||||||
|
|
||||||
|
def mock_enrich_diag(diag, contexte, est_dp=True, cache=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def mock_enrich_acte(acte, contexte, cache=None):
|
||||||
|
enriched.append(acte.texte)
|
||||||
|
|
||||||
|
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich_diag), \
|
||||||
|
patch("src.medical.rag_search.enrich_acte", side_effect=mock_enrich_acte), \
|
||||||
|
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
|
||||||
|
mock_cache_cls.return_value = MagicMock()
|
||||||
|
enrich_dossier(dossier)
|
||||||
|
|
||||||
|
assert len(enriched) == 2
|
||||||
|
assert "Cholécystectomie par coelioscopie" in enriched
|
||||||
|
assert "Anesthésie générale" in enriched
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildPromptCCAM:
|
||||||
|
def test_prompt_contains_acte(self):
|
||||||
|
from src.medical.rag_search import _build_prompt_ccam
|
||||||
|
|
||||||
|
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie"}]
|
||||||
|
contexte = {"sexe": "F", "age": 43}
|
||||||
|
prompt = _build_prompt_ccam("Cholécystectomie par coelioscopie", sources, contexte)
|
||||||
|
|
||||||
|
assert "Cholécystectomie par coelioscopie" in prompt
|
||||||
|
assert "CCAM" in prompt
|
||||||
|
assert "analyse_acte" in prompt
|
||||||
|
assert "objet JSON" in prompt
|
||||||
|
|
||||||
|
def test_prompt_contains_source_info(self):
|
||||||
|
from src.medical.rag_search import _build_prompt_ccam
|
||||||
|
|
||||||
|
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie par coelioscopie"}]
|
||||||
|
contexte = {}
|
||||||
|
prompt = _build_prompt_ccam("Cholécystectomie", sources, contexte)
|
||||||
|
|
||||||
|
assert "CCAM PMSI V4 2025" in prompt
|
||||||
|
assert "HMFC004" in prompt
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseOllamaResponseCCAM:
|
||||||
|
def test_parse_ccam_structured_json(self):
|
||||||
|
"""Le parsing extrait analyse_acte dans le raisonnement."""
|
||||||
|
from src.medical.rag_search import _parse_ollama_response
|
||||||
|
import json
|
||||||
|
|
||||||
|
raw = json.dumps({
|
||||||
|
"analyse_acte": "Cholécystectomie par voie coelioscopique",
|
||||||
|
"codes_candidats": "HMFC004, HMFC003",
|
||||||
|
"discrimination": "HMFC004 est le code spécifique à la coelioscopie",
|
||||||
|
"code": "HMFC004",
|
||||||
|
"confidence": "high",
|
||||||
|
"justification": "Cholécystectomie coelioscopique = HMFC004",
|
||||||
|
})
|
||||||
|
|
||||||
|
result = _parse_ollama_response(raw)
|
||||||
|
assert result is not None
|
||||||
|
assert result["code"] == "HMFC004"
|
||||||
|
assert "raisonnement" in result
|
||||||
|
assert "ANALYSE ACTE" in result["raisonnement"]
|
||||||
|
assert "CODES CANDIDATS" in result["raisonnement"]
|
||||||
|
assert "analyse_acte" not in result
|
||||||
|
|||||||
212
tests/test_rum_export.py
Normal file
212
tests/test_rum_export.py
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
"""Tests pour le module d'export RUM V016."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.config import ActeCCAM, Diagnostic, DossierMedical, Sejour
|
||||||
|
from src.export.rum_export import (
|
||||||
|
RUMConfig,
|
||||||
|
export_rum,
|
||||||
|
_format_cim10,
|
||||||
|
_format_date,
|
||||||
|
_format_sex,
|
||||||
|
_format_ccam_act,
|
||||||
|
_map_mode_entree,
|
||||||
|
_map_mode_sortie,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatHelpers:
|
||||||
|
def test_format_cim10_normal(self):
|
||||||
|
assert _format_cim10("K85.1") == "K851 "
|
||||||
|
assert len(_format_cim10("K85.1")) == 8
|
||||||
|
|
||||||
|
def test_format_cim10_short(self):
|
||||||
|
result = _format_cim10("J18")
|
||||||
|
assert result == "J18 "
|
||||||
|
assert len(result) == 8
|
||||||
|
|
||||||
|
def test_format_cim10_none(self):
|
||||||
|
assert _format_cim10(None) == " "
|
||||||
|
assert len(_format_cim10(None)) == 8
|
||||||
|
|
||||||
|
def test_format_cim10_empty(self):
|
||||||
|
assert _format_cim10("") == " "
|
||||||
|
|
||||||
|
def test_format_date_ddmmyyyy(self):
|
||||||
|
assert _format_date("15/03/2025") == "15032025"
|
||||||
|
|
||||||
|
def test_format_date_iso(self):
|
||||||
|
assert _format_date("2025-03-15") == "15032025"
|
||||||
|
|
||||||
|
def test_format_date_none(self):
|
||||||
|
assert _format_date(None) == " "
|
||||||
|
assert len(_format_date(None)) == 8
|
||||||
|
|
||||||
|
def test_format_sex_masculin(self):
|
||||||
|
assert _format_sex("M") == "1"
|
||||||
|
assert _format_sex("Masculin") == "1"
|
||||||
|
assert _format_sex("H") == "1"
|
||||||
|
|
||||||
|
def test_format_sex_feminin(self):
|
||||||
|
assert _format_sex("F") == "2"
|
||||||
|
assert _format_sex("Féminin") == "2"
|
||||||
|
|
||||||
|
def test_format_sex_none(self):
|
||||||
|
assert _format_sex(None) == " "
|
||||||
|
|
||||||
|
def test_map_mode_entree(self):
|
||||||
|
assert _map_mode_entree("Domicile") == "8"
|
||||||
|
assert _map_mode_entree("Mutation") == "6"
|
||||||
|
assert _map_mode_entree("Transfert") == "7"
|
||||||
|
assert _map_mode_entree(None) == " "
|
||||||
|
|
||||||
|
def test_map_mode_sortie(self):
|
||||||
|
assert _map_mode_sortie("Domicile") == "8"
|
||||||
|
assert _map_mode_sortie("Décès") == "9"
|
||||||
|
assert _map_mode_sortie("Transfert") == "7"
|
||||||
|
assert _map_mode_sortie(None) == " "
|
||||||
|
|
||||||
|
def test_format_ccam_act(self):
|
||||||
|
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="15/03/2025")
|
||||||
|
result = _format_ccam_act(acte)
|
||||||
|
assert len(result) == 29
|
||||||
|
assert result[:7] == "HMFC004"
|
||||||
|
assert result[7] == "1" # phase
|
||||||
|
assert result[8] == "1" # activité
|
||||||
|
assert result[9:17] == "15032025" # date
|
||||||
|
|
||||||
|
|
||||||
|
class TestExportRUM:
|
||||||
|
def _make_dossier(self, **kwargs):
|
||||||
|
defaults = dict(
|
||||||
|
source_file="test.pdf",
|
||||||
|
sejour=Sejour(
|
||||||
|
sexe="M",
|
||||||
|
date_entree="01/01/2025",
|
||||||
|
date_sortie="05/01/2025",
|
||||||
|
mode_entree="Domicile",
|
||||||
|
mode_sortie="Domicile",
|
||||||
|
),
|
||||||
|
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="HTA", cim10_suggestion="I10"),
|
||||||
|
],
|
||||||
|
actes_ccam=[
|
||||||
|
ActeCCAM(texte="Radio thorax", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
defaults.update(kwargs)
|
||||||
|
return DossierMedical(**defaults)
|
||||||
|
|
||||||
|
def test_fixed_zone_length(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
# La zone fixe fait 165 chars, plus DAS et actes
|
||||||
|
assert len(rum) >= 165
|
||||||
|
|
||||||
|
def test_fixed_zone_exact_165(self):
|
||||||
|
dossier = self._make_dossier(diagnostics_associes=[], actes_ccam=[])
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert len(rum) == 165
|
||||||
|
|
||||||
|
def test_version_format(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[9:12] == "016" # version format
|
||||||
|
assert rum[24:27] == "016" # version RUM
|
||||||
|
|
||||||
|
def test_finess(self):
|
||||||
|
config = RUMConfig(finess="123456789")
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier, config)
|
||||||
|
assert rum[15:24] == "123456789"
|
||||||
|
|
||||||
|
def test_sexe(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[85] == "1" # M
|
||||||
|
|
||||||
|
def test_dates(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[92:100] == "01012025" # date entrée
|
||||||
|
assert rum[102:110] == "05012025" # date sortie
|
||||||
|
|
||||||
|
def test_modes(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[100] == "8" # mode entrée domicile
|
||||||
|
assert rum[110] == "8" # mode sortie domicile
|
||||||
|
|
||||||
|
def test_dp_field(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[131:139] == "J189 "
|
||||||
|
|
||||||
|
def test_nb_das(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[125:127] == "01"
|
||||||
|
|
||||||
|
def test_nb_actes(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert rum[129:131] == "01"
|
||||||
|
|
||||||
|
def test_das_variable_zone(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
# DAS commence à pos 165, 8 chars
|
||||||
|
das_zone = rum[165:173]
|
||||||
|
assert das_zone == "I10 "
|
||||||
|
|
||||||
|
def test_acte_variable_zone(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
# 1 DAS (8 chars) puis l'acte (29 chars) à pos 173
|
||||||
|
acte_zone = rum[173:202]
|
||||||
|
assert len(acte_zone) == 29
|
||||||
|
assert acte_zone[:7] == "ZBQK002"
|
||||||
|
|
||||||
|
def test_total_length(self):
|
||||||
|
dossier = self._make_dossier()
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
# 165 + 1*8 (DAS) + 1*29 (acte) = 202
|
||||||
|
assert len(rum) == 202
|
||||||
|
|
||||||
|
|
||||||
|
class TestEdgeCases:
|
||||||
|
def test_no_dp(self):
|
||||||
|
dossier = DossierMedical(source_file="test.pdf")
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert len(rum) == 165
|
||||||
|
assert rum[131:139] == " "
|
||||||
|
|
||||||
|
def test_no_sejour_data(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
source_file="test.pdf",
|
||||||
|
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="J18.9"),
|
||||||
|
)
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
assert len(rum) == 165
|
||||||
|
assert rum[85] == " " # sexe vide
|
||||||
|
|
||||||
|
def test_multiple_das_and_actes(self):
|
||||||
|
dossier = DossierMedical(
|
||||||
|
source_file="test.pdf",
|
||||||
|
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="K85.1"),
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="D1", cim10_suggestion="I10"),
|
||||||
|
Diagnostic(texte="D2", cim10_suggestion="E11.9"),
|
||||||
|
Diagnostic(texte="D3", cim10_suggestion="I48.9"),
|
||||||
|
],
|
||||||
|
actes_ccam=[
|
||||||
|
ActeCCAM(texte="A1", code_ccam_suggestion="HMFC004", date="01/01/2025"),
|
||||||
|
ActeCCAM(texte="A2", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
rum = export_rum(dossier)
|
||||||
|
# 165 + 3*8 + 2*29 = 165 + 24 + 58 = 247
|
||||||
|
assert len(rum) == 247
|
||||||
|
assert rum[125:127] == "03" # nb DAS
|
||||||
|
assert rum[129:131] == "02" # nb actes
|
||||||
@@ -90,7 +90,7 @@ class TestEnrichDossierSeverity:
|
|||||||
Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"),
|
Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"),
|
||||||
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
||||||
]
|
]
|
||||||
alertes = enrich_dossier_severity(dp, das)
|
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
|
||||||
|
|
||||||
# I48.9 = CMA probable
|
# I48.9 = CMA probable
|
||||||
assert das[0].est_cma is True
|
assert das[0].est_cma is True
|
||||||
@@ -101,9 +101,21 @@ class TestEnrichDossierSeverity:
|
|||||||
|
|
||||||
# Au moins une alerte CMA
|
# Au moins une alerte CMA
|
||||||
assert any("CMA" in a for a in alertes)
|
assert any("CMA" in a for a in alertes)
|
||||||
|
assert cma_count >= 1
|
||||||
|
|
||||||
def test_dp_severity_set(self):
|
def test_dp_severity_set(self):
|
||||||
dp = Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9")
|
dp = Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9")
|
||||||
alertes = enrich_dossier_severity(dp, [])
|
alertes, cma_count, cms_count = enrich_dossier_severity(dp, [])
|
||||||
assert dp.niveau_severite == "severe"
|
assert dp.niveau_severite == "severe"
|
||||||
assert dp.est_cma is True
|
assert dp.est_cma is True
|
||||||
|
|
||||||
|
def test_cms_detection(self):
|
||||||
|
"""CMS détecté quand CMA + sévérité severe."""
|
||||||
|
dp = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9"),
|
||||||
|
]
|
||||||
|
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
|
||||||
|
assert das[0].est_cma is True
|
||||||
|
assert das[0].est_cms is True
|
||||||
|
assert cms_count == 1
|
||||||
|
|||||||
Reference in New Issue
Block a user