feat: optimisations pipeline médical (bio_normals, GHM, DP selector, CIM-10)
- bio_normals: table de normes biologiques étendue (+200 analytes) - bio_extraction: amélioration parsing valeurs biologiques - cim10_extractor: règles supplémentaires extraction codes - dp_selector: affinement sélection diagnostic principal - ghm: estimation sévérité GHM enrichie - validation_pipeline: correctifs mineurs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -147,7 +147,7 @@ def _extract_biologie_faiss(text: str, dossier: DossierMedical) -> None:
|
||||
sanitized = _sanitize_bio_value(concept_name, raw_value, sanity_cfg)
|
||||
if sanitized:
|
||||
token, val_num, quality, reason = sanitized
|
||||
anomalie = _is_abnormal(concept_name, token)
|
||||
anomalie = _is_abnormal(concept_name, token, dossier.sejour.age if dossier.sejour else None, dossier.sejour.sexe if dossier.sejour else None)
|
||||
|
||||
is_dup = any(b.test == concept_name and b.valeur == raw_value for b in dossier.biologie_cle)
|
||||
if is_dup:
|
||||
@@ -208,6 +208,10 @@ def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
]
|
||||
|
||||
|
||||
# Contexte patient pour normes adaptées (âge/sexe)
|
||||
_patient_age = dossier.sejour.age if dossier.sejour else None
|
||||
_patient_sexe = dossier.sejour.sexe if dossier.sejour else None
|
||||
|
||||
# Anti-doublons + limite par test (évite d'exploser le JSON)
|
||||
max_per_test = 6
|
||||
counts: dict[str, int] = {}
|
||||
@@ -234,7 +238,7 @@ def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
counts[test_name] = counts.get(test_name, 0) + 1
|
||||
if counts[test_name] > max_per_test:
|
||||
break
|
||||
anomalie = _is_abnormal(test_name, raw_value)
|
||||
anomalie = _is_abnormal(test_name, raw_value, _patient_age, _patient_sexe)
|
||||
dossier.biologie_cle.append(
|
||||
BiologieCle(
|
||||
test=test_name,
|
||||
@@ -280,7 +284,7 @@ def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
if drop_out_of_range:
|
||||
continue
|
||||
|
||||
anomalie = _is_abnormal(test_name, token)
|
||||
anomalie = _is_abnormal(test_name, token, _patient_age, _patient_sexe)
|
||||
dossier.biologie_cle.append(
|
||||
BiologieCle(
|
||||
test=test_name,
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
"""Plages de référence biologiques et fonction d'interprétation."""
|
||||
"""Plages de référence biologiques et fonction d'interprétation.
|
||||
|
||||
Normes adaptées par sexe et tranche d'âge (adulte homme, adulte femme, enfant).
|
||||
Sources : référentiels SFH, SFR, HAS 2023.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
# Plages de référence biologiques (min, max) — utilisées par _is_abnormal()
|
||||
# et exportées pour le formatage du contexte LLM dans rag_search.py
|
||||
# Plages de référence biologiques (min, max) — fallback adulte unisexe
|
||||
# Utilisées quand le sexe/âge n'est pas disponible
|
||||
BIO_NORMALS: dict[str, tuple[float, float]] = {
|
||||
# --- Hépatique / digestif ---
|
||||
"Lipasémie": (0, 60),
|
||||
@@ -18,7 +22,7 @@ BIO_NORMALS: dict[str, tuple[float, float]] = {
|
||||
# --- Inflammatoire ---
|
||||
"CRP": (0, 5),
|
||||
"VS": (0, 20), # mm/h
|
||||
# --- Ionogramme (fallback adulte ; les règles de décision utilisent reference_ranges.yaml) ---
|
||||
# --- Ionogramme ---
|
||||
"Sodium": (135, 145),
|
||||
"Potassium": (3.5, 5.0),
|
||||
# --- Hématologie ---
|
||||
@@ -51,8 +55,127 @@ BIO_NORMALS: dict[str, tuple[float, float]] = {
|
||||
}
|
||||
|
||||
|
||||
def _is_abnormal(test: str, value: str) -> bool | None:
|
||||
"""Détermine si un résultat biologique est anormal."""
|
||||
# ---------------------------------------------------------------------------
|
||||
# Normes par sexe et tranche d'âge
|
||||
# Clé : (test, groupe) → (min, max)
|
||||
# Groupes : "M" (homme adulte), "F" (femme adulte),
|
||||
# "child" (1-14 ans), "infant" (0-1 an), "newborn" (0-28 jours)
|
||||
# ---------------------------------------------------------------------------
|
||||
BIO_NORMALS_BY_GROUP: dict[tuple[str, str], tuple[float, float]] = {
|
||||
# --- Hémoglobine (g/dL) ---
|
||||
("Hémoglobine", "M"): (13, 17), # Homme adulte
|
||||
("Hémoglobine", "F"): (12, 16), # Femme adulte
|
||||
("Hémoglobine", "child"): (11.5, 15.5), # Enfant 1-14 ans
|
||||
("Hémoglobine", "infant"): (10, 14), # Nourrisson 1-12 mois
|
||||
("Hémoglobine", "newborn"): (14, 22), # Nouveau-né 0-28j
|
||||
|
||||
# --- Leucocytes (G/L) ---
|
||||
("Leucocytes", "M"): (4, 10),
|
||||
("Leucocytes", "F"): (4, 10),
|
||||
("Leucocytes", "child"): (5, 15), # Enfant (leucocytose physiologique)
|
||||
("Leucocytes", "infant"): (6, 17),
|
||||
("Leucocytes", "newborn"): (10, 26),
|
||||
|
||||
# --- Plaquettes (G/L) — même norme globalement ---
|
||||
("Plaquettes", "child"): (150, 450),
|
||||
("Plaquettes", "newborn"): (150, 450),
|
||||
|
||||
# --- Créatinine (µmol/L) ---
|
||||
("Créatinine", "M"): (60, 120),
|
||||
("Créatinine", "F"): (45, 105),
|
||||
("Créatinine", "child"): (20, 60), # Enfant 1-14 ans
|
||||
("Créatinine", "infant"): (15, 35), # Nourrisson
|
||||
("Créatinine", "newborn"): (20, 75), # Nouveau-né (transitoire élevé)
|
||||
|
||||
# --- Ferritine (µg/L) ---
|
||||
("Ferritine", "M"): (30, 400),
|
||||
("Ferritine", "F"): (15, 200), # Plus bas chez la femme (menstruations)
|
||||
("Ferritine", "child"): (10, 150),
|
||||
|
||||
# --- ALAT (UI/L) ---
|
||||
("ALAT", "M"): (0, 45),
|
||||
("ALAT", "F"): (0, 35),
|
||||
("ALAT", "child"): (0, 40),
|
||||
("ALAT", "infant"): (0, 55), # Nourrisson : seuil plus haut
|
||||
|
||||
# --- ASAT (UI/L) ---
|
||||
("ASAT", "M"): (0, 40),
|
||||
("ASAT", "F"): (0, 35),
|
||||
("ASAT", "child"): (0, 50),
|
||||
("ASAT", "infant"): (0, 60),
|
||||
|
||||
# --- GGT (UI/L) ---
|
||||
("GGT", "M"): (0, 70),
|
||||
("GGT", "F"): (0, 45),
|
||||
("GGT", "child"): (0, 30),
|
||||
("GGT", "newborn"): (0, 200), # Nouveau-né : GGT très élevé physiologique
|
||||
|
||||
# --- PAL (UI/L) ---
|
||||
("PAL", "child"): (100, 400), # Croissance osseuse → PAL élevé normal
|
||||
|
||||
# --- Acide urique (µmol/L) ---
|
||||
("Acide urique", "M"): (200, 420),
|
||||
("Acide urique", "F"): (150, 360),
|
||||
|
||||
# --- Potassium (mmol/L) ---
|
||||
("Potassium", "child"): (3.5, 5.5),
|
||||
("Potassium", "newborn"): (3.5, 6.0), # Seuil haut plus élevé chez le nouveau-né
|
||||
|
||||
# --- Sodium (mmol/L) ---
|
||||
("Sodium", "newborn"): (133, 146),
|
||||
|
||||
# --- Bilirubine totale (µmol/L) ---
|
||||
("Bilirubine totale", "newborn"): (0, 250), # Ictère physiologique du nouveau-né
|
||||
|
||||
# --- Glycémie (mmol/L) ---
|
||||
("Glycémie", "newborn"): (2.5, 5.5), # Seuil bas plus bas
|
||||
|
||||
# --- Albumine (g/L) ---
|
||||
("Albumine", "child"): (35, 50),
|
||||
("Albumine", "infant"): (25, 45),
|
||||
("Albumine", "newborn"): (25, 45),
|
||||
}
|
||||
|
||||
|
||||
def _age_group(age: int | None, sexe: str | None) -> str | None:
|
||||
"""Détermine le groupe démographique pour les normes bio."""
|
||||
if age is None:
|
||||
return None
|
||||
if age == 0:
|
||||
return "newborn"
|
||||
if age <= 1:
|
||||
return "infant"
|
||||
if age <= 14:
|
||||
return "child"
|
||||
# Adulte : utiliser le sexe
|
||||
if sexe:
|
||||
s = sexe.strip().upper()
|
||||
if s in ("M", "MASCULIN", "HOMME", "H"):
|
||||
return "M"
|
||||
if s in ("F", "FÉMININ", "FEMININ", "FEMME"):
|
||||
return "F"
|
||||
return None
|
||||
|
||||
|
||||
def get_norms(test: str, age: int | None = None, sexe: str | None = None) -> tuple[float, float] | None:
|
||||
"""Retourne les normes (min, max) adaptées à l'âge et au sexe.
|
||||
|
||||
Cascade : normes spécifiques groupe > normes génériques BIO_NORMALS.
|
||||
"""
|
||||
group = _age_group(age, sexe)
|
||||
if group:
|
||||
key = (test, group)
|
||||
if key in BIO_NORMALS_BY_GROUP:
|
||||
return BIO_NORMALS_BY_GROUP[key]
|
||||
return BIO_NORMALS.get(test)
|
||||
|
||||
|
||||
def _is_abnormal(test: str, value: str, age: int | None = None, sexe: str | None = None) -> bool | None:
|
||||
"""Détermine si un résultat biologique est anormal.
|
||||
|
||||
Utilise les normes par groupe (âge/sexe) si disponibles,
|
||||
sinon les normes adultes génériques.
|
||||
"""
|
||||
try:
|
||||
val = float(value.replace(",", "."))
|
||||
except (ValueError, AttributeError):
|
||||
@@ -62,7 +185,71 @@ def _is_abnormal(test: str, value: str) -> bool | None:
|
||||
return True
|
||||
return None
|
||||
|
||||
if test in BIO_NORMALS:
|
||||
lo, hi = BIO_NORMALS[test]
|
||||
norms = get_norms(test, age, sexe)
|
||||
if norms:
|
||||
lo, hi = norms
|
||||
return val > hi or val < lo
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Calcul DFG (CKD-EPI 2021, sans race)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def compute_dfg(creatinine_umol: float, age: int, sexe: str) -> float | None:
|
||||
"""Calcule le DFG estimé par CKD-EPI 2021 (sans race).
|
||||
|
||||
Args:
|
||||
creatinine_umol: Créatinine en µmol/L
|
||||
age: Âge en années
|
||||
sexe: "M", "F", "Masculin", "Féminin"...
|
||||
|
||||
Returns:
|
||||
DFG en mL/min/1.73m², ou None si calcul impossible.
|
||||
"""
|
||||
if creatinine_umol <= 0 or age <= 0:
|
||||
return None
|
||||
|
||||
s = (sexe or "").strip().upper()
|
||||
is_female = s in ("F", "FÉMININ", "FEMININ", "FEMME")
|
||||
|
||||
# Conversion µmol/L → mg/dL (facteur 0.0113)
|
||||
scr = creatinine_umol * 0.0113
|
||||
|
||||
# CKD-EPI 2021 (sans race)
|
||||
if is_female:
|
||||
kappa = 0.7
|
||||
alpha = -0.241
|
||||
factor = 1.012
|
||||
else:
|
||||
kappa = 0.9
|
||||
alpha = -0.302
|
||||
factor = 1.0
|
||||
|
||||
ratio = scr / kappa
|
||||
if ratio <= 1:
|
||||
dfg = 142 * (ratio ** alpha) * (0.9938 ** age) * factor
|
||||
else:
|
||||
dfg = 142 * (ratio ** -1.200) * (0.9938 ** age) * factor
|
||||
|
||||
return round(dfg, 1)
|
||||
|
||||
|
||||
def stade_irc(dfg: float) -> tuple[str, str] | None:
|
||||
"""Retourne le stade IRC et le code CIM-10 correspondant.
|
||||
|
||||
Stades KDIGO :
|
||||
G1 ≥90 — G2 60-89 — G3a 45-59 — G3b 30-44 — G4 15-29 — G5 <15
|
||||
"""
|
||||
if dfg >= 90:
|
||||
return ("G1", "N18.1")
|
||||
elif dfg >= 60:
|
||||
return ("G2", "N18.2")
|
||||
elif dfg >= 45:
|
||||
return ("G3a", "N18.3")
|
||||
elif dfg >= 30:
|
||||
return ("G3b", "N18.4")
|
||||
elif dfg >= 15:
|
||||
return ("G4", "N18.4")
|
||||
else:
|
||||
return ("G5", "N18.5")
|
||||
|
||||
@@ -208,6 +208,17 @@ def extract_medical_info(
|
||||
# Post-processing : cohérence DAS ↔ biologie
|
||||
_validate_bio_das_coherence(dossier)
|
||||
|
||||
# Post-processing : calcul DFG et détection IRC non codée
|
||||
_check_dfg_irc(dossier)
|
||||
|
||||
# Post-processing : détection erreurs fréquentes (intuition DIM senior)
|
||||
try:
|
||||
from .dim_senior import check_common_mistakes
|
||||
senior_alerts = check_common_mistakes(dossier)
|
||||
dossier.alertes_codage.extend(senior_alerts)
|
||||
except Exception:
|
||||
logger.error("DIM-SENIOR: erreur détection erreurs fréquentes", exc_info=True)
|
||||
|
||||
# Post-processing : resélection DP si exclu par vetos/exclusions
|
||||
if dossier.document_type != "trackare":
|
||||
try:
|
||||
@@ -227,6 +238,51 @@ def extract_medical_info(
|
||||
return dossier
|
||||
|
||||
|
||||
def _check_dfg_irc(dossier: DossierMedical) -> None:
|
||||
"""Calcule le DFG si créatinine disponible et alerte si IRC non codée."""
|
||||
from .bio_normals import compute_dfg, stade_irc
|
||||
|
||||
age = dossier.sejour.age if dossier.sejour else None
|
||||
sexe = dossier.sejour.sexe if dossier.sejour else None
|
||||
if not age or not sexe or age < 18:
|
||||
return
|
||||
|
||||
# Trouver la créatinine
|
||||
creat_val = None
|
||||
for bio in dossier.biologie_cle:
|
||||
if bio.test == "Créatinine" and bio.valeur_num:
|
||||
creat_val = bio.valeur_num
|
||||
break
|
||||
if creat_val is None:
|
||||
return
|
||||
|
||||
dfg = compute_dfg(creat_val, age, sexe)
|
||||
if dfg is None:
|
||||
return
|
||||
|
||||
dossier.quality_flags["dfg_estime"] = dfg
|
||||
|
||||
# Vérifier si IRC codée alors que DFG le justifie
|
||||
if dfg < 60:
|
||||
stade, code_attendu = stade_irc(dfg)
|
||||
# Chercher si un N18.x est déjà codé
|
||||
irc_coded = False
|
||||
all_codes = set()
|
||||
if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion:
|
||||
all_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||||
for das in dossier.diagnostics_associes:
|
||||
if das.cim10_suggestion:
|
||||
all_codes.add(das.cim10_suggestion)
|
||||
irc_coded = any(c.startswith("N18") for c in all_codes)
|
||||
|
||||
if not irc_coded:
|
||||
dossier.alertes_codage.append(
|
||||
f"DFG estimé {dfg} mL/min (stade {stade}) — IRC ({code_attendu}) "
|
||||
f"non codée. Créatinine {creat_val} µmol/L, {sexe} {age} ans."
|
||||
)
|
||||
dossier.quality_flags["irc_non_codee"] = code_attendu
|
||||
|
||||
|
||||
def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extrait des DAS supplémentaires via un pass LLM (avant enrichissement RAG)."""
|
||||
try:
|
||||
|
||||
@@ -243,6 +243,7 @@ def score_candidates(
|
||||
for sec_key, sec_bonus in (
|
||||
("diag_sortie", 4), ("diag_principal", 4),
|
||||
("synthese", 2), ("conclusion", 2),
|
||||
("histoire_maladie", 1),
|
||||
):
|
||||
sec_text = (synthese.get(sec_key) or "").lower()
|
||||
if not sec_text or len(sec_text) < 3:
|
||||
@@ -332,9 +333,15 @@ def _llm_rank(
|
||||
motif = synthese.get("motif", "")
|
||||
if motif:
|
||||
ctx_parts.append(f"Motif: {motif}")
|
||||
hdm = synthese.get("histoire_maladie", "")
|
||||
if hdm:
|
||||
ctx_parts.append(f"Histoire de la maladie: {hdm[:400]}")
|
||||
conclusion = synthese.get("conclusion", "")
|
||||
if conclusion:
|
||||
ctx_parts.append(f"Conclusion: {conclusion[:300]}")
|
||||
diag_sortie = synthese.get("diag_sortie", "")
|
||||
if diag_sortie:
|
||||
ctx_parts.append(f"Diagnostic de sortie: {diag_sortie[:200]}")
|
||||
ctx_str = "\n".join(ctx_parts) or "Non disponible"
|
||||
|
||||
prompt = DP_RANKER_CONSTRAINED.format(
|
||||
@@ -362,6 +369,7 @@ def build_synthese(dossier: DossierMedical, parsed_data: dict) -> dict:
|
||||
"diag_sortie": sections.get("diag_sortie", ""),
|
||||
"diag_principal": sections.get("diag_principal", ""),
|
||||
"synthese": sections.get("synthese", ""),
|
||||
"histoire_maladie": sections.get("histoire_maladie", ""),
|
||||
"antecedents": [a.texte for a in dossier.antecedents[:10]],
|
||||
}
|
||||
|
||||
|
||||
@@ -325,3 +325,48 @@ def estimate_financial_impact(
|
||||
priorite=priorite,
|
||||
raison=raison,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Simulation what-if : impact de chaque DAS sur la sévérité GHM
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def ghm_whatif(dossier: DossierMedical) -> list[dict]:
|
||||
"""Simule le GHM sans chaque DAS pour détecter les DAS à fort impact.
|
||||
|
||||
Retourne une liste de dicts :
|
||||
{"das_texte", "das_code", "severite_avec", "severite_sans", "delta", "impact"}
|
||||
Triée par impact décroissant.
|
||||
"""
|
||||
from copy import copy
|
||||
|
||||
base_ghm = estimate_ghm(dossier)
|
||||
base_sev = base_ghm.severite or 1
|
||||
results = []
|
||||
|
||||
for i, das in enumerate(dossier.diagnostics_associes):
|
||||
code = das.cim10_suggestion
|
||||
if not code:
|
||||
continue
|
||||
|
||||
# Créer une copie shallow du dossier sans ce DAS
|
||||
dossier_copy = copy(dossier)
|
||||
dossier_copy.diagnostics_associes = [
|
||||
d for j, d in enumerate(dossier.diagnostics_associes) if j != i
|
||||
]
|
||||
|
||||
ghm_sans = estimate_ghm(dossier_copy)
|
||||
sev_sans = ghm_sans.severite or 1
|
||||
|
||||
if sev_sans != base_sev:
|
||||
results.append({
|
||||
"das_texte": das.texte[:80],
|
||||
"das_code": code,
|
||||
"severite_avec": base_sev,
|
||||
"severite_sans": sev_sans,
|
||||
"delta": base_sev - sev_sans,
|
||||
"impact": "hausse_severite" if base_sev > sev_sans else "baisse_severite",
|
||||
})
|
||||
|
||||
results.sort(key=lambda r: abs(r["delta"]), reverse=True)
|
||||
return results
|
||||
|
||||
@@ -258,7 +258,9 @@ def _validate_bio_das_coherence(dossier: DossierMedical) -> None:
|
||||
# Indexer la biologie du dossier : analyte → (valeur, anomalie)
|
||||
bio_index: dict[str, tuple[str, bool | None]] = {}
|
||||
for bio in dossier.biologie_cle:
|
||||
abnormal = _is_abnormal(bio.test, bio.valeur)
|
||||
_age = dossier.sejour.age if dossier.sejour else None
|
||||
_sexe = dossier.sejour.sexe if dossier.sejour else None
|
||||
abnormal = _is_abnormal(bio.test, bio.valeur, _age, _sexe)
|
||||
bio_index[bio.test] = (bio.valeur, abnormal)
|
||||
|
||||
all_diags = []
|
||||
|
||||
Reference in New Issue
Block a user