- Filtre DAS identique au DP (violation règle PMSI) dans extracteur et fusion - Correction automatique D55.9 → D64.9 pour "Anémie" non qualifiée (70 cas) - 17 codes ajoutés aux supplements (K59.0, Z93.1, H92.0, A87.0, D64.9, etc.) - 436 tests OK (+14 nouveaux) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
944 lines
35 KiB
Python
944 lines
35 KiB
Python
"""Extraction d'informations médicales structurées pour le codage CIM-10."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import re
|
||
from datetime import datetime
|
||
from typing import Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
from .cim10_dict import lookup as dict_lookup, normalize_text, normalize_code, validate_code as cim10_validate
|
||
from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
|
||
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes
|
||
from ..config import (
|
||
ActeCCAM,
|
||
BiologieCle,
|
||
Diagnostic,
|
||
DossierMedical,
|
||
Imagerie,
|
||
Sejour,
|
||
Traitement,
|
||
)
|
||
|
||
try:
|
||
from .edsnlp_pipeline import EdsnlpResult
|
||
except ImportError:
|
||
EdsnlpResult = None # type: ignore[assignment,misc]
|
||
|
||
# Mapping diagnostics fréquents → codes CIM-10
|
||
CIM10_MAP: dict[str, str] = {
|
||
# Pancréatite
|
||
"pancréatite aiguë biliaire": "K85.1",
|
||
"pancréatite aigue biliaire": "K85.1",
|
||
"pancréatite aiguë lithiasique": "K85.1",
|
||
"pancréatite aigue lithiasique": "K85.1",
|
||
"pancréatite aiguë": "K85.9",
|
||
"pancréatite aigue": "K85.9",
|
||
"pancréatite": "K85.9",
|
||
# Lithiases biliaires
|
||
"lithiase cholédoque": "K80.5",
|
||
"lithiase du cholédoque": "K80.5",
|
||
"calcul des canaux biliaires": "K80.5",
|
||
"lithiase vésiculaire": "K80.2",
|
||
"lithiases vésiculaires": "K80.2",
|
||
"vésicule lithiasique": "K80.2",
|
||
"colique hépatique": "K80.2",
|
||
# Cholécystite
|
||
"cholécystite aiguë": "K81.0",
|
||
"cholecystite aigue": "K81.0",
|
||
"angiocholite": "K83.0",
|
||
# Obésité
|
||
"obésité": "E66.0",
|
||
"obesite": "E66.0",
|
||
"surpoids": "E66.0",
|
||
# Réactions médicamenteuses
|
||
"éruption médicamenteuse": "L27.0",
|
||
"eruption medicamenteuse": "L27.0",
|
||
"éruption cutanée médicamenteuse": "L27.0",
|
||
"toxidermie": "L27.0",
|
||
"réaction au tramadol": "L27.0",
|
||
"allergie médicamenteuse": "T88.7",
|
||
# Douleur
|
||
"douleur abdominale": "R10.4",
|
||
"douleur hypochondre droit": "R10.1",
|
||
# Ictère
|
||
"ictère": "R17",
|
||
"jaunisse": "R17",
|
||
# HTA
|
||
"hypertension artérielle": "I10",
|
||
"hta": "I10",
|
||
# Diabète
|
||
"diabète type 2": "E11.9",
|
||
"diabète de type 2": "E11.9",
|
||
"diabète type 1": "E10.9",
|
||
}
|
||
|
||
# Mapping actes → codes CCAM
|
||
CCAM_MAP: dict[str, str] = {
|
||
"cholécystectomie": "HMFC004",
|
||
"cholecystectomie": "HMFC004",
|
||
"cholécystectomie par cœlioscopie": "HMFC004",
|
||
"cholecystectomie par coelioscopie": "HMFC004",
|
||
"cholangiographie": "HHHE002",
|
||
"cholangiographie peropératoire": "HHHE002",
|
||
"cpre": "HHHE002",
|
||
"sphinctérotomie endoscopique": "HHHE003",
|
||
"scanner abdominal": "ZCQK002",
|
||
"tdm abdominal": "ZCQK002",
|
||
"échographie abdominale": "ZCQJ001",
|
||
"echo abdominale": "ZCQJ001",
|
||
"irm abdominale": "ZCQN001",
|
||
}
|
||
|
||
|
||
def extract_medical_info(
|
||
parsed_data: dict,
|
||
anonymized_text: str,
|
||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||
use_rag: bool = False,
|
||
) -> DossierMedical:
|
||
"""Extrait les informations médicales structurées depuis les données parsées et le texte."""
|
||
dossier = DossierMedical()
|
||
dossier.document_type = parsed_data.get("type", "")
|
||
|
||
_extract_sejour(parsed_data, dossier)
|
||
_extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result)
|
||
_extract_actes(anonymized_text, dossier)
|
||
_extract_antecedents(anonymized_text, dossier)
|
||
_extract_traitements(parsed_data, anonymized_text, dossier, edsnlp_result)
|
||
_extract_biologie(anonymized_text, dossier)
|
||
_extract_imagerie(anonymized_text, dossier)
|
||
_extract_complications(anonymized_text, dossier, edsnlp_result)
|
||
|
||
# Phase 4 : pass LLM pour détecter des DAS supplémentaires
|
||
if use_rag:
|
||
_extract_das_llm(anonymized_text, dossier)
|
||
|
||
if use_rag:
|
||
_enrich_with_rag(dossier)
|
||
|
||
# Post-processing : validation des codes CCAM contre le dictionnaire
|
||
_validate_ccam(dossier)
|
||
|
||
# Post-processing : validation des codes CIM-10 contre le dictionnaire
|
||
_validate_cim10(dossier)
|
||
|
||
# Post-processing : correction des codes systématiquement mal attribués
|
||
_apply_code_corrections(dossier)
|
||
|
||
# Post-processing : exclusions symptôme vs diagnostic précis
|
||
_apply_exclusion_rules(dossier)
|
||
|
||
# Post-processing : enrichissement sévérité (CMA/CMS heuristique)
|
||
_apply_severity_rules(dossier)
|
||
|
||
# Post-processing : détection non-cumul actes CCAM
|
||
_apply_noncumul_rules(dossier)
|
||
|
||
# Post-processing : retirer DAS dont le code est identique au DP
|
||
_remove_das_equal_dp(dossier)
|
||
|
||
return dossier
|
||
|
||
|
||
def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
|
||
"""Extrait des DAS supplémentaires via un pass LLM (avant enrichissement RAG)."""
|
||
try:
|
||
from .rag_search import extract_das_llm
|
||
from .ollama_cache import OllamaCache
|
||
from ..config import OLLAMA_CACHE_PATH, OLLAMA_MODEL
|
||
except ImportError:
|
||
logger.warning("Module RAG non disponible pour l'extraction DAS LLM")
|
||
return
|
||
|
||
try:
|
||
cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL)
|
||
|
||
# Construire le contexte
|
||
contexte = {
|
||
"sexe": dossier.sejour.sexe,
|
||
"age": dossier.sejour.age,
|
||
"duree_sejour": dossier.sejour.duree_sejour,
|
||
"imc": dossier.sejour.imc,
|
||
"antecedents": dossier.antecedents[:5],
|
||
"biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle],
|
||
"imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie],
|
||
"complications": dossier.complications,
|
||
}
|
||
|
||
# DAS existants (texte + code)
|
||
existing_das = []
|
||
existing_codes = set()
|
||
if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion:
|
||
existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||
for d in dossier.diagnostics_associes:
|
||
label = d.texte
|
||
if d.cim10_suggestion:
|
||
label += f" ({d.cim10_suggestion})"
|
||
existing_codes.add(d.cim10_suggestion)
|
||
existing_das.append(label)
|
||
|
||
dp_texte = dossier.diagnostic_principal.texte if dossier.diagnostic_principal else ""
|
||
|
||
das_results = extract_das_llm(text, contexte, existing_das, dp_texte, cache=cache)
|
||
|
||
added = 0
|
||
for das in das_results:
|
||
texte = clean_diagnostic_text(das.get("texte", ""))
|
||
if not texte or not is_valid_diagnostic_text(texte):
|
||
continue
|
||
|
||
code = das.get("code_cim10")
|
||
if code:
|
||
code = normalize_code(code)
|
||
is_valid, _ = cim10_validate(code)
|
||
if not is_valid:
|
||
logger.info("DAS LLM : code %s invalide pour « %s », ignoré", code, texte)
|
||
continue
|
||
if code in existing_codes:
|
||
continue
|
||
existing_codes.add(code)
|
||
|
||
dossier.diagnostics_associes.append(Diagnostic(
|
||
texte=texte,
|
||
cim10_suggestion=code,
|
||
justification=das.get("justification"),
|
||
))
|
||
added += 1
|
||
|
||
if added:
|
||
logger.info("DAS LLM : %d diagnostics supplémentaires ajoutés", added)
|
||
|
||
cache.save()
|
||
except Exception:
|
||
logger.warning("Erreur lors de l'extraction DAS LLM", exc_info=True)
|
||
|
||
|
||
def _enrich_with_rag(dossier: DossierMedical) -> None:
|
||
"""Enrichit les diagnostics via le RAG (FAISS + Ollama)."""
|
||
try:
|
||
from .rag_search import enrich_dossier
|
||
enrich_dossier(dossier)
|
||
except ImportError:
|
||
logger.warning("Module RAG non disponible (faiss-cpu ou sentence-transformers manquant)")
|
||
except Exception:
|
||
logger.warning("Erreur lors de l'enrichissement RAG", exc_info=True)
|
||
|
||
|
||
def _extract_sejour(parsed: dict, dossier: DossierMedical) -> None:
|
||
"""Extrait les informations de séjour."""
|
||
patient = parsed.get("patient", {})
|
||
sejour_data = parsed.get("sejour", {})
|
||
|
||
dossier.sejour = Sejour(
|
||
sexe=patient.get("sexe"),
|
||
date_entree=sejour_data.get("date_entree"),
|
||
date_sortie=sejour_data.get("date_sortie"),
|
||
mode_entree=parsed.get("urgences", {}).get("mode_entree"),
|
||
)
|
||
|
||
# Calcul de l'âge à partir de la date de naissance et de la date d'entrée
|
||
dob = patient.get("date_naissance")
|
||
date_entree = sejour_data.get("date_entree")
|
||
if dob and date_entree:
|
||
try:
|
||
dob_dt = datetime.strptime(dob, "%d/%m/%Y")
|
||
entree_dt = datetime.strptime(date_entree, "%d/%m/%Y")
|
||
age = entree_dt.year - dob_dt.year
|
||
if (entree_dt.month, entree_dt.day) < (dob_dt.month, dob_dt.day):
|
||
age -= 1
|
||
dossier.sejour.age = age
|
||
except ValueError:
|
||
pass
|
||
|
||
# Durée de séjour
|
||
if sejour_data.get("date_entree") and sejour_data.get("date_sortie"):
|
||
try:
|
||
d1 = datetime.strptime(sejour_data["date_entree"], "%d/%m/%Y")
|
||
d2 = datetime.strptime(sejour_data["date_sortie"], "%d/%m/%Y")
|
||
dossier.sejour.duree_sejour = (d2 - d1).days
|
||
except ValueError:
|
||
pass
|
||
|
||
# IMC, poids, taille
|
||
vitals = parsed.get("signes_vitaux", {})
|
||
if vitals.get("imc"):
|
||
dossier.sejour.imc = vitals["imc"]
|
||
elif patient.get("imc"):
|
||
dossier.sejour.imc = patient["imc"]
|
||
|
||
if vitals.get("poids_kg"):
|
||
dossier.sejour.poids = vitals["poids_kg"]
|
||
elif patient.get("poids_kg"):
|
||
dossier.sejour.poids = patient["poids_kg"]
|
||
|
||
if vitals.get("taille_cm"):
|
||
dossier.sejour.taille = vitals["taille_cm"]
|
||
elif patient.get("taille_cm"):
|
||
dossier.sejour.taille = patient["taille_cm"]
|
||
|
||
|
||
def _extract_diagnostics(
|
||
parsed: dict,
|
||
text: str,
|
||
dossier: DossierMedical,
|
||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||
) -> None:
|
||
"""Extrait le diagnostic principal et les diagnostics associés."""
|
||
text_lower = text.lower()
|
||
|
||
# Diagnostics codés depuis Trackare (prioritaires)
|
||
for diag in parsed.get("diagnostics", []):
|
||
texte = clean_diagnostic_text(diag.get("libelle", ""))
|
||
if not is_valid_diagnostic_text(texte):
|
||
continue
|
||
d = Diagnostic(
|
||
texte=texte,
|
||
cim10_suggestion=diag.get("code_cim10"),
|
||
)
|
||
if diag.get("type", "").lower() == "principal":
|
||
dossier.diagnostic_principal = d
|
||
else:
|
||
dossier.diagnostics_associes.append(d)
|
||
|
||
# Extraction du texte "Au total:" ou conclusion
|
||
conclusion = ""
|
||
m = re.search(
|
||
r"Au total\s*[::]?\s*(.*?)(?=\n\s*(?:Devenir|TTT|Sortie|$))",
|
||
text,
|
||
re.DOTALL | re.IGNORECASE,
|
||
)
|
||
if m:
|
||
conclusion = m.group(1).strip()
|
||
|
||
# Enrichissement via edsnlp (CIM-10)
|
||
edsnlp_codes: dict[str, str] = {}
|
||
if edsnlp_result:
|
||
for ent in edsnlp_result.cim10_entities:
|
||
if not ent.negation and not ent.hypothese:
|
||
edsnlp_codes[ent.code] = ent.texte
|
||
|
||
# Si pas de DP depuis le codage, chercher dans le texte
|
||
if not dossier.diagnostic_principal:
|
||
# D'abord essayer le fallback regex (plus précis pour les patterns spécifiques)
|
||
dp = _find_diagnostic_principal(text_lower, conclusion)
|
||
if dp:
|
||
dossier.diagnostic_principal = dp
|
||
elif edsnlp_codes:
|
||
# Utiliser la première entité CIM-10 edsnlp comme DP
|
||
code, texte = next(iter(edsnlp_codes.items()))
|
||
dossier.diagnostic_principal = Diagnostic(
|
||
texte=texte.capitalize(), cim10_suggestion=code,
|
||
)
|
||
|
||
# Diagnostics associés depuis le texte (regex)
|
||
das = _find_diagnostics_associes(text_lower, conclusion, dossier)
|
||
das = [d for d in das if is_valid_diagnostic_text(d.texte)]
|
||
dossier.diagnostics_associes.extend(das)
|
||
|
||
# Enrichissement DAS depuis edsnlp
|
||
if edsnlp_result:
|
||
existing_codes = set()
|
||
if dossier.diagnostic_principal:
|
||
existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||
for d in dossier.diagnostics_associes:
|
||
existing_codes.add(d.cim10_suggestion)
|
||
|
||
for ent in edsnlp_result.cim10_entities:
|
||
if ent.negation or ent.hypothese:
|
||
continue
|
||
texte = clean_diagnostic_text(ent.texte.capitalize())
|
||
if not is_valid_diagnostic_text(texte):
|
||
continue
|
||
if ent.code not in existing_codes:
|
||
dossier.diagnostics_associes.append(Diagnostic(
|
||
texte=texte,
|
||
cim10_suggestion=ent.code,
|
||
))
|
||
existing_codes.add(ent.code)
|
||
|
||
|
||
def _find_diagnostic_principal(text_lower: str, conclusion: str) -> Diagnostic | None:
|
||
"""Trouve le diagnostic principal dans le texte.
|
||
|
||
Normalise le texte avant matching pour gérer les variations d'accents/casse.
|
||
"""
|
||
conclusion_norm = normalize_text(conclusion)
|
||
|
||
# Chercher dans la conclusion d'abord via CIM10_MAP (domain override)
|
||
for terme, code in CIM10_MAP.items():
|
||
if normalize_text(terme) in conclusion_norm:
|
||
return Diagnostic(texte=terme.capitalize(), cim10_suggestion=code)
|
||
|
||
text_norm = normalize_text(text_lower)
|
||
|
||
# Patterns courants pour le DP (normalisés, sans accents)
|
||
dp_patterns = [
|
||
r"pancreatite\s+aigue\s+(?:d'origine\s+)?lithiasique",
|
||
r"pancreatite\s+aigue\s+biliaire",
|
||
r"pancreatite\s+aigue",
|
||
]
|
||
for pat in dp_patterns:
|
||
m = re.search(pat, text_norm)
|
||
if m:
|
||
matched = m.group(0)
|
||
code = _lookup_cim10(matched)
|
||
return Diagnostic(texte=matched.capitalize(), cim10_suggestion=code)
|
||
|
||
return None
|
||
|
||
|
||
# Patterns DAS : (pattern_normalisé, label, code_fallback)
|
||
# Les patterns sont appliqués sur du texte normalisé (sans accents, lowercase)
|
||
_DAS_PATTERNS: list[tuple[str, str, str]] = [
|
||
# Lithiases biliaires
|
||
(r"lithiase\s+(?:du\s+)?(?:bas\s+)?choledoque", "Lithiase du cholédoque", "K80.5"),
|
||
(r"vesicule\s+lithiasique|lithiases?\s+vesiculaire", "Lithiase vésiculaire", "K80.2"),
|
||
# Inflammation biliaire
|
||
(r"cholecystite\s+aigue", "Cholécystite aiguë", "K81.0"),
|
||
(r"angiocholite|cholangite", "Angiocholite", "K83.0"),
|
||
# Réactions médicamenteuses
|
||
(r"eruption\s+cutanee|toxidermie|reaction\s+au\s+tramadol", "Éruption cutanée médicamenteuse", "L27.0"),
|
||
# Cardiovasculaire
|
||
(r"hypertension\s+arterielle|\bhta\b", "Hypertension artérielle", "I10"),
|
||
(r"fibrillation\s+auriculaire|\bfa\b(?:\s+paroxystique)?|\bacfa\b", "Fibrillation auriculaire", "I48.9"),
|
||
(r"embolie\s+pulmonaire", "Embolie pulmonaire", "I26.9"),
|
||
(r"thrombose\s+veineuse\s+profonde|\btvp\b", "Thrombose veineuse profonde", "I80.2"),
|
||
# Métabolique
|
||
(r"diabete\s+(?:sucre\s+)?(?:de\s+)?type\s+2|diabete\s+type\s*2", "Diabète de type 2", "E11.9"),
|
||
(r"diabete\s+(?:sucre\s+)?(?:de\s+)?type\s+1|diabete\s+type\s*1", "Diabète de type 1", "E10.9"),
|
||
(r"dyslipidemie|hypercholesterolemie", "Dyslipidémie", "E78.5"),
|
||
(r"denutrition|malnutrition", "Dénutrition", "E46"),
|
||
# Infectieux
|
||
(r"pneumopathie|pneumonie", "Pneumopathie", "J18.9"),
|
||
(r"infection\s+urinaire|pyelonephrite", "Infection urinaire", "N39.0"),
|
||
(r"\bsepsis\b|septicemie|choc\s+septique", "Sepsis", "A41.9"),
|
||
# Rénal
|
||
(r"insuffisance\s+renale", "Insuffisance rénale", "N19"),
|
||
# Hématologique
|
||
(r"anemie", "Anémie", "D64.9"),
|
||
# Addictions
|
||
(r"tabagisme|tabac\s+actif", "Tabagisme", "F17.2"),
|
||
(r"ethylisme|alcoolisme|intoxication\s+ethylique", "Éthylisme", "F10.1"),
|
||
]
|
||
|
||
|
||
def _find_diagnostics_associes(
|
||
text_lower: str, conclusion: str, dossier: DossierMedical
|
||
) -> list[Diagnostic]:
|
||
"""Trouve les diagnostics associés.
|
||
|
||
Utilise des patterns normalisés (sans accents) pour une détection robuste.
|
||
"""
|
||
das: list[Diagnostic] = []
|
||
existing_codes = set()
|
||
if dossier.diagnostic_principal:
|
||
existing_codes.add(dossier.diagnostic_principal.cim10_suggestion)
|
||
for d in dossier.diagnostics_associes:
|
||
existing_codes.add(d.cim10_suggestion)
|
||
|
||
text_norm = normalize_text(text_lower)
|
||
|
||
# Patterns DAS
|
||
for pat, label, code in _DAS_PATTERNS:
|
||
if re.search(pat, text_norm) and code not in existing_codes:
|
||
das.append(Diagnostic(texte=label, cim10_suggestion=code))
|
||
existing_codes.add(code)
|
||
|
||
# Obésité (IMC >= 30) — pattern spécial avec extraction de valeur
|
||
m = re.search(r"imc\s*[:=]?\s*(\d{2,3}[.,]\d+)", text_norm)
|
||
if m:
|
||
imc_val = float(m.group(1).replace(",", "."))
|
||
if imc_val >= 30 and "E66.0" not in existing_codes:
|
||
das.append(Diagnostic(texte=f"Obésité (IMC {imc_val})", cim10_suggestion="E66.0"))
|
||
existing_codes.add("E66.0")
|
||
|
||
return das
|
||
|
||
|
||
def _extract_actes(text: str, dossier: DossierMedical) -> None:
|
||
"""Extrait les actes CCAM."""
|
||
text_lower = text.lower()
|
||
|
||
# Cholécystectomie par cœlioscopie
|
||
if re.search(r"chol[ée]cystectomie\s+par\s+c[oœ][ea]lioscopie", text_lower):
|
||
date = _find_act_date(text, r"chol[ée]cystectomie")
|
||
dossier.actes_ccam.append(ActeCCAM(
|
||
texte="Cholécystectomie par cœlioscopie",
|
||
code_ccam_suggestion="HMFC004",
|
||
date=date,
|
||
))
|
||
elif re.search(r"chol[ée]cystectomie|cholecystectomie", text_lower):
|
||
date = _find_act_date(text, r"chol[ée]cystectomie|cholecystectomie")
|
||
dossier.actes_ccam.append(ActeCCAM(
|
||
texte="Cholécystectomie",
|
||
code_ccam_suggestion="HMFC004",
|
||
date=date,
|
||
))
|
||
|
||
# Cholangiographie
|
||
if re.search(r"cholangiographie", text_lower):
|
||
date = _find_act_date(text, r"cholangiographie")
|
||
dossier.actes_ccam.append(ActeCCAM(
|
||
texte="Cholangiographie peropératoire",
|
||
code_ccam_suggestion="HHHE002",
|
||
date=date,
|
||
))
|
||
|
||
# TDM
|
||
if re.search(r"(?:tdm|scanner|tomodensitométrie)", text_lower):
|
||
date = _find_act_date(text, r"(?:TDM|scanner)")
|
||
dossier.actes_ccam.append(ActeCCAM(
|
||
texte="TDM abdominal",
|
||
code_ccam_suggestion="ZCQK002",
|
||
date=date,
|
||
))
|
||
|
||
# Fallback : tenter le lookup CCAM dict pour les actes sans code
|
||
for acte in dossier.actes_ccam:
|
||
if not acte.code_ccam_suggestion:
|
||
code = ccam_lookup(acte.texte, domain_overrides=CCAM_MAP)
|
||
if code:
|
||
acte.code_ccam_suggestion = code
|
||
|
||
|
||
def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
|
||
"""Extrait les antécédents."""
|
||
m = re.search(
|
||
r"Antécédents?\s*[::]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[::]|Allergie|Histoire de la maladie|Examen clinique|\n\n))",
|
||
text,
|
||
re.DOTALL | re.IGNORECASE,
|
||
)
|
||
if m:
|
||
block = m.group(1).strip()
|
||
for line in block.split("\n"):
|
||
line = line.strip().lstrip("- •")
|
||
# Filtrer les lignes non pertinentes
|
||
if (line and len(line) > 5 and line != "0"
|
||
and not re.match(r"^\d", line)
|
||
and "Item de" not in line
|
||
and "surveillance" not in line.lower()
|
||
and "Température" not in line
|
||
and "Signes Vitaux" not in line
|
||
and "Pouls" not in line
|
||
and "Type de note" not in line
|
||
and "Aucune donnée" not in line
|
||
and "renseignée" not in line
|
||
and "habitudes de vie" not in line
|
||
and "Systolique" not in line
|
||
and "Diastolique" not in line
|
||
and "Saturation" not in line):
|
||
dossier.antecedents.append(line)
|
||
|
||
|
||
def _extract_traitements(
|
||
parsed: dict,
|
||
text: str,
|
||
dossier: DossierMedical,
|
||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||
) -> None:
|
||
"""Extrait les traitements de sortie."""
|
||
# Construire un index des médicaments edsnlp avec codes ATC
|
||
drug_atc: dict[str, str] = {}
|
||
if edsnlp_result:
|
||
for drug in edsnlp_result.drug_entities:
|
||
if not drug.negation and drug.code_atc:
|
||
drug_atc[drug.texte.lower()] = drug.code_atc
|
||
|
||
# Depuis le texte — section "TTT de sortie" (sans limite de lignes)
|
||
m = re.search(
|
||
r"(?:TTT|Traitement)\s+de\s+sortie\s*[::]?\s*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement|Patient:|Episode|Le \d{2}/\d{2}|\n\n)|$)",
|
||
text,
|
||
re.DOTALL | re.IGNORECASE,
|
||
)
|
||
if m:
|
||
block = m.group(1).strip()
|
||
lines = block.split("\n")
|
||
for line in lines:
|
||
line = line.strip().lstrip("- •")
|
||
if not line or len(line) <= 2:
|
||
continue
|
||
# Conditions d'arrêt : footers, signatures, metadata
|
||
if re.match(
|
||
r"^(Patient|Episode|Le \d|Page\s+\d|V\d|Rédigé|Cordialement|Dr\s|Docteur|Signature|Date|Fait\s+le)",
|
||
line,
|
||
re.IGNORECASE,
|
||
):
|
||
break
|
||
med = line
|
||
poso = None
|
||
# Séparer médicament et posologie (pattern élargi)
|
||
poso_match = re.search(
|
||
r"\s+(si besoin|matin|soir|midi|"
|
||
r"\d+\s*(?:mg|cp|gel|sachet|comprim[ée]|g[ée]lule).*|"
|
||
r"\d+\s*(?:x|fois)\s*/?\s*(?:j(?:our)?|semaine)|"
|
||
r"pendant\s+\d+\s*jours?)",
|
||
line,
|
||
re.IGNORECASE,
|
||
)
|
||
if poso_match:
|
||
med = line[:poso_match.start()].strip()
|
||
poso = poso_match.group(1).strip()
|
||
# Chercher le code ATC via edsnlp
|
||
code_atc = _match_drug_atc(med, drug_atc)
|
||
dossier.traitements_sortie.append(Traitement(
|
||
medicament=med,
|
||
posologie=poso,
|
||
code_atc=code_atc,
|
||
))
|
||
|
||
# Si rien trouvé, chercher les prescriptions "Presc. de Sortie"
|
||
if not dossier.traitements_sortie:
|
||
for m_presc in re.finditer(
|
||
r"([A-ZÉÈÊËÀÂ][A-ZÉÈÊËÀÂ0-9\s\-/%.]+?)(?:\s+\d+\s*(?:mg|G|CPR|GEL))?.*?Presc\.\s*de\s*Sortie",
|
||
text,
|
||
):
|
||
med = m_presc.group(1).strip()
|
||
if len(med) > 3:
|
||
code_atc = _match_drug_atc(med, drug_atc)
|
||
dossier.traitements_sortie.append(Traitement(
|
||
medicament=med, code_atc=code_atc,
|
||
))
|
||
|
||
|
||
def _match_drug_atc(med_name: str, drug_atc: dict[str, str]) -> Optional[str]:
|
||
"""Cherche un code ATC correspondant au médicament dans les résultats edsnlp."""
|
||
if not drug_atc:
|
||
return None
|
||
med_lower = med_name.lower().strip()
|
||
# Correspondance exacte
|
||
if med_lower in drug_atc:
|
||
return drug_atc[med_lower]
|
||
# Correspondance partielle : le nom edsnlp est contenu dans le nom du médicament
|
||
for drug_text, atc in drug_atc.items():
|
||
if drug_text in med_lower or med_lower in drug_text:
|
||
return atc
|
||
return None
|
||
|
||
|
||
def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||
"""Extrait les résultats biologiques clés.
|
||
|
||
Supporte les aliases (TGO/TGP, Hb), variantes d'unités (UI/L, µmol/L, g/dL),
|
||
et des tests additionnels (hémoglobine, plaquettes, leucocytes, créatinine).
|
||
"""
|
||
bio_patterns = [
|
||
(r"[Ll]ipas[ée]mie\s*(?:[àa=:])?\s*(\d+)\s*(?:UI/L|U/L)?", "Lipasémie", None),
|
||
(r"CRP\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mg/[Ll])?", "CRP", None),
|
||
(r"(?:ASAT|TGO)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ASAT", None),
|
||
(r"(?:ALAT|TGP)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ALAT", None),
|
||
(r"GGT\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "GGT", None),
|
||
(r"PAL\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "PAL", None),
|
||
(r"[Bb]ilirubine\s+(?:totale\s+)?[àa=:]\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Bilirubine totale", None),
|
||
(r"[Tt]roponine\s+(?:us\s+)?(n[ée]gative|positive|normale)", "Troponine", None),
|
||
(r"(?:[Hh][ée]moglobine|Hb)\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:g/dL|g/L)?", "Hémoglobine", None),
|
||
(r"[Pp]laquettes?\s*[=:àa]?\s*(\d+(?:\s*000)?)\s*(?:/mm3|G/L)?", "Plaquettes", None),
|
||
(r"[Ll]eucocytes?\s*[=:àa]?\s*(\d+(?:\s*000)?)\s*(?:/mm3|G/L)?", "Leucocytes", None),
|
||
(r"[Cc]r[ée]atinine?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Créatinine", None),
|
||
]
|
||
|
||
for pattern, test_name, _ in bio_patterns:
|
||
m = re.search(pattern, text)
|
||
if m:
|
||
value = m.group(1)
|
||
anomalie = _is_abnormal(test_name, value)
|
||
dossier.biologie_cle.append(BiologieCle(
|
||
test=test_name,
|
||
valeur=value,
|
||
anomalie=anomalie,
|
||
))
|
||
|
||
|
||
def _extract_imagerie(text: str, dossier: DossierMedical) -> None:
|
||
"""Extrait les résultats d'imagerie."""
|
||
# TDM
|
||
tdm_match = re.search(
|
||
r"(?:TDM|[Ss]canner|tomodensitométrie).*?(?:retrouve|montre|objective)\s*[::]?\s*(.*?)(?=\n\s*(?:Cholécystectomie|Au total|Devenir|\n\n))",
|
||
text,
|
||
re.DOTALL | re.IGNORECASE,
|
||
)
|
||
if tdm_match:
|
||
conclusion = tdm_match.group(1).strip()
|
||
# Score de Balthazar
|
||
score = None
|
||
m = re.search(r"[Bb]althazar\s*(?:[àa=:])?\s*(\d+|[A-E])", text)
|
||
if m:
|
||
score = f"Balthazar {m.group(1)}"
|
||
dossier.imagerie.append(Imagerie(
|
||
type="TDM abdominal",
|
||
conclusion=conclusion[:500],
|
||
score=score,
|
||
))
|
||
|
||
# Échographie
|
||
echo_match = re.search(
|
||
r"(?:[ée]cho(?:graphie)?)\s*.*?(?:retrouve|montre|objective)\s*[::]?\s*(.*?)(?=\n\n)",
|
||
text,
|
||
re.DOTALL | re.IGNORECASE,
|
||
)
|
||
if echo_match:
|
||
dossier.imagerie.append(Imagerie(
|
||
type="Échographie",
|
||
conclusion=echo_match.group(1).strip()[:500],
|
||
))
|
||
|
||
|
||
def _extract_complications(
|
||
text: str,
|
||
dossier: DossierMedical,
|
||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||
) -> None:
|
||
"""Extrait les complications mentionnées."""
|
||
text_lower = text.lower()
|
||
|
||
# Termes de négation détectés par edsnlp pour chaque entité
|
||
edsnlp_negated_terms: set[str] = set()
|
||
if edsnlp_result:
|
||
for ent in edsnlp_result.cim10_entities:
|
||
if ent.negation:
|
||
edsnlp_negated_terms.add(ent.texte.lower())
|
||
|
||
complication_terms = [
|
||
"éruption cutanée",
|
||
"eruption cutanée",
|
||
"fièvre",
|
||
"infection",
|
||
"hémorragie",
|
||
"hématome",
|
||
"abcès",
|
||
"fistule",
|
||
"iléus",
|
||
"occlusion",
|
||
]
|
||
|
||
for term in complication_terms:
|
||
if term in text_lower:
|
||
# Vérifier la négation via edsnlp d'abord
|
||
if edsnlp_result and _is_negated_by_edsnlp(term, edsnlp_negated_terms):
|
||
continue
|
||
# Fallback regex pour la négation
|
||
pattern = rf"(?:pas de|sans|absence de|aucun[e]?)\s+{re.escape(term)}"
|
||
if not re.search(pattern, text_lower):
|
||
dossier.complications.append(term.capitalize())
|
||
|
||
|
||
def _is_negated_by_edsnlp(term: str, negated_terms: set[str]) -> bool:
|
||
"""Vérifie si un terme est nié selon edsnlp."""
|
||
term_lower = term.lower()
|
||
for neg_term in negated_terms:
|
||
if term_lower in neg_term or neg_term in term_lower:
|
||
return True
|
||
return False
|
||
|
||
|
||
def _validate_ccam(dossier: DossierMedical) -> None:
|
||
"""Valide les codes CCAM suggérés contre le dictionnaire officiel."""
|
||
for acte in dossier.actes_ccam:
|
||
if not acte.code_ccam_suggestion:
|
||
acte.validite = "non_verifie"
|
||
continue
|
||
is_valid, desc = ccam_validate(acte.code_ccam_suggestion)
|
||
if is_valid:
|
||
acte.validite = "valide"
|
||
else:
|
||
acte.validite = "non_verifie"
|
||
dossier.alertes_codage.append(
|
||
f"CCAM {acte.code_ccam_suggestion} ({acte.texte}) : code absent du dictionnaire CCAM V81"
|
||
)
|
||
|
||
|
||
_INVALID_CODE_PATTERNS = {"aucun", "none", "n/a", "non_codable", "aucun_code_valide", "inconnu"}
|
||
|
||
|
||
def _fallback_cim10(texte: str) -> str | None:
|
||
"""Tente de trouver un code CIM-10 via le dictionnaire à partir du texte diagnostic."""
|
||
code = dict_lookup(texte, domain_overrides=CIM10_MAP)
|
||
if code:
|
||
is_valid, _ = cim10_validate(code)
|
||
if is_valid:
|
||
return code
|
||
return None
|
||
|
||
|
||
def _validate_cim10(dossier: DossierMedical) -> None:
|
||
"""Valide les codes CIM-10 suggérés par Ollama contre le dictionnaire."""
|
||
diags: list[tuple[str, Diagnostic]] = []
|
||
if dossier.diagnostic_principal:
|
||
diags.append(("DP", dossier.diagnostic_principal))
|
||
for das in dossier.diagnostics_associes:
|
||
diags.append(("DAS", das))
|
||
|
||
for type_diag, diag in diags:
|
||
if not diag.cim10_suggestion:
|
||
continue
|
||
|
||
# Rejeter les hallucinations
|
||
if diag.cim10_suggestion.lower().strip() in _INVALID_CODE_PATTERNS:
|
||
fallback = _fallback_cim10(diag.texte)
|
||
if fallback:
|
||
dossier.alertes_codage.append(
|
||
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} » → fallback {fallback}"
|
||
)
|
||
diag.cim10_suggestion = fallback
|
||
diag.cim10_confidence = "medium"
|
||
else:
|
||
dossier.alertes_codage.append(
|
||
f"CIM-10 {type_diag} ({diag.texte}) : code rejeté « {diag.cim10_suggestion} »"
|
||
)
|
||
diag.cim10_suggestion = None
|
||
diag.cim10_confidence = None
|
||
continue
|
||
|
||
# Normaliser le format (K810 → K81.0)
|
||
diag.cim10_suggestion = normalize_code(diag.cim10_suggestion)
|
||
|
||
# Valider contre le dictionnaire
|
||
is_valid, label = cim10_validate(diag.cim10_suggestion)
|
||
if not is_valid:
|
||
fallback = _fallback_cim10(diag.texte)
|
||
if fallback:
|
||
dossier.alertes_codage.append(
|
||
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code invalide → fallback {fallback}"
|
||
)
|
||
diag.cim10_suggestion = fallback
|
||
diag.cim10_confidence = "medium"
|
||
else:
|
||
dossier.alertes_codage.append(
|
||
f"CIM-10 {type_diag} {diag.cim10_suggestion} ({diag.texte}) : code absent du dictionnaire CIM-10"
|
||
)
|
||
diag.cim10_confidence = "low"
|
||
|
||
|
||
def _find_act_date(text: str, act_pattern: str) -> str | None:
|
||
"""Trouve la date associée à un acte."""
|
||
# Chercher "acte le DD/MM" ou "acte le DD/MM/YYYY"
|
||
m = re.search(
|
||
rf"{act_pattern}.*?(?:le\s+)?(\d{{2}}/\d{{2}}(?:/\d{{4}})?)",
|
||
text,
|
||
re.IGNORECASE,
|
||
)
|
||
if m:
|
||
return m.group(1)
|
||
|
||
# Chercher dans la ligne d'observation juste avant
|
||
m = re.search(
|
||
rf"(\d{{2}}/\d{{2}}/\d{{4}}).*?{act_pattern}",
|
||
text,
|
||
re.IGNORECASE,
|
||
)
|
||
if m:
|
||
return m.group(1)
|
||
return None
|
||
|
||
|
||
def _apply_exclusion_rules(dossier: DossierMedical) -> None:
|
||
"""Applique les règles d'exclusion symptôme vs diagnostic précis."""
|
||
try:
|
||
from .exclusion_rules import check_exclusions
|
||
result = check_exclusions(dossier.diagnostic_principal, dossier.diagnostics_associes)
|
||
dossier.diagnostics_associes = result.cleaned_das
|
||
dossier.alertes_codage.extend(result.warnings)
|
||
if result.excluded:
|
||
logger.info(
|
||
" Exclusions : %d DAS symptomatiques exclus",
|
||
len(result.excluded),
|
||
)
|
||
except Exception:
|
||
logger.warning("Erreur lors de l'application des règles d'exclusion", exc_info=True)
|
||
|
||
|
||
def _apply_severity_rules(dossier: DossierMedical) -> None:
|
||
"""Enrichit les diagnostics avec les informations de sévérité heuristique."""
|
||
try:
|
||
from .severity import enrich_dossier_severity
|
||
alertes, _cma_count, _cms_count = enrich_dossier_severity(
|
||
dossier.diagnostic_principal, dossier.diagnostics_associes,
|
||
)
|
||
dossier.alertes_codage.extend(alertes)
|
||
except Exception:
|
||
logger.warning("Erreur lors de l'évaluation de sévérité", exc_info=True)
|
||
|
||
|
||
def _apply_code_corrections(dossier: DossierMedical) -> None:
|
||
"""Corrige les codes CIM-10 systématiquement mal attribués par le LLM."""
|
||
all_diags = []
|
||
if dossier.diagnostic_principal:
|
||
all_diags.append(dossier.diagnostic_principal)
|
||
all_diags.extend(dossier.diagnostics_associes)
|
||
|
||
for diag in all_diags:
|
||
if not diag.cim10_suggestion:
|
||
continue
|
||
corrected = correct_known_miscodes(diag.cim10_suggestion, diag.texte)
|
||
if corrected:
|
||
logger.info(" Code corrigé : %s → %s pour « %s »", diag.cim10_suggestion, corrected, diag.texte)
|
||
diag.cim10_suggestion = corrected
|
||
|
||
|
||
def _remove_das_equal_dp(dossier: DossierMedical) -> None:
|
||
"""Retire les DAS dont le code CIM-10 est identique au DP (violation règle PMSI)."""
|
||
dp_code = dossier.diagnostic_principal.cim10_suggestion if dossier.diagnostic_principal else None
|
||
if not dp_code:
|
||
return
|
||
before = len(dossier.diagnostics_associes)
|
||
dossier.diagnostics_associes = [
|
||
d for d in dossier.diagnostics_associes if d.cim10_suggestion != dp_code
|
||
]
|
||
removed = before - len(dossier.diagnostics_associes)
|
||
if removed:
|
||
logger.info(" DAS=DP : %d DAS retiré(s) (code %s identique au DP)", removed, dp_code)
|
||
|
||
|
||
def _apply_noncumul_rules(dossier: DossierMedical) -> None:
|
||
"""Détecte les incompatibilités de non-cumul entre actes CCAM."""
|
||
try:
|
||
from .ccam_noncumul import check_noncumul
|
||
alertes = check_noncumul(dossier.actes_ccam)
|
||
dossier.alertes_codage.extend(alertes)
|
||
except Exception:
|
||
logger.warning("Erreur lors de la vérification du non-cumul CCAM", exc_info=True)
|
||
|
||
|
||
def _lookup_cim10(text: str) -> str | None:
|
||
"""Cherche un code CIM-10 pour un texte donné.
|
||
|
||
Utilise le dictionnaire complet (10 893 codes) avec CIM10_MAP en override prioritaire.
|
||
"""
|
||
return dict_lookup(text, domain_overrides=CIM10_MAP)
|
||
|
||
|
||
# Plages de référence biologiques (min, max) — utilisées par _is_abnormal()
|
||
# et exportées pour le formatage du contexte LLM dans rag_search.py
|
||
BIO_NORMALS: dict[str, tuple[float, float]] = {
|
||
"Lipasémie": (0, 60),
|
||
"CRP": (0, 5),
|
||
"ASAT": (0, 40),
|
||
"ALAT": (0, 40),
|
||
"GGT": (0, 60),
|
||
"PAL": (0, 150),
|
||
"Bilirubine totale": (0, 17),
|
||
"Hémoglobine": (12, 17),
|
||
"Plaquettes": (150, 400),
|
||
"Leucocytes": (4, 10),
|
||
"Créatinine": (50, 120),
|
||
}
|
||
|
||
|
||
def _is_abnormal(test: str, value: str) -> bool | None:
|
||
"""Détermine si un résultat biologique est anormal."""
|
||
try:
|
||
val = float(value.replace(",", "."))
|
||
except (ValueError, AttributeError):
|
||
if value.lower() in ("négative", "negative", "normale", "normal"):
|
||
return False
|
||
if value.lower() in ("positive", "positif", "élevée", "elevee"):
|
||
return True
|
||
return None
|
||
|
||
if test in BIO_NORMALS:
|
||
lo, hi = BIO_NORMALS[test]
|
||
return val > hi or val < lo
|
||
return None
|