feat: dictionnaire de codage + détection anomalies statistiques
- Script build_coding_dict.py génère le dictionnaire depuis le batch (240 dossiers) - coding_dictionary.json : co-occurrences DP→DAS, fréquences, associations bio - anomaly_stats.py : 8 checks (DP/DAS rare, DAS manquant, bio-DAS, âge atypique) - Intégré dans le pipeline cim10_extractor post-DIM-senior Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
4521
config/coding_dictionary.json
Normal file
4521
config/coding_dictionary.json
Normal file
File diff suppressed because it is too large
Load Diff
314
scripts/build_coding_dict.py
Normal file
314
scripts/build_coding_dict.py
Normal file
@@ -0,0 +1,314 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Construit le dictionnaire de codage a partir des resultats du batch.
|
||||||
|
|
||||||
|
Parcourt output/structured/ et genere config/coding_dictionary.json
|
||||||
|
avec les co-occurrences, frequences et associations observees.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 scripts/build_coding_dict.py [--output config/coding_dictionary.json]
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Heuristique : filtrer les vrais medicaments dans les traitements
|
||||||
|
_MED_SUFFIXES = re.compile(
|
||||||
|
r"(ine|ide|ol|one|ate|ase|mab|nib|zol|pam|lam|zide|pine|pril|tan|"
|
||||||
|
r"oxine|xone|dine|mide|fene|phene|mine|sone|lone|done|cine|il|"
|
||||||
|
r"lin|ril|mox|tine|zine|vir|cin)$",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_MED_KNOWN = {
|
||||||
|
"insuline", "heparine", "paracetamol", "doliprane", "aspirine",
|
||||||
|
"augmentin", "ceftriaxone", "amoxicilline", "metformine", "amlodipine",
|
||||||
|
"ramipril", "bisoprolol", "furosemide", "lasilix", "kardegic",
|
||||||
|
"lovenox", "spasfon", "perfalgan", "morphine", "tramadol",
|
||||||
|
"ketoprofene", "profenid", "omeprazole", "pantoprazole", "lanzor",
|
||||||
|
"atorvastatine", "simvastatine", "levothyrox", "cordarone",
|
||||||
|
"amiodarone", "digoxine", "warfarine", "coumadine", "xarelto",
|
||||||
|
"eliquis", "pradaxa", "dabigatran", "rivaroxaban", "apixaban",
|
||||||
|
"methotrexate", "salbutamol", "ventoline", "seretide", "spiriva",
|
||||||
|
"cortancyl", "prednisone", "prednisolone", "solupred", "celestene",
|
||||||
|
"dexamethasone", "hydrocortisone", "zymad", "uvedose", "calcidose",
|
||||||
|
"diffu-k", "potassium", "magnesium", "fer", "tardyferon", "speciafoldine",
|
||||||
|
"acide folique", "vitamine", "enoxaparine", "tinzaparine", "fondaparinux",
|
||||||
|
"arixtra", "clopidogrel", "plavix", "ticagrelor", "brilique",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _is_medication(text: str) -> str | None:
|
||||||
|
"""Extrait le nom du medicament si c'est un vrai traitement."""
|
||||||
|
if not text or len(text) < 3:
|
||||||
|
return None
|
||||||
|
# Nettoyer
|
||||||
|
words = text.strip().lower().split()
|
||||||
|
if not words:
|
||||||
|
return None
|
||||||
|
first = words[0].rstrip(".,;:")
|
||||||
|
|
||||||
|
# Rejeter les phrases (>4 mots sans chiffre de posologie)
|
||||||
|
if len(words) > 6 and not any(c.isdigit() for c in text):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Rejeter les patterns evidents de non-medicament
|
||||||
|
reject_starts = (
|
||||||
|
"ce document", "parents", "il pourra", "document",
|
||||||
|
"prévoir", "réévaluation", "evènement", "transfusion",
|
||||||
|
"note", "consultation", "histoire", "pas de", "suite",
|
||||||
|
"dr.", "mme", "mr.", "bilan", "a revoir", "rdv",
|
||||||
|
)
|
||||||
|
text_lower = text.lower().strip()
|
||||||
|
if any(text_lower.startswith(r) for r in reject_starts):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Check connu
|
||||||
|
if first in _MED_KNOWN:
|
||||||
|
return first
|
||||||
|
for known in _MED_KNOWN:
|
||||||
|
if known in text_lower[:40]:
|
||||||
|
return known
|
||||||
|
|
||||||
|
# Check suffixe
|
||||||
|
if _MED_SUFFIXES.search(first) and len(first) >= 4:
|
||||||
|
return first
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load_dossiers(structured_dir: str) -> list[dict]:
|
||||||
|
"""Charge tous les dossiers uniques depuis output/structured/."""
|
||||||
|
dossiers = []
|
||||||
|
seen_nda = set()
|
||||||
|
|
||||||
|
for d in sorted(os.listdir(structured_dir)):
|
||||||
|
full = os.path.join(structured_dir, d)
|
||||||
|
if not os.path.isdir(full) or d == "pseudonymise":
|
||||||
|
continue
|
||||||
|
if "_" not in d:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for f in os.listdir(full):
|
||||||
|
if f.endswith("_cim10.json"):
|
||||||
|
try:
|
||||||
|
data = json.load(open(os.path.join(full, f)))
|
||||||
|
nda = d.split("_", 1)[1]
|
||||||
|
if nda not in seen_nda:
|
||||||
|
seen_nda.add(nda)
|
||||||
|
dossiers.append(data)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return dossiers
|
||||||
|
|
||||||
|
|
||||||
|
def build_dictionary(dossiers: list[dict]) -> dict:
|
||||||
|
"""Construit le dictionnaire de codage."""
|
||||||
|
dp_freq = Counter()
|
||||||
|
das_freq = Counter()
|
||||||
|
dp_das = defaultdict(Counter)
|
||||||
|
dp_acte = defaultdict(Counter)
|
||||||
|
das_bio = defaultdict(Counter)
|
||||||
|
das_treatment = defaultdict(Counter)
|
||||||
|
dp_texte = {} # dp_code -> texte le plus frequent
|
||||||
|
das_texte = {}
|
||||||
|
dp_texte_counter = defaultdict(Counter)
|
||||||
|
das_texte_counter = defaultdict(Counter)
|
||||||
|
duree_das = []
|
||||||
|
age_dp = defaultdict(list)
|
||||||
|
|
||||||
|
for data in dossiers:
|
||||||
|
dp = data.get("diagnostic_principal", {})
|
||||||
|
dp_code = (dp.get("cim10_final") or dp.get("cim10_suggestion") or "").strip()
|
||||||
|
dp_text = (dp.get("texte") or "").strip()
|
||||||
|
|
||||||
|
das_list = data.get("diagnostics_associes", [])
|
||||||
|
das_codes = []
|
||||||
|
for das in das_list:
|
||||||
|
c = (das.get("cim10_final") or das.get("cim10_suggestion") or "").strip()
|
||||||
|
t = (das.get("texte") or "").strip()
|
||||||
|
if c:
|
||||||
|
das_codes.append(c)
|
||||||
|
das_freq[c] += 1
|
||||||
|
if t:
|
||||||
|
das_texte_counter[c][t] += 1
|
||||||
|
|
||||||
|
if dp_code:
|
||||||
|
dp_freq[dp_code] += 1
|
||||||
|
if dp_text:
|
||||||
|
dp_texte_counter[dp_code][dp_text] += 1
|
||||||
|
|
||||||
|
for c in das_codes:
|
||||||
|
dp_das[dp_code][c] += 1
|
||||||
|
|
||||||
|
# Actes
|
||||||
|
for a in data.get("actes_ccam", []):
|
||||||
|
code = (
|
||||||
|
a.get("code_ccam")
|
||||||
|
or a.get("ccam_suggestion")
|
||||||
|
or a.get("code_ccam_suggestion")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if code and dp_code:
|
||||||
|
dp_acte[dp_code][code] += 1
|
||||||
|
|
||||||
|
# Bio anormale -> DAS
|
||||||
|
abnormal = [
|
||||||
|
b.get("test", "")
|
||||||
|
for b in data.get("biologie_cle", [])
|
||||||
|
if b.get("anomalie")
|
||||||
|
]
|
||||||
|
for c in das_codes:
|
||||||
|
c3 = c[:3]
|
||||||
|
for bt in abnormal:
|
||||||
|
if bt:
|
||||||
|
das_bio[c3][bt] += 1
|
||||||
|
|
||||||
|
# Traitements -> DAS
|
||||||
|
for t in data.get("traitements_sortie", []):
|
||||||
|
med = _is_medication(t.get("medicament", ""))
|
||||||
|
if med:
|
||||||
|
for c in das_codes:
|
||||||
|
das_treatment[c[:3]][med] += 1
|
||||||
|
|
||||||
|
# Metadata
|
||||||
|
sejour = data.get("sejour", {})
|
||||||
|
duree = sejour.get("duree_sejour")
|
||||||
|
age = sejour.get("age")
|
||||||
|
if duree is not None:
|
||||||
|
duree_das.append((duree, len(das_codes)))
|
||||||
|
if age is not None and dp_code:
|
||||||
|
age_dp[dp_code].append(age)
|
||||||
|
|
||||||
|
# Texte le plus frequent par code
|
||||||
|
for code, counter in dp_texte_counter.items():
|
||||||
|
dp_texte[code] = counter.most_common(1)[0][0]
|
||||||
|
for code, counter in das_texte_counter.items():
|
||||||
|
das_texte[code] = counter.most_common(1)[0][0]
|
||||||
|
|
||||||
|
# Construire le dico final
|
||||||
|
dictionary = {
|
||||||
|
"metadata": {
|
||||||
|
"n_dossiers": len(dossiers),
|
||||||
|
"n_dp_distinct": len(dp_freq),
|
||||||
|
"n_das_distinct": len(das_freq),
|
||||||
|
"version": 1,
|
||||||
|
},
|
||||||
|
"dp": {},
|
||||||
|
"das": {},
|
||||||
|
"dp_das_cooccurrence": {},
|
||||||
|
"dp_acte_cooccurrence": {},
|
||||||
|
"das_bio_association": {},
|
||||||
|
"das_treatment_association": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
# DP
|
||||||
|
for code, n in dp_freq.most_common():
|
||||||
|
entry = {"freq": n, "texte": dp_texte.get(code, "")}
|
||||||
|
ages = age_dp.get(code, [])
|
||||||
|
if ages:
|
||||||
|
entry["age_moy"] = round(sum(ages) / len(ages), 1)
|
||||||
|
entry["age_min"] = min(ages)
|
||||||
|
entry["age_max"] = max(ages)
|
||||||
|
dictionary["dp"][code] = entry
|
||||||
|
|
||||||
|
# DAS
|
||||||
|
for code, n in das_freq.most_common():
|
||||||
|
dictionary["das"][code] = {
|
||||||
|
"freq": n,
|
||||||
|
"texte": das_texte.get(code, ""),
|
||||||
|
"pct": round(100 * n / len(dossiers), 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Co-occurrences DP->DAS (seuil >= 2)
|
||||||
|
for dp_code, das_counter in dp_das.items():
|
||||||
|
pairs = {
|
||||||
|
das_code: count
|
||||||
|
for das_code, count in das_counter.most_common(30)
|
||||||
|
if count >= 2
|
||||||
|
}
|
||||||
|
if pairs:
|
||||||
|
dictionary["dp_das_cooccurrence"][dp_code] = pairs
|
||||||
|
|
||||||
|
# Co-occurrences DP->ACTE
|
||||||
|
for dp_code, acte_counter in dp_acte.items():
|
||||||
|
pairs = {
|
||||||
|
acte: count
|
||||||
|
for acte, count in acte_counter.most_common(10)
|
||||||
|
}
|
||||||
|
if pairs:
|
||||||
|
dictionary["dp_acte_cooccurrence"][dp_code] = pairs
|
||||||
|
|
||||||
|
# DAS -> Bio (top 5 par DAS, seuil >= 3)
|
||||||
|
for das3, bio_counter in das_bio.items():
|
||||||
|
top = {
|
||||||
|
test: count
|
||||||
|
for test, count in bio_counter.most_common(5)
|
||||||
|
if count >= 3
|
||||||
|
}
|
||||||
|
if top:
|
||||||
|
dictionary["das_bio_association"][das3] = top
|
||||||
|
|
||||||
|
# DAS -> Traitements (top 5 par DAS, seuil >= 3)
|
||||||
|
for das3, trt_counter in das_treatment.items():
|
||||||
|
top = {
|
||||||
|
med: count
|
||||||
|
for med, count in trt_counter.most_common(5)
|
||||||
|
if count >= 3
|
||||||
|
}
|
||||||
|
if top:
|
||||||
|
dictionary["das_treatment_association"][das3] = top
|
||||||
|
|
||||||
|
return dictionary
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Build coding dictionary from batch results")
|
||||||
|
parser.add_argument(
|
||||||
|
"--input",
|
||||||
|
default="output/structured",
|
||||||
|
help="Directory containing structured outputs",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
default="config/coding_dictionary.json",
|
||||||
|
help="Output dictionary JSON path",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
project_root = Path(__file__).resolve().parent.parent
|
||||||
|
input_dir = project_root / args.input
|
||||||
|
output_path = project_root / args.output
|
||||||
|
|
||||||
|
print(f"Loading dossiers from {input_dir}...")
|
||||||
|
dossiers = load_dossiers(str(input_dir))
|
||||||
|
print(f"Loaded {len(dossiers)} dossiers")
|
||||||
|
|
||||||
|
print("Building dictionary...")
|
||||||
|
dictionary = build_dictionary(dossiers)
|
||||||
|
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_path.write_text(
|
||||||
|
json.dumps(dictionary, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
meta = dictionary["metadata"]
|
||||||
|
print(f"\nDictionary written to {output_path}")
|
||||||
|
print(f" {meta['n_dossiers']} dossiers")
|
||||||
|
print(f" {meta['n_dp_distinct']} DP distincts")
|
||||||
|
print(f" {meta['n_das_distinct']} DAS distincts")
|
||||||
|
print(f" {len(dictionary['dp_das_cooccurrence'])} DP avec co-occurrences")
|
||||||
|
print(f" {len(dictionary['das_bio_association'])} DAS3 avec associations bio")
|
||||||
|
print(f" {len(dictionary['das_treatment_association'])} DAS3 avec associations traitement")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
206
src/medical/anomaly_stats.py
Normal file
206
src/medical/anomaly_stats.py
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
"""Detection d'anomalies statistiques dans le codage PMSI.
|
||||||
|
|
||||||
|
Compare le codage d'un dossier au dictionnaire de codage construit
|
||||||
|
a partir du batch pour detecter les ecarts significatifs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..config import DossierMedical
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DICT_PATH = Path(__file__).resolve().parent.parent.parent / "config" / "coding_dictionary.json"
|
||||||
|
_dict_cache: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _load_dict() -> dict:
|
||||||
|
global _dict_cache
|
||||||
|
if _dict_cache is not None:
|
||||||
|
return _dict_cache
|
||||||
|
try:
|
||||||
|
_dict_cache = json.loads(_DICT_PATH.read_text(encoding="utf-8"))
|
||||||
|
except Exception:
|
||||||
|
logger.warning("coding_dictionary.json introuvable — anomalies stats desactivees")
|
||||||
|
_dict_cache = {}
|
||||||
|
return _dict_cache
|
||||||
|
|
||||||
|
|
||||||
|
def check_statistical_anomalies(dossier: DossierMedical) -> list[str]:
|
||||||
|
"""Detecte les anomalies statistiques par rapport au dictionnaire de codage.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste d'alertes textuelles.
|
||||||
|
"""
|
||||||
|
cd = _load_dict()
|
||||||
|
if not cd:
|
||||||
|
return []
|
||||||
|
|
||||||
|
alerts: list[str] = []
|
||||||
|
n_dossiers = cd.get("metadata", {}).get("n_dossiers", 1)
|
||||||
|
|
||||||
|
# --- Extraire les codes du dossier ---
|
||||||
|
dp_code = ""
|
||||||
|
if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_suggestion:
|
||||||
|
dp_code = dossier.diagnostic_principal.cim10_suggestion
|
||||||
|
|
||||||
|
das_codes = set()
|
||||||
|
for das in dossier.diagnostics_associes:
|
||||||
|
if das.cim10_suggestion:
|
||||||
|
das_codes.add(das.cim10_suggestion)
|
||||||
|
|
||||||
|
acte_codes = set()
|
||||||
|
for acte in dossier.actes_ccam:
|
||||||
|
code = getattr(acte, "code_ccam", None) or getattr(acte, "ccam_suggestion", None) or ""
|
||||||
|
if code:
|
||||||
|
acte_codes.add(code.upper())
|
||||||
|
|
||||||
|
bio_abnormal = set()
|
||||||
|
for bio in dossier.biologie_cle:
|
||||||
|
if bio.anomalie:
|
||||||
|
bio_abnormal.add(bio.test)
|
||||||
|
|
||||||
|
duree = dossier.sejour.duree_sejour if dossier.sejour else None
|
||||||
|
nb_das = len(dossier.diagnostics_associes)
|
||||||
|
|
||||||
|
dp_dict = cd.get("dp", {})
|
||||||
|
das_dict = cd.get("das", {})
|
||||||
|
cooc = cd.get("dp_das_cooccurrence", {})
|
||||||
|
das_bio = cd.get("das_bio_association", {})
|
||||||
|
|
||||||
|
# --- 1. DP jamais vu dans le batch ---
|
||||||
|
if dp_code and dp_code not in dp_dict:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [DP rare]: {dp_code} jamais observe dans le batch "
|
||||||
|
f"({n_dossiers} dossiers) — verifier le codage"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 2. DAS jamais vu dans le batch ---
|
||||||
|
for code in das_codes:
|
||||||
|
if code not in das_dict:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [DAS rare]: {code} jamais observe dans le batch — "
|
||||||
|
f"code potentiellement errone"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 3. DAS singleton ---
|
||||||
|
# Desactive quand le batch est petit (< 500 dossiers) car trop de faux positifs.
|
||||||
|
# Sera utile avec un dictionnaire construit sur 1000+ dossiers.
|
||||||
|
if n_dossiers >= 500:
|
||||||
|
for code in das_codes:
|
||||||
|
entry = das_dict.get(code, {})
|
||||||
|
if entry.get("freq", 0) == 1:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [DAS singleton]: {code} ({entry.get('texte', '?')}) "
|
||||||
|
f"n'apparait qu'une seule fois dans le batch — a verifier"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 4. DAS attendus manquants pour ce DP ---
|
||||||
|
if dp_code and dp_code in cooc:
|
||||||
|
expected = cooc[dp_code]
|
||||||
|
dp_freq = dp_dict.get(dp_code, {}).get("freq", 1)
|
||||||
|
|
||||||
|
for das_code, count in expected.items():
|
||||||
|
ratio = count / dp_freq
|
||||||
|
# Si ce DAS apparait dans >60% des dossiers avec ce DP et qu'il est absent
|
||||||
|
if ratio >= 0.6 and das_code not in das_codes:
|
||||||
|
das_texte = das_dict.get(das_code, {}).get("texte", "")
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [DAS attendu manquant]: {das_code} ({das_texte}) "
|
||||||
|
f"present dans {count}/{dp_freq} dossiers avec DP {dp_code} "
|
||||||
|
f"mais absent ici"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 5. Combinaison DP+DAS jamais vue ---
|
||||||
|
if dp_code and dp_code in cooc:
|
||||||
|
known_das = set(cooc[dp_code].keys())
|
||||||
|
# Aussi considerer les DAS vus dans d'autres DP
|
||||||
|
all_known_das = set(das_dict.keys())
|
||||||
|
|
||||||
|
for code in das_codes:
|
||||||
|
if code not in all_known_das:
|
||||||
|
continue # Deja signale comme "jamais vu"
|
||||||
|
if code not in known_das and dp_dict.get(dp_code, {}).get("freq", 0) >= 10:
|
||||||
|
das_texte = das_dict.get(code, {}).get("texte", "")
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [combinaison inedite]: {dp_code} + {code} ({das_texte}) "
|
||||||
|
f"jamais observe ensemble dans le batch"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 6. Ratio DAS/duree atypique ---
|
||||||
|
if duree is not None and duree >= 3:
|
||||||
|
# Heuristique : on attend ~1-2 DAS par jour de sejour
|
||||||
|
expected_min = max(1, duree // 4)
|
||||||
|
expected_max = max(10, duree * 3)
|
||||||
|
|
||||||
|
if nb_das < expected_min and duree >= 7:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [sous-codage]: {nb_das} DAS pour {duree} jours de sejour "
|
||||||
|
f"(attendu >= {expected_min})"
|
||||||
|
)
|
||||||
|
elif nb_das > expected_max:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [sur-codage]: {nb_das} DAS pour {duree} jours de sejour "
|
||||||
|
f"(attendu <= {expected_max}) — exces de codes"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 7. Bio anormale sans DAS correspondant ---
|
||||||
|
bio_das_expected = {
|
||||||
|
"Créatinine": ["N17", "N18", "N19"],
|
||||||
|
"Hémoglobine": ["D50", "D62", "D64"],
|
||||||
|
"Plaquettes": ["D69"],
|
||||||
|
"Troponine": ["I21", "I20", "I25"],
|
||||||
|
"BNP": ["I50", "I11"],
|
||||||
|
"ALAT": ["K71", "K72", "K73", "K74", "K75", "K76"],
|
||||||
|
"ASAT": ["K71", "K72", "K73", "K74", "K75", "K76"],
|
||||||
|
"TSH": ["E03", "E05"],
|
||||||
|
"Lipasémie": ["K85"],
|
||||||
|
"CRP": [], # Trop non-specifique
|
||||||
|
"Leucocytes": [], # Trop non-specifique
|
||||||
|
}
|
||||||
|
|
||||||
|
for bio_test in bio_abnormal:
|
||||||
|
expected_prefixes = bio_das_expected.get(bio_test, [])
|
||||||
|
if not expected_prefixes:
|
||||||
|
continue
|
||||||
|
all_codes = das_codes | ({dp_code} if dp_code else set())
|
||||||
|
has_match = any(
|
||||||
|
any(c.startswith(p) for p in expected_prefixes)
|
||||||
|
for c in all_codes
|
||||||
|
)
|
||||||
|
if not has_match:
|
||||||
|
# Verifier dans le dico si cette association est frequente
|
||||||
|
for das3 in expected_prefixes[:1]:
|
||||||
|
bio_assoc = das_bio.get(das3, {})
|
||||||
|
if bio_test in bio_assoc and bio_assoc[bio_test] >= 5:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [bio-DAS incoherent]: {bio_test} anormal mais aucun code "
|
||||||
|
f"{'/'.join(expected_prefixes[:3])} dans le codage "
|
||||||
|
f"(observe {bio_assoc[bio_test]}x dans le batch)"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
# --- 8. Age atypique pour le DP ---
|
||||||
|
if dp_code and dp_code in dp_dict:
|
||||||
|
dp_entry = dp_dict[dp_code]
|
||||||
|
age = dossier.sejour.age if dossier.sejour else None
|
||||||
|
age_min = dp_entry.get("age_min")
|
||||||
|
age_max = dp_entry.get("age_max")
|
||||||
|
age_moy = dp_entry.get("age_moy")
|
||||||
|
if age is not None and age_min is not None and age_max is not None:
|
||||||
|
# Alerte si l'age est tres eloigne de la fourchette observee
|
||||||
|
margin = max(10, (age_max - age_min) * 0.3)
|
||||||
|
if age < age_min - margin or age > age_max + margin:
|
||||||
|
alerts.append(
|
||||||
|
f"STATS [age atypique]: Patient {age} ans pour DP {dp_code} "
|
||||||
|
f"(observe {age_min}-{age_max} ans, moy {age_moy})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Score
|
||||||
|
dossier.quality_flags["stats_anomaly_count"] = len(alerts)
|
||||||
|
|
||||||
|
return alerts
|
||||||
@@ -219,6 +219,14 @@ def extract_medical_info(
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.error("DIM-SENIOR: erreur détection erreurs fréquentes", exc_info=True)
|
logger.error("DIM-SENIOR: erreur détection erreurs fréquentes", exc_info=True)
|
||||||
|
|
||||||
|
# Post-processing : anomalies statistiques (dictionnaire de codage)
|
||||||
|
try:
|
||||||
|
from .anomaly_stats import check_statistical_anomalies
|
||||||
|
stats_alerts = check_statistical_anomalies(dossier)
|
||||||
|
dossier.alertes_codage.extend(stats_alerts)
|
||||||
|
except Exception:
|
||||||
|
logger.error("STATS: erreur détection anomalies statistiques", exc_info=True)
|
||||||
|
|
||||||
# Post-processing : resélection DP si exclu par vetos/exclusions
|
# Post-processing : resélection DP si exclu par vetos/exclusions
|
||||||
if dossier.document_type != "trackare":
|
if dossier.document_type != "trackare":
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user