"""Moteur de décisions (post-traitement qualité). But: conserver la proposition du modèle (cim10_suggestion) tout en produisant une *sortie finale* plus défendable (cim10_final + cim10_decision). Ce module est déterministe, court, et auditable. """ from __future__ import annotations import re import unicodedata from typing import Optional import logging from ..config import ( CodeDecision, Diagnostic, DossierMedical, VetoIssue, load_reference_ranges, load_bio_rules, rule_enabled, ) from ..medical.cim10_dict import validate_code as cim10_validate logger = logging.getLogger(__name__) # --- Règles "étiologiques" : ne pas affirmer sans preuve spécifique --- IRON_MARKERS = ( "ferrit", # ferritine "transferr", # transferrine "saturation", # saturation transferrine "cst", # coefficient de saturation "carence mart", "martiale", "ferripr", # ferriprive "fer intraveineux", "fer iv", "traitement martial", ) def _norm(s: str) -> str: s = s.replace("’", "'") s = unicodedata.normalize("NFKD", s) s = "".join(ch for ch in s if not unicodedata.combining(ch)) s = s.lower() return re.sub(r"\s+", " ", s).strip() def _first_float(text: str) -> Optional[float]: m = re.search(r"(-?\d+(?:[\.,]\d+)?)", text) if not m: return None return float(m.group(1).replace(",", ".")) def _parse_normal_range(text: str) -> tuple[Optional[float], Optional[float]]: # Ex: "[N: 12-17]" / "[N: 12 - 17]" m = re.search(r"\[\s*N\s*:\s*([0-9]+(?:[\.,][0-9]+)?)\s*-\s*([0-9]+(?:[\.,][0-9]+)?)\s*\]", text) if not m: return None, None lo = float(m.group(1).replace(",", ".")) hi = float(m.group(2).replace(",", ".")) return lo, hi def _parse_float(v: str | None) -> float | None: if v is None: return None s = str(v).strip().replace(",", ".") m = re.search(r"(-?\d+(?:\.\d+)?)", s) if not m: return None try: return float(m.group(1)) except ValueError: return None def _age_band(dossier: DossierMedical, cfg: dict) -> str: age = getattr(dossier.sejour, "age", None) adult_min = (cfg.get("age_bands") or {}).get("adult_min_years", 18) if age is None: return "unknown" return "adult" if age >= adult_min else "child" def _threshold(cfg: dict, test: str, age_band: str, doc_lo: float | None) -> float: """Retourne un seuil 'normal' conservateur pour déclencher un RULED_OUT. Priorité: - doc_lo si présent (norme du document = vérité du dossier) - safe zone si âge inconnu ou enfant (conservateur) - fallback YAML sinon (adult) """ if doc_lo is not None: return float(doc_lo) safe = cfg.get("safe_zones_unknown_age") or {} fallback = cfg.get("fallback_ranges") or {} if age_band in ("unknown", "child"): # Seuils safe si dispo, sinon fallback adult key_map = { "platelets": "platelets_ruled_out_low", "sodium": "sodium_ruled_out_low", "potassium_high": "potassium_ruled_out_high", "potassium_low": "potassium_ruled_out_low", } k = key_map.get(test) if k and k in safe: return float(safe[k]) band = "adult" if age_band == "unknown" else age_band band_cfg = fallback.get(band) or fallback.get("adult") or {} test_cfg = band_cfg.get(test.replace("_high", "").replace("_low", "")) or {} lo = test_cfg.get("low") if lo is None: # dernier recours return 0.0 return float(lo) def _threshold_high(cfg: dict, test: str, age_band: str, doc_hi: float | None) -> float: """Retourne un seuil 'normal haut' conservateur. Utilisé pour écarter des diagnostics de type "hyper-" quand la valeur est clairement ≤ la borne haute normale. Priorité: - doc_hi si présent (norme du document) - safe zone si âge inconnu/enfant (conservateur) - fallback YAML sinon (adult) """ if doc_hi is not None: return float(doc_hi) safe = cfg.get("safe_zones_unknown_age") or {} fallback = cfg.get("fallback_ranges") or {} if age_band in ("unknown", "child"): # safe zone dédiée si dispo if test == "potassium" and "potassium_ruled_out_high" in safe: return float(safe["potassium_ruled_out_high"]) band = "adult" if age_band == "unknown" else age_band band_cfg = fallback.get(band) or fallback.get("adult") or {} test_cfg = band_cfg.get(test) or {} hi = test_cfg.get("high") if hi is None: # dernier recours return 0.0 return float(hi) def _is_sodium_test(test: str) -> bool: t = (test or "").lower().strip() if "sodium" in t or "natr" in t: return True return bool(re.fullmatch(r"na\+?", t)) def _is_potassium_test(test: str) -> bool: t = (test or "").lower().strip() if "potassium" in t or "kali" in t: return True return bool(re.fullmatch(r"k\+?", t)) def _get_bio_matcher(analyte: str): """Retourne une fonction de matching pour l'analyte demandé.""" a = analyte.lower() if a == "sodium": return _is_sodium_test if a == "potassium": return _is_potassium_test if a == "hemoglobin": return lambda t: "hemoglob" in t.lower() or "hb" in t.lower().split() if a == "platelets": return lambda t: "plaquette" in t.lower() or "platelet" in t.lower() if a == "creatinine": return lambda t: "creatinine" in t.lower() if a == "glucose": return lambda t: "glucose" in t.lower() or "glycemie" in t.lower() if a == "hba1c": return lambda t: "hba1c" in t.lower() if a == "tsh": return lambda t: "tsh" in t.lower() # Fallback: simple inclusion return lambda t: a in t.lower() def _apply_bio_rules_gen(dossier: DossierMedical, cfg_ranges: dict) -> None: """Applique les règles de validation biologique définies dans config/bio_rules.yaml.""" bio_cfg = load_bio_rules() or {} rules = (bio_cfg.get("rules") or {}) if isinstance(bio_cfg, dict) else {} missing_cfg = (bio_cfg.get("missing_evidence") or {}) if isinstance(bio_cfg, dict) else {} age_band = _age_band(dossier, cfg_ranges) def _push_need_info_veto(where: str, message: str) -> None: if dossier.veto_report is None: return vr = dossier.veto_report veto = str(missing_cfg.get("veto") or "VETO-17") if not rule_enabled(veto): return severity = str(missing_cfg.get("severity") or "LOW") penalty = int(missing_cfg.get("score_penalty") or 0) if any((it.veto == veto and it.where == where and (it.message or "") == message) for it in (vr.issues or [])): return vr.issues.append(VetoIssue(veto=veto, severity=severity, where=where, message=message)) if (vr.verdict or "") == "PASS": vr.verdict = "NEED_INFO" if penalty: vr.score_contestabilite = max(0, int(vr.score_contestabilite or 0) - penalty) for rule_id, r in rules.items(): if not r.get("enabled", True): continue analyte = r.get("analyte") if not analyte: continue codes = set(r.get("codes") or []) matcher = _get_bio_matcher(analyte) values, lo_doc, hi_doc = _bio_values(dossier, matcher) t_type = r.get("threshold_type", "low") # 'low' pour hypo/anémie, 'high' pour hyper/insuffisance # 1) PREUVE MANQUANTE if not values and bool(missing_cfg.get("enabled", False)): for i, das in enumerate(dossier.diagnostics_associes or []): if (das.cim10_suggestion or "") not in codes: continue if das.cim10_decision and (das.cim10_decision.action or "") in ("RULED_OUT", "REMOVE"): continue rule_key = f"RULE-{rule_id.upper()}-MISSING" if not rule_enabled(rule_key): continue reason = f"Preuve manquante: {analyte} non extrait — impossible de valider {das.cim10_suggestion} de façon défendable." das.status = "needs_info" das.cim10_final = None das.cim10_decision = CodeDecision( action="NEED_INFO", final_code=None, downgraded_from=das.cim10_suggestion, reason=reason, needs_info=[f"Valeur(s) de {analyte} + date(s) ?", "Normes du laboratoire si disponibles ?"], applied_rules=[rule_key], ) _push_need_info_veto(f"diagnostics_associes[{i}]", f"{das.cim10_suggestion} suggéré mais aucune preuve de {analyte} n'a été extraite.") # 2) CONTRADICTION (RULED_OUT) if values: is_conflict = False found_val = 0.0 threshold = 0.0 if t_type == "low": # Pour un diagnostic de type "Bas" (hypo, anémie), on écarte si la valeur est >= seuil bas normal threshold = _threshold(cfg_ranges, analyte, age_band, lo_doc) if min(values) >= threshold: is_conflict = True found_val = min(values) else: # Pour un diagnostic de type "Haut" (hyper, insuff), on écarte si la valeur est <= seuil haut normal threshold = _threshold_high(cfg_ranges, analyte, age_band, hi_doc) if max(values) <= threshold: is_conflict = True found_val = max(values) # Cas particulier : seuil fixe dans le YAML (ex: HbA1c > 9) if r.get("threshold_value") is not None: fixed_t = float(r["threshold_value"]) if t_type == "high" and max(values) < fixed_t: is_conflict = True found_val = max(values) threshold = fixed_t elif t_type == "low" and min(values) > fixed_t: is_conflict = True found_val = min(values) threshold = fixed_t if is_conflict: rule_key = f"RULE-{rule_id.upper()}-NORMAL" if not rule_enabled(rule_key): continue op = "≥" if t_type == "low" else "≤" reason = f"Contradiction biologique: {analyte}={found_val} ({op}{threshold}, valeur normale) — {r.get('message', 'diagnostic non retenu')}." for das in dossier.diagnostics_associes or []: if (das.cim10_suggestion or "") not in codes: continue das.status = "ruled_out" das.ruled_out_reason = reason das.cim10_final = None das.cim10_decision = CodeDecision( action="RULED_OUT", final_code=None, downgraded_from=das.cim10_suggestion, reason=reason, needs_info=[ f"Valeurs de {analyte} sur d'autres dates (trend) ?", f"Mention explicite de {das.cim10_suggestion} confirmée malgré valeurs normales ?", ], applied_rules=[rule_key], ) def _bio_values( dossier: DossierMedical, matcher, ) -> tuple[list[float], float | None, float | None]: """Collecte des valeurs biologiques et une éventuelle norme [N: lo-hi]. - Les entrées BiologieCle peuvent être marquées quality=ok|suspect|discarded. - Par défaut, on **privilégie** les valeurs 'ok'. Si aucune valeur ok n'existe, on retombe sur les valeurs 'suspect' (audit), afin de ne pas perdre l'info. Retour: - liste de valeurs (float) - norme basse (si trouvée) - norme haute (si trouvée) """ ok_values: list[float] = [] suspect_values: list[float] = [] lo_doc: float | None = None hi_doc: float | None = None for b in dossier.biologie_cle or []: if not matcher(getattr(b, "test", "") or ""): continue q = getattr(b, "quality", None) or "ok" if q == "discarded": continue # Priorité: valeur_num si disponible (plus fiable que reparsing) val = getattr(b, "valeur_num", None) if val is None: raw = str(getattr(b, "valeur", "") or "") val = _parse_float(raw) if val is None: continue if q == "suspect": suspect_values.append(val) else: ok_values.append(val) # Normes éventuelles dans la chaîne if lo_doc is None and hi_doc is None: raw = str(getattr(b, "valeur", "") or "") lo, hi = _parse_normal_range(raw) if lo is not None or hi is not None: lo_doc, hi_doc = lo, hi values = ok_values if ok_values else suspect_values return values, lo_doc, hi_doc def _get_platelets_context(dossier: DossierMedical) -> tuple[float | None, float | None, float | None]: """Retourne (valeur_plaquettes, norme_basse, norme_haute) si disponible. Politique: - privilégie une valeur qualité=ok - sinon retombe sur une valeur qualité=suspect - ignore discarded """ best_val: float | None = None best_q: str | None = None best_raw: str | None = None best_lo: float | None = None best_hi: float | None = None for b in dossier.biologie_cle or []: test = (b.test or "").lower() if "plaquette" not in test and "platelet" not in test: continue q = getattr(b, "quality", None) or "ok" if q == "discarded": continue raw = str(b.valeur or "") val = getattr(b, "valeur_num", None) if val is None: val = _parse_float(raw) if val is None: continue lo, hi = _parse_normal_range(raw) if best_val is None: best_val, best_q, best_raw, best_lo, best_hi = val, q, raw, lo, hi continue # Remplacer un suspect par un ok if best_q == "suspect" and q != "suspect": best_val, best_q, best_raw, best_lo, best_hi = val, q, raw, lo, hi return best_val, best_lo, best_hi def _anemia_bio(diag: Diagnostic) -> bool: # 1) via preuves_cliniques (souvent déjà interprétées) for p in diag.preuves_cliniques or []: blob = f"{p.element} {p.interpretation}".lower() if "hemoglob" in blob or "hémoglob" in blob or blob.strip().startswith("hb"): val = _first_float(p.element) or _first_float(p.interpretation) lo, _ = _parse_normal_range(p.element) lo = lo if lo is not None else 12.0 if val is not None and val < lo: return True if "confirm" in blob and "anemie" in blob: return True # 2) fallback : le texte mentionne une anémie chiffrée ex = _norm(diag.source_excerpt or "") if "hemoglob" in ex or "hémoglob" in ex: return True return False def _iron_evidence_blob(dossier: DossierMedical, diag: Diagnostic) -> str: parts: list[str] = [] # Preuves patient (extraits + éléments structurés) if diag.source_excerpt: parts.append(str(diag.source_excerpt)) for p in diag.preuves_cliniques or []: parts.append(f"{p.element} {p.interpretation}") # Biologie clé globale (si ferritine/fer a été capté ailleurs) for b in dossier.biologie_cle or []: parts.append(f"{b.test} {b.valeur or ''}") # Traitements (si supplémentation martiale documentée) for t in dossier.traitements_sortie or []: parts.append(f"{t.medicament} {t.posologie or ''}") return _norm("\n".join(parts)) def _das_promotion_score(das: Diagnostic) -> tuple[int, int, int]: """Score de pertinence pour la promotion DAS→DP. Retourne (pertinence_clinique, confiance, spécificité) : - Pertinence : pathologie (2) > symptôme R (1) > Z-code (0) - Confiance : high (3) > medium (2) > low (1) - Spécificité : longueur du code (sans point) — plus long = plus spécifique """ code = das.cim10_final or "" letter = code[0] if code else "" # Pertinence clinique if letter == "Z": pertinence = 0 elif letter == "R": pertinence = 1 else: pertinence = 2 # Confiance conf = (das.cim10_confidence or "").lower() confiance = {"high": 3, "medium": 2, "low": 1}.get(conf, 1) # Spécificité (longueur du code) specificite = len(code.replace(".", "")) return (pertinence, confiance, specificite) def apply_decisions(dossier: DossierMedical) -> None: """Applique des décisions finales sur DP/DAS. - Ne supprime pas la suggestion du modèle. - Remplit cim10_final systématiquement quand une suggestion existe. - Remplit cim10_decision uniquement si action != KEEP (pour garder le JSON lisible). """ def _set_default_final(diag: Diagnostic): if diag.cim10_suggestion and diag.cim10_final is None: is_valid, _ = cim10_validate(diag.cim10_suggestion) if is_valid: diag.cim10_final = diag.cim10_suggestion else: logger.warning( "Code %s absent du dictionnaire CIM-10 pour « %s » — cim10_final non rempli", diag.cim10_suggestion, diag.texte, ) diag.cim10_final = None # DP if dossier.diagnostic_principal: _set_default_final(dossier.diagnostic_principal) # DAS for das in dossier.diagnostics_associes or []: _set_default_final(das) # --- Règle: nettoyage hiérarchique (VETO-22bis) --- # Si un code spécifique (ex: K81.0) est présent, on retire le code générique (K81.9) all_final_codes = set() if dossier.diagnostic_principal and dossier.diagnostic_principal.cim10_final: all_final_codes.add(dossier.diagnostic_principal.cim10_final) for das in dossier.diagnostics_associes or []: if das.cim10_final: all_final_codes.add(das.cim10_final) for das in dossier.diagnostics_associes or []: if das.cim10_final and das.cim10_final.endswith(".9"): cat3 = das.cim10_final[:3] # Chercher s'il existe un autre code plus spécifique dans la même catégorie if any(c.startswith(cat3) and c != das.cim10_final for c in all_final_codes): das.status = "removed" das.cim10_decision = CodeDecision( action="REMOVE", final_code=None, downgraded_from=das.cim10_final, reason=f"Code générique {das.cim10_final} retiré car un code plus spécifique de la catégorie {cat3} est présent.", applied_rules=["RULE-HIERARCHY-CLEANUP"], ) das.cim10_final = None # --- Règle: D50 sans preuve martiale -> downgrade D64.9 + needs_info --- if rule_enabled("RULE-D50-NEEDS-IRON"): for das in dossier.diagnostics_associes or []: if das.cim10_suggestion != "D50": continue blob = _iron_evidence_blob(dossier, das) has_iron = any(m in blob for m in IRON_MARKERS) has_anemia = _anemia_bio(das) # Si on n'a même pas d'anémie biologique, on n'automatise pas. if not has_anemia: continue if not has_iron: das.cim10_final = "D64.9" das.cim10_decision = CodeDecision( action="DOWNGRADE", final_code="D64.9", downgraded_from="D50", reason="Anémie biologique sans preuve d'étiologie ferriprive (bilan martial absent/insuffisant).", needs_info=[ "Bilan martial disponible ? (ferritine, fer, CST/transferrine)", "Mention explicite 'anémie ferriprive' ou carence martiale ?", "Traitement martial (fer per os/IV) documenté ?", ], applied_rules=["RULE-D50-NEEDS-IRON"], ) # --- Règle: thrombopénie (D69.6) incompatible avec plaquettes normales -> ruled_out (visible mais barré) # Objectif: éviter un FAIL "dur" sur incohérence biologique quand la biologie contredit clairement. if rule_enabled("RULE-D69.6-PLT-NORMAL"): cfg_ranges = load_reference_ranges() plaquettes, plt_lo_doc, _plt_hi_doc = _get_platelets_context(dossier) age_band = _age_band(dossier, cfg_ranges) plt_threshold = _threshold(cfg_ranges, "platelets", age_band, plt_lo_doc) if plaquettes is not None and plaquettes >= plt_threshold: for das in dossier.diagnostics_associes or []: if das.cim10_suggestion != "D69.6": continue # Visible mais barré : on conserve la suggestion, mais on retire le code final das.status = "ruled_out" das.ruled_out_reason = f"Contradiction biologique: plaquettes={plaquettes} (≥{plt_threshold}, valeur normale)" \ " — thrombopénie non retenue sans preuve explicite." das.cim10_final = None das.cim10_decision = CodeDecision( action="RULED_OUT", final_code=None, downgraded_from="D69.6", reason=das.ruled_out_reason, needs_info=[ "Mention explicite de thrombopénie confirmée dans le CR (malgré plaquettes normales) ?", "Valeurs de plaquettes sur d'autres dates (trend) ?", "Cause/iatrogénie documentée (héparine, hémopathie, etc.) ?", ], applied_rules=["RULE-D69.6-PLT-NORMAL"], ) # --- Pack "bio": contradictions pilotées par config/bio_rules.yaml cfg_ranges = load_reference_ranges() _apply_bio_rules_gen(dossier, cfg_ranges) # --- Règle: promotion DAS→DP quand aucun DP n'a été extrait --- if rule_enabled("RULE-DAS-TO-DP"): if dossier.diagnostic_principal is None and dossier.diagnostics_associes: candidates = [ das for das in dossier.diagnostics_associes if das.cim10_final and das.status not in ("ruled_out", "needs_info") ] if candidates: best = max(candidates, key=_das_promotion_score) dossier.diagnostic_principal = Diagnostic( texte=best.texte, cim10_suggestion=best.cim10_suggestion, cim10_confidence=best.cim10_confidence, cim10_final=best.cim10_final, justification=best.justification, raisonnement=best.raisonnement, source=best.source, source_page=best.source_page, source_excerpt=best.source_excerpt, preuves_cliniques=best.preuves_cliniques, sources_rag=best.sources_rag, cim10_decision=CodeDecision( action="PROMOTE_DP", final_code=best.cim10_final, applied_rules=["RULE-DAS-TO-DP"], reason=f"DAS promu en DP (score {_das_promotion_score(best)})", ), ) dossier.diagnostics_associes.remove(best) # Traçabilité : alerte DIM lisible pour audit dossier.alertes_codage.append( f"RULE-DAS-TO-DP: DP absent → DAS {best.cim10_final} ({best.texte}) promu en DP" ) logger.warning( "PROMOTE_DP: DAS %s (%s) promu en DP — aucun DP extrait", best.cim10_final, best.texte, ) def decision_summaries(dossier: DossierMedical) -> list[str]: """Retourne une liste de lignes lisibles à injecter dans alertes_codage.""" lines: list[str] = [] def _summ(where: str, d: Diagnostic): dec = d.cim10_decision if not dec or dec.action == "KEEP": return if dec.action == "DOWNGRADE": lines.append(f"DECISION: {where} {dec.downgraded_from}→{dec.final_code} ({', '.join(dec.applied_rules)})") for ni in dec.needs_info[:3]: lines.append(f"DECISION: besoin_info: {ni}") elif dec.action == "REMOVE": lines.append(f"DECISION: {where} {d.cim10_suggestion} supprimé ({', '.join(dec.applied_rules)})") elif dec.action == "RULED_OUT": lines.append( f"DECISION: {where} {d.cim10_suggestion} écarté (ruled_out) ({', '.join(dec.applied_rules)})" ) if dec.reason: lines.append(f"DECISION: raison: {dec.reason}") elif dec.action == "NEED_INFO": lines.append( f"DECISION: {where} {d.cim10_suggestion} non retenu (NEED_INFO) ({', '.join(dec.applied_rules)})" ) if dec.reason: lines.append(f"DECISION: raison: {dec.reason}") if dec.needs_info: for q in dec.needs_info: lines.append(f"DECISION: besoin_info: {q}") elif dec.action == "PROMOTE_DP": lines.append(f"DECISION: {where} {dec.final_code} promu en DP ({', '.join(dec.applied_rules)})") if dossier.diagnostic_principal: _summ("diagnostic_principal", dossier.diagnostic_principal) for i, das in enumerate(dossier.diagnostics_associes or []): _summ(f"diagnostics_associes[{i}]", das) return lines