chore: add .gitignore

This commit is contained in:
dom
2026-03-05 00:37:41 +01:00
parent 542797a124
commit 2578afb6ff
1716 changed files with 1905609 additions and 18 deletions

View File

@@ -0,0 +1 @@
"""Contrôles qualité (vetos) pour réduire la contestabilité CPAM."""

View File

@@ -0,0 +1,170 @@
"""Moteur de décisions (post-traitement qualité).
But: conserver la proposition du modèle (cim10_suggestion) tout en produisant une
*sortie finale* plus défendable (cim10_final + cim10_decision).
Ce module est déterministe, court, et auditable.
"""
from __future__ import annotations
import re
import unicodedata
from typing import Optional
from ..config import CodeDecision, Diagnostic, DossierMedical
# --- Règles "étiologiques" : ne pas affirmer sans preuve spécifique ---
IRON_MARKERS = (
"ferrit", # ferritine
"transferr", # transferrine
"saturation", # saturation transferrine
"cst", # coefficient de saturation
"carence mart",
"martiale",
"ferripr", # ferriprive
"fer intraveineux",
"fer iv",
"traitement martial",
)
def _norm(s: str) -> str:
s = s.replace("", "'")
s = unicodedata.normalize("NFKD", s)
s = "".join(ch for ch in s if not unicodedata.combining(ch))
s = s.lower()
return re.sub(r"\s+", " ", s).strip()
def _first_float(text: str) -> Optional[float]:
m = re.search(r"(-?\d+(?:[\.,]\d+)?)", text)
if not m:
return None
return float(m.group(1).replace(",", "."))
def _parse_normal_range(text: str) -> tuple[Optional[float], Optional[float]]:
# Ex: "[N: 12-17]" / "[N: 12 - 17]"
m = re.search(r"\[\s*N\s*:\s*([0-9]+(?:[\.,][0-9]+)?)\s*-\s*([0-9]+(?:[\.,][0-9]+)?)\s*\]", text)
if not m:
return None, None
lo = float(m.group(1).replace(",", "."))
hi = float(m.group(2).replace(",", "."))
return lo, hi
def _anemia_bio(diag: Diagnostic) -> bool:
# 1) via preuves_cliniques (souvent déjà interprétées)
for p in diag.preuves_cliniques or []:
blob = f"{p.element} {p.interpretation}".lower()
if "hemoglob" in blob or "hémoglob" in blob or blob.strip().startswith("hb"):
val = _first_float(p.element) or _first_float(p.interpretation)
lo, _ = _parse_normal_range(p.element)
lo = lo if lo is not None else 12.0
if val is not None and val < lo:
return True
if "confirm" in blob and "anemie" in blob:
return True
# 2) fallback : le texte mentionne une anémie chiffrée
ex = _norm(diag.source_excerpt or "")
if "hemoglob" in ex or "hémoglob" in ex:
return True
return False
def _iron_evidence_blob(dossier: DossierMedical, diag: Diagnostic) -> str:
parts: list[str] = []
# Preuves patient (extraits + éléments structurés)
if diag.source_excerpt:
parts.append(str(diag.source_excerpt))
for p in diag.preuves_cliniques or []:
parts.append(f"{p.element} {p.interpretation}")
# Biologie clé globale (si ferritine/fer a été capté ailleurs)
for b in dossier.biologie_cle or []:
parts.append(f"{b.test} {b.valeur or ''}")
# Traitements (si supplémentation martiale documentée)
for t in dossier.traitements_sortie or []:
parts.append(f"{t.medicament} {t.posologie or ''}")
return _norm("\n".join(parts))
def apply_decisions(dossier: DossierMedical) -> None:
"""Applique des décisions finales sur DP/DAS.
- Ne supprime pas la suggestion du modèle.
- Remplit cim10_final systématiquement quand une suggestion existe.
- Remplit cim10_decision uniquement si action != KEEP (pour garder le JSON lisible).
"""
def _set_default_final(diag: Diagnostic):
if diag.cim10_suggestion and diag.cim10_final is None:
diag.cim10_final = diag.cim10_suggestion
# DP
if dossier.diagnostic_principal:
_set_default_final(dossier.diagnostic_principal)
# DAS
for das in dossier.diagnostics_associes or []:
_set_default_final(das)
# --- Règle: D50 sans preuve martiale -> downgrade D64.9 + needs_info ---
for das in dossier.diagnostics_associes or []:
if das.cim10_suggestion != "D50":
continue
blob = _iron_evidence_blob(dossier, das)
has_iron = any(m in blob for m in IRON_MARKERS)
has_anemia = _anemia_bio(das)
# Si on n'a même pas d'anémie biologique, on n'automatise pas.
if not has_anemia:
continue
if not has_iron:
das.cim10_final = "D64.9"
das.cim10_decision = CodeDecision(
action="DOWNGRADE",
final_code="D64.9",
downgraded_from="D50",
reason="Anémie biologique sans preuve d'étiologie ferriprive (bilan martial absent/insuffisant).",
needs_info=[
"Bilan martial disponible ? (ferritine, fer, CST/transferrine)",
"Mention explicite 'anémie ferriprive' ou carence martiale ?",
"Traitement martial (fer per os/IV) documenté ?",
],
applied_rules=["RULE-D50-NEEDS-IRON"],
)
def decision_summaries(dossier: DossierMedical) -> list[str]:
"""Retourne une liste de lignes lisibles à injecter dans alertes_codage."""
lines: list[str] = []
def _summ(where: str, d: Diagnostic):
dec = d.cim10_decision
if not dec or dec.action == "KEEP":
return
if dec.action == "DOWNGRADE":
lines.append(f"DECISION: {where} {dec.downgraded_from}{dec.final_code} ({', '.join(dec.applied_rules)})")
for ni in dec.needs_info[:3]:
lines.append(f"DECISION: besoin_info: {ni}")
elif dec.action == "REMOVE":
lines.append(f"DECISION: {where} {d.cim10_suggestion} supprimé ({', '.join(dec.applied_rules)})")
if dossier.diagnostic_principal:
_summ("diagnostic_principal", dossier.diagnostic_principal)
for i, das in enumerate(dossier.diagnostics_associes or []):
_summ(f"diagnostics_associes[{i}]", das)
return lines

View File

@@ -0,0 +1,380 @@
"""Moteur de vetos (contrôle de contestabilité).
Objectif : bloquer automatiquement les propositions CIM-10/CCAM contestables
(absence de preuve, négation/conditionnel, doublons incohérents, etc.).
Ce module est volontairement simple et déterministe : il doit être stable,
audit-able, et indépendant des modèles.
"""
from __future__ import annotations
import re
import unicodedata
from typing import Iterable
from ..config import (
ActeCCAM,
BiologieCle,
Diagnostic,
DossierMedical,
VetoIssue,
VetoReport,
)
# NOTE: Vetos = déterministes et auditables.
# On évite d'interpréter le « raisonnement » du modèle comme une preuve.
_NEGATION_CUES = (
"pas de",
"pas d",
"absence de",
"non retenu",
"exclu",
"a eliminer",
"a éliminer",
"negatif",
"négatif",
)
_CONDITIONAL_CUES = (
"si",
"s il", # OCR fréquent de "s'il"
"eventuel",
"éventuel",
"suspect",
"probable",
"hypothese",
"hypothèse",
"?",
)
_EVIDENCE_TEMPLATE_CUES = (
"score",
"fib4",
"fibrosis-4",
"test de depistage",
"test de dépistage",
"outil de depistage",
"outil de dépistage",
)
def _norm(s: str) -> str:
"""Normalisation légère (lower + sans accents) pour matcher OCR."""
s = s.replace("", "'")
s = unicodedata.normalize("NFKD", s)
s = "".join(ch for ch in s if not unicodedata.combining(ch))
s = s.lower()
# simplifier ponctuation en espaces
s = re.sub(r"[^a-z0-9]+", " ", s)
return re.sub(r"\s+", " ", s).strip()
def _split_sentences(text: str) -> list[str]:
# volontairement simple : robuste sur OCR
text = text.replace("\r", "\n")
parts = re.split(r"[\n\.\;\:]+", text)
return [p.strip() for p in parts if p.strip()]
def _concept_keywords(label: str) -> list[str]:
"""Extrait des mots-clés discriminants depuis le libellé Diagnostic."""
stop = {
"de", "du", "des", "la", "le", "les", "un", "une", "et", "a", "au", "aux",
"gauche", "droite", "bilaterale", "bilat", "chronique", "aigue", "aigu",
"sans", "avec",
}
tokens = [t for t in _norm(label).split() if len(t) >= 4 and t not in stop]
# garder l'ordre, éviter doublons
seen: set[str] = set()
out: list[str] = []
for t in tokens:
if t not in seen:
seen.add(t)
out.append(t)
return out[:5]
def _analyze_neg_cond(excerpts: Iterable[str], label: str) -> tuple[bool, bool, bool, bool]:
"""Retourne (negated, conditional, contradictory, positive).
*negated* : une phrase qui contient le concept ET une négation proche.
*conditional* : une phrase qui contient le concept ET un marqueur conditionnel.
*positive* : une phrase qui contient le concept sans négation proche.
*contradictory* : negated et positive.
"""
kws = _concept_keywords(label)
if not kws:
return False, False, False, False
negated = False
conditional = False
positive = False
for ex in excerpts:
if not ex or not str(ex).strip():
continue
for sent in _split_sentences(str(ex)):
ns = _norm(sent)
if not ns:
continue
# le concept est-il mentionné ?
hit_pos = None
for kw in kws:
pos = ns.find(kw)
if pos != -1:
hit_pos = pos
break
if hit_pos is None:
continue
pre = ns[max(0, hit_pos - 40):hit_pos]
has_neg = any(cue in pre for cue in _NEGATION_CUES)
has_cond = any(cue in ns for cue in _CONDITIONAL_CUES)
if has_neg:
negated = True
else:
positive = True
if has_cond:
conditional = True
contradictory = negated and positive
return negated, conditional, contradictory, positive
def _evidence_excerpts(d: Diagnostic | ActeCCAM) -> list[str]:
"""Ne retourne que des preuves (extraits), pas le raisonnement du modèle."""
texts: list[str] = []
if getattr(d, "source_excerpt", None):
texts.append(str(getattr(d, "source_excerpt")))
# Sources RAG (extraits)
for s in getattr(d, "sources_rag", []) or []:
if getattr(s, "extrait", None):
texts.append(str(s.extrait))
return [t for t in texts if t.strip()]
def _has_evidence(d: Diagnostic | ActeCCAM) -> bool:
if getattr(d, "source_excerpt", None):
return True
if getattr(d, "sources_rag", None):
# un extrait RAG suffit
for s in d.sources_rag:
if s.extrait and str(s.extrait).strip():
return True
if isinstance(d, Diagnostic) and getattr(d, "preuves_cliniques", None):
return len(d.preuves_cliniques) > 0
return False
def _has_template_evidence(excerpts: Iterable[str]) -> bool:
joined = _norm("\n".join([str(x) for x in excerpts if x]))
cues = [_norm(c) for c in _EVIDENCE_TEMPLATE_CUES]
return any(cue in joined for cue in cues)
def _parse_float(v: str | None) -> float | None:
if v is None:
return None
s = str(v).strip().replace(",", ".")
# extraire le premier nombre
m = re.search(r"(-?\d+(?:\.\d+)?)", s)
if not m:
return None
try:
return float(m.group(1))
except ValueError:
return None
def _get_bio_value(bios: list[BiologieCle], keywords: tuple[str, ...]) -> float | None:
for b in bios:
t = (b.test or "").lower()
if any(k in t for k in keywords):
return _parse_float(b.valeur)
return None
def apply_vetos(dossier: DossierMedical) -> VetoReport:
"""Applique des vetos déterministes et retourne un rapport.
Verdicts :
- FAIL : au moins un veto HARD.
- NEED_INFO : pas de HARD, au moins un MEDIUM.
- PASS : aucun HARD/MEDIUM.
"""
issues: list[VetoIssue] = []
seen_issue_keys: set[tuple[str, str, str]] = set() # (veto, where, message)
def add(veto: str, severity: str, where: str, message: str):
key = (veto, where, message)
if key in seen_issue_keys:
return
seen_issue_keys.add(key)
issues.append(VetoIssue(veto=veto, severity=severity, where=where, message=message))
# -----------------------------
# VETO-02 : code sans preuve
# -----------------------------
dp = dossier.diagnostic_principal
if dp and dp.cim10_suggestion:
if not _has_evidence(dp):
add("VETO-02", "HARD", "diagnostic_principal", f"DP {dp.cim10_suggestion} sans preuve exploitable")
for i, das in enumerate(dossier.diagnostics_associes):
if das.cim10_suggestion and not _has_evidence(das):
add("VETO-02", "MEDIUM", f"diagnostics_associes[{i}]", f"DAS {das.cim10_suggestion} sans preuve exploitable")
for i, acte in enumerate(dossier.actes_ccam):
if acte.code_ccam_suggestion and not _has_evidence(acte):
add("VETO-02", "HARD", f"actes_ccam[{i}]", f"Acte {acte.code_ccam_suggestion} sans preuve exploitable")
# -------------------------------------------------
# VETO-03 : négation / conditionnel DANS LES PREUVES
# (pas dans le raisonnement du modèle)
# -------------------------------------------------
if dp and dp.cim10_suggestion:
excerpts = _evidence_excerpts(dp)
neg, cond, contra, pos = _analyze_neg_cond(excerpts, dp.texte or dp.cim10_suggestion)
if neg and not pos:
add("VETO-03", "HARD", "diagnostic_principal", f"DP {dp.cim10_suggestion} contredit par la preuve (négation)")
elif contra:
add("VETO-03", "MEDIUM", "diagnostic_principal", f"DP {dp.cim10_suggestion} preuves contradictoires (positif vs négatif)")
elif cond and dp.cim10_confidence == "high":
add("VETO-03", "MEDIUM", "diagnostic_principal", f"DP {dp.cim10_suggestion} basé sur du conditionnel")
for i, das in enumerate(dossier.diagnostics_associes):
if not das.cim10_suggestion:
continue
excerpts = _evidence_excerpts(das)
neg, cond, contra, pos = _analyze_neg_cond(excerpts, das.texte or das.cim10_suggestion)
where = f"diagnostics_associes[{i}]"
if neg and not pos:
# En contrôle CPAM : une négation explicite = bloquant, surtout si le modèle est « high ».
severity = "HARD" if das.cim10_confidence == "high" else "MEDIUM"
add("VETO-03", severity, where, f"DAS {das.cim10_suggestion} contredit par la preuve (négation)")
elif contra:
add("VETO-03", "MEDIUM", where, f"DAS {das.cim10_suggestion} preuves contradictoires")
elif cond and das.cim10_confidence == "high":
add("VETO-03", "LOW", where, f"DAS {das.cim10_suggestion} potentiellement conditionnel")
# -------------------------------------------------
# VETO-15 : preuve de type "score/test" (risque élevé de sur-codage)
# -------------------------------------------------
for i, das in enumerate(dossier.diagnostics_associes):
if not das.cim10_suggestion:
continue
excerpts = _evidence_excerpts(das)
if _has_template_evidence(excerpts) and ("fibrose" in _norm(das.texte or "") or str(das.cim10_suggestion).startswith("K74")):
add("VETO-15", "MEDIUM", f"diagnostics_associes[{i}]", f"{das.cim10_suggestion}: preuve issue d'un score/test (à confirmer par diagnostic explicite)")
# -------------------------------------------------
# VETO-16 : incohérence libellé→code (heuristique)
# -------------------------------------------------
for i, das in enumerate(dossier.diagnostics_associes):
if not das.cim10_suggestion:
continue
label_n = _norm(das.texte or "")
if "sacroili" in label_n and str(das.cim10_suggestion) == "M53.3":
add("VETO-16", "MEDIUM", f"diagnostics_associes[{i}]", "Sacro-iliite : M53.3 semble hors-sujet (à revalider via candidats, ex. M46.1)")
# -------------------------------------------------
# VETO-06 : DP dupliqué en DAS (incohérent)
# -------------------------------------------------
if dp and dp.cim10_suggestion:
dp_code = dp.cim10_suggestion
for i, das in enumerate(dossier.diagnostics_associes):
if das.cim10_suggestion == dp_code:
add("VETO-06", "HARD", "diagnostics_associes", f"Code DP {dp_code} dupliqué dans les DAS (index {i})")
break
# -------------------------------------------------
# VETO-07 : doublons DAS (à fusionner)
# -------------------------------------------------
seen: dict[str, int] = {}
for i, das in enumerate(dossier.diagnostics_associes):
c = das.cim10_suggestion
if not c:
continue
if c in seen:
add("VETO-07", "MEDIUM", "diagnostics_associes", f"Doublon DAS {c} (index {seen[c]} et {i})")
else:
seen[c] = i
# -------------------------------------------------
# VETO-09 : contradiction bio simple (plaquettes / créat)
# -------------------------------------------------
# Plaquettes : si code suggère thrombopénie (D69*) mais valeur normale
plaquettes = _get_bio_value(dossier.biologie_cle, ("plaquette", "platelet"))
if plaquettes is not None:
# seuil volontairement large pour éviter faux positifs
if dp and dp.cim10_suggestion and dp.cim10_suggestion.startswith("D69") and plaquettes >= 150:
add("VETO-09", "HARD", "diagnostic_principal", f"DP {dp.cim10_suggestion} incompatible avec plaquettes={plaquettes} (sans preuve explicite)")
for i, das in enumerate(dossier.diagnostics_associes):
if das.cim10_suggestion and das.cim10_suggestion.startswith("D69") and plaquettes >= 150:
# Si les preuves disent explicitement "pas de thrombopénie" ou si le modèle est très confiant,
# on passe en HARD (risque CPAM maximal).
excerpts = _evidence_excerpts(das)
neg, _, _, _ = _analyze_neg_cond(excerpts, das.texte or das.cim10_suggestion)
severity = "HARD" if (das.cim10_confidence == "high" or neg) else "MEDIUM"
add("VETO-09", severity, f"diagnostics_associes[{i}]", f"DAS {das.cim10_suggestion} incompatible avec plaquettes={plaquettes}")
creat = _get_bio_value(dossier.biologie_cle, ("créat", "creat", "creatin"))
if creat is not None:
# ultra prudence : on ne hard-fail pas sur l'IR, on alerte
for i, das in enumerate(dossier.diagnostics_associes):
if das.cim10_suggestion and das.cim10_suggestion.startswith(("N17", "N18", "N19")) and creat < 110 and das.cim10_confidence == "high":
add("VETO-09", "LOW", f"diagnostics_associes[{i}]", f"IR {das.cim10_suggestion} à confirmer (créat={creat})")
# -------------------------------------------------
# VETO-12 : sur-confiance
# -------------------------------------------------
def _overconf(d: Diagnostic | ActeCCAM) -> bool:
conf = getattr(d, "cim10_confidence", None) or getattr(d, "ccam_confidence", None)
return conf == "high" and not _has_evidence(d)
if dp and dp.cim10_suggestion and _overconf(dp):
add("VETO-12", "HARD", "diagnostic_principal", f"DP {dp.cim10_suggestion} en high sans preuve")
# -------------------------------------------------
# Post-traitement : si un veto HARD existe pour un même 'where',
# on évite de polluer avec des vetos plus faibles redondants.
# Exemple : thrombopénie (VETO-09 HARD) -> VETO-03 devient secondaire.
# -------------------------------------------------
hard_where = {it.where for it in issues if it.severity == "HARD"}
if hard_where:
issues = [
it for it in issues
if not (it.where in hard_where and it.severity in ("LOW", "MEDIUM") and it.veto in ("VETO-03", "VETO-15"))
]
# -----------------------------
# Verdict + score
# -----------------------------
hard = any(i.severity == "HARD" for i in issues)
medium = any(i.severity == "MEDIUM" for i in issues)
if hard:
verdict = "FAIL"
elif medium:
verdict = "NEED_INFO"
else:
verdict = "PASS"
score = 100
for it in issues:
if it.severity == "HARD":
score -= 30
elif it.severity == "MEDIUM":
score -= 10
else:
score -= 3
score = max(0, min(100, score))
return VetoReport(verdict=verdict, score_contestabilite=score, issues=issues)