- Nouveau module dp_scoring.py : shortlist, scoring multi-critères, select_dp, LLM one-shot fallback avec garde-fous (négation, comorbidité, Z/R-codes) - Parser CPAM : auto-détection format legacy/ucr_extract, 6 nouveaux champs ControleCPAM (codes_etablissement, libelle, codes_retenus, ghm_ghs) - CRH parser : 3 nouvelles sections (diag_sortie, diag_principal, synthese) - Prompt DP_LLM_ONESHOT externalisé dans templates.py - Propagation dp_selection dans fusion.py - 808 tests passent (dont 21 nouveaux CPAM + 77 dp_scoring + 8 CRH) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
845 lines
30 KiB
Python
845 lines
30 KiB
Python
"""Scoring déterministe du Diagnostic Principal (DP) pour les CRH.
|
|
|
|
Collecte les candidats DP depuis les sections CRH parsées, les entités edsnlp
|
|
et les regex, puis applique un scoring multi-critères pour sélectionner le
|
|
meilleur candidat ou signaler une ambiguïté (verdict REVIEW).
|
|
|
|
Fallback LLM one-shot : si use_llm=True et verdict REVIEW, un appel unique
|
|
au LLM voit les sections fortes et propose dp_code + evidence en un seul pass.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from typing import Optional
|
|
|
|
from ..config import (
|
|
DossierMedical,
|
|
DPCandidate,
|
|
DPSelection,
|
|
DP_REVIEW_THRESHOLD,
|
|
DP_SCORING_WEIGHTS,
|
|
DP_Z_CODE_WHITELIST,
|
|
)
|
|
from .cim10_dict import normalize_code, normalize_text, validate_code as cim10_validate
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Patterns de négation et conditionnel
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_NEGATION_PATTERNS = re.compile(
|
|
r"(?:pas\s+de|absence\s+d[e']|non\s+retenu|exclu[es]?|"
|
|
r"[ée]limin[ée]|n[ée]gatif|aucun[e]?\s|sans\s)",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
_CONDITIONAL_PATTERNS = re.compile(
|
|
r"(?:suspect[ée]?|probable|hypothèse|hypothese|\?\s*$|"
|
|
r"[àa]\s+confirmer|[ée]ventuel(?:le)?|possiblement|"
|
|
r"ne\s+peut\s+(?:pas\s+)?[êe]tre\s+exclu)",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Regex pour extraire des codes CIM-10 explicites dans du texte
|
|
# Exige le format avec point (X##.#) pour éviter les faux positifs 3-char :
|
|
# P02 (diététique), N34 (mutation N34S), T36 (T36.7°C = température)
|
|
# Les codes 3-char sans point sont trop ambigus en texte libre.
|
|
# CIM10_MAP gère les correspondances terme→code pour les diagnostics courants.
|
|
_CIM10_CODE_RE = re.compile(r"\b([A-Z]\d{2}\.\d{1,2})(?![A-Za-z°])")
|
|
|
|
# Codes de comorbidité banals : pénalisés en DP (toutes sections)
|
|
# Presque toujours DAS, même s'ils apparaissent en conclusion/motif
|
|
_COMORBIDITY_PREFIXES = ("I10", "E66.", "E78.", "E11.", "D64.9")
|
|
|
|
# Patterns de preuve explicite de PEC principale (exception comorbidité)
|
|
# Ex: "hospitalisé pour HTA maligne", "prise en charge de l'obésité morbide"
|
|
_PEC_PROOF_RE = re.compile(
|
|
r"(?:hospitalis[ée]e?\s+pour"
|
|
r"|prise\s+en\s+charge"
|
|
r"|admission\s+pour"
|
|
r"|adress[ée]e?\s+pour)",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
# Sections à fort signal DP
|
|
# NB : dans ce corpus CRH, "diag_sortie"/"diag_principal" n'existent quasiment
|
|
# jamais. "conclusion" et "synthese" SONT les sections diagnostiques de fait.
|
|
_STRONG_SECTIONS = frozenset({
|
|
"motif", "motif_hospitalisation",
|
|
"diag_sortie", "diagnostics_retenus", "diag_principal",
|
|
"conclusion", "synthese",
|
|
})
|
|
|
|
# Mapping de normalisation : noms libres renvoyés par le LLM → clés de section
|
|
_SECTION_NORMALIZE_MAP = {
|
|
# conclusion
|
|
"conclusion": "conclusion",
|
|
"conclusions": "conclusion",
|
|
"au total": "conclusion",
|
|
# synthese
|
|
"synthèse": "synthese",
|
|
"synthese": "synthese",
|
|
"synthèse du séjour": "synthese",
|
|
"synthese du sejour": "synthese",
|
|
"synthèse du dossier": "synthese",
|
|
"synthese du dossier": "synthese",
|
|
"synthèse clinique": "synthese",
|
|
"synthese clinique": "synthese",
|
|
"en résumé": "synthese",
|
|
"en resume": "synthese",
|
|
"en synthèse": "synthese",
|
|
"en synthese": "synthese",
|
|
"résumé": "synthese",
|
|
"resume": "synthese",
|
|
# motif_hospitalisation
|
|
"motif": "motif_hospitalisation",
|
|
"motif d'hospitalisation": "motif_hospitalisation",
|
|
"motif d'admission": "motif_hospitalisation",
|
|
"motif de consultation": "motif_hospitalisation",
|
|
"motif_hospitalisation": "motif_hospitalisation",
|
|
"motif hospitalisation": "motif_hospitalisation",
|
|
"admission": "motif_hospitalisation",
|
|
"motif d'entrée": "motif_hospitalisation",
|
|
"motif d'entree": "motif_hospitalisation",
|
|
# diag_sortie
|
|
"diagnostic de sortie": "diag_sortie",
|
|
"diagnostics de sortie": "diag_sortie",
|
|
"diag_sortie": "diag_sortie",
|
|
"diag sortie": "diag_sortie",
|
|
# diagnostics_retenus
|
|
"diagnostic retenu": "diagnostics_retenus",
|
|
"diagnostics retenus": "diagnostics_retenus",
|
|
"diagnostic retenu à la sortie": "diagnostics_retenus",
|
|
"diagnostics retenus à la sortie": "diagnostics_retenus",
|
|
"diagnostics_retenus": "diagnostics_retenus",
|
|
# diag_principal
|
|
"diagnostic principal": "diag_principal",
|
|
"diag_principal": "diag_principal",
|
|
"diag principal": "diag_principal",
|
|
"problème principal": "diag_principal",
|
|
"probleme principal": "diag_principal",
|
|
# histoire_maladie
|
|
"histoire de la maladie": "histoire_maladie",
|
|
"histoire_maladie": "histoire_maladie",
|
|
"histoire maladie": "histoire_maladie",
|
|
"hdm": "histoire_maladie",
|
|
# evolution
|
|
"evolution dans le service": "evolution",
|
|
"évolution dans le service": "evolution",
|
|
"evolution": "evolution",
|
|
"évolution": "evolution",
|
|
# examen
|
|
"examen clinique": "examen_clinique",
|
|
"examen_clinique": "examen_clinique",
|
|
# actes
|
|
"indication opératoire": "indication_operatoire",
|
|
"indication operatoire": "indication_operatoire",
|
|
"prise en charge chirurgicale": "indication_operatoire",
|
|
"actes réalisés": "actes",
|
|
"actes realises": "actes",
|
|
"actes": "actes",
|
|
# administratif / bruit → "autres"
|
|
"sections cliniques": "autres",
|
|
"sections_cliniques": "autres",
|
|
"sections fortes du dossier": "autres",
|
|
"secrétariat": "autres",
|
|
"secretariat": "autres",
|
|
"médecine interne": "autres",
|
|
"medecine interne": "autres",
|
|
"médecin": "autres",
|
|
"medecin": "autres",
|
|
"courrier": "autres",
|
|
"courrier de sortie": "autres",
|
|
"compte rendu": "autres",
|
|
"compte-rendu": "autres",
|
|
"dossier médical": "autres",
|
|
"dossier medical": "autres",
|
|
"observations": "autres",
|
|
}
|
|
|
|
# Fallback par mots-clés quand la correspondance exacte échoue.
|
|
# Paires (mot-clé(s), section_normalisée) testées dans l'ordre — premier match gagne.
|
|
_SECTION_KEYWORD_FALLBACKS: list[tuple[tuple[str, ...], str]] = [
|
|
# diagnostic + sortie/retenu → diag_sortie / diagnostics_retenus
|
|
(("diagnostic", "sortie"), "diag_sortie"),
|
|
(("diagnostic", "retenu"), "diagnostics_retenus"),
|
|
# conclusion / synthese
|
|
(("conclusion",), "conclusion"),
|
|
(("synthese",), "synthese"),
|
|
(("synthèse",), "synthese"),
|
|
(("au total",), "synthese"),
|
|
(("en résumé",), "synthese"),
|
|
# motif / admission
|
|
(("motif",), "motif_hospitalisation"),
|
|
(("admission",), "motif_hospitalisation"),
|
|
]
|
|
|
|
|
|
def _normalize_evidence_section(raw_section: str) -> str:
|
|
"""Normalise le nom de section renvoyé par le LLM vers une clé standard.
|
|
|
|
1. Nettoyage : lower, strip, retrait crochets/deux-points/guillemets.
|
|
2. Correspondance exacte dans _SECTION_NORMALIZE_MAP.
|
|
3. Fallback par mots-clés (_SECTION_KEYWORD_FALLBACKS).
|
|
"""
|
|
if not raw_section:
|
|
return ""
|
|
# Nettoyage agressif : crochets, guillemets, deux-points, underscores → espaces
|
|
key = raw_section.lower().strip()
|
|
key = re.sub(r"[\[\]\"':]+", "", key).strip()
|
|
|
|
# 1. Exact match
|
|
result = _SECTION_NORMALIZE_MAP.get(key)
|
|
if result:
|
|
return result
|
|
|
|
# 1b. Tenter aussi avec underscores → espaces
|
|
key_spaces = key.replace("_", " ")
|
|
result = _SECTION_NORMALIZE_MAP.get(key_spaces)
|
|
if result:
|
|
return result
|
|
|
|
# 2. Fallback par mots-clés
|
|
for keywords, section in _SECTION_KEYWORD_FALLBACKS:
|
|
if all(kw in key for kw in keywords):
|
|
return section
|
|
|
|
return key
|
|
|
|
|
|
def _is_comorbidity_code(code: str) -> bool:
|
|
"""Vérifie si un code est une comorbidité banale (I10, E66.x, E78.x, E11.x, D64.9)."""
|
|
return any(code.startswith(prefix) for prefix in _COMORBIDITY_PREFIXES)
|
|
|
|
|
|
def _has_explicit_pec_proof(label: str, full_text: str) -> bool:
|
|
"""Vérifie si le texte contient une preuve explicite que cette comorbidité
|
|
est le motif PRINCIPAL de prise en charge.
|
|
|
|
Cherche "hospitalisé pour", "prise en charge de", "admission pour", etc.
|
|
suivis du label de la comorbidité dans une fenêtre de 100 caractères.
|
|
"""
|
|
if not full_text or not label:
|
|
return False
|
|
text_lower = full_text.lower()
|
|
label_lower = label.lower()
|
|
for m in _PEC_PROOF_RE.finditer(text_lower):
|
|
window = text_lower[m.end():m.end() + 100]
|
|
if label_lower in window:
|
|
return True
|
|
return False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. Construction de la shortlist
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def build_dp_shortlist(
|
|
parsed: dict,
|
|
text: str,
|
|
edsnlp_result,
|
|
dossier: DossierMedical,
|
|
) -> list[DPCandidate]:
|
|
"""Collecte les candidats DP depuis les sections CRH, edsnlp et regex.
|
|
|
|
Déduplique par code CIM-10 en gardant la section la plus forte.
|
|
"""
|
|
from .cim10_extractor import CIM10_MAP, _find_diagnostic_principal
|
|
from .das_filter import is_valid_diagnostic_text, clean_diagnostic_text
|
|
|
|
candidates: list[DPCandidate] = []
|
|
sections = parsed.get("sections", {})
|
|
|
|
# Ordre de priorité des sections (décroissant)
|
|
section_priority = [
|
|
"diag_sortie", "diag_principal", "motif_hospitalisation",
|
|
"conclusion", "synthese",
|
|
]
|
|
|
|
# 1. Sections CRH à fort signal
|
|
for section_key in section_priority:
|
|
section_text = sections.get(section_key, "")
|
|
if not section_text:
|
|
continue
|
|
|
|
section_norm = normalize_text(section_text)
|
|
|
|
# 1a. Codes CIM-10 explicites dans le texte de section
|
|
for m in _CIM10_CODE_RE.finditer(section_text):
|
|
code = normalize_code(m.group(1))
|
|
is_valid, label = cim10_validate(code)
|
|
if is_valid:
|
|
excerpt = _extract_excerpt(section_text, m.start())
|
|
candidates.append(DPCandidate(
|
|
code=code,
|
|
label=label,
|
|
source_section=section_key,
|
|
source_excerpt=excerpt,
|
|
))
|
|
|
|
# 1b. CIM10_MAP uniquement (curé pour les DP courants)
|
|
# On n'utilise PAS dict_lookup car le dictionnaire complet (10K+ entrées)
|
|
# produit des faux positifs par substring match sur du texte libre.
|
|
for terme, code in CIM10_MAP.items():
|
|
if normalize_text(terme) in section_norm:
|
|
candidates.append(DPCandidate(
|
|
code=code,
|
|
label=terme.capitalize(),
|
|
source_section=section_key,
|
|
source_excerpt=section_text[:200].strip(),
|
|
))
|
|
break # plus-long-match : CIM10_MAP est ordonné spécifique→générique
|
|
|
|
# 2. edsnlp entities
|
|
if edsnlp_result:
|
|
for ent in edsnlp_result.cim10_entities:
|
|
if ent.negation or ent.hypothese:
|
|
continue
|
|
texte = clean_diagnostic_text(ent.texte.capitalize())
|
|
if not is_valid_diagnostic_text(texte):
|
|
continue
|
|
candidates.append(DPCandidate(
|
|
code=ent.code,
|
|
label=texte,
|
|
source_section="edsnlp",
|
|
))
|
|
|
|
# 3. Regex fallback (_find_diagnostic_principal sur texte complet)
|
|
text_lower = text.lower()
|
|
conclusion = sections.get("conclusion", "")
|
|
dp_regex = _find_diagnostic_principal(text_lower, conclusion)
|
|
if dp_regex:
|
|
candidates.append(DPCandidate(
|
|
code=dp_regex.cim10_suggestion,
|
|
label=dp_regex.texte,
|
|
source_section="regex",
|
|
source_excerpt=dp_regex.source_excerpt,
|
|
))
|
|
|
|
# 4. Dédup par code CIM-10 : garder la section la plus forte
|
|
candidates = _dedup_by_code(candidates, section_priority)
|
|
|
|
return candidates
|
|
|
|
|
|
def _extract_excerpt(text: str, pos: int, window: int = 100) -> str:
|
|
"""Extrait ~200 chars autour d'une position dans le texte."""
|
|
start = max(0, pos - window)
|
|
end = min(len(text), pos + window)
|
|
return text[start:end].strip()
|
|
|
|
|
|
def _dedup_by_code(
|
|
candidates: list[DPCandidate],
|
|
section_priority: list[str],
|
|
) -> list[DPCandidate]:
|
|
"""Déduplique par code CIM-10, garde la section la plus forte."""
|
|
priority_map = {s: i for i, s in enumerate(section_priority)}
|
|
# Ajouter edsnlp et regex en bas de priorité
|
|
priority_map.setdefault("edsnlp", len(section_priority))
|
|
priority_map.setdefault("regex", len(section_priority) + 1)
|
|
|
|
seen: dict[str, DPCandidate] = {}
|
|
for c in candidates:
|
|
key = c.code or c.label.lower()
|
|
if key not in seen:
|
|
seen[key] = c
|
|
else:
|
|
existing = seen[key]
|
|
existing_prio = priority_map.get(existing.source_section, 99)
|
|
new_prio = priority_map.get(c.source_section, 99)
|
|
if new_prio < existing_prio:
|
|
seen[key] = c
|
|
|
|
return list(seen.values())
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. Scoring des candidats
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def score_candidates(
|
|
candidates: list[DPCandidate],
|
|
dossier: DossierMedical,
|
|
full_text: str = "",
|
|
) -> list[DPCandidate]:
|
|
"""Applique le scoring déterministe sur chaque candidat.
|
|
|
|
Args:
|
|
full_text: Texte complet du document pour la détection négation/conditionnel.
|
|
"""
|
|
for c in candidates:
|
|
details: dict[str, int] = {}
|
|
|
|
# 1. Bonus section
|
|
section_key = f"section_{c.source_section}"
|
|
section_bonus = DP_SCORING_WEIGHTS.get(section_key, 0)
|
|
if section_bonus:
|
|
details["section"] = section_bonus
|
|
|
|
# 2. Bonus preuve (excerpt + page)
|
|
if c.source_excerpt:
|
|
proof = DP_SCORING_WEIGHTS.get("proof_excerpt", 0)
|
|
if proof:
|
|
details["proof_excerpt"] = proof
|
|
|
|
# 3. Pénalité négation (fenêtre étroite AVANT le label)
|
|
if full_text and c.label:
|
|
prefix = _get_prefix_window(full_text, c.label, chars_before=60)
|
|
if prefix and _NEGATION_PATTERNS.search(prefix):
|
|
c.is_negated = True
|
|
penalty = DP_SCORING_WEIGHTS.get("negation", 0)
|
|
if penalty:
|
|
details["negation"] = penalty
|
|
|
|
# 4. Pénalité conditionnel (fenêtre étroite AVANT + APRÈS le label)
|
|
if full_text and c.label:
|
|
window = _get_context_window(full_text, c.label, radius=80)
|
|
if window and _CONDITIONAL_PATTERNS.search(window):
|
|
c.is_conditional = True
|
|
penalty = DP_SCORING_WEIGHTS.get("conditional", 0)
|
|
if penalty:
|
|
details["conditional"] = penalty
|
|
|
|
# 5. Pénalité Z-code en DP
|
|
if c.code and c.code.startswith("Z"):
|
|
if not _is_z_code_whitelisted(c.code):
|
|
penalty = DP_SCORING_WEIGHTS.get("z_code_dp", 0)
|
|
if penalty:
|
|
details["z_code_dp"] = penalty
|
|
|
|
# 6. Pénalité R-code (symptôme) en DP
|
|
if c.code and c.code.startswith("R"):
|
|
penalty = DP_SCORING_WEIGHTS.get("r_code_dp", 0)
|
|
if penalty:
|
|
details["r_code_dp"] = penalty
|
|
|
|
# 7. Pénalité comorbidité banale (toutes sections)
|
|
if c.code and _is_comorbidity_code(c.code):
|
|
penalty = DP_SCORING_WEIGHTS.get("comorbidity_weak", 0)
|
|
if penalty:
|
|
details["comorbidity_weak"] = penalty
|
|
# Exception : preuve explicite de PEC principale → compense
|
|
if full_text and _has_explicit_pec_proof(c.label, full_text):
|
|
details["comorbidity_pec_proof"] = abs(penalty) if penalty else 3
|
|
|
|
c.score_details = details
|
|
c.score = sum(details.values())
|
|
|
|
# Trier par score décroissant
|
|
candidates.sort(key=lambda c: -c.score)
|
|
return candidates
|
|
|
|
|
|
def _get_prefix_window(text: str, label: str, chars_before: int = 60) -> str:
|
|
"""Retourne les N caractères AVANT la première occurrence du label.
|
|
|
|
Sert à détecter les négations qui précèdent directement le diagnostic
|
|
("pas de pancréatite" vs "pancréatite ... pas de complication").
|
|
"""
|
|
text_lower = text.lower()
|
|
label_lower = label.lower()
|
|
pos = text_lower.find(label_lower)
|
|
if pos < 0:
|
|
text_norm = normalize_text(text)
|
|
label_norm = normalize_text(label)
|
|
pos = text_norm.find(label_norm)
|
|
if pos < 0:
|
|
return ""
|
|
start = max(0, pos - chars_before)
|
|
return text_norm[start:pos]
|
|
start = max(0, pos - chars_before)
|
|
return text_lower[start:pos]
|
|
|
|
|
|
def _get_context_window(text: str, label: str, radius: int = 200) -> str:
|
|
"""Retourne une fenêtre de texte autour de la première occurrence du label."""
|
|
text_lower = text.lower()
|
|
label_lower = label.lower()
|
|
pos = text_lower.find(label_lower)
|
|
if pos < 0:
|
|
# Essayer avec le texte normalisé
|
|
text_norm = normalize_text(text)
|
|
label_norm = normalize_text(label)
|
|
pos = text_norm.find(label_norm)
|
|
if pos < 0:
|
|
return ""
|
|
start = max(0, pos - radius)
|
|
end = min(len(text_norm), pos + len(label_norm) + radius)
|
|
return text_norm[start:end]
|
|
start = max(0, pos - radius)
|
|
end = min(len(text), pos + len(label) + radius)
|
|
return text[start:end].lower()
|
|
|
|
|
|
def _is_z_code_whitelisted(code: str) -> bool:
|
|
"""Vérifie si un Z-code est dans la whitelist (match préfixe)."""
|
|
for prefix in DP_Z_CODE_WHITELIST:
|
|
if code.startswith(prefix):
|
|
return True
|
|
return False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 3. Sélection du DP
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def select_dp(
|
|
candidates: list[DPCandidate],
|
|
dossier: DossierMedical,
|
|
use_llm: bool = False,
|
|
) -> DPSelection:
|
|
"""Sélectionne le DP parmi les candidats scorés.
|
|
|
|
Retourne verdict="confirmed" si le delta est suffisant,
|
|
"review" si ambiguïté.
|
|
"""
|
|
if not candidates:
|
|
return DPSelection(verdict="review", winner_reason="aucun candidat DP trouvé")
|
|
|
|
# Anti-comorbidité universelle : comorbidité banale en DP → REVIEW
|
|
# sauf preuve explicite de PEC principale (hospitalisé pour, prise en charge de)
|
|
top = candidates[0]
|
|
if top.code and _is_comorbidity_code(top.code):
|
|
has_pec = "comorbidity_pec_proof" in top.score_details
|
|
if not has_pec:
|
|
logger.info(
|
|
"Comorbidité-banale DP : %s (%s, section=%s) → REVIEW + fallback LLM",
|
|
top.code, top.label, top.source_section,
|
|
)
|
|
return DPSelection(
|
|
verdict="review",
|
|
candidates=candidates[:3],
|
|
winner_reason=f"comorbidité banale {top.code} sans preuve PEC ({top.source_section})",
|
|
)
|
|
|
|
if len(candidates) == 1:
|
|
return DPSelection(
|
|
verdict="confirmed",
|
|
candidates=candidates,
|
|
winner_reason="candidat unique",
|
|
)
|
|
|
|
top1 = candidates[0]
|
|
top2 = candidates[1]
|
|
delta = top1.score - top2.score
|
|
|
|
if delta >= DP_REVIEW_THRESHOLD:
|
|
return DPSelection(
|
|
verdict="confirmed",
|
|
candidates=candidates,
|
|
winner_reason=f"score {top1.score} vs {top2.score} (delta {delta})",
|
|
)
|
|
|
|
# Delta trop faible — tenter tiebreaker LLM si autorisé
|
|
if use_llm and top1.score == top2.score:
|
|
tiebreak = _llm_tiebreak(top1, top2, dossier)
|
|
if tiebreak and tiebreak.get("winner") in ("A", "B"):
|
|
if tiebreak["winner"] == "B":
|
|
# Swap pour que le gagnant soit en premier
|
|
candidates[0], candidates[1] = candidates[1], candidates[0]
|
|
return DPSelection(
|
|
verdict="confirmed",
|
|
candidates=candidates,
|
|
winner_reason=f"LLM tiebreak: {tiebreak.get('reason', '')}",
|
|
llm_tiebreak=tiebreak,
|
|
)
|
|
|
|
return DPSelection(
|
|
verdict="review",
|
|
candidates=candidates[:3],
|
|
winner_reason=f"delta insuffisant: {top1.score} vs {top2.score} (delta {delta} < seuil {DP_REVIEW_THRESHOLD})",
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 4. Tiebreaker LLM (optionnel)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _llm_tiebreak(
|
|
candidate_a: DPCandidate,
|
|
candidate_b: DPCandidate,
|
|
dossier: DossierMedical,
|
|
) -> dict | None:
|
|
"""Appelle le LLM pour départager deux candidats DP à scores identiques."""
|
|
try:
|
|
from .ollama_client import call_ollama
|
|
from ..prompts import DP_TIEBREAK
|
|
except ImportError:
|
|
logger.warning("Module ollama_client non disponible pour le tiebreaker DP")
|
|
return None
|
|
|
|
motif = ""
|
|
if dossier.sejour and dossier.sejour.mode_entree:
|
|
motif = dossier.sejour.mode_entree
|
|
|
|
def _format_candidate(c: DPCandidate) -> str:
|
|
parts = [c.label]
|
|
if c.code:
|
|
parts.append(f"({c.code})")
|
|
parts.append(f"[section: {c.source_section}, score: {c.score}]")
|
|
if c.source_excerpt:
|
|
parts.append(f'extrait: "{c.source_excerpt[:150]}"')
|
|
return " — ".join(parts)
|
|
|
|
candidat_a_str = _format_candidate(candidate_a)
|
|
candidat_b_str = _format_candidate(candidate_b)
|
|
|
|
sections_fortes = "Non disponible"
|
|
|
|
prompt = DP_TIEBREAK.format(
|
|
motif=motif or "Non renseigné",
|
|
candidat_a=candidat_a_str,
|
|
candidat_b=candidat_b_str,
|
|
sections_fortes=sections_fortes,
|
|
)
|
|
|
|
try:
|
|
result = call_ollama(prompt, temperature=0.0, max_tokens=500, role="coding")
|
|
except Exception:
|
|
logger.warning("Erreur LLM tiebreaker DP", exc_info=True)
|
|
return None
|
|
|
|
if not result or not isinstance(result, dict):
|
|
return None
|
|
|
|
winner = result.get("winner")
|
|
if winner not in ("A", "B"):
|
|
return None
|
|
|
|
return {"winner": winner, "reason": result.get("reason", "")}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 5. LLM Fallback one-shot — proposition DP quand le scoring déterministe échoue
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _build_strong_sections_text(parsed: dict) -> str:
|
|
"""Construit le texte des sections fortes pour le prompt LLM one-shot.
|
|
|
|
Sections fortes : motif, diag_sortie, diag_principal, diagnostics_retenus,
|
|
conclusion, synthese. PAS histoire_maladie ni examen_clinique (= bruit).
|
|
"""
|
|
sections = parsed.get("sections", {})
|
|
_STRONG_ORDER = [
|
|
("motif_hospitalisation", 500),
|
|
("diag_sortie", 600), ("diagnostics_retenus", 600),
|
|
("diag_principal", 600),
|
|
("conclusion", 600), ("synthese", 600),
|
|
]
|
|
parts = []
|
|
for key, max_len in _STRONG_ORDER:
|
|
val = sections.get(key, "")
|
|
if val:
|
|
parts.append(f"[{key}] {val[:max_len]}")
|
|
return "\n".join(parts) or "Aucune section forte"
|
|
|
|
|
|
def _build_motif(parsed: dict, dossier: DossierMedical) -> str:
|
|
"""Extrait le motif d'hospitalisation pour le prompt LLM."""
|
|
motif = ""
|
|
if dossier.sejour and dossier.sejour.mode_entree:
|
|
motif = dossier.sejour.mode_entree
|
|
if not motif:
|
|
motif = parsed.get("sections", {}).get("motif_hospitalisation", "")[:300] or "Non renseigné"
|
|
return motif
|
|
|
|
|
|
def _build_actes(dossier: DossierMedical) -> str:
|
|
"""Construit la liste des actes pour le prompt LLM."""
|
|
parts = []
|
|
for a in dossier.actes_ccam[:5]:
|
|
label = a.texte
|
|
if a.code_ccam_suggestion:
|
|
label += f" ({a.code_ccam_suggestion})"
|
|
parts.append(label)
|
|
return ", ".join(parts) or "Non renseignés"
|
|
|
|
|
|
def _validate_and_normalize_code(dp_code: str, pool_codes: set[str] | None = None) -> tuple[str, str | None, bool]:
|
|
"""Valide et normalise un code CIM-10. Retourne (code, original_si_normalisé, is_valid)."""
|
|
dp_code = normalize_code(dp_code)
|
|
dp_code_original = None
|
|
|
|
# Si pool fourni, vérifier appartenance
|
|
if pool_codes is not None and dp_code in pool_codes:
|
|
return dp_code, None, True
|
|
|
|
parent3 = dp_code[:3]
|
|
parent9 = f"{parent3}.9"
|
|
|
|
# Tenter match pool par parent
|
|
if pool_codes is not None:
|
|
if parent3 in pool_codes:
|
|
return parent3, dp_code, True
|
|
if parent9 in pool_codes:
|
|
return parent9, dp_code, True
|
|
|
|
# Validation CIM-10 directe
|
|
is_valid, _ = cim10_validate(dp_code)
|
|
if is_valid:
|
|
return dp_code, None, True
|
|
|
|
# Tenter parent
|
|
is_valid_p, _ = cim10_validate(parent3)
|
|
if is_valid_p:
|
|
return parent3, dp_code, True
|
|
|
|
is_valid_9, _ = cim10_validate(parent9)
|
|
if is_valid_9:
|
|
return parent9, dp_code, True
|
|
|
|
return dp_code, None, False
|
|
|
|
|
|
def _apply_guardrails(
|
|
dp_code: str,
|
|
candidate: DPCandidate,
|
|
evidence_section: str,
|
|
evidence_excerpt: str,
|
|
confidence: str,
|
|
) -> DPSelection:
|
|
"""Applique les garde-fous déterministes sur un candidat LLM.
|
|
|
|
Retourne DPSelection avec verdict confirmed ou review.
|
|
"""
|
|
is_strong_section = evidence_section in _STRONG_SECTIONS
|
|
has_evidence = bool(evidence_excerpt and evidence_excerpt.strip())
|
|
is_high_conf = confidence == "high"
|
|
|
|
# Score synthétique
|
|
confidence_scores = {"high": 3, "medium": 2, "low": 1}
|
|
candidate.score = confidence_scores.get(confidence, 1)
|
|
candidate.score_details = {"llm_confidence": candidate.score}
|
|
|
|
# GF-1 : evidence_excerpt vide → REVIEW
|
|
if not has_evidence:
|
|
logger.info("LLM fallback DP : pas d'extrait preuve pour %s, REVIEW", dp_code)
|
|
return DPSelection(
|
|
verdict="review", candidates=[candidate],
|
|
winner_reason="LLM fallback: evidence_excerpt vide",
|
|
)
|
|
|
|
# GF-2 : comorbidité banale ET section non-forte → REVIEW
|
|
if _is_comorbidity_code(dp_code) and not is_strong_section:
|
|
logger.info("LLM fallback DP : comorbidité %s hors section forte (%s), REVIEW", dp_code, evidence_section)
|
|
return DPSelection(
|
|
verdict="review", candidates=[candidate],
|
|
winner_reason=f"LLM fallback: comorbidité {dp_code} hors section forte",
|
|
)
|
|
|
|
# GF-3 : CONFIRMED uniquement si section forte + confidence high
|
|
if not is_strong_section or not is_high_conf:
|
|
reasons = []
|
|
if not is_strong_section:
|
|
reasons.append(f"section faible ({evidence_section})")
|
|
if not is_high_conf:
|
|
reasons.append(f"confidence {confidence}")
|
|
reason_str = " + ".join(reasons)
|
|
logger.info("LLM fallback DP : %s pour %s, REVIEW", reason_str, dp_code)
|
|
return DPSelection(
|
|
verdict="review", candidates=[candidate],
|
|
winner_reason=f"LLM fallback: {dp_code} — {reason_str}",
|
|
)
|
|
|
|
# Toutes les conditions réunies → CONFIRMED
|
|
return DPSelection(
|
|
verdict="confirmed", candidates=[candidate],
|
|
winner_reason=f"LLM fallback: {dp_code} ({confidence}, {evidence_section})",
|
|
)
|
|
|
|
|
|
|
|
def llm_dp_fallback(
|
|
parsed: dict,
|
|
text: str,
|
|
dossier: DossierMedical,
|
|
dp_candidates: list[DPCandidate] | None = None,
|
|
edsnlp_result=None,
|
|
) -> DPSelection:
|
|
"""Appelle le LLM en one-shot pour identifier et coder le DP.
|
|
|
|
Le LLM voit directement les sections fortes du CRH et doit fournir
|
|
en un seul appel : dp_code, dp_label, evidence_section, evidence_excerpt, confidence.
|
|
|
|
Ne doit être appelé que si use_llm=True ET verdict="review".
|
|
"""
|
|
try:
|
|
from .ollama_client import call_ollama
|
|
from ..prompts import DP_LLM_ONESHOT
|
|
except ImportError:
|
|
logger.warning("Module ollama_client non disponible pour le fallback DP LLM")
|
|
return DPSelection(verdict="review", winner_reason="LLM non disponible")
|
|
|
|
# Contexte
|
|
motif = _build_motif(parsed, dossier)
|
|
sections_fortes = _build_strong_sections_text(parsed)
|
|
actes = _build_actes(dossier)
|
|
|
|
prompt = DP_LLM_ONESHOT.format(
|
|
motif=motif, sections_fortes=sections_fortes, actes=actes,
|
|
)
|
|
|
|
try:
|
|
result = call_ollama(prompt, temperature=0.0, max_tokens=800, role="coding")
|
|
except Exception:
|
|
logger.warning("Erreur LLM fallback DP", exc_info=True)
|
|
return DPSelection(verdict="review", winner_reason="erreur LLM fallback DP")
|
|
|
|
if not result or not isinstance(result, dict):
|
|
return DPSelection(verdict="review", winner_reason="réponse LLM invalide")
|
|
|
|
dp_code = result.get("dp_code", "")
|
|
dp_label = result.get("dp_label", "")
|
|
confidence = result.get("confidence", "low")
|
|
evidence_section_raw = result.get("evidence_section", "")
|
|
evidence_excerpt = result.get("evidence_excerpt", "")
|
|
|
|
# Normaliser la section
|
|
evidence_section = _normalize_evidence_section(evidence_section_raw)
|
|
|
|
logger.info(
|
|
"LLM oneshot: code=%s label='%s' section=%s confidence=%s",
|
|
dp_code, dp_label[:60], evidence_section, confidence,
|
|
)
|
|
|
|
if not dp_code:
|
|
return DPSelection(
|
|
verdict="review",
|
|
winner_reason="LLM: aucun code DP proposé",
|
|
)
|
|
|
|
# Validation et normalisation du code CIM-10
|
|
dp_code, dp_code_original, is_valid = _validate_and_normalize_code(dp_code)
|
|
if not is_valid:
|
|
return DPSelection(
|
|
verdict="review",
|
|
winner_reason=f"code invalide {dp_code}",
|
|
)
|
|
if dp_code_original:
|
|
logger.info("LLM oneshot: normalisation %s → %s", dp_code_original, dp_code)
|
|
|
|
# Résoudre le label final
|
|
_, dict_label = cim10_validate(dp_code)
|
|
|
|
# Construire le candidat
|
|
source_tag = f"llm_oneshot ({evidence_section})" if evidence_section else "llm_oneshot"
|
|
|
|
candidate = DPCandidate(
|
|
code=dp_code,
|
|
label=dp_label or dict_label or "",
|
|
source_section=source_tag,
|
|
source_excerpt=evidence_excerpt,
|
|
confidence_raw=confidence,
|
|
dp_code_original_llm=dp_code_original,
|
|
dp_code_normalized=dp_code_original is not None,
|
|
)
|
|
|
|
# Appliquer les garde-fous déterministes
|
|
return _apply_guardrails(dp_code, candidate, evidence_section, evidence_excerpt, confidence)
|