feat: CRH diag sections + DP scoring bonus + evidence by code
- crh_parser: 3 nouvelles sections (diag_sortie, diag_principal, synthese) avec garde-fou début de ligne pour éviter faux positifs mid-sentence - dp_selector: NUKE-3 sélecteur DP déterministe (548 lignes) - build_candidates/score_candidates/select_dp - bonus +4 pour mention dans diag_sortie/diag_principal - bonus +2 pour mention dans synthese - hardening DIM : A1 evidence, A2 mono-fragile, A3 confidence cap - _collect_evidence match par terme OU code CIM-10 - LLM tiebreaker optionnel (DP_RANKER_CONSTRAINED) - fusion: propagation dp_selection depuis le dossier source du DP retenu Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -112,13 +112,25 @@ def _extract_medical_content(text: str, result: dict) -> None:
|
|||||||
if m:
|
if m:
|
||||||
result["contenu_medical"] = m.group(1).strip()
|
result["contenu_medical"] = m.group(1).strip()
|
||||||
|
|
||||||
# Sections spécifiques
|
# Terminaisons communes : en-têtes de section connus (y compris diagnostics)
|
||||||
|
_TERM_DIAG = r"Diagnostic(?:s)?\s+(?:de\s+sortie|retenu|principal)|Problème\s+principal|Synthèse|En\s+résumé|En\s+synthèse"
|
||||||
|
_TERM_BASE = r"Antécédents|Histoire|Examen|Au total|Conclusion|Devenir|TTT|Traitement"
|
||||||
|
_TERM_ALL = rf"{_TERM_BASE}|{_TERM_DIAG}"
|
||||||
|
|
||||||
|
# Sections spécifiques — ordre : sections fortes (diagnostic) AVANT conclusion
|
||||||
section_patterns = [
|
section_patterns = [
|
||||||
("motif_hospitalisation", r"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:Antécédents|Histoire|Examen|Au total|Devenir|TTT)|$)"),
|
("motif_hospitalisation", rf"(?:motif\s+(?:d'hospitalisation|suivant))\s*[:\s]*\n?(.*?)(?=\n\s*(?:{_TERM_ALL})|$)"),
|
||||||
("antecedents", r"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir)|$)"),
|
("antecedents", rf"(?:Antécédents?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Histoire|Examen|Traitement|Au total|Devenir|{_TERM_DIAG})|$)"),
|
||||||
("histoire_maladie", r"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir)|$)"),
|
("histoire_maladie", rf"(?:Histoire de la maladie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Examen|Biologie|Au total|Devenir|{_TERM_DIAG})|$)"),
|
||||||
("examen_clinique", r"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir)|$)"),
|
("examen_clinique", rf"(?:Examen clinique)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Biologie|Imagerie|Au total|Devenir|{_TERM_DIAG})|$)"),
|
||||||
("conclusion", r"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement)|$)"),
|
# Sections diagnostiques à fort signal DP
|
||||||
|
# (?:^|\n)\s* exige un début de ligne pour éviter les faux positifs
|
||||||
|
# ex: "pas de diagnostic retenu" ne doit PAS déclencher la section
|
||||||
|
("diag_sortie", rf"(?:^|\n)\s*(?:Diagnostic(?:s)?\s+de\s+sortie|Diagnostic(?:s)?\s+retenu(?:s)?(?:\s+(?:à\s+la\s+sortie|en\s+sortie))?)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Rédigé|Cordialement|Synthèse|En\s+résumé)|$)"),
|
||||||
|
("diag_principal", rf"(?:^|\n)\s*(?:Diagnostic\s+principal|Problème\s+principal)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Diagnostic(?:s)?\s+(?:associé|secondaire|de\s+sortie|retenu)|Devenir|TTT|Traitement|Rédigé|Cordialement|Synthèse|En\s+résumé)|$)"),
|
||||||
|
("synthese", rf"(?:^|\n)\s*(?:Synthèse|En\s+résumé|En\s+synthèse)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|Rédigé|Cordialement)|$)"),
|
||||||
|
# Sections génériques
|
||||||
|
("conclusion", rf"(?:Au total|Conclusion)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|TTT|Traitement|{_TERM_DIAG})|$)"),
|
||||||
("traitement_sortie", r"(?:TTT de sortie|Traitement de sortie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement)|$)"),
|
("traitement_sortie", r"(?:TTT de sortie|Traitement de sortie)\s*[:\s]*\n?(.*?)(?=\n\s*(?:Devenir|Rédigé|Cordialement)|$)"),
|
||||||
("devenir", r"(?:Devenir)\s*[:\s]*\n?(.*?)(?=\n\s*(?:TTT|Traitement|Rédigé|Cordialement)|$)"),
|
("devenir", r"(?:Devenir)\s*[:\s]*\n?(.*?)(?=\n\s*(?:TTT|Traitement|Rédigé|Cordialement)|$)"),
|
||||||
]
|
]
|
||||||
|
|||||||
581
src/medical/dp_selector.py
Normal file
581
src/medical/dp_selector.py
Normal file
@@ -0,0 +1,581 @@
|
|||||||
|
"""NUKE-3 — Sélecteur DP type DIM.
|
||||||
|
|
||||||
|
Pré-ranker déterministe + ranker LLM contraint (optionnel).
|
||||||
|
|
||||||
|
Flux :
|
||||||
|
1. build_candidates() → liste de DPCandidate depuis dossier
|
||||||
|
2. score_candidates() → scoring déterministe (section, confiance, occurrences, malus)
|
||||||
|
3. select_dp() → verdict CONFIRMED ou REVIEW
|
||||||
|
|
||||||
|
Règles DIM incontournables (hardening) :
|
||||||
|
- CONFIRMED ⇒ evidence non vide (sinon downgrade → REVIEW)
|
||||||
|
- Mono-candidat fragile (comorb/symptôme/acte-only/evidence faible) → REVIEW
|
||||||
|
- confidence="high" interdit si verdict="REVIEW"
|
||||||
|
|
||||||
|
Le LLM n'intervient QUE si les scores sont proches ET DP_RANKER_LLM_ENABLED=True.
|
||||||
|
Le pré-ranker est suffisant dans >80% des cas.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from ..config import (
|
||||||
|
DossierMedical,
|
||||||
|
Diagnostic,
|
||||||
|
DPCandidate,
|
||||||
|
DPSelection,
|
||||||
|
get_dp_ranker_llm_enabled,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constantes de classification
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Prefixes CIM-10 typiques de comorbidités chroniques (rarement DP)
|
||||||
|
COMORBIDITY_PREFIXES = frozenset({
|
||||||
|
"I10", "I11", "I12", "I13", "I15", # HTA
|
||||||
|
"E10", "E11", "E12", "E13", "E14", # Diabète
|
||||||
|
"E66", # Obésité
|
||||||
|
"E78", # Dyslipidémie
|
||||||
|
"J44", "J45", # BPCO, Asthme (sauf exacerbation aiguë)
|
||||||
|
"I48", # FA
|
||||||
|
"N18", # IRC
|
||||||
|
"M81", # Ostéoporose
|
||||||
|
"F32", # Dépression
|
||||||
|
"G47", # Troubles du sommeil
|
||||||
|
})
|
||||||
|
|
||||||
|
# Z-codes admis en DP (même whitelist que VETO-20)
|
||||||
|
Z_CODE_DP_WHITELIST = frozenset({
|
||||||
|
"Z03", "Z04", "Z08", "Z09",
|
||||||
|
"Z38", "Z43", "Z45", "Z50", "Z51", "Z54", "Z75",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Mots-clés indiquant un diagnostic = juste un acte
|
||||||
|
ACT_KEYWORDS = frozenset({
|
||||||
|
"cholécystectomie", "appendicectomie", "colectomie", "gastrectomie",
|
||||||
|
"prothèse", "arthroplastie", "ostéosynthèse", "arthroscopie",
|
||||||
|
"endoscopie", "coloscopie", "fibroscopie", "biopsie",
|
||||||
|
"cathétérisme", "dialyse", "transfusion", "chimiothérapie",
|
||||||
|
"radiothérapie", "césarienne", "hystérectomie",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Force de la section source (proxy via champ source du Diagnostic)
|
||||||
|
SECTION_STRENGTH = {
|
||||||
|
"trackare": 5,
|
||||||
|
"regex": 3,
|
||||||
|
"edsnlp": 2,
|
||||||
|
"llm_das": 1,
|
||||||
|
"das_promotion": 1,
|
||||||
|
"nuke3": 0, # pas de self-boost
|
||||||
|
}
|
||||||
|
|
||||||
|
# Seuil d'écart de score pour CONFIRMED sans LLM
|
||||||
|
DELTA_CONFIRMED = 3.0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers de classification
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _is_comorbidity_like(code: str | None) -> bool:
|
||||||
|
"""Le code CIM-10 correspond-il à une comorbidité chronique ?"""
|
||||||
|
if not code or len(code) < 3:
|
||||||
|
return False
|
||||||
|
return code[:3].upper() in COMORBIDITY_PREFIXES
|
||||||
|
|
||||||
|
|
||||||
|
def _is_symptom_like(code: str | None) -> bool:
|
||||||
|
"""Le code CIM-10 est-il un symptôme (R00-R99) ?"""
|
||||||
|
if not code:
|
||||||
|
return False
|
||||||
|
return code[0].upper() == "R"
|
||||||
|
|
||||||
|
|
||||||
|
def _is_act_only(text: str) -> bool:
|
||||||
|
"""Le texte ne décrit-il qu'un acte, sans diagnostic sous-jacent ?"""
|
||||||
|
words = text.lower().strip().split()
|
||||||
|
if len(words) > 5:
|
||||||
|
return False
|
||||||
|
return any(kw in text.lower() for kw in ACT_KEYWORDS)
|
||||||
|
|
||||||
|
|
||||||
|
def _section_strength(source: str | None) -> int:
|
||||||
|
return SECTION_STRENGTH.get(source or "", 0)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Construction des candidats
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_candidates(dossier: DossierMedical) -> list[DPCandidate]:
|
||||||
|
"""Construit le pool de candidats DP depuis le dossier enrichi."""
|
||||||
|
candidates: list[DPCandidate] = []
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
# DP actuel comme premier candidat
|
||||||
|
if dossier.diagnostic_principal:
|
||||||
|
dp = dossier.diagnostic_principal
|
||||||
|
candidates.append(DPCandidate(
|
||||||
|
index=idx,
|
||||||
|
term=dp.texte,
|
||||||
|
code=dp.cim10_suggestion,
|
||||||
|
confidence=dp.cim10_confidence,
|
||||||
|
source=dp.source,
|
||||||
|
is_comorbidity_like=_is_comorbidity_like(dp.cim10_suggestion),
|
||||||
|
is_symptom_like=_is_symptom_like(dp.cim10_suggestion),
|
||||||
|
is_act_only=_is_act_only(dp.texte),
|
||||||
|
section_strength=_section_strength(dp.source),
|
||||||
|
))
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
# DAS comme candidats
|
||||||
|
for das in dossier.diagnostics_associes:
|
||||||
|
if not das.cim10_suggestion:
|
||||||
|
continue
|
||||||
|
if das.status == "ruled_out":
|
||||||
|
continue
|
||||||
|
candidates.append(DPCandidate(
|
||||||
|
index=idx,
|
||||||
|
term=das.texte,
|
||||||
|
code=das.cim10_suggestion,
|
||||||
|
confidence=das.cim10_confidence,
|
||||||
|
source=das.source,
|
||||||
|
is_comorbidity_like=_is_comorbidity_like(das.cim10_suggestion),
|
||||||
|
is_symptom_like=_is_symptom_like(das.cim10_suggestion),
|
||||||
|
is_act_only=_is_act_only(das.texte),
|
||||||
|
section_strength=_section_strength(das.source),
|
||||||
|
))
|
||||||
|
idx += 1
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
def _count_code_occurrences(candidate: DPCandidate, all_candidates: list[DPCandidate]) -> int:
|
||||||
|
"""Compte combien de candidats partagent le même préfixe 3 chars."""
|
||||||
|
if not candidate.code or len(candidate.code) < 3:
|
||||||
|
return 1
|
||||||
|
prefix = candidate.code[:3]
|
||||||
|
return sum(1 for c in all_candidates if c.code and c.code[:3] == prefix)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Scoring déterministe
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def score_candidates(
|
||||||
|
candidates: list[DPCandidate],
|
||||||
|
synthese: dict,
|
||||||
|
) -> list[DPCandidate]:
|
||||||
|
"""Score et trie les candidats (décroissant). Modifie en place."""
|
||||||
|
for c in candidates:
|
||||||
|
score = 0.0
|
||||||
|
details: dict[str, float] = {}
|
||||||
|
|
||||||
|
# 1. Force de la section source (0-5)
|
||||||
|
s = c.section_strength
|
||||||
|
score += s
|
||||||
|
details["section"] = s
|
||||||
|
|
||||||
|
# 2. Confiance LLM/RAG
|
||||||
|
conf_map = {"high": 3, "medium": 1, "low": 0}
|
||||||
|
conf = conf_map.get(c.confidence or "", 0)
|
||||||
|
score += conf
|
||||||
|
details["confidence"] = conf
|
||||||
|
|
||||||
|
# 3. Occurrences multi-sections
|
||||||
|
occ = _count_code_occurrences(c, candidates)
|
||||||
|
c.num_occurrences = occ
|
||||||
|
if occ > 1:
|
||||||
|
bonus = min(occ - 1, 2) # cap +2
|
||||||
|
score += bonus
|
||||||
|
details["occurrences"] = bonus
|
||||||
|
|
||||||
|
# 4. Alignement avec le motif d'hospitalisation
|
||||||
|
motif = (synthese.get("motif") or "").lower()
|
||||||
|
if motif and len(motif) > 3 and c.term.lower() in motif:
|
||||||
|
score += 2
|
||||||
|
details["motif_align"] = 2
|
||||||
|
|
||||||
|
# 4b. Mention dans sections diagnostiques fortes (CRH)
|
||||||
|
# diag_sortie / diag_principal = +4 (signal le plus fort après trackare)
|
||||||
|
# synthese = +2 (équivalent conclusion)
|
||||||
|
term_lower = c.term.lower()
|
||||||
|
code_upper = (c.code or "").upper()
|
||||||
|
_diag_section_bonus = 0.0
|
||||||
|
for sec_key, sec_bonus in (("diag_sortie", 4), ("diag_principal", 4), ("synthese", 2)):
|
||||||
|
sec_text = (synthese.get(sec_key) or "").lower()
|
||||||
|
if not sec_text or len(sec_text) < 3:
|
||||||
|
continue
|
||||||
|
if term_lower in sec_text or (code_upper and code_upper in sec_text.upper()):
|
||||||
|
_diag_section_bonus = max(_diag_section_bonus, sec_bonus)
|
||||||
|
if _diag_section_bonus:
|
||||||
|
score += _diag_section_bonus
|
||||||
|
details["diag_section_bonus"] = _diag_section_bonus
|
||||||
|
|
||||||
|
# 5. Malus comorbidité chronique
|
||||||
|
if c.is_comorbidity_like:
|
||||||
|
score -= 3
|
||||||
|
details["comorbidity_malus"] = -3
|
||||||
|
|
||||||
|
# 6. Malus symptôme (R-code)
|
||||||
|
if c.is_symptom_like:
|
||||||
|
score -= 2
|
||||||
|
details["symptom_malus"] = -2
|
||||||
|
|
||||||
|
# 7. Malus acte-seul
|
||||||
|
if c.is_act_only:
|
||||||
|
score -= 4
|
||||||
|
details["act_only_malus"] = -4
|
||||||
|
|
||||||
|
# 8. Malus Z-code non whitelisté
|
||||||
|
if c.code and c.code[0].upper() == "Z":
|
||||||
|
prefix = c.code[:3]
|
||||||
|
if prefix not in Z_CODE_DP_WHITELIST:
|
||||||
|
score -= 2
|
||||||
|
details["z_code_malus"] = -2
|
||||||
|
|
||||||
|
c.score = score
|
||||||
|
c.score_details = details
|
||||||
|
|
||||||
|
candidates.sort(key=lambda c: c.score, reverse=True)
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Ranker LLM (optionnel)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _llm_rank(
|
||||||
|
candidates: list[DPCandidate],
|
||||||
|
dossier: DossierMedical,
|
||||||
|
synthese: dict,
|
||||||
|
) -> dict | None:
|
||||||
|
"""Appel LLM contraint : choisir un index dans la liste."""
|
||||||
|
try:
|
||||||
|
from .ollama_client import call_ollama
|
||||||
|
from ..prompts import DP_RANKER_CONSTRAINED
|
||||||
|
except ImportError:
|
||||||
|
logger.warning("NUKE-3: imports LLM indisponibles")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Construire la liste de candidats pour le prompt
|
||||||
|
lines = []
|
||||||
|
for c in candidates:
|
||||||
|
label = f"[{c.index}] {c.term}"
|
||||||
|
if c.code:
|
||||||
|
label += f" ({c.code})"
|
||||||
|
if c.confidence:
|
||||||
|
label += f" — confiance {c.confidence}"
|
||||||
|
flags = []
|
||||||
|
if c.is_comorbidity_like:
|
||||||
|
flags.append("comorbidité chronique")
|
||||||
|
if c.is_symptom_like:
|
||||||
|
flags.append("symptôme")
|
||||||
|
if c.is_act_only:
|
||||||
|
flags.append("acte seul")
|
||||||
|
if flags:
|
||||||
|
label += f" [flags: {', '.join(flags)}]"
|
||||||
|
lines.append(f" {label}")
|
||||||
|
candidates_str = "\n".join(lines)
|
||||||
|
|
||||||
|
# Contexte clinique
|
||||||
|
ctx_parts = []
|
||||||
|
if dossier.sejour.age:
|
||||||
|
ctx_parts.append(f"Âge: {dossier.sejour.age} ans")
|
||||||
|
if dossier.sejour.sexe:
|
||||||
|
ctx_parts.append(f"Sexe: {dossier.sejour.sexe}")
|
||||||
|
if dossier.sejour.duree_sejour:
|
||||||
|
ctx_parts.append(f"Durée séjour: {dossier.sejour.duree_sejour}j")
|
||||||
|
motif = synthese.get("motif", "")
|
||||||
|
if motif:
|
||||||
|
ctx_parts.append(f"Motif: {motif}")
|
||||||
|
conclusion = synthese.get("conclusion", "")
|
||||||
|
if conclusion:
|
||||||
|
ctx_parts.append(f"Conclusion: {conclusion[:300]}")
|
||||||
|
ctx_str = "\n".join(ctx_parts) or "Non disponible"
|
||||||
|
|
||||||
|
prompt = DP_RANKER_CONSTRAINED.format(
|
||||||
|
candidates_str=candidates_str,
|
||||||
|
ctx_str=ctx_str,
|
||||||
|
n_candidates=len(candidates),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = call_ollama(prompt, temperature=0.0, max_tokens=1000, role="coding")
|
||||||
|
if result is None:
|
||||||
|
logger.warning("NUKE-3: LLM indisponible pour le ranking")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Synthèse clinique (construite depuis le dossier + parsed_data)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_synthese(dossier: DossierMedical, parsed_data: dict) -> dict:
|
||||||
|
"""Construit la synthèse clinique pour le scoring."""
|
||||||
|
sections = parsed_data.get("sections", {})
|
||||||
|
return {
|
||||||
|
"motif": sections.get("motif_hospitalisation", ""),
|
||||||
|
"conclusion": sections.get("conclusion", ""),
|
||||||
|
"diag_sortie": sections.get("diag_sortie", ""),
|
||||||
|
"diag_principal": sections.get("diag_principal", ""),
|
||||||
|
"synthese": sections.get("synthese", ""),
|
||||||
|
"antecedents": [a.texte for a in dossier.antecedents[:10]],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Collecte d'evidence & hardening DIM
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Seuils de qualification CONFIRMED
|
||||||
|
MIN_SECTION_STRENGTH_HIGH = 2 # section_strength min pour confidence="high"
|
||||||
|
MAX_EVIDENCE_LEN = 240 # longueur max d'un extrait evidence
|
||||||
|
MIN_EVIDENCE_LEN = 20 # longueur min pour un extrait exploitable
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_evidence(
|
||||||
|
winner: DPCandidate,
|
||||||
|
candidates: list[DPCandidate],
|
||||||
|
synthese: dict,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Produit 1-3 extraits courts justifiant le choix du DP.
|
||||||
|
|
||||||
|
Sources (par priorité) :
|
||||||
|
1. Score details du candidat gagnant (toujours disponible)
|
||||||
|
2. Synthèse clinique si le motif/conclusion mentionne le terme gagnant
|
||||||
|
3. Comparaison avec le 2e candidat (si delta significatif)
|
||||||
|
"""
|
||||||
|
evidence: list[str] = []
|
||||||
|
|
||||||
|
# 1. Résumé scoring
|
||||||
|
section_label = winner.source or "inconnu"
|
||||||
|
score_str = f"Score {winner.score:.1f} — source: {section_label}"
|
||||||
|
if winner.section_strength >= 3:
|
||||||
|
score_str += " (section forte)"
|
||||||
|
evidence.append(score_str)
|
||||||
|
|
||||||
|
# 2. Alignement sections (priorité : diag_sortie > diag_principal > motif > conclusion)
|
||||||
|
# Match par terme OU par code CIM-10 (cohérent avec score_candidates 4b)
|
||||||
|
motif = (synthese.get("motif") or "").strip()
|
||||||
|
conclusion = (synthese.get("conclusion") or "").strip()
|
||||||
|
diag_sortie = (synthese.get("diag_sortie") or "").strip()
|
||||||
|
diag_principal_sec = (synthese.get("diag_principal") or "").strip()
|
||||||
|
term_lower = winner.term.lower()
|
||||||
|
code_upper = (winner.code or "").upper()
|
||||||
|
|
||||||
|
def _term_or_code_in(text: str) -> bool:
|
||||||
|
tl = text.lower()
|
||||||
|
return (term_lower in tl) or (code_upper and code_upper in text.upper())
|
||||||
|
|
||||||
|
if diag_sortie and _term_or_code_in(diag_sortie):
|
||||||
|
excerpt = diag_sortie[:MAX_EVIDENCE_LEN]
|
||||||
|
evidence.append(f"Diagnostic de sortie: «{excerpt}»")
|
||||||
|
elif diag_principal_sec and _term_or_code_in(diag_principal_sec):
|
||||||
|
excerpt = diag_principal_sec[:MAX_EVIDENCE_LEN]
|
||||||
|
evidence.append(f"Diagnostic principal: «{excerpt}»")
|
||||||
|
elif motif and term_lower in motif.lower():
|
||||||
|
excerpt = motif[:MAX_EVIDENCE_LEN]
|
||||||
|
evidence.append(f"Motif hospitalisation: «{excerpt}»")
|
||||||
|
elif conclusion and term_lower in conclusion.lower():
|
||||||
|
excerpt = conclusion[:MAX_EVIDENCE_LEN]
|
||||||
|
evidence.append(f"Conclusion: «{excerpt}»")
|
||||||
|
|
||||||
|
# 3. Delta vs runner-up
|
||||||
|
if len(candidates) >= 2:
|
||||||
|
runner = candidates[1] if candidates[0] is winner else candidates[0]
|
||||||
|
delta = winner.score - runner.score
|
||||||
|
if delta > 0:
|
||||||
|
evidence.append(
|
||||||
|
f"Delta +{delta:.1f} vs {runner.term} ({runner.code or '?'})"
|
||||||
|
)
|
||||||
|
|
||||||
|
return evidence[:3]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_fragile_candidate(c: DPCandidate) -> bool:
|
||||||
|
"""Un candidat est fragile s'il porte un flag disqualifiant."""
|
||||||
|
return c.is_comorbidity_like or c.is_symptom_like or c.is_act_only
|
||||||
|
|
||||||
|
|
||||||
|
def _enforce_confirmed_rules(selection: DPSelection, synthese: dict) -> DPSelection:
|
||||||
|
"""Applique les gardes-fous DIM sur une DPSelection.
|
||||||
|
|
||||||
|
Règles :
|
||||||
|
A1) CONFIRMED sans evidence → downgrade REVIEW
|
||||||
|
A2) Mono-candidat fragile → downgrade REVIEW
|
||||||
|
A3) confidence="high" interdit si verdict="REVIEW"
|
||||||
|
confidence="high" requiert section_strength >= MIN_SECTION_STRENGTH_HIGH
|
||||||
|
"""
|
||||||
|
# --- A1 : CONFIRMED ⇒ evidence non vide ---
|
||||||
|
if selection.verdict == "CONFIRMED" and not selection.evidence:
|
||||||
|
selection.verdict = "REVIEW"
|
||||||
|
if selection.confidence == "high":
|
||||||
|
selection.confidence = "medium"
|
||||||
|
selection.reason = (selection.reason or "") + " | downgrade: pas de preuve exploitable"
|
||||||
|
|
||||||
|
# --- A2 : Mono-candidat fragile ---
|
||||||
|
if (selection.verdict == "CONFIRMED"
|
||||||
|
and len(selection.candidates) == 1
|
||||||
|
and selection.candidates[0]):
|
||||||
|
c = selection.candidates[0]
|
||||||
|
if _is_fragile_candidate(c):
|
||||||
|
selection.verdict = "REVIEW"
|
||||||
|
if selection.confidence == "high":
|
||||||
|
selection.confidence = "medium"
|
||||||
|
flags = []
|
||||||
|
if c.is_comorbidity_like:
|
||||||
|
flags.append("comorbidité")
|
||||||
|
if c.is_symptom_like:
|
||||||
|
flags.append("symptôme")
|
||||||
|
if c.is_act_only:
|
||||||
|
flags.append("acte-seul")
|
||||||
|
selection.reason = f"Pool mono-candidat fragile ({', '.join(flags)})"
|
||||||
|
|
||||||
|
# --- A3 : confidence cap ---
|
||||||
|
if selection.verdict == "REVIEW" and selection.confidence == "high":
|
||||||
|
selection.confidence = "medium"
|
||||||
|
|
||||||
|
# confidence="high" requiert section_strength suffisante
|
||||||
|
if selection.confidence == "high" and selection.candidates:
|
||||||
|
winner = selection.candidates[0]
|
||||||
|
if winner.section_strength < MIN_SECTION_STRENGTH_HIGH:
|
||||||
|
selection.confidence = "medium"
|
||||||
|
|
||||||
|
return selection
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Point d'entrée principal
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def select_dp(
|
||||||
|
dossier: DossierMedical,
|
||||||
|
synthese: dict,
|
||||||
|
pool: list | None = None,
|
||||||
|
config: dict | None = None,
|
||||||
|
) -> DPSelection:
|
||||||
|
"""NUKE-3 — Sélecteur DP type DIM.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dossier: DossierMedical enrichi (post NUKE-1 + NUKE-2).
|
||||||
|
synthese: Synthèse clinique {"motif", "conclusion", "antecedents"}.
|
||||||
|
pool: Réservé (les candidats sont construits depuis le dossier).
|
||||||
|
config: {"llm_enabled": bool, "top_k": int}.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DPSelection avec verdict CONFIRMED ou REVIEW.
|
||||||
|
Garanti : CONFIRMED ⇒ evidence non vide.
|
||||||
|
"""
|
||||||
|
config = config or {}
|
||||||
|
llm_enabled = config.get("llm_enabled", get_dp_ranker_llm_enabled())
|
||||||
|
top_k = config.get("top_k", 7)
|
||||||
|
|
||||||
|
# Trackare → DP autoritaire, pas de scoring
|
||||||
|
if dossier.document_type == "trackare" and dossier.diagnostic_principal:
|
||||||
|
return DPSelection(
|
||||||
|
chosen_index=0,
|
||||||
|
chosen_term=dossier.diagnostic_principal.texte,
|
||||||
|
chosen_code=dossier.diagnostic_principal.cim10_suggestion,
|
||||||
|
confidence="high",
|
||||||
|
verdict="CONFIRMED",
|
||||||
|
evidence=["Source: Trackare (codage établissement)"],
|
||||||
|
reason="DP Trackare — source d'autorité",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Construire et scorer les candidats
|
||||||
|
candidates = build_candidates(dossier)
|
||||||
|
candidates = score_candidates(candidates, synthese)
|
||||||
|
candidates = candidates[:top_k]
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return DPSelection(
|
||||||
|
verdict="REVIEW",
|
||||||
|
reason="Aucun candidat DP identifié",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Candidat unique → CONFIRMED (sous réserve hardening A2)
|
||||||
|
if len(candidates) == 1:
|
||||||
|
c = candidates[0]
|
||||||
|
evidence = _collect_evidence(c, candidates, synthese)
|
||||||
|
selection = DPSelection(
|
||||||
|
chosen_index=c.index,
|
||||||
|
chosen_term=c.term,
|
||||||
|
chosen_code=c.code,
|
||||||
|
confidence=c.confidence,
|
||||||
|
verdict="CONFIRMED",
|
||||||
|
evidence=evidence,
|
||||||
|
reason="Candidat unique",
|
||||||
|
candidates=candidates,
|
||||||
|
debug_scores={"top1": c.score},
|
||||||
|
)
|
||||||
|
return _enforce_confirmed_rules(selection, synthese)
|
||||||
|
|
||||||
|
delta = candidates[0].score - candidates[1].score
|
||||||
|
|
||||||
|
# Écart net → CONFIRMED par le pré-ranker
|
||||||
|
if delta >= DELTA_CONFIRMED:
|
||||||
|
c = candidates[0]
|
||||||
|
evidence = _collect_evidence(c, candidates, synthese)
|
||||||
|
selection = DPSelection(
|
||||||
|
chosen_index=c.index,
|
||||||
|
chosen_term=c.term,
|
||||||
|
chosen_code=c.code,
|
||||||
|
confidence=c.confidence,
|
||||||
|
verdict="CONFIRMED",
|
||||||
|
evidence=evidence,
|
||||||
|
reason=f"Écart score {delta:.1f} >= seuil {DELTA_CONFIRMED}",
|
||||||
|
candidates=candidates,
|
||||||
|
debug_scores={"top1": c.score, "top2": candidates[1].score, "delta": delta},
|
||||||
|
)
|
||||||
|
return _enforce_confirmed_rules(selection, synthese)
|
||||||
|
|
||||||
|
# Scores proches → tenter LLM
|
||||||
|
if llm_enabled:
|
||||||
|
llm_result = _llm_rank(candidates, dossier, synthese)
|
||||||
|
if llm_result and isinstance(llm_result, dict):
|
||||||
|
chosen_idx = llm_result.get("chosen_index")
|
||||||
|
if isinstance(chosen_idx, int) and 0 <= chosen_idx < len(candidates):
|
||||||
|
c = candidates[chosen_idx]
|
||||||
|
llm_evidence = llm_result.get("evidence", [])
|
||||||
|
if not llm_evidence:
|
||||||
|
llm_evidence = _collect_evidence(c, candidates, synthese)
|
||||||
|
selection = DPSelection(
|
||||||
|
chosen_index=c.index,
|
||||||
|
chosen_term=c.term,
|
||||||
|
chosen_code=c.code,
|
||||||
|
confidence=llm_result.get("confidence", c.confidence),
|
||||||
|
verdict=llm_result.get("verdict", "CONFIRMED"),
|
||||||
|
evidence=llm_evidence,
|
||||||
|
reason=llm_result.get("reason", "Sélection LLM"),
|
||||||
|
candidates=candidates,
|
||||||
|
debug_scores={
|
||||||
|
"top1": candidates[0].score,
|
||||||
|
"top2": candidates[1].score,
|
||||||
|
"delta": delta,
|
||||||
|
"llm": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return _enforce_confirmed_rules(selection, synthese)
|
||||||
|
|
||||||
|
# Fallback → REVIEW avec top-1 en suggestion
|
||||||
|
c = candidates[0]
|
||||||
|
selection = DPSelection(
|
||||||
|
chosen_index=c.index,
|
||||||
|
chosen_term=c.term,
|
||||||
|
chosen_code=c.code,
|
||||||
|
confidence=c.confidence,
|
||||||
|
verdict="REVIEW",
|
||||||
|
evidence=[f"Scores proches : {candidates[0].score:.1f} vs {candidates[1].score:.1f}"],
|
||||||
|
reason=f"Écart {delta:.1f} < seuil {DELTA_CONFIRMED}, "
|
||||||
|
f"LLM {'désactivé' if not llm_enabled else 'indisponible'}",
|
||||||
|
candidates=candidates,
|
||||||
|
debug_scores={"top1": candidates[0].score, "top2": candidates[1].score, "delta": delta},
|
||||||
|
)
|
||||||
|
return _enforce_confirmed_rules(selection, synthese)
|
||||||
@@ -188,6 +188,16 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
|
|||||||
# Diagnostic principal : le plus spécifique
|
# Diagnostic principal : le plus spécifique
|
||||||
merged.diagnostic_principal = _prefer_most_specific_dp(dossiers)
|
merged.diagnostic_principal = _prefer_most_specific_dp(dossiers)
|
||||||
|
|
||||||
|
# Propager dp_selection depuis le dossier source du DP retenu
|
||||||
|
if merged.diagnostic_principal:
|
||||||
|
dp_code = merged.diagnostic_principal.cim10_suggestion
|
||||||
|
for d in dossiers:
|
||||||
|
if (d.diagnostic_principal
|
||||||
|
and d.diagnostic_principal.cim10_suggestion == dp_code
|
||||||
|
and d.dp_selection is not None):
|
||||||
|
merged.dp_selection = d.dp_selection
|
||||||
|
break
|
||||||
|
|
||||||
# Collecter tous les DAS + DP non retenus comme DAS
|
# Collecter tous les DAS + DP non retenus comme DAS
|
||||||
all_das: list[Diagnostic] = []
|
all_das: list[Diagnostic] = []
|
||||||
for d in dossiers:
|
for d in dossiers:
|
||||||
|
|||||||
Reference in New Issue
Block a user