diff --git a/benchmark_quality.py b/benchmark_quality.py new file mode 100644 index 0000000..417a1d3 --- /dev/null +++ b/benchmark_quality.py @@ -0,0 +1,660 @@ +#!/usr/bin/env python3 +"""Benchmark qualité DP scoring déterministe vs pipeline LLM. + +Compare le DP trouvé par le nouveau scoring déterministe (± fallback LLM) +avec le DP de référence (gold) extrait par le pipeline complet (avec LLM). + +Métriques (calculées sur dossiers avec gold_dp non-None uniquement) : +- exact_match : code identique +- family4 : 4 premiers chars identiques (ex: K85.1 vs K85.0) +- family3 : 3 premiers chars identiques (ex: K85.x vs K86.x → non) +- coverage_dp : % de dossiers où un DP est proposé (new_code non-None) + +Usage: + .venv/bin/python3 benchmark_quality.py [--limit 50] [--verbose] + .venv/bin/python3 benchmark_quality.py --limit 50 --use-llm --verbose +""" + +from __future__ import annotations + +import argparse +import json +import sys +import time +from collections import Counter +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) + +from src.extraction.crh_parser import parse_crh +from src.config import DossierMedical, Diagnostic +from src.medical.cim10_extractor import ( + _extract_sejour, + _extract_actes, + _extract_biologie, + _extract_imagerie, +) +from src.medical.dp_scoring import ( + build_dp_shortlist, + build_dp_candidate_pool, + score_candidates, + select_dp, + llm_dp_fallback, + llm_dp_pool_rank, + generate_synthese_pmsi, +) + +BASE = Path(__file__).resolve().parent +ANON_DIR = BASE / "output" / "anonymized" +STRUCT_DIR = BASE / "output" / "structured" + + +def find_crh_dossiers(limit: int = 50) -> list[dict]: + """Trouve les dossiers avec CRH anonymisé ET JSON gold.""" + dossiers = [] + for anon_dir in sorted(ANON_DIR.iterdir()): + if not anon_dir.is_dir(): + continue + dir_name = anon_dir.name + crh_files = list(anon_dir.glob("CRH_*_anonymized.txt")) + if not crh_files: + continue + crh_file = crh_files[0] + crh_name = crh_file.stem.replace("_anonymized", "") + gold_json = STRUCT_DIR / dir_name / f"{crh_name}_cim10.json" + if not gold_json.exists(): + continue + dossiers.append({ + "dir_name": dir_name, + "crh_name": crh_name, + "text_path": crh_file, + "gold_path": gold_json, + }) + if len(dossiers) >= limit: + break + return dossiers + + +def load_gold_dp(gold_path: Path) -> dict: + """Charge le DP de référence depuis le JSON gold.""" + data = json.loads(gold_path.read_text(encoding="utf-8")) + dp = data.get("diagnostic_principal", {}) + return { + "code": dp.get("cim10_suggestion"), + "label": dp.get("texte", ""), + "confidence": dp.get("cim10_confidence", ""), + "source": dp.get("source", ""), + } + + +def run_dp_only(text_path: Path, use_llm: bool = False) -> dict: + """Extraction DP ciblée : scoring déterministe + fallback LLM optionnel.""" + text = text_path.read_text(encoding="utf-8") + parsed = parse_crh(text) + + dossier = DossierMedical() + dossier.document_type = parsed.get("type", "") + _extract_sejour(parsed, dossier) + _extract_biologie(text, dossier) + _extract_actes(text, dossier) + _extract_imagerie(text, dossier) + + edsnlp_result = None + try: + from src.medical.edsnlp_pipeline import run_edsnlp + edsnlp_result = run_edsnlp(text) + except Exception: + pass + + candidates = build_dp_shortlist(parsed, text, edsnlp_result, dossier) + candidates = score_candidates(candidates, dossier, full_text=text) + selection = select_dp(candidates, dossier, use_llm=use_llm) + + # Instrumentation : comorbidity fallback + comorbidity_fallback = ( + selection.verdict == "review" + and "comorbidité banale" in (selection.winner_reason or "") + ) + dp_pre_llm = None + if comorbidity_fallback and selection.candidates: + c0 = selection.candidates[0] + dp_pre_llm = {"code": c0.code, "section": c0.source_section} + + if use_llm and selection.verdict == "review": + llm_selection = llm_dp_fallback( + parsed, text, dossier, + dp_candidates=candidates, + edsnlp_result=edsnlp_result, + ) + if llm_selection.candidates: + all_candidates = list(llm_selection.candidates) + if selection.candidates: + all_candidates.extend(selection.candidates) + llm_selection.candidates = all_candidates + selection = llm_selection + + dossier.dp_selection = selection + if selection.candidates: + winner = selection.candidates[0] + dossier.diagnostic_principal = Diagnostic( + texte=winner.label, + cim10_suggestion=winner.code, + source=winner.source_section, + source_page=winner.source_page, + source_excerpt=winner.source_excerpt, + ) + + result = { + "dp_code": None, + "dp_label": "", + "dp_source": "", + "verdict": None, + "winner_reason": None, + "candidates": [], + "comorbidity_fallback": comorbidity_fallback, + "dp_pre_llm": dp_pre_llm, + } + if dossier.diagnostic_principal: + result["dp_code"] = dossier.diagnostic_principal.cim10_suggestion + result["dp_label"] = dossier.diagnostic_principal.texte + result["dp_source"] = dossier.diagnostic_principal.source or "" + if dossier.dp_selection: + sel = dossier.dp_selection + result["verdict"] = sel.verdict + result["winner_reason"] = sel.winner_reason + result["candidates"] = [ + {"code": c.code, "label": c.label, "section": c.source_section, + "score": c.score, "details": c.score_details} + for c in sel.candidates + ] + return result + + +def run_dp_pool_rank(text_path: Path) -> dict: + """DP Pool Rank : génère SynthesePMSI + pool, puis LLM choisit parmi le pool.""" + text = text_path.read_text(encoding="utf-8") + parsed = parse_crh(text) + + dossier = DossierMedical() + dossier.document_type = parsed.get("type", "") + _extract_sejour(parsed, dossier) + _extract_biologie(text, dossier) + _extract_actes(text, dossier) + _extract_imagerie(text, dossier) + + edsnlp_result = None + try: + from src.medical.edsnlp_pipeline import run_edsnlp + edsnlp_result = run_edsnlp(text) + except Exception: + pass + + # 1. Synthèse PMSI + synthese = generate_synthese_pmsi(parsed, text, dossier) + + # 2. Pool de candidats + pool = build_dp_candidate_pool(parsed, text, edsnlp_result, dossier) + + # 3. LLM pool rank + dp_shortlist = build_dp_shortlist(parsed, text, edsnlp_result, dossier) + dp_shortlist = score_candidates(dp_shortlist, dossier, full_text=text) + + selection = llm_dp_pool_rank( + parsed, text, dossier, + pool_candidates=pool, + synthese=synthese, + fallback_oneshot=True, + dp_candidates=dp_shortlist, + edsnlp_result=edsnlp_result, + ) + + dossier.dp_selection = selection + if selection.candidates: + winner = selection.candidates[0] + dossier.diagnostic_principal = Diagnostic( + texte=winner.label, + cim10_suggestion=winner.code, + source=winner.source_section, + source_page=winner.source_page, + source_excerpt=winner.source_excerpt, + ) + + result = { + "dp_code": None, + "dp_label": "", + "dp_source": "", + "verdict": None, + "winner_reason": None, + "candidates": [], + "pool_size": len(pool), + "pool_top10": [ + {"terme": c.terme, "section": c.section, + "preuve": c.preuve[:120], "score": round(c.score_initial, 2)} + for c in pool[:10] + ], + "synthese": synthese.model_dump() if synthese else None, + } + if dossier.diagnostic_principal: + result["dp_code"] = dossier.diagnostic_principal.cim10_suggestion + result["dp_label"] = dossier.diagnostic_principal.texte + result["dp_source"] = dossier.diagnostic_principal.source or "" + if dossier.dp_selection: + sel = dossier.dp_selection + result["verdict"] = sel.verdict + result["winner_reason"] = sel.winner_reason + result["candidates"] = [ + {"code": c.code, "label": c.label, "section": c.source_section, + "score": c.score, "details": c.score_details} + for c in sel.candidates + ] + return result + + +# --- Matching helpers --- + +def _norm(code: str) -> str: + """Normalise un code CIM-10 pour comparaison : supprime le point.""" + return code.replace(".", "") + + +def match_exact(a: str | None, b: str | None) -> bool: + if not a or not b: + return False + return a == b + + +def match_family4(a: str | None, b: str | None) -> bool: + """4 premiers chars sans point identiques (ex: K851 vs K850 → True).""" + if not a or not b: + return False + return _norm(a)[:4] == _norm(b)[:4] + + +def match_family3(a: str | None, b: str | None) -> bool: + """3 premiers chars identiques (ex: K85.x → K85).""" + if not a or not b: + return False + return a[:3] == b[:3] + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark qualité DP scoring") + parser.add_argument("--limit", type=int, default=50, help="Nombre de dossiers") + parser.add_argument("--verbose", action="store_true", help="Afficher chaque dossier") + parser.add_argument("--use-llm", action="store_true", + help="Activer le fallback LLM DP sur les REVIEW (nécessite Ollama)") + parser.add_argument("--synthese-pmsi", action="store_true", + help="Générer la SynthesePMSI pour chaque dossier (nécessite Ollama)") + parser.add_argument("--dp-candidates", action="store_true", + help="Générer et afficher le DP Candidate Pool pour chaque dossier") + parser.add_argument("--use-dp-pool-rank", action="store_true", + help="Utiliser le mode DP Pool Rank (LLM choisit parmi le pool, nécessite Ollama)") + args = parser.parse_args() + + mode = "déterministe + LLM fallback" if args.use_llm else "déterministe seul" + if args.use_dp_pool_rank: + mode = "DP Pool Rank (LLM choisit parmi pool)" + if args.synthese_pmsi: + mode += " + SynthesePMSI" + if args.dp_candidates: + mode += " + DPCandidatePool" + print(f"=== Benchmark DP scoring {mode} (n={args.limit}) ===\n") + + dossiers = find_crh_dossiers(limit=args.limit) + print(f"Dossiers CRH trouvés : {len(dossiers)}\n") + if not dossiers: + print("ERREUR : aucun dossier CRH avec gold JSON trouvé") + return + + # Compteurs + total = len(dossiers) + gold_none = 0 # gold_dp = None (exclus des métriques de match) + evaluable = 0 # gold_dp non-None → base pour exact/family + exact = 0 + fam4 = 0 + fam3 = 0 + coverage_has_dp = 0 # new_code non-None (sur total) + review_count = 0 + confirmed_count = 0 + comorbidity_fallback_count = 0 + errors: list[dict] = [] + review_reasons: list[str] = [] + syntheses: list[dict] = [] # (crh_name, gold_code, new_code, synthese_dict) + dp_pools: list[dict] = [] + pool_rank_results: list[dict] = [] # résultats détaillés pool rank + gold_confidences = Counter() + verdicts = Counter() + source_sections = Counter() + timings: list[float] = [] + + for i, d in enumerate(dossiers): + gold = load_gold_dp(d["gold_path"]) + gold_code = gold["code"] + gold_confidences[gold["confidence"] or "none"] += 1 + + t0 = time.time() + if args.use_dp_pool_rank: + result = run_dp_pool_rank(d["text_path"]) + pool_rank_results.append({ + "crh": d["crh_name"], + "gold_code": gold_code, + "dp_label": result["dp_label"], + "dp_source": result["dp_source"], + "verdict": result["verdict"], + "winner_reason": result["winner_reason"], + "pool_size": result.get("pool_size", 0), + "pool_top10": result.get("pool_top10", []), + "synthese": result.get("synthese"), + "candidates": result.get("candidates", []), + }) + else: + result = run_dp_only(d["text_path"], use_llm=args.use_llm) + elapsed = time.time() - t0 + timings.append(elapsed) + + new_code = result["dp_code"] + + # SynthesePMSI optionnelle + if args.synthese_pmsi: + text_synth = d["text_path"].read_text(encoding="utf-8") + parsed_synth = parse_crh(text_synth) + dossier_tmp = DossierMedical() + dossier_tmp.document_type = parsed_synth.get("type", "") + _extract_sejour(parsed_synth, dossier_tmp) + _extract_actes(text_synth, dossier_tmp) + synthese = generate_synthese_pmsi(parsed_synth, text_synth, dossier_tmp) + syntheses.append({ + "crh": d["crh_name"], + "gold_code": gold_code, + "new_code": new_code, + "synthese": synthese.model_dump() if synthese else None, + }) + # DP Candidate Pool optionnel + if args.dp_candidates: + text_pool = d["text_path"].read_text(encoding="utf-8") + parsed_pool = parse_crh(text_pool) + dossier_pool = DossierMedical() + dossier_pool.document_type = parsed_pool.get("type", "") + _extract_sejour(parsed_pool, dossier_pool) + _extract_actes(text_pool, dossier_pool) + edsnlp_pool = None + try: + from src.medical.edsnlp_pipeline import run_edsnlp + edsnlp_pool = run_edsnlp(text_pool) + except Exception: + pass + pool = build_dp_candidate_pool(parsed_pool, text_pool, edsnlp_pool, dossier_pool) + dp_pools.append({ + "crh": d["crh_name"], + "gold_code": gold_code, + "new_code": new_code, + "pool_size": len(pool), + "candidates": [ + {"terme": c.terme, "section": c.section, + "preuve": c.preuve[:120], "score": round(c.score_initial, 2)} + for c in pool + ], + }) + + verdict = result["verdict"] + verdicts[verdict or "no_selection"] += 1 + + if result["dp_source"]: + source_sections[result["dp_source"]] += 1 + + # Coverage : new_code proposé (sur total) + if new_code: + coverage_has_dp += 1 + + # Métriques de match : uniquement si gold_dp non-None + if gold_code is None: + gold_none += 1 + else: + evaluable += 1 + is_exact = match_exact(new_code, gold_code) + is_f4 = match_family4(new_code, gold_code) + is_f3 = match_family3(new_code, gold_code) + if is_exact: + exact += 1 + if is_f4: + fam4 += 1 + if is_f3: + fam3 += 1 + + # Erreurs (non-exact avec gold) + if not is_exact: + errors.append({ + "dir": d["dir_name"], + "crh": d["crh_name"], + "gold_code": gold_code, + "gold_label": gold["label"], + "gold_conf": gold["confidence"], + "new_code": new_code or "(aucun)", + "new_label": result["dp_label"] or "(aucun)", + "new_source": result["dp_source"], + "verdict": verdict, + "winner_reason": result["winner_reason"] or "", + "candidates": result["candidates"][:3], + "is_f4": is_f4, + "is_f3": is_f3, + }) + + if result.get("comorbidity_fallback"): + comorbidity_fallback_count += 1 + + if verdict == "review": + review_count += 1 + if result["winner_reason"]: + review_reasons.append(result["winner_reason"]) + elif verdict == "confirmed": + confirmed_count += 1 + + if args.verbose: + if gold_code is None: + tag = "SKIP" + elif match_exact(new_code, gold_code): + tag = "EXACT" + elif match_family4(new_code, gold_code): + tag = "FAM4" + elif match_family3(new_code, gold_code): + tag = "FAM3" + else: + tag = "MISS" + print(f" [{i+1:3d}] {d['crh_name']} : gold={gold_code} new={new_code} " + f"[{tag}] verdict={verdict} ({elapsed:.1f}s)") + + # === Rapport === + print(f"\n{'='*60}") + print(f"RESULTATS — {total} dossiers CRH ({mode})") + print(f"{'='*60}\n") + + pct = lambda n, d: n / d * 100 if d else 0 + + print(f" Évaluables (gold non-None) : {evaluable}/{total} (excl. {gold_none} sans gold DP)") + print() + print(f" DP exact match : {exact}/{evaluable} ({pct(exact, evaluable):.1f}%)") + print(f" DP family4 : {fam4}/{evaluable} ({pct(fam4, evaluable):.1f}%)") + print(f" DP family3 : {fam3}/{evaluable} ({pct(fam3, evaluable):.1f}%)") + print(f" Coverage DP : {coverage_has_dp}/{total} ({pct(coverage_has_dp, total):.1f}%)") + print() + print(f" Verdict REVIEW : {review_count}/{total} ({pct(review_count, total):.1f}%)") + print(f" Verdict CONFIRM: {confirmed_count}/{total} ({pct(confirmed_count, total):.1f}%)") + print(f" Comorbidité FB : {comorbidity_fallback_count}/{total} ({pct(comorbidity_fallback_count, total):.1f}%)") + + if timings: + avg_t = sum(timings) / len(timings) + print(f"\n Temps moyen : {avg_t:.1f}s/dossier") + print(f" Temps total : {sum(timings):.1f}s") + + print(f"\n Gold confidence :") + for conf, cnt in gold_confidences.most_common(): + print(f" {conf:8s} : {cnt}") + + print(f"\n Sources DP (new) :") + for src, cnt in source_sections.most_common(): + print(f" {src:35s} : {cnt}") + + print(f"\n Verdicts :") + for v, cnt in verdicts.most_common(): + print(f" {v:15s} : {cnt}") + + if review_reasons: + print(f"\n Top 5 review reasons :") + reason_patterns = Counter() + for r in review_reasons: + if "aucun candidat" in r: + reason_patterns["aucun candidat DP trouvé"] += 1 + elif "delta insuffisant" in r: + reason_patterns["delta insuffisant (ambiguïté)"] += 1 + elif "evidence_excerpt vide" in r: + reason_patterns["LLM: evidence_excerpt vide"] += 1 + elif "comorbidité" in r: + reason_patterns["LLM: comorbidité hors section forte"] += 1 + elif "code invalide" in r: + reason_patterns["LLM: code CIM-10 invalide"] += 1 + elif "LLM non disponible" in r or "erreur LLM" in r: + reason_patterns["LLM: erreur/indisponible"] += 1 + elif "réponse LLM invalide" in r: + reason_patterns["LLM: réponse invalide"] += 1 + elif "section faible" in r or "confidence" in r: + reason_patterns["LLM: garde-fou (section/confidence)"] += 1 + else: + reason_patterns[r[:60]] += 1 + for reason, cnt in reason_patterns.most_common(5): + print(f" [{cnt:2d}] {reason}") + + if errors: + print(f"\n{'='*60}") + print(f"ERREURS DP — {len(errors)} dossiers (5 premiers)") + print(f"{'='*60}\n") + for e in errors[:5]: + fam_tag = " [fam4]" if e.get("is_f4") else (" [fam3]" if e.get("is_f3") else "") + print(f" {e['crh']} ({e['dir']}){fam_tag}") + print(f" Gold : {e['gold_code']} — {e['gold_label'][:60]} (conf={e['gold_conf']})") + print(f" New : {e['new_code']} — {e['new_label'][:60]}") + print(f" Source: {e['new_source']}, Verdict: {e['verdict']}") + if e.get('winner_reason'): + print(f" Reason: {e['winner_reason'][:80]}") + if e['candidates']: + print(f" Candidats :") + for c in e['candidates']: + print(f" {c['code']} — {c['label'][:50]} " + f"(section={c['section']}, score={c['score']})") + print() + + # Affichage des synthèses PMSI si activé + if args.synthese_pmsi and syntheses: + print(f"\n{'='*60}") + print(f"SYNTHESES PMSI — {len(syntheses)} dossiers") + print(f"{'='*60}") + for s in syntheses: + print(f"\n --- {s['crh']} (gold={s['gold_code']}, new={s['new_code']}) ---") + syn = s.get("synthese") + if not syn: + print(" (échec génération)") + continue + print(f" Motif admission : {syn.get('motif_admission', '')[:100]}") + print(f" Problème PEC : {syn.get('probleme_pris_en_charge', '')[:100]}") + print(f" Diagnostic retenu : {syn.get('diagnostic_retenu', '')[:100]}") + actes = syn.get("actes_ou_traitements_majeurs", []) + if actes: + print(f" Actes/traitements : {', '.join(a[:60] for a in actes[:4])}") + compli = syn.get("complications", []) + if compli: + print(f" Complications : {', '.join(c[:60] for c in compli[:3])}") + comor = syn.get("terrain_comorbidites", []) + if comor: + print(f" Terrain/comorbidités: {', '.join(c[:60] for c in comor[:5])}") + preuves = syn.get("preuves", []) + if preuves: + print(f" Preuves ({len(preuves)}) :") + for p in preuves[:3]: + print(f" [{p.get('section', '?')}] {p.get('excerpt', '')[:120]}") + + # Affichage des résultats DP Pool Rank si activé + if args.use_dp_pool_rank and pool_rank_results: + print(f"\n{'='*60}") + print(f"DP POOL RANK — {len(pool_rank_results)} dossiers") + print(f"{'='*60}") + chosen_ok = sum(1 for r in pool_rank_results if r["dp_label"]) + print(f"\n Choix effectué : {chosen_ok}/{len(pool_rank_results)} " + f"({chosen_ok/len(pool_rank_results)*100:.0f}%)") + for r in pool_rank_results: + print(f"\n --- {r['crh']} (gold={r['gold_code']}) ---") + # SynthesePMSI + syn = r.get("synthese") + if syn: + print(f" SynthesePMSI :") + print(f" Motif admission : {syn.get('motif_admission', '')[:80]}") + print(f" Problème PEC : {syn.get('probleme_pris_en_charge', '')[:80]}") + print(f" Diag retenu : {syn.get('diagnostic_retenu', '')[:80]}") + else: + print(f" SynthesePMSI : (non disponible)") + # Pool top 10 + print(f" Pool ({r['pool_size']} candidats) :") + for j, c in enumerate(r.get("pool_top10", [])[:10]): + print(f" [{j}] {c['terme'][:55]:55s} ({c['section']}, {c['score']:.2f})") + # Résultat LLM + print(f" >>> DP choisi : {r['dp_label'][:70] or '(aucun)'}") + print(f" Source : {r['dp_source']}") + print(f" Verdict : {r['verdict']}") + print(f" Reason : {(r['winner_reason'] or '')[:100]}") + + # Affichage des DP Candidate Pools si activé + if args.dp_candidates and dp_pools: + print(f"\n{'='*60}") + print(f"DP CANDIDATE POOL — {len(dp_pools)} dossiers") + print(f"{'='*60}") + pool_sizes = [p["pool_size"] for p in dp_pools] + print(f"\n Taille pool : min={min(pool_sizes)}, max={max(pool_sizes)}, " + f"moy={sum(pool_sizes)/len(pool_sizes):.1f}") + for p in dp_pools: + print(f"\n --- {p['crh']} (gold={p['gold_code']}, new={p['new_code']}) " + f"— {p['pool_size']} candidats ---") + for i, c in enumerate(p["candidates"][:10], 1): + print(f" [{i:2d}] {c['terme'][:60]:60s} " + f"({c['section']}, score={c['score']:.2f})") + if c["preuve"]: + print(f" preuve: {c['preuve'][:100]}") + + # JSON exportable + summary = { + "mode": mode, + "total": total, + "evaluable": evaluable, + "gold_none": gold_none, + "exact_match": exact, + "exact_match_pct": round(pct(exact, evaluable), 1), + "family4": fam4, + "family4_pct": round(pct(fam4, evaluable), 1), + "family3": fam3, + "family3_pct": round(pct(fam3, evaluable), 1), + "coverage_dp": coverage_has_dp, + "coverage_dp_pct": round(pct(coverage_has_dp, total), 1), + "review_count": review_count, + "review_pct": round(pct(review_count, total), 1), + "confirmed_count": confirmed_count, + "comorbidity_fallback_count": comorbidity_fallback_count, + "comorbidity_fallback_pct": round(pct(comorbidity_fallback_count, total), 1), + "errors": errors, + } + if args.synthese_pmsi: + summary["syntheses_pmsi"] = syntheses + if args.dp_candidates: + summary["dp_pools"] = dp_pools + if args.use_dp_pool_rank: + summary["pool_rank_results"] = pool_rank_results + suffix = "_llm" if args.use_llm else "" + if args.use_dp_pool_rank: + suffix = "_pool_rank" + if args.synthese_pmsi: + suffix += "_synthese" + if args.dp_candidates: + suffix += "_pool" + out_path = BASE / "output" / f"benchmark_dp_quality{suffix}.json" + out_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8") + print(f"\nRésultats exportés : {out_path}") + + +if __name__ == "__main__": + main() diff --git a/src/config.py b/src/config.py index c2853f5..00e5e91 100644 --- a/src/config.py +++ b/src/config.py @@ -169,6 +169,14 @@ class SynthesePMSI(BaseModel): preuves: list[PreuveSynthese] = Field(default_factory=list) +class DPPoolCandidate(BaseModel): + """Candidat du pool DP élargi (terme libre, pas forcément codé CIM-10).""" + terme: str # Texte du diagnostic candidat + section: str # Section source (conclusion, motif_hospitalisation, edsnlp, acte, etc.) + preuve: str = "" # Extrait du texte source (~200 chars) + score_initial: float = 0.0 # Score 0-1 (poids section + indicateurs) + + class DPCandidate(BaseModel): code: Optional[str] = None label: str @@ -247,6 +255,7 @@ class DossierMedical(BaseModel): sejour: Sejour = Field(default_factory=Sejour) diagnostic_principal: Optional[Diagnostic] = None dp_selection: Optional[DPSelection] = None + synthese_pmsi: Optional[SynthesePMSI] = None diagnostics_associes: list[Diagnostic] = Field(default_factory=list) actes_ccam: list[ActeCCAM] = Field(default_factory=list) antecedents: list[Antecedent] = Field(default_factory=list) diff --git a/src/medical/dp_scoring.py b/src/medical/dp_scoring.py index 0bb49b0..9d42f00 100644 --- a/src/medical/dp_scoring.py +++ b/src/medical/dp_scoring.py @@ -17,6 +17,7 @@ from typing import Optional from ..config import ( DossierMedical, DPCandidate, + DPPoolCandidate, DPSelection, DP_REVIEW_THRESHOLD, DP_SCORING_WEIGHTS, @@ -893,7 +894,562 @@ def llm_dp_fallback( # --------------------------------------------------------------------------- -# 6. Synthèse PMSI — raisonnement clinique structuré avant codage DP +# 5b. DP Pool Rank — sélection du DP parmi un pool de candidats via LLM +# --------------------------------------------------------------------------- + +def _format_pool_for_prompt(candidates: list[DPPoolCandidate], max_items: int = 30) -> str: + """Formate la liste de candidats pour le prompt LLM de ranking.""" + lines = [] + for i, c in enumerate(candidates[:max_items]): + preuve_short = c.preuve[:120].replace("\n", " ").strip() if c.preuve else "" + lines.append( + f" [{i}] terme=\"{c.terme}\" | section={c.section} " + f"| preuve=\"{preuve_short}\" | score={c.score_initial:.2f}" + ) + return "\n".join(lines) + + +def _build_clinical_context( + parsed: dict, + dossier: DossierMedical, + text: str, + synthese: SynthesePMSI | None = None, +) -> str: + """Construit le contexte clinique pour le prompt de ranking. + + Priorité : + 1. SynthesePMSI structurée (si disponible) + 2. Fallback : motif + sections fortes + actes + """ + if synthese: + parts = [] + if synthese.motif_admission: + parts.append(f"Motif d'admission : {synthese.motif_admission}") + if synthese.probleme_pris_en_charge: + parts.append(f"Problème pris en charge : {synthese.probleme_pris_en_charge}") + if synthese.diagnostic_retenu: + parts.append(f"Diagnostic retenu : {synthese.diagnostic_retenu}") + if synthese.actes_ou_traitements_majeurs: + parts.append(f"Actes : {', '.join(synthese.actes_ou_traitements_majeurs)}") + if synthese.complications: + parts.append(f"Complications : {', '.join(synthese.complications)}") + if synthese.terrain_comorbidites: + parts.append(f"Terrain : {', '.join(synthese.terrain_comorbidites)}") + if synthese.preuves: + for p in synthese.preuves[:3]: + parts.append(f"Preuve [{p.section}] : {p.excerpt[:150]}") + return "\n".join(parts) + + # Fallback : sections fortes + motif = _build_motif(parsed, dossier, full_text=text) + sections_fortes = _build_strong_sections_text(parsed) + actes = _build_actes(dossier) + return ( + f"Motif d'hospitalisation : {motif}\n" + f"Sections cliniques :\n{sections_fortes}\n" + f"Actes : {actes}" + ) + + +def llm_dp_pool_rank( + parsed: dict, + text: str, + dossier: DossierMedical, + pool_candidates: list[DPPoolCandidate], + synthese: SynthesePMSI | None = None, + fallback_oneshot: bool = True, + dp_candidates: list[DPCandidate] | None = None, + edsnlp_result=None, +) -> DPSelection: + """Sélectionne le DP en demandant au LLM de choisir parmi le pool de candidats. + + Le LLM reçoit la SynthesePMSI (ou sections fortes en fallback) + la liste + des candidats du pool. Il doit choisir un index et recopier exactement le terme. + + Args: + parsed: CRH parsé + text: texte brut du CRH + dossier: dossier médical en cours + pool_candidates: candidats issus de build_dp_candidate_pool() + synthese: SynthesePMSI si disponible (prioritaire pour le contexte) + fallback_oneshot: si True, fallback vers llm_dp_fallback quand chosen_index=-1 + dp_candidates: candidats DP existants (pour le fallback oneshot) + edsnlp_result: résultat edsnlp (pour le fallback oneshot) + + Returns: + DPSelection avec verdict confirmed ou review. + """ + if not pool_candidates: + logger.info("DP pool rank : aucun candidat dans le pool") + if fallback_oneshot: + return llm_dp_fallback(parsed, text, dossier, dp_candidates, edsnlp_result) + return DPSelection(verdict="review", winner_reason="pool vide, pas de fallback") + + try: + from .ollama_client import call_ollama + from ..prompts import DP_POOL_RANK + except ImportError: + logger.warning("Module ollama_client non disponible pour le DP pool rank") + return DPSelection(verdict="review", winner_reason="LLM non disponible") + + # Construire le contexte et la liste de candidats + contexte = _build_clinical_context(parsed, dossier, text, synthese) + candidates_text = _format_pool_for_prompt(pool_candidates) + + prompt = DP_POOL_RANK.format( + contexte_clinique=contexte, + candidates_list=candidates_text, + ) + + try: + result = call_ollama(prompt, temperature=0.0, max_tokens=600, role="coding") + except Exception: + logger.warning("Erreur LLM DP pool rank", exc_info=True) + if fallback_oneshot: + return llm_dp_fallback(parsed, text, dossier, dp_candidates, edsnlp_result) + return DPSelection(verdict="review", winner_reason="erreur LLM pool rank") + + if not result or not isinstance(result, dict): + logger.warning("Réponse LLM pool rank invalide : %s", type(result)) + if fallback_oneshot: + return llm_dp_fallback(parsed, text, dossier, dp_candidates, edsnlp_result) + return DPSelection(verdict="review", winner_reason="réponse LLM pool rank invalide") + + # Parser la réponse + chosen_index = result.get("chosen_index", -1) + chosen_terme = result.get("chosen_terme", "") + evidence_section_raw = result.get("evidence_section", "") + evidence_excerpt = result.get("evidence_excerpt", "") + confidence = result.get("confidence", "low") + reason = result.get("reason", "") + + # Normaliser l'index + if not isinstance(chosen_index, int): + try: + chosen_index = int(chosen_index) + except (ValueError, TypeError): + chosen_index = -1 + + logger.info( + "LLM pool rank: index=%d terme='%s' section=%s confidence=%s reason='%s'", + chosen_index, (chosen_terme or "")[:60], evidence_section_raw, confidence, (reason or "")[:80], + ) + + # chosen_index == -1 → aucun candidat retenu + if chosen_index < 0 or chosen_index >= len(pool_candidates): + logger.info("LLM pool rank : chosen_index=%d hors plage (0-%d), fallback", + chosen_index, len(pool_candidates) - 1) + if fallback_oneshot: + return llm_dp_fallback(parsed, text, dossier, dp_candidates, edsnlp_result) + return DPSelection( + verdict="review", + winner_reason=f"LLM pool rank: aucun candidat retenu (index={chosen_index})", + ) + + # Candidat sélectionné + chosen = pool_candidates[chosen_index] + + # Vérifier cohérence du terme (le LLM doit recopier exactement) + if chosen_terme and normalize_text(chosen_terme) != normalize_text(chosen.terme): + logger.warning( + "LLM pool rank : terme recopié '%s' ≠ candidat '%s' (index %d)", + chosen_terme[:60], chosen.terme[:60], chosen_index, + ) + # On fait confiance à l'index, pas au terme recopié + + # Normaliser la section + evidence_section = _normalize_evidence_section(evidence_section_raw) + if not evidence_section: + evidence_section = chosen.section + + # Utiliser la preuve du candidat si le LLM n'en fournit pas + if not evidence_excerpt: + evidence_excerpt = chosen.preuve + + source_tag = f"llm_pool_rank ({evidence_section})" + + # Le pool ne contient pas de codes CIM-10 → on ne peut pas valider/normaliser ici. + # On crée un candidat DPCandidate sans code, qui sera codé en aval par CODING_CIM10. + candidate = DPCandidate( + code=None, # sera codé CIM-10 plus tard si nécessaire + label=chosen.terme, + source_section=source_tag, + source_excerpt=evidence_excerpt, + confidence_raw=confidence, + ) + + # Score synthétique + confidence_scores = {"high": 3, "medium": 2, "low": 1} + candidate.score = confidence_scores.get(confidence, 1) + candidate.score_details = { + "llm_confidence": candidate.score, + "pool_score": round(chosen.score_initial * 10), + "pool_index": chosen_index, + } + + # Garde-fous simplifiés (pas de code → pas de GF-2 comorbidité) + has_evidence = bool(evidence_excerpt and evidence_excerpt.strip()) + + # GF-1 : evidence vide → REVIEW + if not has_evidence: + logger.info("LLM pool rank : pas de preuve pour '%s', REVIEW", chosen.terme[:60]) + return DPSelection( + verdict="review", candidates=[candidate], + winner_reason=f"LLM pool rank: evidence vide pour '{chosen.terme[:40]}'", + ) + + # GF-3 : CONFIRMED uniquement si confidence high + if confidence != "high": + return DPSelection( + verdict="review", candidates=[candidate], + winner_reason=f"LLM pool rank: '{chosen.terme[:40]}' — confidence {confidence}", + ) + + return DPSelection( + verdict="confirmed", candidates=[candidate], + winner_reason=f"LLM pool rank: '{chosen.terme[:40]}' (confidence={confidence}, reason={reason[:60]})", + ) + + +# --------------------------------------------------------------------------- +# 6. DP Candidate Pool — extraction élargie de termes diagnostiques +# --------------------------------------------------------------------------- + +# Phrases indicatives : signalent le diagnostic principal dans le texte. +# Capturer le texte APRÈS le marqueur, jusqu'à un point/newline/fin de phrase. +_INDICATIVE_PHRASES_RE = re.compile( + r"(?:" + r"diagnostic(?:\s+(?:principal|retenu|de\s+sortie|final))?\s*(?::|retenu\s*:)" + r"|diagnostics?\s+retenus?\s*:" + r"|au\s+total\s*[:,]" + r"|en\s+(?:résumé|resume|synthèse|synthese)\s*[:,]" + r"|hospitalis[ée]e?\s+pour\b" + r"|admise?\s+pour\b" + r"|adress[ée]e?\s+pour\b" + r"|prise?\s+en\s+charge\s+(?:pour|d[e'u])\b" + r"|motif\s+(?:d[e']?\s*)?(?:hospitalisation|admission|consultation)\s*:" + r")" + r"\s*(.{5,200}?)(?:[.\n]|$)", + re.IGNORECASE, +) + +# Valeurs biologiques à exclure (termes isolés ou patterns numériques) +_BIO_EXCLUSION_RE = re.compile( + r"(?:" + r"\b(?:hb|hémoglobine|créatinine|crp|leucocytes|plaquettes|glycémie|" + r"natrémie|kaliémie|calcémie|bilirubine|albumine|fibrinogène|" + r"hématocrite|inr|tp|tca|pct|bnp|nt-?probnp|troponine|lactates?|" + r"ferritine|transferrine|vitamine|acide\s+urique|phosphatases?|" + r"transaminases?|gamma-?gt|ldh|cpk|lipase|amylase)\b" + r".*\d" # suivi d'une valeur numérique + r"|\d+[.,]?\d*\s*(?:g/[dl]|mg/[dl]|mmol/[l]|µmol/[l]|ui/[l]|%|ml/min)" + r")", + re.IGNORECASE, +) + +# Symptômes isolés trop vagues pour être candidats DP (sans contexte) +_VAGUE_SYMPTOMS = frozenset({ + "douleur", "fièvre", "toux", "fatigue", "asthénie", + "nausées", "vomissements", "céphalées", "malaise", + "vertiges", "dyspnée", "essoufflement", +}) + +# Fragments administratifs/structurels à exclure du pool +_ADMIN_NOISE_RE = re.compile( + r"(?:" + # Titres et fonctions hospitalières + r"praticiens?\s+hospitaliers?" + r"|assistant\s+sp[ée]cialiste" + r"|chef\s+de\s+(?:clinique|p[oô]le|service)" + r"|ancien\s+chef" + r"|cadre\s+(?:sant[ée]|infirmier|de\s+p[oô]le)" + r"|infirmi[eè]res?" + r"|secr[ée]tariat" + r"|assistantes?\s+sociales?" + r"|psychologues?" + r"|psychomotricienne" + r"|orthophonistes?" + r"|m[ée]decin\s+coordonnateur" + r"|m[ée]decin\s+du\s+sport" + r"|consultation\s+externes?" + r"|attach[ée]s?" + r"|cadres?\s+infirmiers?" + # Diplômes / formations + r"|desc\s+\w+" + r"|diu\s+\w+" + # Identifiants et en-têtes + r"|n°\s*(?:finess|rpps)" + r"|centre\s+(?:hospitalier|de\s+comp[ée]tences)" + r"|imprim[ée]\s+le" + r"|page\(?s?\)?\s*:" + r"|bien\s+confraternellement" + r"|les\s+consignes\s+d['']\s*usage" + r"|information\s+patient" + r"|h[oô]pitaux?\s+de" + r"|h[oô]pital\s+de\s+" + r"|v\d+\s*-\s*imprim" + r"|[a-z0-9_.+-]+@[a-z0-9-]+\.[a-z]" + r"|bp\s+\d+.*cedex" + r"|avenue|boulevard|rue\s" + # Services et pôles + r"|p[oô]le\s+(?:sp[ée]cialit|femme|m[eè]re|enfant|m[ée]dical)" + r"|service\s+d[eu]s?\s+" + r"|explorations?\s+fonct" + r"|oncologue\s+digestif" + r"|proctologue" + r"|h[ée]pato-gastro" + r"|m[ée]decine\s+interne" + r"|immunologie\s+clinique" + r"|dermatologie$" + r"|rhumato-immunologie" + r"|n[ée]onatologie" + r"|p[ée]diatrie\s+de\s+" + r"|urgences\s+p[ée]diatriques" + r"|reproduction$" + r"|maladies\s+auto" + r"|auto-inflammatoires" + r"|syst[ée]miques\s+rares" + r"|allergiques$" + r"|m[ée]taboliques$" + r"|digestives?$" + r"|__{3,}" + # Posologie / médicaments avec dosage + r"|\d+\s*mg\s*(?:\(|,|\s)" + r"|\b(?:orale|sous-cutan[ée]e|intraveineuse)\b.*\b(?:matin|midi|soir|jour)\b" + r"|cpr\s+\d|cprdis|comprim[ée]" + r"|\bmatin\s+midi\s+soi" + # Rendez-vous et logistique + r"|prochains?\s+rdv" + r"|hdj\s+protocolaire" + r"|pose\s+de\s+picc" + r"|bs\s+les\s+" + r"|prise\s+de\s+rendez" + # Examen clinique (observations, pas diagnostics) + r"|murmure\s+v[ée]siculaire" + r"|pouls\s+p[ée]riph[ée]riques" + r"|abdomen\s+souple" + r"|sans\s+bruits?\s+surajout" + r"|bha\s+per[çc]us" + r"|sans\s+tj\s+ni\s+rhj" + r"|examen\s+au\s+monofilament" + r"|rp\s+de\s+contr[oô]le" + # Poids et mesures + r"|poids\s+de\s+sortie" + # Allergie sans valeur diagnostique + r"|allergie\s*:\s*\d" + # Biologie / valeurs labo (en-têtes) + r"|biologie\s+d['']\s*entr[ée]e" + # Histoire / contexte (en-têtes) + r"|histoire\s+de\s+la\s+maladie" + r"|mode\s+de\s+vie" + # Noms de services (patterns additionnels) + r"|endocrinologie" + r"|diab[ée]tologie" + r"|nutrition$" + r"|f[ée]d[ée]ration$" + r")", + re.IGNORECASE, +) + +# Fragments trop courts ou trop génériques (< 3 mots significatifs) +_PLACE_NOISE_RE = re.compile( + r"^(?:de\s+)?(?:bordeaux|toulouse|lille|paris|lyon|marseille|angers|tours" + r"|bayonne|montpellier|nantes|rennes|strasbourg|nancy)(?:\s+et\s+\w+)?$", + re.IGNORECASE, +) + +# Poids de section pour le score_initial (0-1) +_POOL_SECTION_WEIGHTS: dict[str, float] = { + "diag_sortie": 1.0, + "diagnostics_retenus": 1.0, + "diag_principal": 1.0, + "indicative_phrase": 0.9, + "motif_hospitalisation": 0.8, + "conclusion": 0.7, + "synthese": 0.7, + "acte": 0.5, + "edsnlp": 0.4, + "cim10_map": 0.6, + "histoire_maladie": 0.3, + "evolution": 0.3, +} + +_MAX_POOL_SIZE = 30 + + +def build_dp_candidate_pool( + parsed: dict, + text: str, + edsnlp_result, + dossier: DossierMedical, +) -> list[DPPoolCandidate]: + """Construit un pool élargi de candidats DP (termes libres, pas de codage). + + Sources : + 1. Phrases indicatives dans le texte complet + 2. Diagnostics des sections fortes (phrases médicales significatives) + 3. Entités edsnlp NER (non-niées) + 4. Actes/traitements majeurs comme indices + 5. CIM10_MAP matches dans les sections fortes + + Dédup par terme normalisé, scoring 0-1, cap à 30 candidats. + """ + from .das_filter import is_valid_diagnostic_text, clean_diagnostic_text + from .cim10_extractor import CIM10_MAP + + raw: list[DPPoolCandidate] = [] + + # --- 1. Phrases indicatives (texte complet) --- + for m in _INDICATIVE_PHRASES_RE.finditer(text): + phrase = m.group(1).strip().rstrip(",.;:!") + phrase = clean_diagnostic_text(phrase) + if _is_pool_excluded(phrase): + continue + if not is_valid_diagnostic_text(phrase): + continue + raw.append(DPPoolCandidate( + terme=phrase, + section="indicative_phrase", + preuve=_extract_excerpt(text, m.start()), + score_initial=_POOL_SECTION_WEIGHTS["indicative_phrase"], + )) + + # --- 2. Sections fortes : phrases médicales --- + sections = parsed.get("sections", {}) + for section_key in ("diag_sortie", "diagnostics_retenus", "diag_principal", + "motif_hospitalisation", "conclusion", "synthese"): + section_text = sections.get(section_key, "") + if not section_text: + continue + weight = _POOL_SECTION_WEIGHTS.get(section_key, 0.3) + + # Extraire les phrases/segments significatifs (séparés par ponctuation forte) + fragments = re.split(r"[.\n;]+", section_text) + for frag in fragments: + frag = clean_diagnostic_text(frag.strip()) + if len(frag) < 5 or len(frag) > 200: + continue + if _is_pool_excluded(frag): + continue + if not is_valid_diagnostic_text(frag): + continue + raw.append(DPPoolCandidate( + terme=frag, + section=section_key, + preuve=section_text[:200].strip(), + score_initial=weight, + )) + + # --- 3. edsnlp NER entities --- + if edsnlp_result: + for ent in edsnlp_result.cim10_entities: + if ent.negation or ent.hypothese: + continue + terme = clean_diagnostic_text(ent.texte.capitalize()) + if _is_pool_excluded(terme): + continue + if not is_valid_diagnostic_text(terme): + continue + raw.append(DPPoolCandidate( + terme=terme, + section="edsnlp", + preuve=f"code={ent.code}" if ent.code else "", + score_initial=_POOL_SECTION_WEIGHTS["edsnlp"], + )) + + # --- 4. Actes/traitements majeurs --- + for acte in dossier.actes_ccam[:10]: + terme = clean_diagnostic_text(acte.texte) + if len(terme) < 5: + continue + raw.append(DPPoolCandidate( + terme=terme, + section="acte", + preuve=f"CCAM={acte.code_ccam_suggestion}" if acte.code_ccam_suggestion else "", + score_initial=_POOL_SECTION_WEIGHTS["acte"], + )) + + # --- 5. CIM10_MAP matches dans sections fortes --- + for section_key in ("motif_hospitalisation", "conclusion", "synthese", + "diag_sortie", "diagnostics_retenus", "diag_principal"): + section_text = sections.get(section_key, "") + if not section_text: + continue + section_norm = normalize_text(section_text) + for terme_map, code in CIM10_MAP.items(): + if normalize_text(terme_map) in section_norm: + raw.append(DPPoolCandidate( + terme=terme_map.capitalize(), + section="cim10_map", + preuve=f"CIM10_MAP→{code} (dans {section_key})", + score_initial=_POOL_SECTION_WEIGHTS["cim10_map"], + )) + + # --- Dédup par terme normalisé, garder le meilleur score --- + pool = _dedup_pool(raw) + + # --- Cap à MAX_POOL_SIZE --- + pool.sort(key=lambda c: -c.score_initial) + return pool[:_MAX_POOL_SIZE] + + +def _is_pool_excluded(terme: str) -> bool: + """Vérifie si un terme doit être exclu du pool (bio, bruit admin, trop vague).""" + t_lower = terme.lower().strip() + + # Trop court + if len(t_lower) < 5: + return True + + # Valeur biologique + if _BIO_EXCLUSION_RE.search(terme): + return True + + # Symptôme isolé trop vague (un seul mot) + words = t_lower.split() + if len(words) == 1 and t_lower in _VAGUE_SYMPTOMS: + return True + + # Fragment administratif/structurel + if _ADMIN_NOISE_RE.search(terme): + return True + + # Nom de ville / lieu seul + if _PLACE_NOISE_RE.match(t_lower): + return True + + # Fragments purement numériques ou dates + if re.match(r"^[\d/\-:.\s,]+$", t_lower): + return True + + # Fragment trop court ET non-médical + # Exclure : "de Bordeaux", "Kgs", "fédération" etc. + # Garder : "Pneumopathie", "Cholécystite", "Ictère" (diagnostics valides) + if len(words) <= 2 and len(t_lower) < 15: + # Si c'est un mot unique qui passe is_valid_diagnostic_text, + # le garder (il sera filtré plus tard si vague) + if len(words) == 1 and len(t_lower) >= 6: + pass # diagnostic potentiel (pneumopathie, cholécystite, etc.) + else: + return True + + return False + + +def _dedup_pool(candidates: list[DPPoolCandidate]) -> list[DPPoolCandidate]: + """Déduplique par terme normalisé, garde le score le plus élevé.""" + seen: dict[str, DPPoolCandidate] = {} + for c in candidates: + key = normalize_text(c.terme) + if not key: + continue + if key not in seen or c.score_initial > seen[key].score_initial: + seen[key] = c + return list(seen.values()) + + +# --------------------------------------------------------------------------- +# 7. Synthèse PMSI — raisonnement clinique structuré avant codage DP # --------------------------------------------------------------------------- # Comorbidités banales : NE DOIVENT PAS être probleme_pris_en_charge diff --git a/src/prompts/__init__.py b/src/prompts/__init__.py index a3f3752..441e895 100644 --- a/src/prompts/__init__.py +++ b/src/prompts/__init__.py @@ -9,6 +9,7 @@ from .templates import ( CPAM_ARGUMENTATION, DP_TIEBREAK, DP_LLM_ONESHOT, + DP_POOL_RANK, SYNTHESE_PMSI, CPAM_ADVERSARIAL, ) @@ -22,6 +23,7 @@ __all__ = [ "CPAM_ARGUMENTATION", "DP_TIEBREAK", "DP_LLM_ONESHOT", + "DP_POOL_RANK", "SYNTHESE_PMSI", "CPAM_ADVERSARIAL", ] diff --git a/src/prompts/templates.py b/src/prompts/templates.py index 481231f..583e6c5 100644 --- a/src/prompts/templates.py +++ b/src/prompts/templates.py @@ -372,7 +372,48 @@ Réponds UNIQUEMENT en JSON : }}""" # --------------------------------------------------------------------------- -# 8. Synthèse PMSI — raisonnement clinique structuré avant codage DP +# 8. DP Pool Rank — sélection du DP parmi un pool de candidats +# --------------------------------------------------------------------------- +# Rôle : coding | Temperature : 0.0 | Max tokens : 600 +# Fichier d'origine : src/medical/dp_scoring.py → llm_dp_pool_rank() +# Variables : contexte_clinique, candidates_list + +DP_POOL_RANK = """\ +Tu es un médecin DIM (Département d'Information Médicale) expert en codage PMSI. +Tu dois choisir le Diagnostic Principal (DP) PARMI les candidats ci-dessous. + +DÉFINITION DU DP (Guide méthodologique ATIH) : +Le DP est le problème de santé qui a mobilisé l'essentiel des ressources du séjour. C'est la pathologie ACTIVE, TRAITÉE, RETENUE en fin de séjour. + +RÈGLES DE SÉLECTION : +1. Choisis le diagnostic correspondant au "problème pris en charge" — la pathologie AIGUË/ACTIVE qui justifie l'hospitalisation, PAS une comorbidité chronique de fond (HTA, diabète équilibré, obésité, dyslipidémie). +2. Ne choisis JAMAIS un candidat purement administratif, logistique ou géographique. +3. En cas de plusieurs diagnostics actifs, préfère : + a) Le diagnostic AIGU traité pendant ce séjour + b) Le diagnostic explicitement "retenu" / "au total" / "diagnostic de sortie" + c) Le diagnostic issu de la section la plus fiable (diag_sortie > conclusion > synthese) +4. Si AUCUN candidat n'est assez solide pour être le DP, retourne chosen_index = -1. + +CONTEXTE CLINIQUE : +{contexte_clinique} + +CANDIDATS (index, terme, section, preuve, score) : +{candidates_list} + +INSTRUCTION CRITIQUE : le champ "chosen_terme" DOIT être la RECOPIE EXACTE du terme candidat (pas de paraphrase, pas de reformulation). + +Réponds UNIQUEMENT en JSON : +{{ + "chosen_index": 0, + "chosen_terme": "recopie EXACTE du terme du candidat choisi", + "evidence_section": "section du candidat choisi", + "evidence_excerpt": "extrait court du texte prouvant le choix (copié de la preuve ou du contexte)", + "confidence": "high ou medium ou low", + "reason": "1 phrase max justifiant le choix" +}}""" + +# --------------------------------------------------------------------------- +# 9. Synthèse PMSI — raisonnement clinique structuré avant codage DP # --------------------------------------------------------------------------- # Rôle : coding | Temperature : 0.0 | Max tokens : 1200 # Fichier d'origine : src/medical/dp_scoring.py → generate_synthese_pmsi() diff --git a/tests/test_dp_scoring.py b/tests/test_dp_scoring.py index af8a81d..c7cfa98 100644 --- a/tests/test_dp_scoring.py +++ b/tests/test_dp_scoring.py @@ -3,9 +3,11 @@ import pytest from src.config import ( + ActeCCAM, DossierMedical, Diagnostic, DPCandidate, + DPPoolCandidate, DPSelection, DP_SCORING_WEIGHTS, DP_REVIEW_THRESHOLD, @@ -15,14 +17,20 @@ from src.config import ( ) from src.medical.dp_scoring import ( build_dp_shortlist, + build_dp_candidate_pool, score_candidates, select_dp, generate_synthese_pmsi, + llm_dp_pool_rank, + _format_pool_for_prompt, + _build_clinical_context, _get_context_window, _is_z_code_whitelisted, _is_comorbidity_code, _has_explicit_pec_proof, _dedup_by_code, + _dedup_pool, + _is_pool_excluded, _normalize_evidence_section, _is_comorbidite_banale, _has_pec_marker, @@ -718,6 +726,390 @@ class TestSectionNormalization: assert _normalize_evidence_section("sections fortes du dossier") == "autres" +class TestSynthesePMSI: + """Tests pour generate_synthese_pmsi().""" + + def test_returns_synthese_on_valid_response(self, monkeypatch): + """Réponse LLM valide → SynthesePMSI complète.""" + mock_response = { + "motif_admission": "Douleur abdominale aiguë", + "probleme_pris_en_charge": "Pancréatite aiguë biliaire", + "diagnostic_retenu": "Pancréatite aiguë d'origine biliaire", + "actes_ou_traitements_majeurs": ["Scanner abdominal", "Mise à jeun"], + "complications": [], + "terrain_comorbidites": ["HTA traitée", "Diabète type 2"], + "preuves": [ + {"section": "motif_hospitalisation", "excerpt": "douleur abdominale intense"}, + {"section": "conclusion", "excerpt": "pancréatite aiguë biliaire confirmée"}, + ], + } + + def mock_call_ollama(prompt, **kwargs): + return mock_response + + import src.medical.dp_scoring as mod + monkeypatch.setattr(mod, "call_ollama", mock_call_ollama, raising=False) + # Forcer l'import inline à utiliser notre mock + import src.medical.ollama_client as oc_mod + monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama) + + parsed = _make_parsed(sections={"conclusion": "pancréatite aiguë biliaire confirmée"}) + dossier = DossierMedical() + + result = generate_synthese_pmsi(parsed, "texte complet", dossier) + + assert result is not None + assert isinstance(result, SynthesePMSI) + assert result.probleme_pris_en_charge == "Pancréatite aiguë biliaire" + assert result.motif_admission == "Douleur abdominale aiguë" + assert "Scanner abdominal" in result.actes_ou_traitements_majeurs + assert len(result.terrain_comorbidites) == 2 + assert result.complications == [] + assert len(result.preuves) == 2 + assert result.preuves[0].section == "motif_hospitalisation" + + def test_returns_none_on_invalid_response(self, monkeypatch): + """Réponse LLM non-dict → None.""" + def mock_call_ollama(prompt, **kwargs): + return "texte brut" + + import src.medical.ollama_client as oc_mod + monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + + result = generate_synthese_pmsi(parsed, "texte", dossier) + assert result is None + + def test_returns_none_on_exception(self, monkeypatch): + """Exception LLM → None.""" + def mock_call_ollama(prompt, **kwargs): + raise ConnectionError("Ollama down") + + import src.medical.ollama_client as oc_mod + monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + + result = generate_synthese_pmsi(parsed, "texte", dossier) + assert result is None + + def test_robust_to_string_lists(self, monkeypatch): + """Le LLM renvoie des strings au lieu de listes → toléré.""" + mock_response = { + "motif_admission": "Fièvre", + "probleme_pris_en_charge": "Pneumopathie", + "diagnostic_retenu": "Pneumopathie bactérienne", + "actes_ou_traitements_majeurs": "Antibiothérapie IV", # string + "complications": "Insuffisance respiratoire", # string + "terrain_comorbidites": "BPCO", # string + "preuves": [], + } + + def mock_call_ollama(prompt, **kwargs): + return mock_response + + import src.medical.ollama_client as oc_mod + monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + + result = generate_synthese_pmsi(parsed, "texte", dossier) + + assert result is not None + assert result.actes_ou_traitements_majeurs == ["Antibiothérapie IV"] + assert result.complications == ["Insuffisance respiratoire"] + assert result.terrain_comorbidites == ["BPCO"] + + def test_preuves_malformed_skipped(self, monkeypatch): + """Preuves sans section/excerpt → ignorées.""" + mock_response = { + "motif_admission": "Test", + "probleme_pris_en_charge": "Test", + "diagnostic_retenu": "Test", + "preuves": [ + {"section": "conclusion", "excerpt": "valide"}, + {"section": "", "excerpt": "section vide"}, + {"no_section": True}, + "pas un dict", + ], + } + + def mock_call_ollama(prompt, **kwargs): + return mock_response + + import src.medical.ollama_client as oc_mod + monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + + result = generate_synthese_pmsi(parsed, "texte", dossier) + + assert result is not None + assert len(result.preuves) == 1 + assert result.preuves[0].section == "conclusion" + + def test_serialization_round_trip(self): + """SynthesePMSI se sérialise/désérialise correctement.""" + syn = SynthesePMSI( + motif_admission="Douleur thoracique", + probleme_pris_en_charge="Infarctus du myocarde", + diagnostic_retenu="IDM ST+ antérieur", + actes_ou_traitements_majeurs=["Coronarographie", "Angioplastie"], + complications=["Insuffisance cardiaque"], + terrain_comorbidites=["HTA", "Tabagisme"], + preuves=[PreuveSynthese(section="conclusion", excerpt="IDM confirmé")], + ) + data = syn.model_dump() + restored = SynthesePMSI(**data) + assert restored.probleme_pris_en_charge == "Infarctus du myocarde" + assert len(restored.preuves) == 1 + assert restored.preuves[0].section == "conclusion" + + def test_dossier_medical_field(self): + """Le champ synthese_pmsi est disponible sur DossierMedical.""" + dossier = DossierMedical() + assert dossier.synthese_pmsi is None + dossier.synthese_pmsi = SynthesePMSI( + probleme_pris_en_charge="Test", + ) + assert dossier.synthese_pmsi.probleme_pris_en_charge == "Test" + data = dossier.model_dump(exclude_none=True) + assert "synthese_pmsi" in data + + +# =========================================================================== +# DP Candidate Pool +# =========================================================================== + +class TestDPPoolCandidate: + """Tests du modèle DPPoolCandidate.""" + + def test_basic_creation(self): + c = DPPoolCandidate(terme="Pancréatite aiguë", section="conclusion") + assert c.terme == "Pancréatite aiguë" + assert c.section == "conclusion" + assert c.score_initial == 0.0 + assert c.preuve == "" + + def test_serialization(self): + c = DPPoolCandidate( + terme="Cholécystite aiguë", + section="diag_sortie", + preuve="cholécystite aiguë lithiasique", + score_initial=0.9, + ) + data = c.model_dump() + restored = DPPoolCandidate(**data) + assert restored.terme == "Cholécystite aiguë" + assert restored.score_initial == 0.9 + + +class TestIsPoolExcluded: + """Tests du filtrage des candidats pool.""" + + def test_bio_value_excluded(self): + assert _is_pool_excluded("CRP 180 mg/L") is True + + def test_bio_term_with_number_excluded(self): + assert _is_pool_excluded("Hémoglobine 7.2 g/dL") is True + + def test_vague_symptom_excluded(self): + assert _is_pool_excluded("douleur") is True + assert _is_pool_excluded("fièvre") is True + + def test_vague_symptom_with_context_kept(self): + """Symptôme qualifié (multi-mots) → conservé.""" + assert _is_pool_excluded("douleur abdominale aiguë") is False + + def test_medical_diagnosis_kept(self): + assert _is_pool_excluded("Pancréatite aiguë biliaire") is False + + def test_numeric_value_excluded(self): + assert _is_pool_excluded("12.5 g/dL") is True + + +class TestDedupPool: + """Tests de la déduplication du pool.""" + + def test_dedup_keeps_highest_score(self): + candidates = [ + DPPoolCandidate(terme="Pancréatite aiguë", section="conclusion", score_initial=0.7), + DPPoolCandidate(terme="Pancréatite aiguë", section="diag_sortie", score_initial=1.0), + ] + result = _dedup_pool(candidates) + assert len(result) == 1 + assert result[0].score_initial == 1.0 + assert result[0].section == "diag_sortie" + + def test_dedup_normalizes_text(self): + """Variantes d'accents/espaces → même clé.""" + candidates = [ + DPPoolCandidate(terme="Pancréatite aiguë", section="a", score_initial=0.5), + DPPoolCandidate(terme="pancreatite aigue", section="b", score_initial=0.8), + ] + result = _dedup_pool(candidates) + assert len(result) == 1 + + def test_distinct_terms_kept(self): + candidates = [ + DPPoolCandidate(terme="Pancréatite aiguë", section="a", score_initial=0.7), + DPPoolCandidate(terme="Cholécystite aiguë", section="b", score_initial=0.9), + ] + result = _dedup_pool(candidates) + assert len(result) == 2 + + +class TestBuildDPCandidatePool: + """Tests d'intégration de build_dp_candidate_pool().""" + + def test_indicative_phrase_extraction(self): + """Les phrases indicatives sont extraites du texte.""" + text = "Le patient a été hospitalisé pour pancréatite aiguë biliaire. Suivi habituel." + parsed = _make_parsed(sections={"conclusion": "Pancréatite aiguë biliaire confirmée."}) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, text, None, dossier) + + termes = [c.terme.lower() for c in pool] + assert any("pancréatite" in t or "pancreatite" in t for t in termes) + + def test_sections_fortes_extraction(self): + """Les diagnostics des sections fortes apparaissent dans le pool.""" + parsed = _make_parsed(sections={ + "diag_sortie": "Cholécystite aiguë lithiasique", + "conclusion": "Évolution favorable après cholécystectomie", + }) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte complet", None, dossier) + + termes = [c.terme.lower() for c in pool] + assert any("cholécystite" in t or "cholecystite" in t for t in termes) + + def test_edsnlp_entities_included(self): + """Les entités edsnlp non-niées apparaissent dans le pool.""" + from dataclasses import dataclass + + @dataclass + class MockEntity: + texte: str + code: str + negation: bool = False + hypothese: bool = False + + @dataclass + class MockResult: + cim10_entities: list + + edsnlp = MockResult(cim10_entities=[ + MockEntity(texte="pneumopathie", code="J18.9"), + MockEntity(texte="HTA", code="I10", negation=True), # exclu + ]) + parsed = _make_parsed() + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte", edsnlp, dossier) + + termes = [c.terme.lower() for c in pool] + assert any("pneumopathie" in t for t in termes) + # HTA niée ne doit pas apparaître + assert not any(t == "hta" for t in termes) + + def test_actes_included(self): + """Les actes CCAM du dossier apparaissent comme candidats.""" + parsed = _make_parsed() + dossier = DossierMedical() + dossier.actes_ccam = [ + ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"), + ] + + pool = build_dp_candidate_pool(parsed, "texte", None, dossier) + + termes = [c.terme.lower() for c in pool] + assert any("cholécystectomie" in t or "cholecystectomie" in t for t in termes) + + def test_cim10_map_matches(self): + """Les termes CIM10_MAP matchés dans les sections fortes sont inclus.""" + parsed = _make_parsed(sections={ + "conclusion": "Patient avec pancréatite aiguë biliaire sévère.", + }) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte", None, dossier) + + sections = [c.section for c in pool] + assert "cim10_map" in sections + + def test_bio_values_excluded(self): + """Les valeurs biologiques ne polluent pas le pool.""" + parsed = _make_parsed(sections={ + "conclusion": "CRP 180 mg/L. Hémoglobine 7.2 g/dL. Pancréatite aiguë.", + }) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte", None, dossier) + + termes = [c.terme.lower() for c in pool] + assert not any("crp" in t and "mg" in t for t in termes) + + def test_dedup_across_sources(self): + """Un même terme de 2 sources → 1 seule entrée (meilleur score).""" + parsed = _make_parsed(sections={ + "conclusion": "Pancréatite aiguë biliaire confirmée.", + "motif_hospitalisation": "Pancréatite aiguë biliaire.", + }) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte", None, dossier) + + # Compter les variantes "pancréatite aiguë biliaire" + from src.medical.cim10_dict import normalize_text + keys = [normalize_text(c.terme) for c in pool] + pancreatite_keys = [k for k in keys if "pancreatite" in k and "biliaire" in k] + # Après dedup, devrait être au plus 1-2 (phrase complète vs segment) + assert len(pancreatite_keys) <= 2 + + def test_cap_at_30(self): + """Le pool est plafonné à 30 candidats.""" + # Créer un texte avec beaucoup de diagnostics + diagnostics = [f"diagnostic numéro {i}" for i in range(50)] + section_text = ". ".join(diagnostics) + "." + parsed = _make_parsed(sections={"conclusion": section_text}) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, section_text, None, dossier) + + assert len(pool) <= 30 + + def test_empty_input(self): + """Entrée vide → pool vide.""" + parsed = _make_parsed() + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "", None, dossier) + + assert isinstance(pool, list) + + def test_score_ordering(self): + """Le pool est trié par score_initial décroissant.""" + parsed = _make_parsed(sections={ + "diag_sortie": "Cholécystite aiguë", + "conclusion": "Angiocholite associée", + }) + dossier = DossierMedical() + + pool = build_dp_candidate_pool(parsed, "texte", None, dossier) + + if len(pool) >= 2: + scores = [c.score_initial for c in pool] + assert scores == sorted(scores, reverse=True) + + # =========================================================================== # Anti-comorbidité SynthesePMSI # =========================================================================== @@ -943,3 +1335,252 @@ class TestBuildMotifFallback: parsed = _make_parsed() dossier = DossierMedical() assert _build_motif(parsed, dossier) == "Non renseigné" + + +# =================================================================== +# Tests DP Pool Rank +# =================================================================== + +class TestFormatPoolForPrompt: + """Tests pour _format_pool_for_prompt().""" + + def test_basic_formatting(self): + """Vérifie le format des candidats pour le prompt.""" + pool = [ + DPPoolCandidate(terme="Pneumopathie", section="conclusion", + preuve="Au total : pneumopathie", score_initial=0.7), + DPPoolCandidate(terme="Embolie pulmonaire", section="diag_sortie", + preuve="Diagnostic de sortie", score_initial=1.0), + ] + text = _format_pool_for_prompt(pool) + assert "[0]" in text + assert "[1]" in text + assert "Pneumopathie" in text + assert "Embolie pulmonaire" in text + assert "conclusion" in text + assert "diag_sortie" in text + + def test_max_items_cap(self): + """Vérifie que max_items est respecté.""" + pool = [ + DPPoolCandidate(terme=f"Diag_{i}", section="conclusion", score_initial=0.5) + for i in range(10) + ] + text = _format_pool_for_prompt(pool, max_items=3) + assert "[0]" in text + assert "[2]" in text + assert "[3]" not in text + + def test_empty_pool(self): + """Pool vide → texte vide.""" + assert _format_pool_for_prompt([]) == "" + + +class TestBuildClinicalContext: + """Tests pour _build_clinical_context().""" + + def test_with_synthese(self): + """Avec SynthesePMSI disponible.""" + synthese = SynthesePMSI( + motif_admission="Douleur thoracique", + probleme_pris_en_charge="Embolie pulmonaire", + diagnostic_retenu="Embolie pulmonaire bilatérale", + ) + parsed = _make_parsed() + dossier = DossierMedical() + ctx = _build_clinical_context(parsed, dossier, "", synthese) + assert "Embolie pulmonaire" in ctx + assert "Douleur thoracique" in ctx + + def test_without_synthese(self): + """Sans SynthesePMSI → fallback sections fortes.""" + parsed = _make_parsed(sections={"conclusion": "Pneumopathie traitée"}) + dossier = DossierMedical() + ctx = _build_clinical_context(parsed, dossier, "texte complet", None) + assert "Pneumopathie traitée" in ctx + assert "Motif" in ctx + + +class TestLlmDpPoolRank: + """Tests unitaires pour llm_dp_pool_rank() — sans appel LLM réel.""" + + def test_empty_pool_fallback_off(self): + """Pool vide + fallback OFF → REVIEW.""" + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=[], + fallback_oneshot=False, + ) + assert selection.verdict == "review" + assert "pool vide" in selection.winner_reason + + def test_empty_pool_fallback_on(self, monkeypatch): + """Pool vide + fallback ON → tente llm_dp_fallback.""" + # Mock llm_dp_fallback pour retourner un résultat connu + from src.medical import dp_scoring + mock_selection = DPSelection( + verdict="review", + winner_reason="fallback activé", + ) + monkeypatch.setattr(dp_scoring, "llm_dp_fallback", lambda *a, **kw: mock_selection) + + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=[], + fallback_oneshot=True, + ) + assert selection.verdict == "review" + assert "fallback" in selection.winner_reason + + def test_valid_llm_response_high_confidence(self, monkeypatch): + """Réponse LLM valide avec confidence high → CONFIRMED.""" + pool = [ + DPPoolCandidate(terme="Embolie pulmonaire", section="conclusion", + preuve="Au total : embolie pulmonaire", score_initial=0.7), + DPPoolCandidate(terme="HTA", section="conclusion", + preuve="terrain HTA", score_initial=0.3), + ] + + # Mock call_ollama + def mock_call_ollama(prompt, **kwargs): + return { + "chosen_index": 0, + "chosen_terme": "Embolie pulmonaire", + "evidence_section": "conclusion", + "evidence_excerpt": "Au total : embolie pulmonaire", + "confidence": "high", + "reason": "pathologie aiguë traitée", + } + + from src.medical import dp_scoring + monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama) + + parsed = _make_parsed(sections={"conclusion": "Au total : embolie pulmonaire"}) + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=pool, + fallback_oneshot=False, + ) + assert selection.verdict == "confirmed" + assert len(selection.candidates) == 1 + assert selection.candidates[0].label == "Embolie pulmonaire" + assert selection.candidates[0].source_section == "llm_pool_rank (conclusion)" + assert selection.candidates[0].code is None # pas de code CIM-10, sera codé en aval + + def test_valid_llm_response_medium_confidence(self, monkeypatch): + """Réponse LLM avec confidence medium → REVIEW.""" + pool = [ + DPPoolCandidate(terme="Insuffisance cardiaque", section="conclusion", + preuve="insuffisance cardiaque", score_initial=0.7), + ] + + def mock_call_ollama(prompt, **kwargs): + return { + "chosen_index": 0, + "chosen_terme": "Insuffisance cardiaque", + "evidence_section": "conclusion", + "evidence_excerpt": "insuffisance cardiaque globale", + "confidence": "medium", + "reason": "diagnostic probable", + } + + monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=pool, + fallback_oneshot=False, + ) + assert selection.verdict == "review" + assert "confidence medium" in selection.winner_reason + + def test_chosen_index_minus_one_fallback_off(self, monkeypatch): + """chosen_index=-1 + fallback OFF → REVIEW.""" + pool = [ + DPPoolCandidate(terme="HTA", section="conclusion", + preuve="HTA", score_initial=0.3), + ] + + def mock_call_ollama(prompt, **kwargs): + return { + "chosen_index": -1, + "chosen_terme": "", + "confidence": "low", + "reason": "aucun candidat solide", + } + + monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=pool, + fallback_oneshot=False, + ) + assert selection.verdict == "review" + assert "aucun candidat retenu" in selection.winner_reason + + def test_index_out_of_range_fallback_off(self, monkeypatch): + """Index hors plage → REVIEW.""" + pool = [ + DPPoolCandidate(terme="Pneumopathie", section="conclusion", + preuve="...", score_initial=0.7), + ] + + def mock_call_ollama(prompt, **kwargs): + return { + "chosen_index": 5, + "chosen_terme": "Fantôme", + "confidence": "high", + } + + monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=pool, + fallback_oneshot=False, + ) + assert selection.verdict == "review" + + def test_score_details_contain_pool_info(self, monkeypatch): + """Les score_details du candidat contiennent les infos pool.""" + pool = [ + DPPoolCandidate(terme="Cholécystite aiguë", section="diag_sortie", + preuve="cholécystite aiguë lithiasique", score_initial=0.9), + ] + + def mock_call_ollama(prompt, **kwargs): + return { + "chosen_index": 0, + "chosen_terme": "Cholécystite aiguë", + "evidence_section": "diag_sortie", + "evidence_excerpt": "cholécystite aiguë lithiasique", + "confidence": "high", + "reason": "diagnostic chirurgical aigu", + } + + monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama) + + parsed = _make_parsed() + dossier = DossierMedical() + selection = llm_dp_pool_rank( + parsed, "texte", dossier, + pool_candidates=pool, + fallback_oneshot=False, + ) + assert selection.verdict == "confirmed" + details = selection.candidates[0].score_details + assert "pool_score" in details + assert "pool_index" in details + assert details["pool_index"] == 0