rpa_vision_v3/demo/facturation_urgences/run_simulation.py

#!/usr/bin/env python3
"""
Simulation : pertinence de MedGemma:4b pour la décision de facturation urgences.

Pour chaque DPI urgences synthétique, on demande au modèle :
  - décision : FORFAIT_URGENCE vs REQUALIFICATION_HOSPITALISATION
  - critères objectifs identifiés
  - justification courte
  - confiance

On compare à la vérité-terrain et on produit un rapport.

Lancer : python run_simulation.py
"""

import json
import sys
import time
import urllib.request
import urllib.error
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from cas_dpi import CAS  # noqa: E402

OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL = "medgemma:4b"

PROMPT_TEMPLATE = """Tu es un médecin DIM (Département d'Information Médicale) expert en facturation T2A/PMSI aux urgences hospitalières en France.

À partir du dossier patient ci-dessous, tu dois déterminer si le passage relève :
- d'un FORFAIT_URGENCE (passage simple, geste ponctuel ou bilan rassurant, retour à domicile sans surveillance prolongée)
- d'une REQUALIFICATION_HOSPITALISATION (séjour MCO requis : surveillance scopée prolongée, soins continus IV, oxygénothérapie/VNI, examens itératifs, transfert vers service spécialisé, durée > 24h en UHCD ou critères de gravité)

Réponds UNIQUEMENT en JSON valide strict, sans aucun texte avant ou après, selon ce schéma :
{{
  "decision": "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION",
  "criteres": [liste courte de critères objectifs (3-6 items max) que tu identifies dans le dossier],
  "justification": "2-3 phrases maximum",
  "confiance": "elevee" | "moyenne" | "faible"
}}

DOSSIER PATIENT :
{dpi}
"""


def query_medgemma(dpi_text: str, timeout: int = 300) -> dict:
    payload = {
        "model": MODEL,
        "prompt": PROMPT_TEMPLATE.format(dpi=dpi_text),
        "stream": False,
        "format": "json",
        "options": {
            "temperature": 0.1,
            "num_predict": 600,
            "num_ctx": 4096,
        },
    }
    data = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(
        OLLAMA_URL, data=data, headers={"Content-Type": "application/json"}, method="POST"
    )
    t0 = time.time()
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        raw = resp.read().decode("utf-8")
    elapsed = time.time() - t0
    body = json.loads(raw)
    raw_response = body.get("response", "").strip()
    try:
        parsed = json.loads(raw_response)
    except json.JSONDecodeError:
        parsed = {"_raw": raw_response, "_parse_error": True}
    parsed["_elapsed_s"] = round(elapsed, 1)
    parsed["_eval_count"] = body.get("eval_count")
    return parsed


def fmt_decision(d: str) -> str:
    return {"FORFAIT_URGENCE": "FORFAIT", "REQUALIFICATION_HOSPITALISATION": "HOSPIT"}.get(d, d or "?")


def run() -> None:
    print(f"\n{'=' * 78}")
    print(f"  SIMULATION MedGemma:4b — Facturation urgences (forfait vs hospit)")
    print(f"  Modèle : {MODEL} | Endpoint : {OLLAMA_URL}")
    print(f"  Cas : {len(CAS)} DPI synthétiques")
    print(f"{'=' * 78}\n")

    results = []
    correct = 0
    parse_errors = 0
    total_elapsed = 0.0

    for cas in CAS:
        gt = cas["verite_terrain"]
        print(f"--- Cas {cas['id']:>2} | {cas['type']:<8} | {cas['titre']}")
        print(f"    Vérité-terrain : {fmt_decision(gt)}")
        try:
            out = query_medgemma(cas["dpi"])
        except (urllib.error.URLError, TimeoutError, ConnectionError) as e:
            print(f"    ERREUR appel Ollama : {e}\n")
            results.append({"cas": cas, "out": {"_error": str(e)}, "match": False})
            continue

        if out.get("_parse_error"):
            parse_errors += 1
            print(f"    !! Réponse non-JSON : {out.get('_raw', '')[:200]}")
            results.append({"cas": cas, "out": out, "match": False})
            print()
            continue

        decision = out.get("decision", "?")
        match = decision == gt
        if match:
            correct += 1
        total_elapsed += out.get("_elapsed_s", 0)

        flag = "OK" if match else "KO"
        print(f"    Prédiction    : {fmt_decision(decision)}  [{flag}]")
        print(f"    Confiance     : {out.get('confiance', '?')}")
        crits = out.get("criteres", [])
        if isinstance(crits, list):
            print(f"    Critères      : {', '.join(str(c) for c in crits[:6])}")
        else:
            print(f"    Critères      : {crits}")
        justif = out.get("justification", "")
        print(f"    Justification : {justif}")
        print(f"    Latence       : {out.get('_elapsed_s', '?')}s | tokens: {out.get('_eval_count', '?')}")
        print()
        results.append({"cas": cas, "out": out, "match": match})

    # ---------- Synthèse ----------
    n = len(CAS)
    print(f"{'=' * 78}")
    print(f"  RÉSULTAT GLOBAL")
    print(f"{'=' * 78}")
    print(f"  Accuracy décision      : {correct}/{n} ({100 * correct / n:.0f} %)")
    print(f"  Erreurs parsing JSON   : {parse_errors}/{n}")
    print(f"  Latence moyenne        : {total_elapsed / max(1, n - parse_errors):.1f} s/cas")
    print(f"  Latence cumulée        : {total_elapsed:.1f} s")

    # Matrice de confusion
    tp = sum(1 for r in results if r["match"] and r["cas"]["verite_terrain"] == "REQUALIFICATION_HOSPITALISATION")
    tn = sum(1 for r in results if r["match"] and r["cas"]["verite_terrain"] == "FORFAIT_URGENCE")
    fp = sum(1 for r in results if not r["match"] and r["cas"]["verite_terrain"] == "FORFAIT_URGENCE")
    fn = sum(1 for r in results if not r["match"] and r["cas"]["verite_terrain"] == "REQUALIFICATION_HOSPITALISATION")

    print(f"\n  Matrice de confusion (positif = HOSPIT) :")
    print(f"    Vrais positifs (HOSPIT correct)  : {tp}")
    print(f"    Vrais négatifs (FORFAIT correct) : {tn}")
    print(f"    Faux positifs (HOSPIT à tort)    : {fp}  → sur-codage potentiel")
    print(f"    Faux négatifs (FORFAIT à tort)   : {fn}  → manque à gagner facturation")

    # Tableau récap
    print(f"\n  Détail par cas :")
    print(f"  {'#':<3} {'Type':<9} {'GT':<8} {'Pred':<8} {'OK?':<4} {'Conf.':<8} Titre")
    print(f"  {'-' * 76}")
    for r in results:
        c = r["cas"]
        out = r["out"]
        pred = fmt_decision(out.get("decision", "?")) if not out.get("_parse_error") else "PARSE!"
        gt = fmt_decision(c["verite_terrain"])
        ok = "✓" if r["match"] else "✗"
        conf = out.get("confiance", "?")[:7] if not out.get("_parse_error") else "-"
        print(f"  {c['id']:<3} {c['type']:<9} {gt:<8} {pred:<8} {ok:<4} {conf:<8} {c['titre']}")

    # Sauvegarde JSON pour réutilisation
    out_path = Path(__file__).parent / "resultats.json"
    with out_path.open("w", encoding="utf-8") as f:
        json.dump(
            [
                {
                    "id": r["cas"]["id"],
                    "titre": r["cas"]["titre"],
                    "type": r["cas"]["type"],
                    "verite_terrain": r["cas"]["verite_terrain"],
                    "criteres_attendus": r["cas"]["criteres_cles"],
                    "prediction": r["out"],
                    "match": r["match"],
                }
                for r in results
            ],
            f,
            ensure_ascii=False,
            indent=2,
        )
    print(f"\n  Résultats détaillés sauvegardés : {out_path}")
    print(f"{'=' * 78}\n")


if __name__ == "__main__":
    run()