rpa_vision_v3/demo/facturation_urgences/run_apollo2.py

#!/usr/bin/env python3
"""Test Apollo2-9B (Q4_K_S, médical multilingue avec FR explicite)."""

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from run_simulation_v2 import run_one_model, stats_for_results  # noqa: E402

MODEL = "hf.co/mradermacher/Apollo2-9B-GGUF:Q4_K_S"

results = run_one_model(MODEL)
s = stats_for_results(results)
print(f"\n>>> {s['correct']}/{s['n']} ({100*s['accuracy']:.0f}%)")
print(f"  S={s['by_type'].get('simple', (0,0))}  C={s['by_type'].get('complexe', (0,0))}  B={s['by_type'].get('borderline', (0,0))}")
print(f"  latence={s['avg_latency_s']:.1f}s  parse_err={s['parse_errors']}  conf={s['confiance_distribution']}")

results_path = Path(__file__).parent / "resultats_v2.json"
all_data = json.loads(results_path.read_text(encoding="utf-8"))
all_data[MODEL] = [
    {
        "id": r["cas"]["id"],
        "titre": r["cas"]["titre"],
        "type": r["cas"]["type"],
        "verite_terrain": r["cas"]["verite_terrain"],
        "criteres_attendus": r["cas"]["criteres_cles"],
        "prediction": r["out"],
        "decision": r["decision"],
        "match": r["match"],
    }
    for r in results
]
results_path.write_text(json.dumps(all_data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"  → mergé dans {results_path.name}")