#!/usr/bin/env python3 """Test Apollo2-9B (Q4_K_S, médical multilingue avec FR explicite).""" import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from run_simulation_v2 import run_one_model, stats_for_results # noqa: E402 MODEL = "hf.co/mradermacher/Apollo2-9B-GGUF:Q4_K_S" results = run_one_model(MODEL) s = stats_for_results(results) print(f"\n>>> {s['correct']}/{s['n']} ({100*s['accuracy']:.0f}%)") print(f" S={s['by_type'].get('simple', (0,0))} C={s['by_type'].get('complexe', (0,0))} B={s['by_type'].get('borderline', (0,0))}") print(f" latence={s['avg_latency_s']:.1f}s parse_err={s['parse_errors']} conf={s['confiance_distribution']}") results_path = Path(__file__).parent / "resultats_v2.json" all_data = json.loads(results_path.read_text(encoding="utf-8")) all_data[MODEL] = [ { "id": r["cas"]["id"], "titre": r["cas"]["titre"], "type": r["cas"]["type"], "verite_terrain": r["cas"]["verite_terrain"], "criteres_attendus": r["cas"]["criteres_cles"], "prediction": r["out"], "decision": r["decision"], "match": r["match"], } for r in results ] results_path.write_text(json.dumps(all_data, ensure_ascii=False, indent=2), encoding="utf-8") print(f" → mergé dans {results_path.name}")