rpa_vision_v3/demo/facturation_urgences/run_extra_models.py

#!/usr/bin/env python3
"""Tester 3 modèles additionnels et merger dans resultats_v2.json :
- t2a-gemma3-27b-q4:latest (fine-tune T2A maison — clé du test)
- DeepSeek-R1:latest (reasoning, FR correct)
- gpt-oss:120b-cloud (top tier 2026)
"""

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))
from run_simulation_v2 import run_one_model, stats_for_results  # noqa: E402

EXTRA_MODELS = [
    "t2a-gemma3-27b-q4:latest",
    "DeepSeek-R1:latest",
    "gpt-oss:120b-cloud",
]

results_path = Path(__file__).parent / "resultats_v2.json"
all_data = json.loads(results_path.read_text(encoding="utf-8"))

for model in EXTRA_MODELS:
    print(f"\n>>> Test {model}")
    results = run_one_model(model)
    s = stats_for_results(results)
    print(f"  → {s['correct']}/{s['n']} ({100*s['accuracy']:.0f}%)  "
          f"S={s['by_type'].get('simple', (0,0))}  "
          f"C={s['by_type'].get('complexe', (0,0))}  "
          f"B={s['by_type'].get('borderline', (0,0))}  "
          f"latence={s['avg_latency_s']:.1f}s  parse_err={s['parse_errors']}")

    all_data[model] = [
        {
            "id": r["cas"]["id"],
            "titre": r["cas"]["titre"],
            "type": r["cas"]["type"],
            "verite_terrain": r["cas"]["verite_terrain"],
            "criteres_attendus": r["cas"]["criteres_cles"],
            "prediction": r["out"],
            "decision": r["decision"],
            "match": r["match"],
        }
        for r in results
    ]
    results_path.write_text(json.dumps(all_data, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"  → mergé dans {results_path.name}")