#!/usr/bin/env python3 """Tester 3 modèles additionnels et merger dans resultats_v2.json : - t2a-gemma3-27b-q4:latest (fine-tune T2A maison — clé du test) - DeepSeek-R1:latest (reasoning, FR correct) - gpt-oss:120b-cloud (top tier 2026) """ import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from run_simulation_v2 import run_one_model, stats_for_results # noqa: E402 EXTRA_MODELS = [ "t2a-gemma3-27b-q4:latest", "DeepSeek-R1:latest", "gpt-oss:120b-cloud", ] results_path = Path(__file__).parent / "resultats_v2.json" all_data = json.loads(results_path.read_text(encoding="utf-8")) for model in EXTRA_MODELS: print(f"\n>>> Test {model}") results = run_one_model(model) s = stats_for_results(results) print(f" → {s['correct']}/{s['n']} ({100*s['accuracy']:.0f}%) " f"S={s['by_type'].get('simple', (0,0))} " f"C={s['by_type'].get('complexe', (0,0))} " f"B={s['by_type'].get('borderline', (0,0))} " f"latence={s['avg_latency_s']:.1f}s parse_err={s['parse_errors']}") all_data[model] = [ { "id": r["cas"]["id"], "titre": r["cas"]["titre"], "type": r["cas"]["type"], "verite_terrain": r["cas"]["verite_terrain"], "criteres_attendus": r["cas"]["criteres_cles"], "prediction": r["out"], "decision": r["decision"], "match": r["match"], } for r in results ] results_path.write_text(json.dumps(all_data, ensure_ascii=False, indent=2), encoding="utf-8") print(f" → mergé dans {results_path.name}")