Files
Aivanov_scan_ogc/bench_v11_vs_legacy.py
Dom b6dd9ff1df chore(bench): résultats V2 et rapports de benchmarking
Snapshot des 18 JSONs produits par le pipeline V2 (Qwen2.5-VL-3B +
checkboxes densité + validation ATIH), utiles au collaborateur comme
référence de ce que la chaîne actuelle produit.

Rapports :
- bench_v2_report.md       : comparaison V2 vs legacy docTR+VLM
                             (couverture, divergences, régressions
                             notables sur codage_reco et praticien).
- validation_report.md     : résumé de la validation ATIH sur les 18
                             JSONs (131/149 → 140/149 codes valides
                             après fix suffixes `*` et `+N`, 0
                             incohérence GHM↔GHS, 8 suggestions de
                             correction OCR).

Script de comparaison :
- bench_v11_vs_legacy.py   : tableau d'accord champ par champ entre
                             un run du pipeline (output/v2/) et les
                             JSONs legacy (output/).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 15:06:30 +02:00

107 lines
3.7 KiB
Python

"""Comparaison systématique V1.1 vs legacy sur les 18 dossiers."""
import json
import os
from pathlib import Path
FIELDS_RECUEIL = [
("etablissement", "etablissement"),
("finess", "finess"),
("date_debut_controle", "date_debut_controle"),
("n_ogc", "n_ogc"),
("n_champ", "n_champ"),
("dates_sejour", "dates_sejour"),
("codage_etab.dp", "codage_etab.dp"),
("codage_etab.dp_libelle", "codage_etab.dp_libelle"),
("codage_etab.dr", "codage_etab.dr"),
("codage_reco.dp", "codage_reco.dp"),
("ghm_etab", "ghm_etab"),
("ghs_etab", "ghs_etab"),
("ghm_reco", "ghm_reco"),
("ghs_reco", "ghs_reco"),
("recodage_impactant", "recodage_impactant"),
("ghs_injustifie", "ghs_injustifie"),
("accord_desaccord", "accord_desaccord"),
("praticien_conseil", "praticien_conseil"),
]
def get(d, path):
for k in path.split("."):
d = d.get(k, "") if isinstance(d, dict) else ""
return str(d).strip()
def count_das(d, path):
for k in path.split("."):
d = d.get(k, []) if isinstance(d, dict) else []
return len(d) if isinstance(d, list) else 0
def load_pairs():
pairs = []
for f in sorted(Path("output").glob("OGC *.json")):
v1_path = Path("output/v2") / f.name
if not v1_path.exists(): continue
with open(f) as g: legacy = json.load(g)
with open(v1_path) as g: v11 = json.load(g)
rec_legacy = (legacy.get("recueil") or {}).get("parsed") or {}
rec_v11 = (v11.get("extraction") or {}).get("recueil") or {}
pairs.append((f.stem, rec_legacy, rec_v11))
return pairs
def bench():
pairs = load_pairs()
print(f"\n{'Dossier':10s} {'Champ':28s} {'legacy':30s} {'v1.1':30s}")
print("="*110)
totals = {f: {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
for f, _ in FIELDS_RECUEIL}
totals["das_etab"] = {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
for name, leg, v11 in pairs:
# Champs simples
for fk, lk in FIELDS_RECUEIL:
vl = get(leg, lk)
vv = get(v11, fk)
if not vl and not vv: continue
if vl and not vv: totals[fk]["leg_only"] += 1
elif vv and not vl: totals[fk]["v11_only"] += 1
else:
totals[fk]["both"] += 1
if fk == "codage_etab.dp_libelle":
ok = vl in vv or vv in vl
else:
ok = vl == vv
if ok: totals[fk]["match"] += 1
else:
totals[fk]["diff"] += 1
# DAS count
cle = count_das(leg, "codage_etab.das")
cv1 = count_das(v11, "codage_etab.das")
if cle and cv1:
totals["das_etab"]["both"] += 1
if cle == cv1: totals["das_etab"]["match"] += 1
else: totals["das_etab"]["diff"] += 1
elif cle: totals["das_etab"]["leg_only"] += 1
elif cv1: totals["das_etab"]["v11_only"] += 1
print(f"\n{'Champ':28s} {'match':>6s} {'diff':>5s} {'v11+':>5s} {'leg+':>5s} {'both':>5s}")
print("-"*70)
order = [f for f, _ in FIELDS_RECUEIL] + ["das_etab"]
for fk in order:
t = totals[fk]
print(f" {fk:28s} {t['match']:>6d} {t['diff']:>5d} "
f"{t['v11_only']:>5d} {t['leg_only']:>5d} {t['both']:>5d}")
print("\nLégende :")
print(" match = les deux extraient la même valeur")
print(" diff = les deux extraient mais des valeurs différentes (à arbitrer)")
print(" v11+ = V1.1 extrait, legacy vide")
print(" leg+ = legacy extrait, V1.1 vide")
print(" both = nb dossiers où les deux ont extrait qqch (match+diff)")
if __name__ == "__main__":
bench()