chore(bench): résultats V2 et rapports de benchmarking
Snapshot des 18 JSONs produits par le pipeline V2 (Qwen2.5-VL-3B +
checkboxes densité + validation ATIH), utiles au collaborateur comme
référence de ce que la chaîne actuelle produit.
Rapports :
- bench_v2_report.md : comparaison V2 vs legacy docTR+VLM
(couverture, divergences, régressions
notables sur codage_reco et praticien).
- validation_report.md : résumé de la validation ATIH sur les 18
JSONs (131/149 → 140/149 codes valides
après fix suffixes `*` et `+N`, 0
incohérence GHM↔GHS, 8 suggestions de
correction OCR).
Script de comparaison :
- bench_v11_vs_legacy.py : tableau d'accord champ par champ entre
un run du pipeline (output/v2/) et les
JSONs legacy (output/).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
106
bench_v11_vs_legacy.py
Normal file
106
bench_v11_vs_legacy.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Comparaison systématique V1.1 vs legacy sur les 18 dossiers."""
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
FIELDS_RECUEIL = [
|
||||
("etablissement", "etablissement"),
|
||||
("finess", "finess"),
|
||||
("date_debut_controle", "date_debut_controle"),
|
||||
("n_ogc", "n_ogc"),
|
||||
("n_champ", "n_champ"),
|
||||
("dates_sejour", "dates_sejour"),
|
||||
("codage_etab.dp", "codage_etab.dp"),
|
||||
("codage_etab.dp_libelle", "codage_etab.dp_libelle"),
|
||||
("codage_etab.dr", "codage_etab.dr"),
|
||||
("codage_reco.dp", "codage_reco.dp"),
|
||||
("ghm_etab", "ghm_etab"),
|
||||
("ghs_etab", "ghs_etab"),
|
||||
("ghm_reco", "ghm_reco"),
|
||||
("ghs_reco", "ghs_reco"),
|
||||
("recodage_impactant", "recodage_impactant"),
|
||||
("ghs_injustifie", "ghs_injustifie"),
|
||||
("accord_desaccord", "accord_desaccord"),
|
||||
("praticien_conseil", "praticien_conseil"),
|
||||
]
|
||||
|
||||
|
||||
def get(d, path):
|
||||
for k in path.split("."):
|
||||
d = d.get(k, "") if isinstance(d, dict) else ""
|
||||
return str(d).strip()
|
||||
|
||||
|
||||
def count_das(d, path):
|
||||
for k in path.split("."):
|
||||
d = d.get(k, []) if isinstance(d, dict) else []
|
||||
return len(d) if isinstance(d, list) else 0
|
||||
|
||||
|
||||
def load_pairs():
|
||||
pairs = []
|
||||
for f in sorted(Path("output").glob("OGC *.json")):
|
||||
v1_path = Path("output/v2") / f.name
|
||||
if not v1_path.exists(): continue
|
||||
with open(f) as g: legacy = json.load(g)
|
||||
with open(v1_path) as g: v11 = json.load(g)
|
||||
rec_legacy = (legacy.get("recueil") or {}).get("parsed") or {}
|
||||
rec_v11 = (v11.get("extraction") or {}).get("recueil") or {}
|
||||
pairs.append((f.stem, rec_legacy, rec_v11))
|
||||
return pairs
|
||||
|
||||
|
||||
def bench():
|
||||
pairs = load_pairs()
|
||||
print(f"\n{'Dossier':10s} {'Champ':28s} {'legacy':30s} {'v1.1':30s}")
|
||||
print("="*110)
|
||||
|
||||
totals = {f: {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
|
||||
for f, _ in FIELDS_RECUEIL}
|
||||
totals["das_etab"] = {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
|
||||
|
||||
for name, leg, v11 in pairs:
|
||||
# Champs simples
|
||||
for fk, lk in FIELDS_RECUEIL:
|
||||
vl = get(leg, lk)
|
||||
vv = get(v11, fk)
|
||||
if not vl and not vv: continue
|
||||
if vl and not vv: totals[fk]["leg_only"] += 1
|
||||
elif vv and not vl: totals[fk]["v11_only"] += 1
|
||||
else:
|
||||
totals[fk]["both"] += 1
|
||||
if fk == "codage_etab.dp_libelle":
|
||||
ok = vl in vv or vv in vl
|
||||
else:
|
||||
ok = vl == vv
|
||||
if ok: totals[fk]["match"] += 1
|
||||
else:
|
||||
totals[fk]["diff"] += 1
|
||||
# DAS count
|
||||
cle = count_das(leg, "codage_etab.das")
|
||||
cv1 = count_das(v11, "codage_etab.das")
|
||||
if cle and cv1:
|
||||
totals["das_etab"]["both"] += 1
|
||||
if cle == cv1: totals["das_etab"]["match"] += 1
|
||||
else: totals["das_etab"]["diff"] += 1
|
||||
elif cle: totals["das_etab"]["leg_only"] += 1
|
||||
elif cv1: totals["das_etab"]["v11_only"] += 1
|
||||
|
||||
print(f"\n{'Champ':28s} {'match':>6s} {'diff':>5s} {'v11+':>5s} {'leg+':>5s} {'both':>5s}")
|
||||
print("-"*70)
|
||||
order = [f for f, _ in FIELDS_RECUEIL] + ["das_etab"]
|
||||
for fk in order:
|
||||
t = totals[fk]
|
||||
print(f" {fk:28s} {t['match']:>6d} {t['diff']:>5d} "
|
||||
f"{t['v11_only']:>5d} {t['leg_only']:>5d} {t['both']:>5d}")
|
||||
|
||||
print("\nLégende :")
|
||||
print(" match = les deux extraient la même valeur")
|
||||
print(" diff = les deux extraient mais des valeurs différentes (à arbitrer)")
|
||||
print(" v11+ = V1.1 extrait, legacy vide")
|
||||
print(" leg+ = legacy extrait, V1.1 vide")
|
||||
print(" both = nb dossiers où les deux ont extrait qqch (match+diff)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bench()
|
||||
Reference in New Issue
Block a user