chore(bench): résultats V2 et rapports de benchmarking

Snapshot des 18 JSONs produits par le pipeline V2 (Qwen2.5-VL-3B + checkboxes densité + validation ATIH), utiles au collaborateur comme référence de ce que la chaîne actuelle produit. Rapports : - bench_v2_report.md : comparaison V2 vs legacy docTR+VLM (couverture, divergences, régressions notables sur codage_reco et praticien). - validation_report.md : résumé de la validation ATIH sur les 18 JSONs (131/149 → 140/149 codes valides après fix suffixes `*` et `+N`, 0 incohérence GHM↔GHS, 8 suggestions de correction OCR). Script de comparaison : - bench_v11_vs_legacy.py : tableau d'accord champ par champ entre un run du pipeline (output/v2/) et les JSONs legacy (output/). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 15:06:30 +02:00
parent 1f75670770
commit b6dd9ff1df
21 changed files with 9524 additions and 0 deletions
--- a/bench_v11_vs_legacy.py
+++ b/bench_v11_vs_legacy.py
@@ -0,0 +1,106 @@
+"""Comparaison systématique V1.1 vs legacy sur les 18 dossiers."""
+import json
+import os
+from pathlib import Path
+
+FIELDS_RECUEIL = [
+    ("etablissement", "etablissement"),
+    ("finess", "finess"),
+    ("date_debut_controle", "date_debut_controle"),
+    ("n_ogc", "n_ogc"),
+    ("n_champ", "n_champ"),
+    ("dates_sejour", "dates_sejour"),
+    ("codage_etab.dp", "codage_etab.dp"),
+    ("codage_etab.dp_libelle", "codage_etab.dp_libelle"),
+    ("codage_etab.dr", "codage_etab.dr"),
+    ("codage_reco.dp", "codage_reco.dp"),
+    ("ghm_etab", "ghm_etab"),
+    ("ghs_etab", "ghs_etab"),
+    ("ghm_reco", "ghm_reco"),
+    ("ghs_reco", "ghs_reco"),
+    ("recodage_impactant", "recodage_impactant"),
+    ("ghs_injustifie", "ghs_injustifie"),
+    ("accord_desaccord", "accord_desaccord"),
+    ("praticien_conseil", "praticien_conseil"),
+]
+
+
+def get(d, path):
+    for k in path.split("."):
+        d = d.get(k, "") if isinstance(d, dict) else ""
+    return str(d).strip()
+
+
+def count_das(d, path):
+    for k in path.split("."):
+        d = d.get(k, []) if isinstance(d, dict) else []
+    return len(d) if isinstance(d, list) else 0
+
+
+def load_pairs():
+    pairs = []
+    for f in sorted(Path("output").glob("OGC *.json")):
+        v1_path = Path("output/v2") / f.name
+        if not v1_path.exists(): continue
+        with open(f) as g: legacy = json.load(g)
+        with open(v1_path) as g: v11 = json.load(g)
+        rec_legacy = (legacy.get("recueil") or {}).get("parsed") or {}
+        rec_v11 = (v11.get("extraction") or {}).get("recueil") or {}
+        pairs.append((f.stem, rec_legacy, rec_v11))
+    return pairs
+
+
+def bench():
+    pairs = load_pairs()
+    print(f"\n{'Dossier':10s}  {'Champ':28s}  {'legacy':30s}  {'v1.1':30s}")
+    print("="*110)
+
+    totals = {f: {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
+              for f, _ in FIELDS_RECUEIL}
+    totals["das_etab"] = {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0}
+
+    for name, leg, v11 in pairs:
+        # Champs simples
+        for fk, lk in FIELDS_RECUEIL:
+            vl = get(leg, lk)
+            vv = get(v11, fk)
+            if not vl and not vv: continue
+            if vl and not vv: totals[fk]["leg_only"] += 1
+            elif vv and not vl: totals[fk]["v11_only"] += 1
+            else:
+                totals[fk]["both"] += 1
+                if fk == "codage_etab.dp_libelle":
+                    ok = vl in vv or vv in vl
+                else:
+                    ok = vl == vv
+                if ok: totals[fk]["match"] += 1
+                else:
+                    totals[fk]["diff"] += 1
+        # DAS count
+        cle = count_das(leg, "codage_etab.das")
+        cv1 = count_das(v11, "codage_etab.das")
+        if cle and cv1:
+            totals["das_etab"]["both"] += 1
+            if cle == cv1: totals["das_etab"]["match"] += 1
+            else: totals["das_etab"]["diff"] += 1
+        elif cle: totals["das_etab"]["leg_only"] += 1
+        elif cv1: totals["das_etab"]["v11_only"] += 1
+
+    print(f"\n{'Champ':28s}  {'match':>6s}  {'diff':>5s}  {'v11+':>5s}  {'leg+':>5s}  {'both':>5s}")
+    print("-"*70)
+    order = [f for f, _ in FIELDS_RECUEIL] + ["das_etab"]
+    for fk in order:
+        t = totals[fk]
+        print(f"  {fk:28s}  {t['match']:>6d}  {t['diff']:>5d}  "
+              f"{t['v11_only']:>5d}  {t['leg_only']:>5d}  {t['both']:>5d}")
+
+    print("\nLégende :")
+    print("  match  = les deux extraient la même valeur")
+    print("  diff   = les deux extraient mais des valeurs différentes (à arbitrer)")
+    print("  v11+   = V1.1 extrait, legacy vide")
+    print("  leg+   = legacy extrait, V1.1 vide")
+    print("  both   = nb dossiers où les deux ont extrait qqch (match+diff)")
+
+
+if __name__ == "__main__":
+    bench()