"""Comparaison systématique V1.1 vs legacy sur les 18 dossiers.""" import json import os from pathlib import Path FIELDS_RECUEIL = [ ("etablissement", "etablissement"), ("finess", "finess"), ("date_debut_controle", "date_debut_controle"), ("n_ogc", "n_ogc"), ("n_champ", "n_champ"), ("dates_sejour", "dates_sejour"), ("codage_etab.dp", "codage_etab.dp"), ("codage_etab.dp_libelle", "codage_etab.dp_libelle"), ("codage_etab.dr", "codage_etab.dr"), ("codage_reco.dp", "codage_reco.dp"), ("ghm_etab", "ghm_etab"), ("ghs_etab", "ghs_etab"), ("ghm_reco", "ghm_reco"), ("ghs_reco", "ghs_reco"), ("recodage_impactant", "recodage_impactant"), ("ghs_injustifie", "ghs_injustifie"), ("accord_desaccord", "accord_desaccord"), ("praticien_conseil", "praticien_conseil"), ] def get(d, path): for k in path.split("."): d = d.get(k, "") if isinstance(d, dict) else "" return str(d).strip() def count_das(d, path): for k in path.split("."): d = d.get(k, []) if isinstance(d, dict) else [] return len(d) if isinstance(d, list) else 0 def load_pairs(): pairs = [] for f in sorted(Path("output").glob("OGC *.json")): v1_path = Path("output/v2") / f.name if not v1_path.exists(): continue with open(f) as g: legacy = json.load(g) with open(v1_path) as g: v11 = json.load(g) rec_legacy = (legacy.get("recueil") or {}).get("parsed") or {} rec_v11 = (v11.get("extraction") or {}).get("recueil") or {} pairs.append((f.stem, rec_legacy, rec_v11)) return pairs def bench(): pairs = load_pairs() print(f"\n{'Dossier':10s} {'Champ':28s} {'legacy':30s} {'v1.1':30s}") print("="*110) totals = {f: {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0} for f, _ in FIELDS_RECUEIL} totals["das_etab"] = {"both": 0, "v11_only": 0, "leg_only": 0, "match": 0, "diff": 0} for name, leg, v11 in pairs: # Champs simples for fk, lk in FIELDS_RECUEIL: vl = get(leg, lk) vv = get(v11, fk) if not vl and not vv: continue if vl and not vv: totals[fk]["leg_only"] += 1 elif vv and not vl: totals[fk]["v11_only"] += 1 else: totals[fk]["both"] += 1 if fk == "codage_etab.dp_libelle": ok = vl in vv or vv in vl else: ok = vl == vv if ok: totals[fk]["match"] += 1 else: totals[fk]["diff"] += 1 # DAS count cle = count_das(leg, "codage_etab.das") cv1 = count_das(v11, "codage_etab.das") if cle and cv1: totals["das_etab"]["both"] += 1 if cle == cv1: totals["das_etab"]["match"] += 1 else: totals["das_etab"]["diff"] += 1 elif cle: totals["das_etab"]["leg_only"] += 1 elif cv1: totals["das_etab"]["v11_only"] += 1 print(f"\n{'Champ':28s} {'match':>6s} {'diff':>5s} {'v11+':>5s} {'leg+':>5s} {'both':>5s}") print("-"*70) order = [f for f, _ in FIELDS_RECUEIL] + ["das_etab"] for fk in order: t = totals[fk] print(f" {fk:28s} {t['match']:>6d} {t['diff']:>5d} " f"{t['v11_only']:>5d} {t['leg_only']:>5d} {t['both']:>5d}") print("\nLégende :") print(" match = les deux extraient la même valeur") print(" diff = les deux extraient mais des valeurs différentes (à arbitrer)") print(" v11+ = V1.1 extrait, legacy vide") print(" leg+ = legacy extrait, V1.1 vide") print(" both = nb dossiers où les deux ont extrait qqch (match+diff)") if __name__ == "__main__": bench()