ScanOGC_extract/generate_report.py

"""
Génération du bilan d'extraction OGC — MISTRAL
Usage : python3 generate_report.py
"""

import json
import re
import sys
from datetime import datetime
from pathlib import Path

from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.platypus import (
    HRFlowable, PageBreak, Paragraph, SimpleDocTemplate, Spacer, Table,
    TableStyle,
)

# ─── Config ───────────────────────────────────────────────────────────────────

BASE   = Path(__file__).parent
OUTPUT = BASE / "output"

MODEL  = "ministral-3:8b-cloud"
LABEL  = "MISTRAL"
ACC    = colors.HexColor("#6c3483")

JSON_PATH       = OUTPUT / "extraction_ogc_raw.json"
CORRECTION_PATH = OUTPUT / "extraction_ogc_raw_Correction.json"
TIMING_PATH     = OUTPUT / "timing_stats.json"
REPORT_PATH     = OUTPUT / "bilan_extraction_mistral_ogc.pdf"

# ─── Styles ───────────────────────────────────────────────────────────────────

def make_styles(acc):
    base = getSampleStyleSheet()
    return {
        "title":    ParagraphStyle("title",    parent=base["Title"],
                                   fontSize=22, textColor=colors.white, alignment=TA_LEFT),
        "subtitle": ParagraphStyle("subtitle", parent=base["Normal"],
                                   fontSize=10, textColor=colors.HexColor("#aaaaaa"), alignment=TA_LEFT),
        "section":  ParagraphStyle("section",  parent=base["Heading2"],
                                   fontSize=13, textColor=acc, spaceBefore=16, spaceAfter=6),
        "body":     ParagraphStyle("body",     parent=base["Normal"],  fontSize=9, leading=14),
        "small":    ParagraphStyle("small",    parent=base["Normal"],  fontSize=8,
                                   textColor=colors.HexColor("#444444")),
        "right":    ParagraphStyle("right",    parent=base["Normal"],  fontSize=7,
                                   textColor=colors.HexColor("#888888"), alignment=TA_RIGHT),
        "kpi_num":  ParagraphStyle("kpi_num",  parent=base["Normal"],  fontSize=36,
                                   fontName="Helvetica-Bold", alignment=TA_CENTER),
        "kpi_lbl":  ParagraphStyle("kpi_lbl",  parent=base["Normal"],  fontSize=8,
                                   textColor=colors.HexColor("#777777"), alignment=TA_CENTER),
        "warn":     ParagraphStyle("warn",     parent=base["Normal"],  fontSize=8,
                                   textColor=colors.HexColor("#c0392b")),
        "footnote": ParagraphStyle("footnote", parent=base["Normal"],  fontSize=7,
                                   textColor=colors.HexColor("#888888")),
        "center":   ParagraphStyle("center",   parent=base["Normal"],  fontSize=9, alignment=TA_CENTER),
        "bold":     ParagraphStyle("bold",     parent=base["Normal"],  fontSize=9,
                                   fontName="Helvetica-Bold"),
        "th":       ParagraphStyle("th",       parent=base["Normal"],  fontSize=8,
                                   textColor=colors.white, fontName="Helvetica-Bold"),
    }

# ─── Utilitaires ──────────────────────────────────────────────────────────────

def _fmt_s(s):
    if s is None:
        return "—"
    s = int(s)
    h, r = divmod(s, 3600)
    m, sec = divmod(r, 60)
    if h:
        return f"{h}h{m:02d}m{sec:02d}s"
    if m:
        return f"{m}m{sec:02d}s"
    return f"{sec}s"


def _prec_color(p: float):
    if p >= 90:
        return colors.HexColor("#27ae60")
    if p >= 75:
        return colors.HexColor("#e67e22")
    return colors.HexColor("#e74c3c")


def _gravite_color(g: str):
    return {
        "Critique": colors.HexColor("#e74c3c"),
        "Haute":    colors.HexColor("#e67e22"),
        "Moyenne":  colors.HexColor("#f1c40f"),
        "Faible":   colors.HexColor("#27ae60"),
    }.get(g, colors.black)


_TS = TableStyle

def _base_table_style(acc):
    return [
        ("BACKGROUND",    (0, 0), (-1, 0), acc),
        ("TEXTCOLOR",     (0, 0), (-1, 0), colors.white),
        ("FONTNAME",      (0, 0), (-1, 0), "Helvetica-Bold"),
        ("FONTSIZE",      (0, 0), (-1, -1), 8),
        ("ROWBACKGROUNDS",(0, 1), (-1, -1), [colors.HexColor("#f8f9fa"), colors.white]),
        ("GRID",          (0, 0), (-1, -1), 0.3, colors.HexColor("#cccccc")),
        ("LEFTPADDING",   (0, 0), (-1, -1), 6),
        ("RIGHTPADDING",  (0, 0), (-1, -1), 6),
        ("TOPPADDING",    (0, 0), (-1, -1), 4),
        ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
    ]

# ─── Comparaison orig vs correction ───────────────────────────────────────────

def _flatten(d, prefix=""):
    items = {}
    if isinstance(d, dict):
        for k, v in d.items():
            key = f"{prefix}.{k}" if prefix else k
            if isinstance(v, (dict, list)):
                items.update(_flatten(v, key))
            else:
                items[key] = str(v).strip()
    elif isinstance(d, list):
        for i, v in enumerate(d):
            key = f"{prefix}[{i}]"
            if isinstance(v, (dict, list)):
                items.update(_flatten(v, key))
            else:
                items[key] = str(v).strip()
    return items


def _normalize_keys(flat: dict) -> dict:
    """Normalise les anciens noms de clés pour compatibilité avec les fichiers de correction antérieurs.
    rang → niveau (renommage effectué en avril 2026).
    """
    return {k.replace(".rang", ".niveau"): v for k, v in flat.items()}


def _get_cat(key: str, ptype: str = "") -> str:
    k = key.lower()
    if ptype == "ELEMENTS_PREUVE":
        if any(x in k for x in ("medecin", "signataire", "date")):
            return "Signataires / Dates"
        return "Éléments de preuve"
    if ptype in ("FICHE_ADMIN_2_2", "FICHE_ADMIN_1_2"):
        if any(x in k for x in ("nom_medecin", "date_conc", "medecin")):
            return "Signataires / Dates"
        return "Concertation (2/2)"
    if any(x in k for x in ("das_etab", "das_reco")):
        return "DAS"
    if any(x in k for x in ("sejour_etab", "sejour_reco")):
        return "Données séjour"
    if any(x in k for x in ("dp_etab", "dr_etab", "dp_reco", "dr_reco")):
        return "DP / DR"
    if any(x in k for x in ("rum_etab", "rum_reco")):
        return "Données RUM"
    if any(x in k for x in ("actes_etab", "actes_reco")):
        return "Actes"
    if any(x in k for x in ("ghm_", "ghs_")):
        return "GHM / GHS"
    if any(x in k for x in ("accord_desaccord", "se_coche", "atu", "ffm", "fsd")):
        return "Accord / SE"
    if any(x in k for x in ("date_debut", "date_fin", "nom_praticien")):
        return "Signataires / Dates"
    return "Métadonnées"


def compare_extractions(orig_list, corr_list):
    orig_map = {r["fichier"]: r for r in orig_list}
    corr_map = {r["fichier"]: r for r in corr_list}

    total_g = correct_g = 0
    per_dossier  = []
    per_cat      = {}
    per_type     = {}

    ep_counters = {k: {"occ": 0, "dossiers": set()} for k in [
        "dr_confondu_das", "annee_mal_lue", "se_coche_halluc", "maintien_X",
        "provenance_halluc", "acte_dans_das", "das_manquant", "das_code_wrong",
        "json_non_parsable",
    ]}

    for fichier in sorted(orig_map):
        if fichier not in corr_map:
            continue
        o = orig_map[fichier]
        c = corr_map[fichier]

        o_pages = {(p["page"], p.get("type", "")): p for p in o.get("pages_traitees", [])}
        c_pages = {(p["page"], p.get("type", "")): p for p in c.get("pages_traitees", [])}
        structural_error = "raw_response" in json.dumps(o)

        dos_total = dos_correct = 0

        for page_key in sorted(set(o_pages) & set(c_pages)):
            op = o_pages[page_key]
            cp = c_pages[page_key]
            ptype = op.get("type", "UNKNOWN")
            od = op.get("data", {})
            cd = cp.get("data", {})

            if not isinstance(od, dict) or not isinstance(cd, dict):
                continue
            if "raw_response" in od or "raw_response" in cd:
                ep_counters["json_non_parsable"]["occ"] += 1
                ep_counters["json_non_parsable"]["dossiers"].add(fichier)
                continue

            o_flat = _normalize_keys(_flatten(od))
            c_flat = _normalize_keys(_flatten(cd))
            all_keys = set(o_flat) | set(c_flat)

            for k in all_keys:
                ov = o_flat.get(k, "")
                cv = c_flat.get(k, "")
                cat = _get_cat(k, ptype)
                per_cat.setdefault(cat, {"total": 0, "correct": 0})
                per_type.setdefault(ptype, {"total": 0, "correct": 0})
                per_cat[cat]["total"]    += 1
                per_type[ptype]["total"] += 1
                dos_total += 1
                if ov == cv:
                    per_cat[cat]["correct"]    += 1
                    per_type[ptype]["correct"] += 1
                    dos_correct += 1

            if ptype == "FICHE_RECUEIL":
                dr  = (od.get("dr_etab") or {}).get("code", "")
                cdr = (cd.get("dr_etab") or {}).get("code", "")
                if dr and not cdr:
                    ep_counters["dr_confondu_das"]["occ"] += 1
                    ep_counters["dr_confondu_das"]["dossiers"].add(fichier)

                prov  = str((od.get("sejour_etab") or {}).get("provenance", "")).strip()
                cprov = str((cd.get("sejour_etab") or {}).get("provenance", "")).strip()
                if prov and not cprov:
                    ep_counters["provenance_halluc"]["occ"] += 1
                    ep_counters["provenance_halluc"]["dossiers"].add(fichier)

                se  = str(od.get("se_coche", "")).strip()
                cse = str(cd.get("se_coche", "")).strip()
                if se and not cse:
                    ep_counters["se_coche_halluc"]["occ"] += 1
                    ep_counters["se_coche_halluc"]["dossiers"].add(fichier)

                das  = od.get("das_etab") or []
                cdas = cd.get("das_etab") or []
                dp   = (od.get("dp_etab") or {}).get("code", "")
                if dp and not [x for x in das if isinstance(x, dict) and x.get("code")]:
                    ep_counters["das_manquant"]["occ"] += 1
                    ep_counters["das_manquant"]["dossiers"].add(fichier)
                for od2, cd2 in zip(das, cdas):
                    if isinstance(od2, dict) and isinstance(cd2, dict):
                        if od2.get("code") != cd2.get("code") and cd2.get("code"):
                            oc = od2.get("code", "")
                            if len(oc) >= 7 and oc[:4].isalpha():
                                ep_counters["acte_dans_das"]["occ"] += 1
                                ep_counters["acte_dans_das"]["dossiers"].add(fichier)
                            else:
                                ep_counters["das_code_wrong"]["occ"] += 1
                                ep_counters["das_code_wrong"]["dossiers"].add(fichier)

            if ptype == "FICHE_ADMIN_2_2":
                m   = str(od.get("maintien_avis_controleur", "")).strip()
                cm_ = str(cd.get("maintien_avis_controleur", "")).strip().lower()
                if m.upper() == "X" and cm_ == "oui":
                    ep_counters["maintien_X"]["occ"] += 1
                    ep_counters["maintien_X"]["dossiers"].add(fichier)

            for k in od:
                if "date" in k.lower():
                    ov = str(od.get(k, "")).strip()
                    cv = str(cd.get(k, "")).strip()
                    if ov != cv:
                        oy = re.findall(r"1[6-9]", ov)
                        cy = re.findall(r"1[6-9]", cv)
                        if oy and cy and oy != cy:
                            ep_counters["annee_mal_lue"]["occ"] += 1
                            ep_counters["annee_mal_lue"]["dossiers"].add(fichier)

        prec = round(dos_correct / dos_total * 100) if dos_total else 0
        per_dossier.append({
            "fichier": fichier.replace(".pdf", ""),
            "total": dos_total, "correct": dos_correct,
            "errors": dos_total - dos_correct,
            "precision": prec, "structural_error": structural_error,
        })
        total_g   += dos_total
        correct_g += dos_correct

    prec_g  = round(correct_g / total_g * 100, 1) if total_g else 0
    n_total = len(orig_list)
    error_patterns = []
    for desc, key, gravite in [
        ("DR confondu avec DAS",                          "dr_confondu_das",   "Critique"),
        ("Année mal lue (ex : 2017 au lieu de 2018)",     "annee_mal_lue",     "Haute"),
        ("se_coche inventé ('1' ou '4' au lieu de vide)", "se_coche_halluc",   "Haute"),
        ("maintien_avis = 'X' au lieu de 'oui'",         "maintien_X",         "Haute"),
        ("provenance inventé ('8' au lieu de vide)",      "provenance_halluc", "Haute"),
        ("Code acte mis dans DAS",                        "acte_dans_das",     "Haute"),
        ("DAS entier manquant",                           "das_manquant",      "Critique"),
        ("DAS code mauvais",                              "das_code_wrong",    "Critique"),
        ("JSON non parsable",                             "json_non_parsable", "Critique"),
    ]:
        e = ep_counters[key]
        if e["occ"] > 0:
            error_patterns.append({
                "desc": desc, "occ": e["occ"],
                "dossiers": len(e["dossiers"]), "n_total": n_total,
                "gravite": gravite,
            })

    return {
        "total": total_g, "correct": correct_g,
        "errors": total_g - correct_g, "precision": prec_g,
        "per_dossier": per_dossier, "per_cat": per_cat,
        "per_type": per_type, "error_patterns": error_patterns,
    }

# ─── Sections PDF ─────────────────────────────────────────────────────────────

def _section_header(story, S, acc, text):
    story.append(Paragraph(text, S["section"]))
    story.append(HRFlowable(width="100%", thickness=0.5, color=acc, spaceAfter=6))


def _build_header(story, S, acc, meta):
    hdr = Table(
        [[Paragraph(f"BILAN D'EXTRACTION —\nMODÈLE {LABEL}", S["title"]),
          Paragraph(meta, S["subtitle"])]],
        colWidths=[10*cm, 7*cm],
    )
    hdr.setStyle(_TS([
        ("BACKGROUND",    (0, 0), (-1, -1), acc),
        ("LEFTPADDING",   (0, 0), (-1, -1), 16),
        ("RIGHTPADDING",  (0, 0), (-1, -1), 12),
        ("TOPPADDING",    (0, 0), (-1, -1), 16),
        ("BOTTOMPADDING", (0, 0), (-1, -1), 16),
        ("VALIGN",        (0, 0), (-1, -1), "MIDDLE"),
    ]))
    story.append(hdr)
    story.append(Spacer(1, 0.5*cm))


def _build_kpis(story, S, acc, cmp):
    GREEN = colors.HexColor("#27ae60")
    RED   = colors.HexColor("#e74c3c")
    kpi_num_style = ParagraphStyle("kpi_num2", parent=S["kpi_num"], fontSize=28, leading=32)

    def kpi_cell(num, lbl, color=colors.HexColor("#333333")):
        return [Paragraph(f'<font color="{color.hexval()}">{num}</font>', kpi_num_style),
                Paragraph(lbl, S["kpi_lbl"])]

    cells = [
        kpi_cell(str(len(cmp["per_dossier"])), "Dossiers analysés",  acc),
        kpi_cell(str(cmp["total"]),            "Champs comparés",    colors.HexColor("#333333")),
        kpi_cell(str(cmp["correct"]),          "Champs corrects",    GREEN),
        kpi_cell(str(cmp["errors"]),           "Champs en erreur",   RED),
        kpi_cell(f"{cmp['precision']}%",       "Précision globale",
                 GREEN if cmp["precision"] >= 85 else RED),
    ]
    kpi_t = Table([[c[0] for c in cells], [c[1] for c in cells]], colWidths=[3.4*cm]*5)
    kpi_t.setStyle(_TS([
        ("BACKGROUND",    (0, 0), (-1, -1), colors.HexColor("#f8f9fa")),
        ("BOX",           (0, 0), (-1, -1), 0.5, colors.HexColor("#dddddd")),
        ("INNERGRID",     (0, 0), (-1, -1), 0.3, colors.HexColor("#eeeeee")),
        ("TOPPADDING",    (0, 0), (-1,  0), 14),
        ("BOTTOMPADDING", (0, 0), (-1,  0), 6),
        ("TOPPADDING",    (0, 1), (-1,  1), 4),
        ("BOTTOMPADDING", (0, 1), (-1,  1), 12),
        ("ALIGN",         (0, 0), (-1, -1), "CENTER"),
        ("VALIGN",        (0, 0), (-1, -1), "MIDDLE"),
    ]))
    story.append(kpi_t)
    story.append(Spacer(1, 0.4*cm))


def _build_per_dossier(story, S, acc, W, cmp):
    header = ["N° OGC", "Champs\ntotaux", "Champs\ncorrects", "Erreurs",
              "Précision", "Err. structurelle"]
    rows = [header]
    style_extra = []

    for i, d in enumerate(cmp["per_dossier"], start=1):
        prec = d["precision"]
        rows.append([d["fichier"], str(d["total"]), str(d["correct"]),
                     str(d["errors"]), f"{prec}%",
                     "■ Oui" if d["structural_error"] else "—"])
        pc = _prec_color(prec)
        style_extra += [
            ("TEXTCOLOR", (4, i), (4, i), pc),
            ("FONTNAME",  (4, i), (4, i), "Helvetica-Bold"),
            ("TEXTCOLOR", (3, i), (3, i),
             colors.HexColor("#e74c3c") if d["errors"] > 0 else colors.HexColor("#27ae60")),
            ("FONTNAME",  (3, i), (3, i), "Helvetica-Bold"),
            ("TEXTCOLOR", (2, i), (2, i), colors.HexColor("#27ae60")),
            ("FONTNAME",  (2, i), (2, i), "Helvetica-Bold"),
        ]
        if d["structural_error"]:
            style_extra += [("TEXTCOLOR", (5, i), (5, i), colors.HexColor("#e74c3c")),
                            ("FONTNAME",  (5, i), (5, i), "Helvetica-Bold")]

    tot_prec = round(cmp["correct"] / cmp["total"] * 100, 1) if cmp["total"] else 0
    n_struct  = sum(1 for d in cmp["per_dossier"] if d["structural_error"])
    rows.append(["TOTAL", str(cmp["total"]), str(cmp["correct"]),
                 str(cmp["errors"]), f"{tot_prec}%", f"{n_struct} dossier(s)"])
    n = len(rows)
    style_extra += [("BACKGROUND", (0, n-1), (-1, n-1), colors.HexColor("#eaf0fb")),
                    ("FONTNAME",   (0, n-1), (-1, n-1), "Helvetica-Bold")]

    col_w = [W*0.16, W*0.12, W*0.14, W*0.11, W*0.13, W*0.34]
    t = Table([[Paragraph(str(c), S["th"] if i == 0 else S["small"]) for c in row]
               for i, row in enumerate(rows)], colWidths=col_w)
    t.setStyle(_TS(_base_table_style(acc) + style_extra))
    story.append(t)


def _build_per_cat(story, S, acc, W, cmp):
    cat_order = [
        "DAS", "Données séjour", "DP / DR", "Données RUM",
        "Signataires / Dates", "Métadonnées", "Concertation (2/2)",
        "Éléments de preuve", "Accord / SE", "GHM / GHS", "Actes",
    ]
    rows = [["Catégorie", "Champs\ntotaux", "Champs\ncorrects", "Erreurs", "Précision"]]
    style_extra = []
    for i, cat in enumerate(cat_order, start=1):
        d = cmp["per_cat"].get(cat)
        if not d:
            continue
        prec = round(d["correct"] / d["total"] * 100) if d["total"] else 0
        rows.append([cat, str(d["total"]), str(d["correct"]),
                     str(d["total"] - d["correct"]), f"{prec}%"])
        style_extra += [("TEXTCOLOR", (4, i), (4, i), _prec_color(prec)),
                        ("FONTNAME",  (4, i), (4, i), "Helvetica-Bold")]

    col_w = [W*0.40, W*0.15, W*0.15, W*0.15, W*0.15]
    t = Table([[Paragraph(str(c), S["th"] if i == 0 else S["small"]) for c in row]
               for i, row in enumerate(rows)], colWidths=col_w)
    t.setStyle(_TS(_base_table_style(acc) + style_extra))
    story.append(t)


def _build_per_type(story, S, acc, W, cmp):
    rows = [["Type de page", "Champs\ntotaux", "Champs\ncorrects", "Erreurs", "Précision"]]
    style_extra = []
    for i, (ptype, d) in enumerate(sorted(cmp["per_type"].items(),
                                          key=lambda x: -x[1]["total"]), start=1):
        prec  = round(d["correct"] / d["total"] * 100) if d["total"] else 0
        label = (ptype.replace("FICHE_", "Fiche ").replace("_RECUEIL", "de recueil")
                 .replace("_ADMIN_", " administrative ").replace("_2_2", "2/2")
                 .replace("_1_2", "1/2").replace("ELEMENTS_PREUVE", "Éléments de preuve"))
        rows.append([label, str(d["total"]), str(d["correct"]),
                     str(d["total"] - d["correct"]), f"{prec}%"])
        style_extra += [("TEXTCOLOR", (4, i), (4, i), _prec_color(prec)),
                        ("FONTNAME",  (4, i), (4, i), "Helvetica-Bold")]

    col_w = [W*0.40, W*0.15, W*0.15, W*0.15, W*0.15]
    t = Table([[Paragraph(str(c), S["th"] if i == 0 else S["small"]) for c in row]
               for i, row in enumerate(rows)], colWidths=col_w)
    t.setStyle(_TS(_base_table_style(acc) + style_extra))
    story.append(t)


def _build_error_patterns(story, S, acc, W, cmp):
    rows = [["Ce que le modèle a raté — catégorie d'erreur",
             "Occur-\nrences", "Sur combien\nde dossiers", "Gravité"]]
    style_extra = []
    for i, p in enumerate(cmp["error_patterns"], start=1):
        gc = _gravite_color(p["gravite"])
        rows.append([p["desc"], str(p["occ"]),
                     f"{p['dossiers']} / {p['n_total']}", p["gravite"]])
        style_extra += [("TEXTCOLOR", (3, i), (3, i), gc),
                        ("FONTNAME",  (3, i), (3, i), "Helvetica-Bold")]

    col_w = [W*0.56, W*0.10, W*0.17, W*0.17]
    t = Table([[Paragraph(str(c), S["th"] if i == 0 else S["small"]) for c in row]
               for i, row in enumerate(rows)], colWidths=col_w)
    t.setStyle(_TS(_base_table_style(acc) + style_extra))
    story.append(t)


def _build_timing(story, S, acc, W, timing_data):
    if not timing_data:
        story.append(Paragraph(
            "Aucune donnée temporelle disponible. "
            "Relancez l'extraction pour générer timing_stats.json.", S["small"]))
        return

    total_s    = sum(t.get("duree_totale_s") or 0 for t in timing_data)
    total_pages= sum(t.get("nb_pages_total") or 0 for t in timing_data)
    total_err  = sum(len(t.get("erreurs", [])) for t in timing_data)
    total_429  = sum(len(t.get("blocages_429", [])) for t in timing_data)
    total_wait = sum(b["attente_s"] for t in timing_data for b in t.get("blocages_429", []))
    n_dos      = len(timing_data)

    story.append(Paragraph("Résumé global", S["bold"]))
    story.append(Spacer(1, 0.2*cm))

    kpi_rows = [
        ["Durée totale d'extraction",   _fmt_s(total_s)],
        ["Durée moyenne / dossier",     _fmt_s(total_s / n_dos) if n_dos else "—"],
        ["Durée moyenne / page",        _fmt_s(total_s / total_pages) if total_pages else "—"],
        ["Pages traitées",              str(total_pages)],
        ["Erreurs totales",             str(total_err)],
        ["Blocages rate limit (429)",   str(total_429)],
        ["Temps perdu en attentes 429", _fmt_s(total_wait)],
        ["Temps utile (hors 429)",      _fmt_s(total_s - total_wait)],
    ]
    style_kpi = _base_table_style(acc) + [
        ("ALIGN",     (1, 0), (1, -1), "CENTER"),
        ("TEXTCOLOR", (1, 0), (1, -1), acc),
        ("FONTNAME",  (1, 0), (1, -1), "Helvetica-Bold"),
    ]
    t_kpi = Table([[Paragraph(k, S["small"]), Paragraph(v, S["small"])]
                   for k, v in kpi_rows], colWidths=[W*0.6, W*0.4])
    t_kpi.setStyle(_TS(style_kpi))
    story.append(t_kpi)
    story.append(Spacer(1, 0.4*cm))

    story.append(Paragraph("Détail par dossier", S["bold"]))
    story.append(Spacer(1, 0.2*cm))

    header = ["Dossier", "Début", "Fin", "Durée", "Pages",
              "Erreurs", "Blocages\n429", "Attente\n429"]
    rows = [header]
    style_dos = _base_table_style(acc)

    for i, t in enumerate(timing_data, start=1):
        debut = (t.get("debut") or "")[:16].replace("T", " ")
        fin   = (t.get("fin")   or "")[:16].replace("T", " ")
        n_err = len(t.get("erreurs", []))
        n_b   = len(t.get("blocages_429", []))
        att   = sum(b["attente_s"] for b in t.get("blocages_429", []))
        rows.append([
            t["fichier"].replace(".pdf", ""), debut, fin,
            _fmt_s(t.get("duree_totale_s")), str(t.get("nb_pages_total", "—")),
            str(n_err), str(n_b), _fmt_s(att) if att else "—",
        ])
        if n_err > 0:
            style_dos += [("TEXTCOLOR", (5, i), (5, i), colors.HexColor("#e74c3c")),
                          ("FONTNAME",  (5, i), (5, i), "Helvetica-Bold")]
        if n_b > 0:
            style_dos += [("TEXTCOLOR", (6, i), (6, i), colors.HexColor("#e67e22")),
                          ("FONTNAME",  (6, i), (6, i), "Helvetica-Bold")]

    col_w = [W*0.18, W*0.14, W*0.14, W*0.10, W*0.08, W*0.09, W*0.10, W*0.17]
    t_dos = Table([[Paragraph(str(c), S["th"] if i == 0 else S["small"]) for c in row]
                   for i, row in enumerate(rows)], colWidths=col_w)
    t_dos.setStyle(_TS(style_dos))
    story.append(t_dos)

    has_issues = any(t.get("erreurs") or t.get("blocages_429") for t in timing_data)
    if has_issues:
        story.append(Spacer(1, 0.4*cm))
        story.append(Paragraph("Erreurs et blocages détaillés", S["bold"]))
        story.append(Spacer(1, 0.2*cm))
        for t in timing_data:
            if not t.get("erreurs") and not t.get("blocages_429"):
                continue
            story.append(Paragraph(t["fichier"].replace(".pdf", ""), S["bold"]))
            for err in t.get("erreurs", []):
                story.append(Paragraph(
                    f"  ⚠ Page {err['page']} — {err['phase']} : {err['message'][:100]}",
                    S["warn"]))
            for b in t.get("blocages_429", []):
                story.append(Paragraph(
                    f"  ⏳ Blocage 429 — tentative {b['tentative']}, "
                    f"attente {b['attente_s']}s à {str(b.get('ts',''))[:16].replace('T',' ')}",
                    ParagraphStyle("b429", parent=S["small"],
                                   textColor=colors.HexColor("#e67e22"))))
            story.append(Spacer(1, 0.1*cm))


# ─── Main builder ─────────────────────────────────────────────────────────────

def build_pdf():
    W = A4[0] - 4*cm

    if not JSON_PATH.exists():
        print(f"⚠ JSON introuvable : {JSON_PATH}")
        sys.exit(1)

    with open(JSON_PATH, encoding="utf-8") as f:
        orig_data = json.load(f)

    cmp = None
    if CORRECTION_PATH.exists():
        with open(CORRECTION_PATH, encoding="utf-8") as f:
            corr_data = json.load(f)
        cmp = compare_extractions(orig_data, corr_data)

    timing_data = None
    if TIMING_PATH.exists():
        with open(TIMING_PATH, encoding="utf-8") as f:
            timing_data = json.load(f)

    S     = make_styles(ACC)
    story = []

    if cmp:
        etabl = finess = controle = ""
        for r in orig_data:
            for pt in r.get("pages_traitees", []):
                d = pt.get("data", {})
                if d.get("etablissement"):
                    etabl = d["etablissement"]
                if d.get("finess"):
                    finess = d["finess"]
                if d.get("date_debut_controle"):
                    controle = d["date_debut_controle"]
                if etabl and finess and controle:
                    break
            if etabl:
                break
        meta = (f"{etabl} · FINESS {finess}\n"
                f"{len(orig_data)} dossiers OGC · Contrôle {controle} · "
                f"{datetime.now().strftime('%B %Y').capitalize()}")
    else:
        meta = (f"{len(orig_data)} dossiers OGC\n"
                f"Généré le {datetime.now().strftime('%d/%m/%Y à %H:%M')}")

    _build_header(story, S, ACC, meta)

    if cmp:
        _section_header(story, S, ACC, "1. Indicateurs globaux")
        _build_kpis(story, S, ACC, cmp)

        _section_header(story, S, ACC, "2. Résultats par dossier OGC")
        _build_per_dossier(story, S, ACC, W, cmp)
        story.append(Spacer(1, 0.4*cm))

        _section_header(story, S, ACC, "3. Précision par catégorie de champ")
        _build_per_cat(story, S, ACC, W, cmp)
        story.append(Spacer(1, 0.4*cm))

        _section_header(story, S, ACC, "4. Précision par type de page")
        _build_per_type(story, S, ACC, W, cmp)
        story.append(Spacer(1, 0.4*cm))

        if cmp["error_patterns"]:
            _section_header(story, S, ACC, "5. Patterns d'erreurs récurrents")
            _build_error_patterns(story, S, ACC, W, cmp)
            story.append(Spacer(1, 0.4*cm))

        sec_timing = 6
    else:
        sec_timing = 1

    story.append(PageBreak())
    _section_header(story, S, ACC, f"{sec_timing}. Analyse temporelle")
    _build_timing(story, S, ACC, W, timing_data)

    story.append(Spacer(1, 0.5*cm))
    note = (
        "Rapport généré par comparaison automatique de extraction_ogc_raw.json "
        "vs extraction_ogc_raw_Correction.json · "
        f"Périmètre : {len(orig_data)} dossiers OGC · "
        "Les pourcentages de précision sont calculés champ par champ."
        if cmp else
        f"Rapport généré automatiquement · {len(orig_data)} dossiers OGC · "
        "Aucun fichier de correction disponible — métriques de précision non calculées."
    )
    story.append(HRFlowable(width="100%", thickness=0.3, color=colors.grey))
    story.append(Paragraph(note, S["footnote"]))

    doc = SimpleDocTemplate(
        str(REPORT_PATH), pagesize=A4,
        leftMargin=2*cm, rightMargin=2*cm,
        topMargin=2*cm, bottomMargin=2*cm,
        title=f"Bilan extraction OGC — {LABEL}",
        author="EttaSanté / T2A",
    )
    doc.build(story)
    print(f"✓ {REPORT_PATH}")


if __name__ == "__main__":
    print(f"Génération bilan {LABEL}...")
    build_pdf()