t2a/cpam/parse_decision_ucr.py

#!/usr/bin/env python3
"""
parse_decision_ucr.py — Extraction des décisions UCR depuis un PDF scanné (contrôle T2A)

Entrée  : PDF scanné de décision UCR (CPAM / Assurance Maladie)
Sortie  : Fichier Excel (.xlsx) avec une feuille unique

Colonnes extraites (enrichies pour analyse IA) :
  Champ, OGC, Type_desaccord,
  Code_etablissement, Libelle_etablissement,
  Code_controleurs, Libelle_controleurs,
  Codes_retenus_final,
  Decision, Texte_decision_complet, Resume_motif,
  Regles_citees, References_guide,
  GHM_mentionne, GHS_mentionne, GHM_final, GHS_final,
  Impact_groupage
"""
from __future__ import annotations

import re
import sys
from pathlib import Path

import pymupdf
import pytesseract
from PIL import Image
import io
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
import unicodedata


# ---------------------------------------------------------------------------
# 0. Normalisation texte OCR
# ---------------------------------------------------------------------------

def normalize_text(text: str) -> str:
    """Normalise les apostrophes, guillemets et espaces issus de l'OCR."""
    text = text.replace("\u2018", "'").replace("\u2019", "'")
    text = text.replace("\u201C", '"').replace("\u201D", '"')
    text = text.replace("\u00AB", '"').replace("\u00BB", '"')
    text = text.replace("''", "'")
    text = text.replace("\u00A0", " ").replace("\u202F", " ")
    # Erreurs OCR courantes
    text = re.sub(r"\bF'UCR\b", "l'UCR", text)
    text = re.sub(r"\bl''UCR\b", "l'UCR", text)
    return text


# ---------------------------------------------------------------------------
# 1. OCR
# ---------------------------------------------------------------------------

def ocr_pdf(pdf_path: str, dpi: int = 300) -> str:
    """Extrait le texte de toutes les pages du PDF via Tesseract OCR."""
    doc = pymupdf.open(pdf_path)
    full_text = []
    total = len(doc)
    for i, page in enumerate(doc):
        print(f"  OCR page {i+1}/{total}...", end="\r")
        mat = pymupdf.Matrix(dpi / 72, dpi / 72)
        pix = page.get_pixmap(matrix=mat)
        img = Image.open(io.BytesIO(pix.tobytes("png")))
        text = pytesseract.image_to_string(img, lang="fra")
        full_text.append(text)
    print(f"  OCR terminé : {total} pages.          ")
    return normalize_text("\n\n".join(full_text))


# ---------------------------------------------------------------------------
# 2. Parsing — Regex
# ---------------------------------------------------------------------------

RE_CHAMP = re.compile(
    r"Champ\s*(?:n°\s*)?(\d+)\s*[:\-—]?\s*(?:Séjours|:)",
    re.IGNORECASE,
)

RE_OGC_HEADER = re.compile(
    r"(?:^|\n)\s*OGC\s+(\d+)\s*:",
    re.MULTILINE,
)

RE_TYPE_DESACCORD = re.compile(
    r"(?:désaccord|discussion)\s+porte\s+(?:sur\s+)?(?:le\s+|les\s+)?(DP\s+et\s+(?:le\s+)?DAS|DP\s+et\s+DAS|DP|DAS)",
    re.IGNORECASE,
)

RE_CIM10 = re.compile(r"\b([A-Z]\d{2}(?:\.\d{1,2})?)\b")

RE_CODAGE_ETS = re.compile(
    r"Codage\s+[ée]tablissement\s*:\s*(.*?)(?=Codage\s+contr[ôo]leurs)",
    re.IGNORECASE | re.DOTALL,
)

RE_CODAGE_CTRL = re.compile(
    r"Codage\s+contr[ôo]leurs\s*:\s*(.*?)(?=D[EÉ]C[I1]?SION\s+UCR|PROPOSITION\s+UCR)",
    re.IGNORECASE | re.DOTALL,
)

RE_DECISION = re.compile(
    r"(?:D[EÉ]C[I1]?SION|PROPOSITION)\s+UCR\s*:?\s*(.*)",
    re.IGNORECASE | re.DOTALL,
)

# --- Classification ---

RE_FAVORABLE = re.compile(
    r"(?:"
    r"retient\s+(?:la\s+demande|le\s+codage|l'avis)\s+(?:de\s+)?l'[ée]tablissement"
    r"|retient\s+en\s+D[PA]S\s+le\s+code"
    r"|retient\s+le\s+codage\s+du\s+DP\s+de\s+l'[ée]tablissement"
    r"|l'UCR\s+retient\s+l'avis\s+de\s+l'[ée]tablissement"
    r"|confirme\s+l'avis\s+(?:de\s+)?l'[ée]tablissement"
    r")",
    re.IGNORECASE,
)

RE_DEFAVORABLE = re.compile(
    r"confirme\s+l'avis\s+des\s+(?:m[ée]decins\s+)?contr[oô]leurs",
    re.IGNORECASE,
)

RE_UCR_RETIENT = re.compile(r"l'UCR\s+retient\b", re.IGNORECASE)
RE_UCR_PROPOSE = re.compile(r"l'UCR\s+propose\b", re.IGNORECASE)
RE_NE_RETIENT_PAS = re.compile(r"ne\s+retient\s+pas", re.IGNORECASE)

# --- GHM / GHS ---

RE_GHM = re.compile(r"GHM\s+([A-Z0-9]{5,7})", re.IGNORECASE)
RE_GHS = re.compile(r"GHS\s+(\d{3,5})", re.IGNORECASE)

RE_MIEUX_VALORISE = re.compile(r"mieux\s+valoris[ée]", re.IGNORECASE)
RE_PAS_MODIFIE = re.compile(
    r"(?:ne\s+modifie\s+pas|ne\s+change(?:nt)?\s+pas|pas\s+de\s+changement|reste\s+group[ée])",
    re.IGNORECASE,
)

# --- Règles et références ---

# Pages du guide méthodologique
RE_GUIDE_PAGE = re.compile(
    r"(?:guide\s+m[ée]thodologique|guide)\s*(?:p\.?|page)\s*(\d{1,3})",
    re.IGNORECASE,
)
RE_PAGE_GUIDE = re.compile(
    r"(?:p\.?|page)\s*(\d{1,3})\s+du\s+guide",
    re.IGNORECASE,
)

# Règles T (T3, T7, etc.)
RE_REGLE_T = re.compile(
    r"r[èe]gle\s+(T\d+)",
    re.IGNORECASE,
)

# Fascicules ATIH
RE_FASCICULE = re.compile(
    r"fascicule\s+(?:ATIH\s+)?(?:de\s+codage\s+)?(?:PMSI\s+)?(?:n°\s*)?(\d{1,2})?\s*(?:[-–]\s*)?([A-ZÀ-Üa-zà-ü\s]+?)(?:\s+(?:de\s+)?(\d{4}))?(?:\s*(?:,\s*)?(?:p\.?\s*|page\s*)(\d+))?",
    re.IGNORECASE,
)

# Avis Agora
RE_AVIS_AGORA = re.compile(
    r"avis\s+(?:agora|AGORA)\s*(?:n°\s*)?(\d+)",
    re.IGNORECASE,
)

# Consignes de codage avec page
RE_CONSIGNES_CODAGE = re.compile(
    r"consignes?\s+de\s+codage\s*(?:p\.?\s*|page\s*)(\d+)",
    re.IGNORECASE,
)

# Codage retenu / DP retenu / DAS retenu
RE_CODAGE_RETENU = re.compile(
    r"(?:codage\s+retenu|DP\s*(?:retenu|=)|DAS\s*(?:retenu|=)|code\s+retenu|est\s+cod[ée]\s+en|se\s+code)\s*(?:est\s+)?(?::?\s*)([A-Z]\d{2}(?:\.\d{1,2})?)",
    re.IGNORECASE,
)

# "est ajouté en DAS" / "ajout du code X"
RE_CODE_AJOUTE = re.compile(
    r"(?:est\s+ajout[ée]\s+en\s+D[PA]S|ajout(?:er)?\s+(?:du\s+|en\s+D[PA]S\s+(?:le\s+)?)?(?:code\s+)?)\s*(?::?\s*)([A-Z]\d{2}(?:\.\d{1,2})?)",
    re.IGNORECASE,
)


# ---------------------------------------------------------------------------
# 2b. Fonctions d'extraction
# ---------------------------------------------------------------------------

def extract_codes_and_label(text: str) -> tuple[str, str]:
    """Extrait les codes CIM-10 et le libellé depuis un bloc de codage."""
    codes = RE_CIM10.findall(text)
    labels = re.findall(r'[«"](.*?)[»"]', text)
    code_str = " + ".join(codes) if codes else ""
    label_str = " | ".join(labels) if labels else text.strip()[:120]
    label_str = re.sub(r"\s+", " ", label_str).strip()
    return code_str, label_str


def extract_codes_retenus(decision_text: str) -> str:
    """Extrait les codes finalement retenus par l'UCR."""
    codes = set()
    for m in RE_CODAGE_RETENU.finditer(decision_text):
        codes.add(m.group(1))
    for m in RE_CODE_AJOUTE.finditer(decision_text):
        codes.add(m.group(1))
    return " + ".join(sorted(codes)) if codes else ""


def extract_regles(text: str) -> str:
    """Extrait les règles de codage citées (T3, T7, etc.)."""
    regles = set()
    for m in RE_REGLE_T.finditer(text):
        regles.add(m.group(1).upper())
    return ", ".join(sorted(regles)) if regles else ""


def extract_references(text: str) -> str:
    """Extrait toutes les références (guide, fascicules, avis Agora, consignes)."""
    refs = []

    # Pages du guide méthodologique
    pages_guide = set()
    for m in RE_GUIDE_PAGE.finditer(text):
        pages_guide.add(m.group(1))
    for m in RE_PAGE_GUIDE.finditer(text):
        pages_guide.add(m.group(1))
    if pages_guide:
        refs.append("Guide méthodologique p." + ", p.".join(sorted(pages_guide, key=int)))

    # Fascicules ATIH
    for m in RE_FASCICULE.finditer(text):
        num = m.group(1) or ""
        sujet = (m.group(2) or "").strip()
        annee = m.group(3) or ""
        page = m.group(4) or ""
        ref = "Fascicule"
        if num:
            ref += f" {num}"
        if sujet:
            ref += f" {sujet}"
        if annee:
            ref += f" ({annee})"
        if page:
            ref += f" p.{page}"
        refs.append(ref.strip())

    # Avis Agora
    for m in RE_AVIS_AGORA.finditer(text):
        refs.append(f"Avis Agora n°{m.group(1)}")

    # Consignes de codage
    for m in RE_CONSIGNES_CODAGE.finditer(text):
        refs.append(f"Consignes de codage p.{m.group(1)}")

    # Dédupliquer
    seen = set()
    unique = []
    for r in refs:
        r_lower = r.lower()
        if r_lower not in seen:
            seen.add(r_lower)
            unique.append(r)

    return " ; ".join(unique) if unique else ""


def extract_ghm_ghs_all(text: str) -> tuple[list[str], list[str]]:
    """Extrait tous les GHM et GHS mentionnés."""
    ghms = []
    for m in RE_GHM.finditer(text):
        v = m.group(1).upper()
        if v not in ghms:
            ghms.append(v)
    ghss = []
    for m in RE_GHS.finditer(text):
        v = m.group(1)
        if v not in ghss:
            ghss.append(v)
    return ghms, ghss


def classify_decision(decision_text: str) -> str:
    """Classifie la décision : Favorable / Défavorable / Mixte / Indéterminé."""
    text = normalize_text(decision_text)

    fav = bool(RE_FAVORABLE.search(text))
    defav = bool(RE_DEFAVORABLE.search(text))

    ucr_retient = bool(RE_UCR_RETIENT.search(text))
    ucr_propose = bool(RE_UCR_PROPOSE.search(text))
    ne_retient_pas = bool(RE_NE_RETIENT_PAS.search(text))

    if ucr_retient and not ne_retient_pas:
        fav = True
    if ucr_propose and not defav:
        fav = True

    if (ucr_retient or fav) and defav:
        return "Mixte"
    if fav and defav:
        return "Mixte"
    elif fav:
        return "Favorable établissement"
    elif defav:
        return "Défavorable établissement"
    else:
        return "Indéterminé"


def clean_decision_text(text: str) -> str:
    """Nettoie le texte de décision (supprime artifacts OCR en fin de bloc)."""
    # Supprimer les lignes de pied de page UCR
    text = re.sub(r"\n\s*(?:UCR\s+NA|CONFIDENTIEL|Page\s+\d+).*$", "", text, flags=re.MULTILINE | re.IGNORECASE)
    # Supprimer les artefacts OCR de fin (séquences de caractères isolés)
    text = re.sub(r"\n\s*[A-Z]{1,4}\s*(?:—|—|-)\s*[a-zA-Z]{0,3}\s*$", "", text, flags=re.MULTILINE)
    text = re.sub(r"\n\s*(?:EE|ESS|2 ae|A D ES|EE nd)\s*$", "", text, flags=re.MULTILINE | re.IGNORECASE)
    # Normaliser les espaces
    text = re.sub(r"[ \t]+", " ", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()


# ---------------------------------------------------------------------------
# 2c. Parsing des blocs
# ---------------------------------------------------------------------------

def parse_ogc_block(block_text: str, champ: int, ogc_num: int) -> dict:
    """Parse un bloc OGC et retourne un dictionnaire structuré enrichi."""
    result = {
        "Champ": champ,
        "OGC": ogc_num,
        "Type_desaccord": "",
        "Code_etablissement": "",
        "Libelle_etablissement": "",
        "Code_controleurs": "",
        "Libelle_controleurs": "",
        "Codes_retenus_final": "",
        "Decision": "",
        "Texte_decision_complet": "",
        "Resume_motif": "",
        "Regles_citees": "",
        "References_guide": "",
        "GHM_mentionne": "",
        "GHS_mentionne": "",
        "GHM_final": "",
        "GHS_final": "",
        "Impact_groupage": "",
    }

    # Type de désaccord
    m = RE_TYPE_DESACCORD.search(block_text)
    if m:
        raw = m.group(1).upper().strip()
        raw = re.sub(r"\s+", " ", raw)
        if "DP" in raw and "DAS" in raw:
            result["Type_desaccord"] = "DP + DAS"
        elif "DAS" in raw:
            result["Type_desaccord"] = "DAS"
        elif "DP" in raw:
            result["Type_desaccord"] = "DP"

    # Codage établissement
    m = RE_CODAGE_ETS.search(block_text)
    if m:
        raw_ets = m.group(1).strip()
        result["Code_etablissement"], result["Libelle_etablissement"] = extract_codes_and_label(raw_ets)

    # Codage contrôleurs
    m = RE_CODAGE_CTRL.search(block_text)
    if m:
        raw_ctrl = m.group(1).strip()
        if re.search(r"non\s+repris", raw_ctrl, re.IGNORECASE):
            result["Code_controleurs"] = "non repris"
            result["Libelle_controleurs"] = ""
        else:
            result["Code_controleurs"], result["Libelle_controleurs"] = extract_codes_and_label(raw_ctrl)

    # Décision UCR — TEXTE COMPLET
    m = RE_DECISION.search(block_text)
    if m:
        decision_text = m.group(1).strip()
        decision_clean = clean_decision_text(decision_text)

        result["Decision"] = classify_decision(decision_clean)
        result["Texte_decision_complet"] = decision_clean

        # Résumé court (première phrase significative)
        resume = re.sub(r"\s+", " ", decision_clean)[:300].strip()
        # Couper à la dernière phrase complète
        last_dot = resume.rfind(".")
        if last_dot > 100:
            resume = resume[:last_dot + 1]
        result["Resume_motif"] = resume

        # Codes finalement retenus
        result["Codes_retenus_final"] = extract_codes_retenus(decision_clean)

        # Règles citées (T3, T7, etc.)
        result["Regles_citees"] = extract_regles(block_text)

        # Références (guide, fascicules, avis Agora)
        result["References_guide"] = extract_references(block_text)

    # GHM / GHS — tous ceux mentionnés et le dernier (= final)
    ghms, ghss = extract_ghm_ghs_all(block_text)
    if ghms:
        result["GHM_mentionne"] = " / ".join(ghms)
        result["GHM_final"] = ghms[-1]  # Le dernier mentionné est souvent le final
    if ghss:
        result["GHS_mentionne"] = " / ".join(ghss)
        result["GHS_final"] = ghss[-1]

    # Impact groupage
    if RE_MIEUX_VALORISE.search(block_text):
        result["Impact_groupage"] = "Mieux valorisé"
    elif RE_PAS_MODIFIE.search(block_text):
        result["Impact_groupage"] = "Pas de changement"

    return result


def parse_grouped_ogcs(text_block: str, champ: int, ogc_nums: list[int]) -> list[dict]:
    """Parse un bloc groupé (ex: OGC 14,19,46,50 traités ensemble)."""
    template = parse_ogc_block(text_block, champ, ogc_nums[0])
    results = []
    for num in ogc_nums:
        row = dict(template)
        row["OGC"] = num
        results.append(row)
    return results


def parse_document(full_text: str) -> list[dict]:
    """Parse le texte OCR complet et retourne la liste des dossiers."""
    rows = []

    champ_positions = [(m.start(), int(m.group(1))) for m in RE_CHAMP.finditer(full_text)]
    ogc_positions = [(m.start(), int(m.group(1))) for m in RE_OGC_HEADER.finditer(full_text)]

    def get_champ_for_position(pos: int) -> int:
        ch = 0
        for cp, cn in champ_positions:
            if cp <= pos:
                ch = cn
            else:
                break
        return ch

    # Blocs groupés
    RE_GROUPED = re.compile(
        r"(?:Concernant|Pour)\s+les\s+OGC\s+([\d,\s]+)",
        re.IGNORECASE,
    )

    grouped_ogcs = set()
    for m in RE_GROUPED.finditer(full_text):
        nums = [int(n.strip()) for n in m.group(1).split(",") if n.strip().isdigit()]
        if len(nums) > 1:
            start = m.start()
            end = len(full_text)
            for op, on in ogc_positions:
                if op > start + 50 and on not in nums:
                    end = op
                    break
            block = full_text[start:end]
            champ = get_champ_for_position(start)
            group_rows = parse_grouped_ogcs(block, champ, nums)
            rows.extend(group_rows)
            grouped_ogcs.update(nums)

    # OGC individuels
    for idx, (pos, ogc_num) in enumerate(ogc_positions):
        champ = get_champ_for_position(pos)

        end = len(full_text)
        for next_pos, _ in ogc_positions[idx + 1:]:
            if next_pos > pos + 20:
                end = next_pos
                break
        for cp, _ in champ_positions:
            if pos < cp < end:
                end = cp
                break

        block = full_text[pos:end]
        row = parse_ogc_block(block, champ, ogc_num)

        if ogc_num in grouped_ogcs:
            if row["Code_etablissement"] and row["Decision"]:
                rows = [r for r in rows if r["OGC"] != ogc_num]
                rows.append(row)
        else:
            if row["Code_etablissement"] or row["Decision"]:
                rows.append(row)

    rows.sort(key=lambda r: (r["Champ"], r["OGC"]))

    # Dédupliquer
    seen = {}
    deduped = []
    for r in rows:
        key = r["OGC"]
        if key in seen:
            old = seen[key]
            old_score = sum(1 for v in old.values() if v)
            new_score = sum(1 for v in r.values() if v)
            if new_score > old_score:
                deduped = [x for x in deduped if x["OGC"] != key]
                deduped.append(r)
                seen[key] = r
        else:
            seen[key] = r
            deduped.append(r)

    deduped.sort(key=lambda r: (r["Champ"], r["OGC"]))
    return deduped


# ---------------------------------------------------------------------------
# 3. Export Excel
# ---------------------------------------------------------------------------

HEADERS = [
    "Champ",
    "OGC",
    "Type_desaccord",
    "Code_etablissement",
    "Libelle_etablissement",
    "Code_controleurs",
    "Libelle_controleurs",
    "Codes_retenus_final",
    "Decision",
    "Texte_decision_complet",
    "Resume_motif",
    "Regles_citees",
    "References_guide",
    "GHM_mentionne",
    "GHS_mentionne",
    "GHM_final",
    "GHS_final",
    "Impact_groupage",
]

HEADER_LABELS = [
    "Champ",
    "N° OGC",
    "Type désaccord",
    "Code(s) Établissement",
    "Libellé Établissement",
    "Code(s) Contrôleurs",
    "Libellé Contrôleurs",
    "Code(s) retenus (final)",
    "Décision UCR",
    "Texte décision complet",
    "Résumé du motif",
    "Règles codage citées",
    "Références (guide, fascicules, avis)",
    "GHM mentionné(s)",
    "GHS mentionné(s)",
    "GHM final",
    "GHS final",
    "Impact groupage",
]


def write_excel(rows: list[dict], output_path: str):
    """Écrit les résultats dans un fichier Excel (feuille unique)."""
    wb = Workbook()
    ws = wb.active
    ws.title = "Décisions UCR"

    # Styles
    header_font = Font(bold=True, color="FFFFFF", size=11)
    header_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid")
    header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
    thin_border = Border(
        left=Side(style="thin"),
        right=Side(style="thin"),
        top=Side(style="thin"),
        bottom=Side(style="thin"),
    )

    fav_fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
    defav_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
    mixte_fill = PatternFill(start_color="FFEB9C", end_color="FFEB9C", fill_type="solid")

    # En-têtes
    for col, label in enumerate(HEADER_LABELS, 1):
        cell = ws.cell(row=1, column=col, value=label)
        cell.font = header_font
        cell.fill = header_fill
        cell.alignment = header_align
        cell.border = thin_border

    # Données
    for row_idx, data in enumerate(rows, 2):
        for col_idx, key in enumerate(HEADERS, 1):
            val = data.get(key, "")
            cell = ws.cell(row=row_idx, column=col_idx, value=val)
            cell.border = thin_border
            cell.alignment = Alignment(vertical="top", wrap_text=True)

        # Colorer la colonne Décision
        dec_col = HEADERS.index("Decision") + 1
        decision_cell = ws.cell(row=row_idx, column=dec_col)
        dv = str(decision_cell.value or "")
        if "Favorable" in dv and "Défavorable" not in dv:
            decision_cell.fill = fav_fill
        elif "Défavorable" in dv:
            decision_cell.fill = defav_fill
        elif "Mixte" in dv:
            decision_cell.fill = mixte_fill

    # Largeurs de colonnes
    col_widths = {
        "Champ": 8, "OGC": 8, "Type_desaccord": 14,
        "Code_etablissement": 22, "Libelle_etablissement": 40,
        "Code_controleurs": 22, "Libelle_controleurs": 40,
        "Codes_retenus_final": 22,
        "Decision": 24, "Texte_decision_complet": 80,
        "Resume_motif": 60,
        "Regles_citees": 16, "References_guide": 50,
        "GHM_mentionne": 16, "GHS_mentionne": 16,
        "GHM_final": 12, "GHS_final": 10,
        "Impact_groupage": 20,
    }
    for i, key in enumerate(HEADERS, 1):
        ws.column_dimensions[ws.cell(row=1, column=i).column_letter].width = col_widths.get(key, 15)

    # Filtre automatique
    last_col_letter = ws.cell(row=1, column=len(HEADERS)).column_letter
    ws.auto_filter.ref = f"A1:{last_col_letter}{len(rows)+1}"

    # Figer la première ligne
    ws.freeze_panes = "A2"

    wb.save(output_path)
    print(f"Excel enregistré : {output_path}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    if len(sys.argv) < 2:
        pdf_path = str(Path(__file__).parent / "SPHO-FINANC26020915121.pdf")
    else:
        pdf_path = sys.argv[1]

    output_path = str(Path(pdf_path).with_suffix(".xlsx"))

    print(f"Fichier PDF : {pdf_path}")
    print("Étape 1/3 : OCR du document...")
    full_text = ocr_pdf(pdf_path)

    txt_path = str(Path(pdf_path).with_suffix(".txt"))
    Path(txt_path).write_text(full_text, encoding="utf-8")
    print(f"  Texte brut sauvegardé : {txt_path}")

    print("Étape 2/3 : Extraction des décisions...")
    rows = parse_document(full_text)
    print(f"  {len(rows)} dossiers OGC extraits.")

    fav = sum(1 for r in rows if "Favorable" in r.get("Decision", "") and "Défavorable" not in r.get("Decision", ""))
    defav = sum(1 for r in rows if "Défavorable" in r.get("Decision", ""))
    mixte = sum(1 for r in rows if "Mixte" in r.get("Decision", ""))
    indet = sum(1 for r in rows if r.get("Decision", "") in ("Indéterminé", ""))
    refs_count = sum(1 for r in rows if r.get("References_guide"))
    codes_ret = sum(1 for r in rows if r.get("Codes_retenus_final"))
    regles = sum(1 for r in rows if r.get("Regles_citees"))

    print(f"  Favorable établissement : {fav}")
    print(f"  Défavorable établissement : {defav}")
    print(f"  Mixte : {mixte}")
    print(f"  Indéterminé : {indet}")
    print(f"  Avec références citées : {refs_count}")
    print(f"  Avec codes retenus : {codes_ret}")
    print(f"  Avec règles T : {regles}")

    print("Étape 3/3 : Génération du fichier Excel...")
    write_excel(rows, output_path)
    print("Terminé.")


if __name__ == "__main__":
    main()