t2a/tests/test_cpam_parser.py

"""Tests pour le parser de contrôle CPAM."""

import tempfile
from pathlib import Path

import openpyxl
import pytest

from src.config import ControleCPAM
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel

# En-têtes
_LEGACY_HEADER = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
_NEW_HEADER = (
    "N° OGC", "Type désaccord", "Codes Établissement", "Libellé Établissement",
    "Codes Contrôleurs", "Libellé Contrôleurs", "Décision UCR", "Codes retenus",
    "GHM / GHS", "Texte décision",
)


def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
    """Crée un fichier xlsx de test au format legacy."""
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "OGC Contrôle T2A"
    ws.append(_LEGACY_HEADER)
    for row in rows:
        ws.append(row)
    wb.save(path)


def _create_new_format_xlsx(rows: list[tuple], path: Path) -> None:
    """Crée un fichier xlsx de test au format ucr_extract (nouveau)."""
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "UCR Extract"
    ws.append(_NEW_HEADER)
    for row in rows:
        ws.append(row)
    wb.save(path)


class TestParseCpamExcel:
    def test_parse_basic(self, tmp_path):
        xlsx = tmp_path / "test.xlsx"
        _create_test_xlsx([
            (17, "Désaccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None),
            (21, "Désaccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        assert 17 in result
        assert 21 in result
        assert len(result[17]) == 1
        assert len(result[21]) == 1
        assert result[17][0].titre == "Désaccord sur les DAS"
        assert result[17][0].decision_ucr == "UCR retient"
        assert result[21][0].dp_ucr == "K85.1"

    def test_parse_multiple_same_ogc(self, tmp_path):
        xlsx = tmp_path / "test.xlsx"
        _create_test_xlsx([
            (17, "Titre 1", "Arg 1", "Décision 1", None, None, None, None),
            (17, "Titre 2", "Arg 2", "Décision 2", None, None, None, None),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        assert len(result[17]) == 2

    def test_parse_empty_file(self, tmp_path):
        xlsx = tmp_path / "empty.xlsx"
        _create_test_xlsx([], xlsx)

        result = parse_cpam_excel(xlsx)

        assert result == {}

    def test_parse_nonexistent_file(self):
        result = parse_cpam_excel("/nonexistent/path.xlsx")
        assert result == {}

    def test_parse_optional_fields(self, tmp_path):
        xlsx = tmp_path / "test.xlsx"
        _create_test_xlsx([
            (42, "Titre", "Arg", "Décision", "E11.40", "G63.2", "E11.9", "ABCD123"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        ctrl = result[42][0]
        assert ctrl.dp_ucr == "E11.40"
        assert ctrl.da_ucr == "G63.2"
        assert ctrl.dr_ucr == "E11.9"
        assert ctrl.actes_ucr == "ABCD123"


class TestMatchDossierOGC:
    def setup_method(self):
        self.cpam_data = {
            17: [ControleCPAM(numero_ogc=17, titre="Test 17")],
            21: [ControleCPAM(numero_ogc=21, titre="Test 21")],
        }

    def test_match_found(self):
        result = match_dossier_ogc("17_23100690", self.cpam_data)
        assert len(result) == 1
        assert result[0].numero_ogc == 17

    def test_match_not_found(self):
        result = match_dossier_ogc("15_23096332", self.cpam_data)
        assert result == []

    def test_match_no_prefix(self):
        result = match_dossier_ogc("nodash", self.cpam_data)
        assert result == []

    def test_match_empty_data(self):
        result = match_dossier_ogc("17_23100690", {})
        assert result == []


class TestControleCPAMModel:
    def test_serialization(self):
        ctrl = ControleCPAM(
            numero_ogc=17,
            titre="Désaccord sur les DAS",
            arg_ucr="Argument...",
            decision_ucr="UCR retient",
            dp_ucr="K85.1",
        )
        data = ctrl.model_dump()
        assert data["numero_ogc"] == 17
        assert data["dp_ucr"] == "K85.1"
        assert data["contre_argumentation"] is None

    def test_deserialization(self):
        data = {
            "numero_ogc": 21,
            "titre": "Test",
            "arg_ucr": "Arg",
            "decision_ucr": "Décision",
            "contre_argumentation": "Ma réponse",
        }
        ctrl = ControleCPAM(**data)
        assert ctrl.numero_ogc == 21
        assert ctrl.contre_argumentation == "Ma réponse"
        assert ctrl.sources_reponse == []

    def test_new_fields_defaults(self):
        """Les 6 nouveaux champs ucr_extract sont None par défaut."""
        ctrl = ControleCPAM(numero_ogc=1)
        assert ctrl.codes_etablissement is None
        assert ctrl.libelle_etablissement is None
        assert ctrl.codes_controleurs is None
        assert ctrl.libelle_controleurs is None
        assert ctrl.codes_retenus is None
        assert ctrl.ghm_ghs is None

    def test_new_fields_serialization(self):
        """Les champs ucr_extract apparaissent dans model_dump."""
        ctrl = ControleCPAM(
            numero_ogc=10,
            titre="Désaccord sur le DP",
            codes_etablissement="K85.1",
            libelle_etablissement="Pancréatite aiguë biliaire",
            codes_controleurs="K85.9",
            libelle_controleurs="Pancréatite aiguë, sans précision",
            codes_retenus="K85.1",
            ghm_ghs="06M091 / 1854",
        )
        data = ctrl.model_dump()
        assert data["codes_etablissement"] == "K85.1"
        assert data["libelle_etablissement"] == "Pancréatite aiguë biliaire"
        assert data["codes_controleurs"] == "K85.9"
        assert data["libelle_controleurs"] == "Pancréatite aiguë, sans précision"
        assert data["codes_retenus"] == "K85.1"
        assert data["ghm_ghs"] == "06M091 / 1854"


class TestParseNewFormat:
    """Tests pour le format ucr_extract (nouveau)."""

    def test_parse_basic_dp(self, tmp_path):
        """Parsing basique — désaccord DP avec Codes Contrôleurs."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            # N° OGC, Type, Codes Étab, Lib Étab, Codes Ctrl, Lib Ctrl, Décision, Codes ret, GHM, Texte
            (17, "DP", "K85.1", "Pancréatite aiguë biliaire", "K85.9",
             "Pancréatite aiguë SAI", "Défavorable", "K85.9", "06M091 / 1854",
             "Le contrôleur ne retient pas K85.1"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        assert 17 in result
        ctrl = result[17][0]
        assert ctrl.numero_ogc == 17
        assert ctrl.titre == "Désaccord sur le DP"
        assert ctrl.dp_ucr == "K85.9"
        assert ctrl.da_ucr is None
        assert ctrl.arg_ucr == "Le contrôleur ne retient pas K85.1"
        assert ctrl.decision_ucr == "UCR confirme avis médecins contrôleurs"

    def test_parse_basic_das(self, tmp_path):
        """Parsing — désaccord DAS."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (21, "DAS", "E11.40,G63.2", "Diabète+neuropathie", "E11.40",
             "Diabète type 2", "Favorable", "E11.40,G63.2", None,
             "L'UCR retient les codes initiaux"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        ctrl = result[21][0]
        assert ctrl.titre == "Désaccord sur les DAS"
        assert ctrl.dp_ucr is None
        assert ctrl.da_ucr == "E11.40"
        assert ctrl.decision_ucr == "UCR retient"

    def test_parse_dp_plus_das(self, tmp_path):
        """DP+DAS : premier code → dp_ucr, reste → da_ucr."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (30, "DP+DAS", "K85.1,E11.40", "...", "K85.9,G63.2,I10",
             "...", "Défavorable", "K85.9,G63.2,I10", None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        ctrl = result[30][0]
        assert ctrl.titre == "Désaccord sur le DP et les DAS"
        assert ctrl.dp_ucr == "K85.9"
        assert ctrl.da_ucr == "G63.2,I10"

    def test_parse_dp_plus_das_single_code(self, tmp_path):
        """DP+DAS avec un seul code → tout en dp_ucr, pas de da_ucr."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (31, "DP+DAS", "K85.1", "...", "K85.9",
             "...", "Favorable", None, None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        ctrl = result[31][0]
        assert ctrl.dp_ucr == "K85.9"
        assert ctrl.da_ucr is None

    def test_new_fields_populated(self, tmp_path):
        """Les 6 champs enrichis sont bien remplis depuis les colonnes."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (42, "DP", "E11.40", "Diabète type 2 avec complications",
             "E11.9", "Diabète type 2 sans complication",
             "Défavorable", "E11.9", "05M092 / 1780", "Argumentation contrôleur"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        ctrl = result[42][0]
        assert ctrl.codes_etablissement == "E11.40"
        assert ctrl.libelle_etablissement == "Diabète type 2 avec complications"
        assert ctrl.codes_controleurs == "E11.9"
        assert ctrl.libelle_controleurs == "Diabète type 2 sans complication"
        assert ctrl.codes_retenus == "E11.9"
        assert ctrl.ghm_ghs == "05M092 / 1780"

    def test_decision_favorable(self, tmp_path):
        """Favorable → 'UCR retient'."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (10, "DP", None, None, None, None, "Favorable", None, None, "OK"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[10][0].decision_ucr == "UCR retient"

    def test_decision_defavorable(self, tmp_path):
        """Défavorable → 'UCR confirme avis médecins contrôleurs'."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (11, "DAS", None, None, None, None, "Défavorable", None, None, "KO"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[11][0].decision_ucr == "UCR confirme avis médecins contrôleurs"

    def test_decision_defavorable_no_accent(self, tmp_path):
        """Defavorable (sans accent) → même mapping."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (12, "DP", None, None, None, None, "Defavorable", None, None, "KO"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[12][0].decision_ucr == "UCR confirme avis médecins contrôleurs"

    def test_decision_unknown_passthrough(self, tmp_path):
        """Décision inconnue → passée telle quelle."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (13, "DP", None, None, None, None, "Partielle", None, None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[13][0].decision_ucr == "Partielle"

    def test_type_desaccord_unknown(self, tmp_path):
        """Type désaccord inconnu → titre 'Désaccord : XXX'."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (14, "Actes", None, None, None, None, "Favorable", None, None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[14][0].titre == "Désaccord : Actes"

    def test_type_desaccord_empty(self, tmp_path):
        """Type désaccord vide → titre vide."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (15, "", None, None, None, None, "Favorable", None, None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result[15][0].titre == ""

    def test_multiple_ogc_new_format(self, tmp_path):
        """Plusieurs OGC dans le nouveau format."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (10, "DP", None, None, "K85.9", None, "Favorable", None, None, "Arg 1"),
            (20, "DAS", None, None, "E11.40", None, "Défavorable", None, None, "Arg 2"),
            (10, "DAS", None, None, "G63.2", None, "Favorable", None, None, "Arg 3"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)

        assert len(result) == 2
        assert len(result[10]) == 2
        assert len(result[20]) == 1
        assert result[10][0].dp_ucr == "K85.9"
        assert result[10][1].da_ucr == "G63.2"

    def test_empty_new_format(self, tmp_path):
        """Fichier nouveau format vide (seulement en-têtes)."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result == {}

    def test_ogc_none_skipped(self, tmp_path):
        """Lignes avec N° OGC None sont ignorées."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (None, "DP", None, None, None, None, "Favorable", None, None, "Texte"),
            (10, "DP", None, None, "K85.1", None, "Favorable", None, None, "OK"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert len(result) == 1
        assert 10 in result

    def test_ogc_invalid_skipped(self, tmp_path):
        """N° OGC non-numérique est ignoré."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            ("ABC", "DP", None, None, None, None, "Favorable", None, None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert result == {}


class TestAutoDetection:
    """Tests pour l'auto-détection du format."""

    def test_detects_legacy(self, tmp_path):
        """Format legacy détecté par ses en-têtes."""
        xlsx = tmp_path / "legacy.xlsx"
        _create_test_xlsx([
            (17, "Titre", "Arg", "Décision", None, None, None, None),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert 17 in result
        assert result[17][0].titre == "Titre"

    def test_detects_new(self, tmp_path):
        """Format nouveau détecté par ses en-têtes."""
        xlsx = tmp_path / "new.xlsx"
        _create_new_format_xlsx([
            (17, "DP", "K85.1", "Label", "K85.9", "Label2",
             "Favorable", "K85.1", None, "Texte"),
        ], xlsx)

        result = parse_cpam_excel(xlsx)
        assert 17 in result
        assert result[17][0].titre == "Désaccord sur le DP"

    def test_unknown_format_returns_empty(self, tmp_path):
        """En-têtes non reconnues → dict vide."""
        xlsx = tmp_path / "unknown.xlsx"
        wb = openpyxl.Workbook()
        ws = wb.active
        ws.append(("Col1", "Col2", "Col3"))
        ws.append((1, "val", "val"))
        wb.save(xlsx)

        result = parse_cpam_excel(xlsx)
        assert result == {}

    def test_new_format_priority_over_legacy(self, tmp_path):
        """Si les deux jeux de colonnes sont présents, le nouveau format prime."""
        xlsx = tmp_path / "both.xlsx"
        wb = openpyxl.Workbook()
        ws = wb.active
        # En-têtes contenant les deux formats
        ws.append((
            "N° OGC", "Titre", "Arg_UCR", "Décision_UCR",
            "Type désaccord", "Décision UCR", "Texte décision",
            "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR",
        ))
        ws.append((17, "Titre", "Arg", "Déc legacy", "DP", "Favorable", "Texte nouveau",
                    "K85.1", None, None, None))
        wb.save(xlsx)

        result = parse_cpam_excel(xlsx)

        assert 17 in result
        # Le nouveau format est prioritaire → titre construit depuis Type désaccord
        assert result[17][0].titre == "Désaccord sur le DP"
        # arg_ucr vient de Texte décision (nouveau), pas de Arg_UCR (legacy)
        assert result[17][0].arg_ucr == "Texte nouveau"