feat: scoring DP déterministe + parser CPAM nouveau format + sections CRH

- Nouveau module dp_scoring.py : shortlist, scoring multi-critères, select_dp, LLM one-shot fallback avec garde-fous (négation, comorbidité, Z/R-codes) - Parser CPAM : auto-détection format legacy/ucr_extract, 6 nouveaux champs ControleCPAM (codes_etablissement, libelle, codes_retenus, ghm_ghs) - CRH parser : 3 nouvelles sections (diag_sortie, diag_principal, synthese) - Prompt DP_LLM_ONESHOT externalisé dans templates.py - Propagation dp_selection dans fusion.py - 808 tests passent (dont 21 nouveaux CPAM + 77 dp_scoring + 8 CRH) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 22:28:59 +01:00
parent 540e0cb400
commit aa501789fd
12 changed files with 2370 additions and 43 deletions
--- a/tests/test_cpam_parser.py
+++ b/tests/test_cpam_parser.py
@@ -9,13 +9,32 @@ import pytest
 from src.config import ControleCPAM
 from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel

+# En-têtes
+_LEGACY_HEADER = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
+_NEW_HEADER = (
+    "N° OGC", "Type désaccord", "Codes Établissement", "Libellé Établissement",
+    "Codes Contrôleurs", "Libellé Contrôleurs", "Décision UCR", "Codes retenus",
+    "GHM / GHS", "Texte décision",
+)
+

 def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
-    """Crée un fichier xlsx de test avec les lignes données."""
+    """Crée un fichier xlsx de test au format legacy."""
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "OGC Contrôle T2A"
-    ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR"))
+    ws.append(_LEGACY_HEADER)
+    for row in rows:
+        ws.append(row)
+    wb.save(path)
+
+
+def _create_new_format_xlsx(rows: list[tuple], path: Path) -> None:
+    """Crée un fichier xlsx de test au format ucr_extract (nouveau)."""
+    wb = openpyxl.Workbook()
+    ws = wb.active
+    ws.title = "UCR Extract"
+    ws.append(_NEW_HEADER)
    for row in rows:
        ws.append(row)
    wb.save(path)
@@ -128,3 +147,292 @@ class TestControleCPAMModel:
        assert ctrl.numero_ogc == 21
        assert ctrl.contre_argumentation == "Ma réponse"
        assert ctrl.sources_reponse == []
+
+    def test_new_fields_defaults(self):
+        """Les 6 nouveaux champs ucr_extract sont None par défaut."""
+        ctrl = ControleCPAM(numero_ogc=1)
+        assert ctrl.codes_etablissement is None
+        assert ctrl.libelle_etablissement is None
+        assert ctrl.codes_controleurs is None
+        assert ctrl.libelle_controleurs is None
+        assert ctrl.codes_retenus is None
+        assert ctrl.ghm_ghs is None
+
+    def test_new_fields_serialization(self):
+        """Les champs ucr_extract apparaissent dans model_dump."""
+        ctrl = ControleCPAM(
+            numero_ogc=10,
+            titre="Désaccord sur le DP",
+            codes_etablissement="K85.1",
+            libelle_etablissement="Pancréatite aiguë biliaire",
+            codes_controleurs="K85.9",
+            libelle_controleurs="Pancréatite aiguë, sans précision",
+            codes_retenus="K85.1",
+            ghm_ghs="06M091 / 1854",
+        )
+        data = ctrl.model_dump()
+        assert data["codes_etablissement"] == "K85.1"
+        assert data["libelle_etablissement"] == "Pancréatite aiguë biliaire"
+        assert data["codes_controleurs"] == "K85.9"
+        assert data["libelle_controleurs"] == "Pancréatite aiguë, sans précision"
+        assert data["codes_retenus"] == "K85.1"
+        assert data["ghm_ghs"] == "06M091 / 1854"
+
+
+class TestParseNewFormat:
+    """Tests pour le format ucr_extract (nouveau)."""
+
+    def test_parse_basic_dp(self, tmp_path):
+        """Parsing basique — désaccord DP avec Codes Contrôleurs."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            # N° OGC, Type, Codes Étab, Lib Étab, Codes Ctrl, Lib Ctrl, Décision, Codes ret, GHM, Texte
+            (17, "DP", "K85.1", "Pancréatite aiguë biliaire", "K85.9",
+             "Pancréatite aiguë SAI", "Défavorable", "K85.9", "06M091 / 1854",
+             "Le contrôleur ne retient pas K85.1"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        assert 17 in result
+        ctrl = result[17][0]
+        assert ctrl.numero_ogc == 17
+        assert ctrl.titre == "Désaccord sur le DP"
+        assert ctrl.dp_ucr == "K85.9"
+        assert ctrl.da_ucr is None
+        assert ctrl.arg_ucr == "Le contrôleur ne retient pas K85.1"
+        assert ctrl.decision_ucr == "UCR confirme avis médecins contrôleurs"
+
+    def test_parse_basic_das(self, tmp_path):
+        """Parsing — désaccord DAS."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (21, "DAS", "E11.40,G63.2", "Diabète+neuropathie", "E11.40",
+             "Diabète type 2", "Favorable", "E11.40,G63.2", None,
+             "L'UCR retient les codes initiaux"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        ctrl = result[21][0]
+        assert ctrl.titre == "Désaccord sur les DAS"
+        assert ctrl.dp_ucr is None
+        assert ctrl.da_ucr == "E11.40"
+        assert ctrl.decision_ucr == "UCR retient"
+
+    def test_parse_dp_plus_das(self, tmp_path):
+        """DP+DAS : premier code → dp_ucr, reste → da_ucr."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (30, "DP+DAS", "K85.1,E11.40", "...", "K85.9,G63.2,I10",
+             "...", "Défavorable", "K85.9,G63.2,I10", None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        ctrl = result[30][0]
+        assert ctrl.titre == "Désaccord sur le DP et les DAS"
+        assert ctrl.dp_ucr == "K85.9"
+        assert ctrl.da_ucr == "G63.2,I10"
+
+    def test_parse_dp_plus_das_single_code(self, tmp_path):
+        """DP+DAS avec un seul code → tout en dp_ucr, pas de da_ucr."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (31, "DP+DAS", "K85.1", "...", "K85.9",
+             "...", "Favorable", None, None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        ctrl = result[31][0]
+        assert ctrl.dp_ucr == "K85.9"
+        assert ctrl.da_ucr is None
+
+    def test_new_fields_populated(self, tmp_path):
+        """Les 6 champs enrichis sont bien remplis depuis les colonnes."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (42, "DP", "E11.40", "Diabète type 2 avec complications",
+             "E11.9", "Diabète type 2 sans complication",
+             "Défavorable", "E11.9", "05M092 / 1780", "Argumentation contrôleur"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        ctrl = result[42][0]
+        assert ctrl.codes_etablissement == "E11.40"
+        assert ctrl.libelle_etablissement == "Diabète type 2 avec complications"
+        assert ctrl.codes_controleurs == "E11.9"
+        assert ctrl.libelle_controleurs == "Diabète type 2 sans complication"
+        assert ctrl.codes_retenus == "E11.9"
+        assert ctrl.ghm_ghs == "05M092 / 1780"
+
+    def test_decision_favorable(self, tmp_path):
+        """Favorable → 'UCR retient'."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (10, "DP", None, None, None, None, "Favorable", None, None, "OK"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[10][0].decision_ucr == "UCR retient"
+
+    def test_decision_defavorable(self, tmp_path):
+        """Défavorable → 'UCR confirme avis médecins contrôleurs'."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (11, "DAS", None, None, None, None, "Défavorable", None, None, "KO"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[11][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
+
+    def test_decision_defavorable_no_accent(self, tmp_path):
+        """Defavorable (sans accent) → même mapping."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (12, "DP", None, None, None, None, "Defavorable", None, None, "KO"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[12][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
+
+    def test_decision_unknown_passthrough(self, tmp_path):
+        """Décision inconnue → passée telle quelle."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (13, "DP", None, None, None, None, "Partielle", None, None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[13][0].decision_ucr == "Partielle"
+
+    def test_type_desaccord_unknown(self, tmp_path):
+        """Type désaccord inconnu → titre 'Désaccord : XXX'."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (14, "Actes", None, None, None, None, "Favorable", None, None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[14][0].titre == "Désaccord : Actes"
+
+    def test_type_desaccord_empty(self, tmp_path):
+        """Type désaccord vide → titre vide."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (15, "", None, None, None, None, "Favorable", None, None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result[15][0].titre == ""
+
+    def test_multiple_ogc_new_format(self, tmp_path):
+        """Plusieurs OGC dans le nouveau format."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (10, "DP", None, None, "K85.9", None, "Favorable", None, None, "Arg 1"),
+            (20, "DAS", None, None, "E11.40", None, "Défavorable", None, None, "Arg 2"),
+            (10, "DAS", None, None, "G63.2", None, "Favorable", None, None, "Arg 3"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        assert len(result) == 2
+        assert len(result[10]) == 2
+        assert len(result[20]) == 1
+        assert result[10][0].dp_ucr == "K85.9"
+        assert result[10][1].da_ucr == "G63.2"
+
+    def test_empty_new_format(self, tmp_path):
+        """Fichier nouveau format vide (seulement en-têtes)."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result == {}
+
+    def test_ogc_none_skipped(self, tmp_path):
+        """Lignes avec N° OGC None sont ignorées."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (None, "DP", None, None, None, None, "Favorable", None, None, "Texte"),
+            (10, "DP", None, None, "K85.1", None, "Favorable", None, None, "OK"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert len(result) == 1
+        assert 10 in result
+
+    def test_ogc_invalid_skipped(self, tmp_path):
+        """N° OGC non-numérique est ignoré."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            ("ABC", "DP", None, None, None, None, "Favorable", None, None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result == {}
+
+
+class TestAutoDetection:
+    """Tests pour l'auto-détection du format."""
+
+    def test_detects_legacy(self, tmp_path):
+        """Format legacy détecté par ses en-têtes."""
+        xlsx = tmp_path / "legacy.xlsx"
+        _create_test_xlsx([
+            (17, "Titre", "Arg", "Décision", None, None, None, None),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert 17 in result
+        assert result[17][0].titre == "Titre"
+
+    def test_detects_new(self, tmp_path):
+        """Format nouveau détecté par ses en-têtes."""
+        xlsx = tmp_path / "new.xlsx"
+        _create_new_format_xlsx([
+            (17, "DP", "K85.1", "Label", "K85.9", "Label2",
+             "Favorable", "K85.1", None, "Texte"),
+        ], xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert 17 in result
+        assert result[17][0].titre == "Désaccord sur le DP"
+
+    def test_unknown_format_returns_empty(self, tmp_path):
+        """En-têtes non reconnues → dict vide."""
+        xlsx = tmp_path / "unknown.xlsx"
+        wb = openpyxl.Workbook()
+        ws = wb.active
+        ws.append(("Col1", "Col2", "Col3"))
+        ws.append((1, "val", "val"))
+        wb.save(xlsx)
+
+        result = parse_cpam_excel(xlsx)
+        assert result == {}
+
+    def test_new_format_priority_over_legacy(self, tmp_path):
+        """Si les deux jeux de colonnes sont présents, le nouveau format prime."""
+        xlsx = tmp_path / "both.xlsx"
+        wb = openpyxl.Workbook()
+        ws = wb.active
+        # En-têtes contenant les deux formats
+        ws.append((
+            "N° OGC", "Titre", "Arg_UCR", "Décision_UCR",
+            "Type désaccord", "Décision UCR", "Texte décision",
+            "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR",
+        ))
+        ws.append((17, "Titre", "Arg", "Déc legacy", "DP", "Favorable", "Texte nouveau",
+                    "K85.1", None, None, None))
+        wb.save(xlsx)
+
+        result = parse_cpam_excel(xlsx)
+
+        assert 17 in result
+        # Le nouveau format est prioritaire → titre construit depuis Type désaccord
+        assert result[17][0].titre == "Désaccord sur le DP"
+        # arg_ucr vient de Texte décision (nouveau), pas de Arg_UCR (legacy)
+        assert result[17][0].arg_ucr == "Texte nouveau"