Files
t2a/tests/test_cpam_parser.py
dom aa501789fd feat: scoring DP déterministe + parser CPAM nouveau format + sections CRH
- Nouveau module dp_scoring.py : shortlist, scoring multi-critères, select_dp,
  LLM one-shot fallback avec garde-fous (négation, comorbidité, Z/R-codes)
- Parser CPAM : auto-détection format legacy/ucr_extract, 6 nouveaux champs
  ControleCPAM (codes_etablissement, libelle, codes_retenus, ghm_ghs)
- CRH parser : 3 nouvelles sections (diag_sortie, diag_principal, synthese)
- Prompt DP_LLM_ONESHOT externalisé dans templates.py
- Propagation dp_selection dans fusion.py
- 808 tests passent (dont 21 nouveaux CPAM + 77 dp_scoring + 8 CRH)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 22:28:59 +01:00

439 lines
16 KiB
Python

"""Tests pour le parser de contrôle CPAM."""
import tempfile
from pathlib import Path
import openpyxl
import pytest
from src.config import ControleCPAM
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
# En-têtes
_LEGACY_HEADER = ("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR")
_NEW_HEADER = (
"N° OGC", "Type désaccord", "Codes Établissement", "Libellé Établissement",
"Codes Contrôleurs", "Libellé Contrôleurs", "Décision UCR", "Codes retenus",
"GHM / GHS", "Texte décision",
)
def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test au format legacy."""
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "OGC Contrôle T2A"
ws.append(_LEGACY_HEADER)
for row in rows:
ws.append(row)
wb.save(path)
def _create_new_format_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test au format ucr_extract (nouveau)."""
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "UCR Extract"
ws.append(_NEW_HEADER)
for row in rows:
ws.append(row)
wb.save(path)
class TestParseCpamExcel:
def test_parse_basic(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Désaccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None),
(21, "Désaccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert 21 in result
assert len(result[17]) == 1
assert len(result[21]) == 1
assert result[17][0].titre == "Désaccord sur les DAS"
assert result[17][0].decision_ucr == "UCR retient"
assert result[21][0].dp_ucr == "K85.1"
def test_parse_multiple_same_ogc(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Titre 1", "Arg 1", "Décision 1", None, None, None, None),
(17, "Titre 2", "Arg 2", "Décision 2", None, None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result[17]) == 2
def test_parse_empty_file(self, tmp_path):
xlsx = tmp_path / "empty.xlsx"
_create_test_xlsx([], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_parse_nonexistent_file(self):
result = parse_cpam_excel("/nonexistent/path.xlsx")
assert result == {}
def test_parse_optional_fields(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(42, "Titre", "Arg", "Décision", "E11.40", "G63.2", "E11.9", "ABCD123"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[42][0]
assert ctrl.dp_ucr == "E11.40"
assert ctrl.da_ucr == "G63.2"
assert ctrl.dr_ucr == "E11.9"
assert ctrl.actes_ucr == "ABCD123"
class TestMatchDossierOGC:
def setup_method(self):
self.cpam_data = {
17: [ControleCPAM(numero_ogc=17, titre="Test 17")],
21: [ControleCPAM(numero_ogc=21, titre="Test 21")],
}
def test_match_found(self):
result = match_dossier_ogc("17_23100690", self.cpam_data)
assert len(result) == 1
assert result[0].numero_ogc == 17
def test_match_not_found(self):
result = match_dossier_ogc("15_23096332", self.cpam_data)
assert result == []
def test_match_no_prefix(self):
result = match_dossier_ogc("nodash", self.cpam_data)
assert result == []
def test_match_empty_data(self):
result = match_dossier_ogc("17_23100690", {})
assert result == []
class TestControleCPAMModel:
def test_serialization(self):
ctrl = ControleCPAM(
numero_ogc=17,
titre="Désaccord sur les DAS",
arg_ucr="Argument...",
decision_ucr="UCR retient",
dp_ucr="K85.1",
)
data = ctrl.model_dump()
assert data["numero_ogc"] == 17
assert data["dp_ucr"] == "K85.1"
assert data["contre_argumentation"] is None
def test_deserialization(self):
data = {
"numero_ogc": 21,
"titre": "Test",
"arg_ucr": "Arg",
"decision_ucr": "Décision",
"contre_argumentation": "Ma réponse",
}
ctrl = ControleCPAM(**data)
assert ctrl.numero_ogc == 21
assert ctrl.contre_argumentation == "Ma réponse"
assert ctrl.sources_reponse == []
def test_new_fields_defaults(self):
"""Les 6 nouveaux champs ucr_extract sont None par défaut."""
ctrl = ControleCPAM(numero_ogc=1)
assert ctrl.codes_etablissement is None
assert ctrl.libelle_etablissement is None
assert ctrl.codes_controleurs is None
assert ctrl.libelle_controleurs is None
assert ctrl.codes_retenus is None
assert ctrl.ghm_ghs is None
def test_new_fields_serialization(self):
"""Les champs ucr_extract apparaissent dans model_dump."""
ctrl = ControleCPAM(
numero_ogc=10,
titre="Désaccord sur le DP",
codes_etablissement="K85.1",
libelle_etablissement="Pancréatite aiguë biliaire",
codes_controleurs="K85.9",
libelle_controleurs="Pancréatite aiguë, sans précision",
codes_retenus="K85.1",
ghm_ghs="06M091 / 1854",
)
data = ctrl.model_dump()
assert data["codes_etablissement"] == "K85.1"
assert data["libelle_etablissement"] == "Pancréatite aiguë biliaire"
assert data["codes_controleurs"] == "K85.9"
assert data["libelle_controleurs"] == "Pancréatite aiguë, sans précision"
assert data["codes_retenus"] == "K85.1"
assert data["ghm_ghs"] == "06M091 / 1854"
class TestParseNewFormat:
"""Tests pour le format ucr_extract (nouveau)."""
def test_parse_basic_dp(self, tmp_path):
"""Parsing basique — désaccord DP avec Codes Contrôleurs."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
# N° OGC, Type, Codes Étab, Lib Étab, Codes Ctrl, Lib Ctrl, Décision, Codes ret, GHM, Texte
(17, "DP", "K85.1", "Pancréatite aiguë biliaire", "K85.9",
"Pancréatite aiguë SAI", "Défavorable", "K85.9", "06M091 / 1854",
"Le contrôleur ne retient pas K85.1"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
ctrl = result[17][0]
assert ctrl.numero_ogc == 17
assert ctrl.titre == "Désaccord sur le DP"
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr is None
assert ctrl.arg_ucr == "Le contrôleur ne retient pas K85.1"
assert ctrl.decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_parse_basic_das(self, tmp_path):
"""Parsing — désaccord DAS."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(21, "DAS", "E11.40,G63.2", "Diabète+neuropathie", "E11.40",
"Diabète type 2", "Favorable", "E11.40,G63.2", None,
"L'UCR retient les codes initiaux"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[21][0]
assert ctrl.titre == "Désaccord sur les DAS"
assert ctrl.dp_ucr is None
assert ctrl.da_ucr == "E11.40"
assert ctrl.decision_ucr == "UCR retient"
def test_parse_dp_plus_das(self, tmp_path):
"""DP+DAS : premier code → dp_ucr, reste → da_ucr."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(30, "DP+DAS", "K85.1,E11.40", "...", "K85.9,G63.2,I10",
"...", "Défavorable", "K85.9,G63.2,I10", None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[30][0]
assert ctrl.titre == "Désaccord sur le DP et les DAS"
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr == "G63.2,I10"
def test_parse_dp_plus_das_single_code(self, tmp_path):
"""DP+DAS avec un seul code → tout en dp_ucr, pas de da_ucr."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(31, "DP+DAS", "K85.1", "...", "K85.9",
"...", "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[31][0]
assert ctrl.dp_ucr == "K85.9"
assert ctrl.da_ucr is None
def test_new_fields_populated(self, tmp_path):
"""Les 6 champs enrichis sont bien remplis depuis les colonnes."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(42, "DP", "E11.40", "Diabète type 2 avec complications",
"E11.9", "Diabète type 2 sans complication",
"Défavorable", "E11.9", "05M092 / 1780", "Argumentation contrôleur"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[42][0]
assert ctrl.codes_etablissement == "E11.40"
assert ctrl.libelle_etablissement == "Diabète type 2 avec complications"
assert ctrl.codes_controleurs == "E11.9"
assert ctrl.libelle_controleurs == "Diabète type 2 sans complication"
assert ctrl.codes_retenus == "E11.9"
assert ctrl.ghm_ghs == "05M092 / 1780"
def test_decision_favorable(self, tmp_path):
"""Favorable → 'UCR retient'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(10, "DP", None, None, None, None, "Favorable", None, None, "OK"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[10][0].decision_ucr == "UCR retient"
def test_decision_defavorable(self, tmp_path):
"""Défavorable → 'UCR confirme avis médecins contrôleurs'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(11, "DAS", None, None, None, None, "Défavorable", None, None, "KO"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[11][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_decision_defavorable_no_accent(self, tmp_path):
"""Defavorable (sans accent) → même mapping."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(12, "DP", None, None, None, None, "Defavorable", None, None, "KO"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[12][0].decision_ucr == "UCR confirme avis médecins contrôleurs"
def test_decision_unknown_passthrough(self, tmp_path):
"""Décision inconnue → passée telle quelle."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(13, "DP", None, None, None, None, "Partielle", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[13][0].decision_ucr == "Partielle"
def test_type_desaccord_unknown(self, tmp_path):
"""Type désaccord inconnu → titre 'Désaccord : XXX'."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(14, "Actes", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[14][0].titre == "Désaccord : Actes"
def test_type_desaccord_empty(self, tmp_path):
"""Type désaccord vide → titre vide."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(15, "", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result[15][0].titre == ""
def test_multiple_ogc_new_format(self, tmp_path):
"""Plusieurs OGC dans le nouveau format."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(10, "DP", None, None, "K85.9", None, "Favorable", None, None, "Arg 1"),
(20, "DAS", None, None, "E11.40", None, "Défavorable", None, None, "Arg 2"),
(10, "DAS", None, None, "G63.2", None, "Favorable", None, None, "Arg 3"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result) == 2
assert len(result[10]) == 2
assert len(result[20]) == 1
assert result[10][0].dp_ucr == "K85.9"
assert result[10][1].da_ucr == "G63.2"
def test_empty_new_format(self, tmp_path):
"""Fichier nouveau format vide (seulement en-têtes)."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_ogc_none_skipped(self, tmp_path):
"""Lignes avec N° OGC None sont ignorées."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(None, "DP", None, None, None, None, "Favorable", None, None, "Texte"),
(10, "DP", None, None, "K85.1", None, "Favorable", None, None, "OK"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result) == 1
assert 10 in result
def test_ogc_invalid_skipped(self, tmp_path):
"""N° OGC non-numérique est ignoré."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
("ABC", "DP", None, None, None, None, "Favorable", None, None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
class TestAutoDetection:
"""Tests pour l'auto-détection du format."""
def test_detects_legacy(self, tmp_path):
"""Format legacy détecté par ses en-têtes."""
xlsx = tmp_path / "legacy.xlsx"
_create_test_xlsx([
(17, "Titre", "Arg", "Décision", None, None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert result[17][0].titre == "Titre"
def test_detects_new(self, tmp_path):
"""Format nouveau détecté par ses en-têtes."""
xlsx = tmp_path / "new.xlsx"
_create_new_format_xlsx([
(17, "DP", "K85.1", "Label", "K85.9", "Label2",
"Favorable", "K85.1", None, "Texte"),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert result[17][0].titre == "Désaccord sur le DP"
def test_unknown_format_returns_empty(self, tmp_path):
"""En-têtes non reconnues → dict vide."""
xlsx = tmp_path / "unknown.xlsx"
wb = openpyxl.Workbook()
ws = wb.active
ws.append(("Col1", "Col2", "Col3"))
ws.append((1, "val", "val"))
wb.save(xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_new_format_priority_over_legacy(self, tmp_path):
"""Si les deux jeux de colonnes sont présents, le nouveau format prime."""
xlsx = tmp_path / "both.xlsx"
wb = openpyxl.Workbook()
ws = wb.active
# En-têtes contenant les deux formats
ws.append((
"N° OGC", "Titre", "Arg_UCR", "Décision_UCR",
"Type désaccord", "Décision UCR", "Texte décision",
"DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR",
))
ws.append((17, "Titre", "Arg", "Déc legacy", "DP", "Favorable", "Texte nouveau",
"K85.1", None, None, None))
wb.save(xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
# Le nouveau format est prioritaire → titre construit depuis Type désaccord
assert result[17][0].titre == "Désaccord sur le DP"
# arg_ucr vient de Texte décision (nouveau), pas de Arg_UCR (legacy)
assert result[17][0].arg_ucr == "Texte nouveau"