feat: cache Ollama + parallélisation ThreadPool + filtrage DAS renforcé + modules GHM/CPAM/export RUM

- Cache persistant JSON thread-safe pour les résultats Ollama (invalidation par modèle)
- Parallélisation des appels Ollama (ThreadPoolExecutor, 2 workers)
- 6 nouvelles règles de filtrage DAS parasites (doublons, ponctuation, OCR, labo, fragments)
- Client Ollama centralisé (mode JSON natif + retry)
- Module GHM (estimation CMD/sévérité)
- Module contrôle CPAM (parser + contre-argumentation RAG)
- Export RUM (format RSS)
- Viewer enrichi (détail dossier)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-12 13:44:34 +01:00
parent a00e5f1147
commit a58398f5d4
25 changed files with 2872 additions and 97 deletions

130
tests/test_cpam_parser.py Normal file
View File

@@ -0,0 +1,130 @@
"""Tests pour le parser de contrôle CPAM."""
import tempfile
from pathlib import Path
import openpyxl
import pytest
from src.config import ControleCPAM
from src.control.cpam_parser import match_dossier_ogc, parse_cpam_excel
def _create_test_xlsx(rows: list[tuple], path: Path) -> None:
"""Crée un fichier xlsx de test avec les lignes données."""
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "OGC Contrôle T2A"
ws.append(("N° OGC", "Titre", "Arg_UCR", "Décision_UCR", "DP_UCR", "DA_UCR", "DR_UCR", "Actes_UCR"))
for row in rows:
ws.append(row)
wb.save(path)
class TestParseCpamExcel:
def test_parse_basic(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Désaccord sur les DAS", "Argument UCR...", "UCR retient", None, None, None, None),
(21, "Désaccord sur le DP", "Autre argument", "UCR confirme avis", "K85.1", None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert 17 in result
assert 21 in result
assert len(result[17]) == 1
assert len(result[21]) == 1
assert result[17][0].titre == "Désaccord sur les DAS"
assert result[17][0].decision_ucr == "UCR retient"
assert result[21][0].dp_ucr == "K85.1"
def test_parse_multiple_same_ogc(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(17, "Titre 1", "Arg 1", "Décision 1", None, None, None, None),
(17, "Titre 2", "Arg 2", "Décision 2", None, None, None, None),
], xlsx)
result = parse_cpam_excel(xlsx)
assert len(result[17]) == 2
def test_parse_empty_file(self, tmp_path):
xlsx = tmp_path / "empty.xlsx"
_create_test_xlsx([], xlsx)
result = parse_cpam_excel(xlsx)
assert result == {}
def test_parse_nonexistent_file(self):
result = parse_cpam_excel("/nonexistent/path.xlsx")
assert result == {}
def test_parse_optional_fields(self, tmp_path):
xlsx = tmp_path / "test.xlsx"
_create_test_xlsx([
(42, "Titre", "Arg", "Décision", "E11.40", "G63.2", "E11.9", "ABCD123"),
], xlsx)
result = parse_cpam_excel(xlsx)
ctrl = result[42][0]
assert ctrl.dp_ucr == "E11.40"
assert ctrl.da_ucr == "G63.2"
assert ctrl.dr_ucr == "E11.9"
assert ctrl.actes_ucr == "ABCD123"
class TestMatchDossierOGC:
def setup_method(self):
self.cpam_data = {
17: [ControleCPAM(numero_ogc=17, titre="Test 17")],
21: [ControleCPAM(numero_ogc=21, titre="Test 21")],
}
def test_match_found(self):
result = match_dossier_ogc("17_23100690", self.cpam_data)
assert len(result) == 1
assert result[0].numero_ogc == 17
def test_match_not_found(self):
result = match_dossier_ogc("15_23096332", self.cpam_data)
assert result == []
def test_match_no_prefix(self):
result = match_dossier_ogc("nodash", self.cpam_data)
assert result == []
def test_match_empty_data(self):
result = match_dossier_ogc("17_23100690", {})
assert result == []
class TestControleCPAMModel:
def test_serialization(self):
ctrl = ControleCPAM(
numero_ogc=17,
titre="Désaccord sur les DAS",
arg_ucr="Argument...",
decision_ucr="UCR retient",
dp_ucr="K85.1",
)
data = ctrl.model_dump()
assert data["numero_ogc"] == 17
assert data["dp_ucr"] == "K85.1"
assert data["contre_argumentation"] is None
def test_deserialization(self):
data = {
"numero_ogc": 21,
"titre": "Test",
"arg_ucr": "Arg",
"decision_ucr": "Décision",
"contre_argumentation": "Ma réponse",
}
ctrl = ControleCPAM(**data)
assert ctrl.numero_ogc == 21
assert ctrl.contre_argumentation == "Ma réponse"
assert ctrl.sources_reponse == []

146
tests/test_cpam_response.py Normal file
View File

@@ -0,0 +1,146 @@
"""Tests pour la génération de contre-argumentation CPAM."""
from unittest.mock import patch
import pytest
from src.config import ControleCPAM, Diagnostic, DossierMedical, RAGSource, Sejour
from src.control.cpam_response import _build_cpam_prompt, _format_response, generate_cpam_response
def _make_dossier() -> DossierMedical:
"""Crée un dossier médical de test."""
return DossierMedical(
source_file="test.pdf",
document_type="crh",
sejour=Sejour(sexe="M", age=65, duree_sejour=5),
diagnostic_principal=Diagnostic(
texte="Cholécystite aiguë",
cim10_suggestion="K81.0",
),
diagnostics_associes=[
Diagnostic(texte="Iléus réflexe", cim10_suggestion="K56.0"),
],
)
def _make_controle() -> ControleCPAM:
"""Crée un contrôle CPAM de test."""
return ControleCPAM(
numero_ogc=17,
titre="Désaccord sur les DAS",
arg_ucr="L'UCR confirme l'avis des médecins contrôleurs au motif que le DAS K56.0 n'est pas justifié.",
decision_ucr="UCR confirme avis médecins contrôleurs",
dp_ucr=None,
da_ucr="K56.0",
)
class TestBuildPrompt:
def test_prompt_contains_dossier_info(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert "Cholécystite aiguë" in prompt
assert "K81.0" in prompt
assert "Iléus réflexe" in prompt
assert "65 ans" in prompt
def test_prompt_contains_cpam_argument(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert controle.arg_ucr in prompt
assert controle.decision_ucr in prompt
def test_prompt_contains_codes_contestes(self):
dossier = _make_dossier()
controle = _make_controle()
prompt = _build_cpam_prompt(dossier, controle, [])
assert "DA proposés par UCR : K56.0" in prompt
def test_prompt_contains_rag_sources(self):
dossier = _make_dossier()
controle = _make_controle()
sources = [
{"document": "guide_methodo", "page": 64, "extrait": "Texte du guide..."},
{"document": "cim10", "code": "K56.0", "extrait": "Iléus paralytique..."},
]
prompt = _build_cpam_prompt(dossier, controle, sources)
assert "Guide Méthodologique MCO 2026" in prompt
assert "CIM-10 FR 2026" in prompt
assert "page 64" in prompt
class TestFormatResponse:
def test_full_response(self):
parsed = {
"analyse_contestation": "La CPAM conteste le DAS K56.0",
"points_accord": "Aucun",
"contre_arguments": "Le guide méthodologique précise...",
"references": "Guide métho p.64",
"conclusion": "Le DAS est justifié",
}
text = _format_response(parsed)
assert "ANALYSE DE LA CONTESTATION" in text
assert "CONTRE-ARGUMENTS" in text
assert "CONCLUSION" in text
# "Aucun" ne doit pas générer la section points d'accord
assert "POINTS D'ACCORD" not in text
def test_partial_response(self):
parsed = {
"contre_arguments": "Arguments...",
"conclusion": "Conclusion...",
}
text = _format_response(parsed)
assert "CONTRE-ARGUMENTS" in text
assert "CONCLUSION" in text
def test_empty_response(self):
text = _format_response({})
assert text == ""
class TestGenerateResponse:
@patch("src.control.cpam_response.call_ollama")
@patch("src.control.cpam_response._search_rag_for_control")
def test_generate_success(self, mock_rag, mock_ollama):
mock_rag.return_value = [
{"document": "guide_methodo", "page": 64, "extrait": "Texte guide"},
]
mock_ollama.return_value = {
"analyse_contestation": "Analyse...",
"contre_arguments": "Contre-arguments...",
"conclusion": "Conclusion...",
}
dossier = _make_dossier()
controle = _make_controle()
text, sources = generate_cpam_response(dossier, controle)
assert "Contre-arguments..." in text
assert len(sources) == 1
assert sources[0].document == "guide_methodo"
mock_ollama.assert_called_once()
@patch("src.control.cpam_response.call_ollama")
@patch("src.control.cpam_response._search_rag_for_control")
def test_generate_ollama_unavailable(self, mock_rag, mock_ollama):
mock_rag.return_value = []
mock_ollama.return_value = None
dossier = _make_dossier()
controle = _make_controle()
text, sources = generate_cpam_response(dossier, controle)
assert text == ""
assert sources == []

View File

@@ -104,3 +104,59 @@ class TestIsValidDiagnosticText:
def test_accept_sepsis(self):
assert is_valid_diagnostic_text("Sepsis sévère")
# --- Règle 5 modifiée : mots dupliqués (2 mots identiques) ---
def test_reject_absence_absence(self):
assert not is_valid_diagnostic_text("Absence absence")
def test_reject_anticoagulant_anticoagulant(self):
assert not is_valid_diagnostic_text("Anticoagulant anticoagulant")
def test_reject_ventilation_ventilation(self):
assert not is_valid_diagnostic_text("Ventilation ventilation")
# --- Règle 7 : ponctuation initiale ---
def test_reject_comma_prefix(self):
assert not is_valid_diagnostic_text(", sans précision")
def test_reject_dash_prefix(self):
assert not is_valid_diagnostic_text("- masse musculaire")
# --- Règle 8 : valeurs numériques OCR "À X.X" ---
def test_reject_a_accent_value(self):
assert not is_valid_diagnostic_text("À 0.1")
def test_reject_a_accent_value_3(self):
assert not is_valid_diagnostic_text("À 3.0")
def test_reject_a_value(self):
assert not is_valid_diagnostic_text("A 12,5")
# --- Règle 9 : crochets (artefacts OCR) ---
def test_reject_bracket_fragment(self):
assert not is_valid_diagnostic_text("Episode [episode")
def test_reject_closing_bracket(self):
assert not is_valid_diagnostic_text("valeur]")
# --- Règle 10 : termes de laboratoire isolés ---
def test_reject_hemoglobine(self):
assert not is_valid_diagnostic_text("Hémoglobine")
def test_reject_creatinine(self):
assert not is_valid_diagnostic_text("Créatinine")
def test_accept_hemoglobine_in_phrase(self):
"""Un terme labo dans un contexte clinique est accepté."""
assert is_valid_diagnostic_text("Hémoglobine basse avec anémie")
# --- Règle 11 : fragments anatomiques courts ---
def test_reject_dans_la_vessie(self):
assert not is_valid_diagnostic_text("Dans la vessie")
def test_reject_le_rein(self):
assert not is_valid_diagnostic_text("Le rein")
def test_accept_long_fragment(self):
"""Un fragment long commençant par 'Dans' peut être légitime."""
assert is_valid_diagnostic_text("Dans le cadre d'une insuffisance rénale chronique terminale")

189
tests/test_ghm.py Normal file
View File

@@ -0,0 +1,189 @@
"""Tests pour le module d'estimation GHM."""
import pytest
from src.config import ActeCCAM, Diagnostic, DossierMedical
from src.medical.ghm import estimate_ghm, find_cmd, _detect_type_ghm, _compute_severity
class TestFindCMD:
def test_k85_hepatobilaire(self):
cmd, libelle = find_cmd("K85.1")
assert cmd == "07"
assert "hépatobiliaire" in libelle.lower() or "pancréat" in libelle.lower()
def test_j18_respiratoire(self):
cmd, _ = find_cmd("J18")
assert cmd == "04"
def test_n17_renal(self):
cmd, _ = find_cmd("N17")
assert cmd == "11"
def test_n40_genital_masculin(self):
cmd, _ = find_cmd("N40")
assert cmd == "12"
def test_f10_toxicomanie(self):
cmd, _ = find_cmd("F10")
assert cmd == "20"
def test_z00_facteurs(self):
cmd, _ = find_cmd("Z00")
assert cmd == "23"
def test_k40_digestif(self):
cmd, _ = find_cmd("K40")
assert cmd == "06"
def test_b20_vih(self):
cmd, _ = find_cmd("B20")
assert cmd == "25"
def test_t25_brulures(self):
cmd, _ = find_cmd("T25")
assert cmd == "22"
def test_s72_traumatismes(self):
cmd, _ = find_cmd("S72")
assert cmd == "21"
def test_code_with_dot(self):
cmd, _ = find_cmd("K85.1")
assert cmd == "07"
def test_code_lowercase(self):
cmd, _ = find_cmd("k85.1")
assert cmd == "07"
def test_empty_code(self):
cmd, libelle = find_cmd("")
assert cmd is None
assert libelle is None
def test_none_code(self):
cmd, libelle = find_cmd(None)
assert cmd is None
assert libelle is None
def test_short_code(self):
cmd, libelle = find_cmd("K8")
assert cmd is None
class TestDetectTypeGHM:
def test_chirurgical(self):
actes = [ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")]
assert _detect_type_ghm(actes) == "C"
def test_interventionnel(self):
actes = [ActeCCAM(texte="Échographie", code_ccam_suggestion="ZCQM001")]
assert _detect_type_ghm(actes) == "K"
def test_medical_no_actes(self):
assert _detect_type_ghm([]) == "M"
def test_medical_no_code(self):
actes = [ActeCCAM(texte="Biopsie", code_ccam_suggestion=None)]
assert _detect_type_ghm(actes) == "M"
def test_chirurgical_overrides_interventionnel(self):
actes = [
ActeCCAM(texte="Écho", code_ccam_suggestion="ZCQM001"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"),
]
assert _detect_type_ghm(actes) == "C"
class TestSeverityLevels:
def test_no_cma_level_1(self):
das = [Diagnostic(texte="HTA", cim10_suggestion="I10")]
niveau, cma, cms = _compute_severity(das)
assert niveau == 1
def test_two_cma_level_2(self):
das = [
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 2
assert cma == 2
def test_one_cms_level_3(self):
das = [
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 3
assert cms == 1
def test_two_cms_level_4(self):
das = [
Diagnostic(texte="Sepsis", cim10_suggestion="A41.9", est_cma=True, est_cms=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True, est_cms=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 4
assert cms == 2
def test_three_cma_level_3(self):
das = [
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
Diagnostic(texte="Diabète", cim10_suggestion="E11.9", est_cma=True),
]
niveau, cma, cms = _compute_severity(das)
assert niveau == 3
assert cma == 3
class TestEstimateGHM:
def test_chirurgical_with_cma(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K80.1"),
actes_ccam=[ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")],
diagnostics_associes=[
Diagnostic(texte="FA", cim10_suggestion="I48.9", est_cma=True),
Diagnostic(texte="IRA", cim10_suggestion="N17.9", est_cma=True),
],
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "07"
assert ghm.type_ghm == "C"
assert ghm.severite == 2
assert ghm.ghm_approx == "07C??2"
assert ghm.cma_count == 2
def test_medical_sans_actes(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "04"
assert ghm.type_ghm == "M"
assert ghm.severite == 1
assert ghm.ghm_approx == "04M??1"
def test_dp_absent(self):
dossier = DossierMedical()
ghm = estimate_ghm(dossier)
assert ghm.cmd is None
assert ghm.ghm_approx is None
assert any("DP absent" in a for a in ghm.alertes)
def test_dp_sans_code(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Douleur thoracique"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd is None
assert any("sans code" in a for a in ghm.alertes)
def test_dp_symptomatique(self):
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Douleur thoracique", cim10_suggestion="R07.4"),
)
ghm = estimate_ghm(dossier)
assert ghm.cmd == "23"
assert any("symptomatique" in a for a in ghm.alertes)

108
tests/test_ollama_cache.py Normal file
View File

@@ -0,0 +1,108 @@
"""Tests unitaires pour le cache Ollama persistant."""
import json
import threading
import pytest
from src.medical.ollama_cache import OllamaCache
class TestOllamaCache:
def test_get_miss(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
assert cache.get("HTA", "das") is None
def test_put_and_get(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"}
cache.put("HTA", "das", result)
assert cache.get("HTA", "das") == result
def test_key_normalization(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
result = {"code": "I10", "confidence": "high"}
cache.put(" HTA ", "das", result)
assert cache.get("hta", "das") == result
def test_different_types_different_keys(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
cache.put("Diabète", "dp", {"code": "E11.9"})
cache.put("Diabète", "das", {"code": "E11.8"})
assert cache.get("Diabète", "dp")["code"] == "E11.9"
assert cache.get("Diabète", "das")["code"] == "E11.8"
def test_save_and_reload(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
assert path.exists()
cache2 = OllamaCache(path, "gemma3:12b")
assert cache2.get("HTA", "das") == {"code": "I10"}
def test_save_no_write_if_clean(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.save()
assert not path.exists()
def test_model_change_invalidates(self, tmp_path):
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
cache2 = OllamaCache(path, "llama3:8b")
assert cache2.get("HTA", "das") is None
assert len(cache2) == 0
def test_corrupted_file(self, tmp_path):
path = tmp_path / "cache.json"
path.write_text("not valid json", encoding="utf-8")
cache = OllamaCache(path, "gemma3:12b")
assert len(cache) == 0
assert cache.get("HTA", "das") is None
def test_len(self, tmp_path):
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
assert len(cache) == 0
cache.put("HTA", "das", {"code": "I10"})
assert len(cache) == 1
cache.put("Diabète", "dp", {"code": "E11.9"})
assert len(cache) == 2
def test_thread_safety(self, tmp_path):
"""Écriture concurrente depuis plusieurs threads."""
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
errors = []
def writer(i):
try:
cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"})
except Exception as e:
errors.append(e)
threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)]
for t in threads:
t.start()
for t in threads:
t.join()
assert not errors
assert len(cache) == 20
def test_json_format(self, tmp_path):
"""Le fichier JSON contient le modèle et les entrées."""
path = tmp_path / "cache.json"
cache = OllamaCache(path, "gemma3:12b")
cache.put("HTA", "das", {"code": "I10"})
cache.save()
raw = json.loads(path.read_text(encoding="utf-8"))
assert raw["model"] == "gemma3:12b"
assert "entries" in raw
assert len(raw["entries"]) == 1

View File

@@ -7,7 +7,8 @@ from unittest.mock import patch, MagicMock
import pytest
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.config import RAGSource, Diagnostic, ActeCCAM, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.medical.ollama_cache import OllamaCache
class TestRAGSource:
@@ -494,6 +495,47 @@ class TestRAGSearchMocked:
assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancréatite..."
assert len(diag.sources_rag) == 1
def test_enrich_diagnostic_invalid_code_ignored(self):
"""Un code Ollama invalide ne remplace pas le code existant."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "X99.99", # code invalide
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
# Le code original est conservé (pas remplacé par le code invalide)
assert diag.cim10_suggestion == "K85.9"
def test_enrich_diagnostic_normalizes_code(self):
"""Un code Ollama sans point est normalisé (K851 → K85.1)."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "K851", # sans point
"confidence": "high",
"justification": "Pancréatite biliaire",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
assert diag.cim10_suggestion == "K85.1"
def test_enrich_diagnostic_est_dp_flag(self):
"""Le flag est_dp est bien passé à _build_prompt."""
from src.medical.rag_search import enrich_diagnostic
@@ -533,10 +575,12 @@ class TestEnrichDossier:
captured_contexts = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured_contexts.append(contexte.copy())
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured_contexts) == 1 # DP seulement (pas de DAS)
@@ -563,10 +607,12 @@ class TestEnrichDossier:
captured = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")})
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured) == 2
@@ -578,6 +624,149 @@ class TestEnrichDossier:
assert captured[1]["dp_texte"] == "Pancréatite aiguë biliaire"
class TestNormalizeCode:
def test_insert_dot(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K810") == "K81.0"
def test_already_dotted(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("k85.1") == "K85.1"
def test_three_chars(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K85") == "K85"
def test_strip_spaces(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code(" E660 ") == "E66.0"
class TestValidateCodeCIM10:
def test_known_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K81.9")
assert is_valid is True
assert label # non vide
def test_unknown_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("Z99.99")
assert is_valid is False
assert label == ""
def test_normalize_before_validate(self):
"""K810 doit être normalisé en K81.0 et trouvé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K810")
assert is_valid is True
def test_three_char_code(self):
"""Code parent sans point (K85) doit être validé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K85")
assert is_valid is True
class TestValidateCIM10PostProcessing:
def test_hallucination_rejected(self):
"""Les codes hallucination (Aucun, N/A...) sont rejetés."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion is None
assert any("rejeté" in a for a in dossier.alertes_codage)
def test_normalizes_format(self):
"""K810 est normalisé en K81.0."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K810"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
def test_invalid_code_gets_low_confidence(self):
"""Un code inexistant reçoit confidence=low et une alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
def test_valid_code_unchanged(self):
"""Un code valide n'est pas modifié et pas d'alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
assert not any("CIM-10" in a for a in dossier.alertes_codage)
def test_non_codable_rejected(self):
"""'non_codable' est rejeté comme hallucination."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Truc", cim10_suggestion="non_codable"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion is None
def test_hallucination_fallback_found(self):
"""Hallucination rejetée mais fallback dictionnaire trouve un code."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite aiguë", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
assert dossier.diagnostic_principal.cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_fallback_found(self):
"""Code invalide remplacé par fallback dictionnaire."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "I10"
assert dossier.diagnostics_associes[0].cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_no_fallback(self):
"""Code invalide sans fallback possible → low confidence."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre inconnue", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "Z99.99"
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
class TestFormatContexte:
"""Tests pour _format_contexte."""
@@ -610,3 +799,241 @@ class TestFormatContexte:
assert "TDM abdominal" in result
assert "éruption cutanée" in result
assert "Pancréatite aiguë biliaire" in result
class TestActeCCAMExtended:
def test_backward_compatible(self):
"""Les nouveaux champs RAG sont optionnels — rétrocompatible."""
a = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
assert a.texte == "Cholécystectomie"
assert a.code_ccam_suggestion == "HMFC004"
assert a.ccam_confidence is None
assert a.justification is None
assert a.raisonnement is None
assert a.sources_rag == []
def test_with_rag_fields(self):
a = ActeCCAM(
texte="Cholécystectomie par coelioscopie",
code_ccam_suggestion="HMFC004",
ccam_confidence="high",
justification="HMFC004 correspond à la cholécystectomie par coelioscopie",
raisonnement="ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
sources_rag=[
RAGSource(document="ccam", page=10, code="HMFC004"),
],
)
assert a.ccam_confidence == "high"
assert a.justification is not None
assert len(a.sources_rag) == 1
assert a.sources_rag[0].code == "HMFC004"
def test_serialization_exclude_none(self):
a = ActeCCAM(texte="Test", code_ccam_suggestion="HMFC004")
data = a.model_dump(exclude_none=True)
assert "ccam_confidence" not in data
assert "justification" not in data
assert "raisonnement" not in data
assert "sources_rag" in data
class TestSearchSimilarCCAM:
def test_prioritizes_ccam(self):
"""Les sources CCAM sont priorisées (au moins 5 sur 8)."""
from src.medical.rag_search import search_similar_ccam
import numpy as np
mock_metadata = []
for i in range(6):
mock_metadata.append({"document": "ccam", "code": f"HMFC00{i}", "page": i, "extrait": f"CCAM {i}"})
for i in range(6):
mock_metadata.append({"document": "guide_methodo", "page": i + 10, "extrait": f"Guide {i}"})
mock_index = MagicMock()
mock_index.ntotal = 12
scores = np.array([[0.9 - i * 0.03 for i in range(12)]], dtype=np.float32)
indices = np.array([list(range(12))], dtype=np.int64)
mock_index.search.return_value = (scores, indices)
with patch("src.medical.rag_index.get_index", return_value=(mock_index, mock_metadata)), \
patch("src.medical.rag_search._get_embed_model") as mock_model:
mock_model.return_value.encode.return_value = np.array([[0.1] * 768], dtype=np.float32)
results = search_similar_ccam("cholécystectomie", top_k=8)
ccam_count = sum(1 for r in results if r["document"] == "ccam")
assert ccam_count >= 5, f"Seulement {ccam_count} sources CCAM sur {len(results)}"
def test_no_index(self):
"""search_similar_ccam retourne une liste vide si l'index n'existe pas."""
from src.medical.rag_search import search_similar_ccam
with patch("src.medical.rag_index.get_index", return_value=None):
results = search_similar_ccam("cholécystectomie")
assert results == []
class TestEnrichActe:
def test_enrich_with_ollama(self):
"""Enrichissement complet avec sources + Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie par coelioscopie")
mock_sources = [
{
"document": "ccam",
"page": 10,
"code": "HMFC004",
"extrait": "HMFC004 Cholécystectomie par coelioscopie...",
"score": 0.92,
},
]
mock_llm = {
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie par coelioscopie = HMFC004",
"raisonnement": "ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(True, "Cholécystectomie")):
enrich_acte(acte, {"sexe": "F", "age": 43})
assert acte.code_ccam_suggestion == "HMFC004"
assert acte.ccam_confidence == "high"
assert acte.justification == "Cholécystectomie par coelioscopie = HMFC004"
assert acte.raisonnement is not None
assert len(acte.sources_rag) == 1
def test_enrich_no_sources(self):
"""enrich_acte ne plante pas si aucune source trouvée."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Acte inconnu", code_ccam_suggestion="ABCD123")
with patch("src.medical.rag_search.search_similar_ccam", return_value=[]):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert acte.sources_rag == []
assert acte.justification is None
def test_enrich_no_ollama(self):
"""Enrichissement avec sources FAISS mais sans Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=None):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert len(acte.sources_rag) == 1
assert acte.justification is None
assert acte.raisonnement is None
def test_enrich_invalid_code(self):
"""Un code CCAM invalide d'Ollama ne remplace pas le code existant."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
mock_llm = {
"code": "ZZZZ999",
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(False, "")):
enrich_acte(acte, {"sexe": "M", "age": 50})
# Le code original est conservé
assert acte.code_ccam_suggestion == "HMFC004"
# Mais la confidence est quand même affectée
assert acte.ccam_confidence == "high"
class TestEnrichDossierCCAM:
def test_enriches_actes(self):
"""enrich_dossier enrichit aussi les actes CCAM."""
from src.medical.rag_search import enrich_dossier
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Lithiase vésiculaire"),
actes_ccam=[
ActeCCAM(texte="Cholécystectomie par coelioscopie"),
ActeCCAM(texte="Anesthésie générale"),
],
)
enriched = []
def mock_enrich_diag(diag, contexte, est_dp=True, cache=None):
pass
def mock_enrich_acte(acte, contexte, cache=None):
enriched.append(acte.texte)
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich_diag), \
patch("src.medical.rag_search.enrich_acte", side_effect=mock_enrich_acte), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(enriched) == 2
assert "Cholécystectomie par coelioscopie" in enriched
assert "Anesthésie générale" in enriched
class TestBuildPromptCCAM:
def test_prompt_contains_acte(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie"}]
contexte = {"sexe": "F", "age": 43}
prompt = _build_prompt_ccam("Cholécystectomie par coelioscopie", sources, contexte)
assert "Cholécystectomie par coelioscopie" in prompt
assert "CCAM" in prompt
assert "analyse_acte" in prompt
assert "objet JSON" in prompt
def test_prompt_contains_source_info(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie par coelioscopie"}]
contexte = {}
prompt = _build_prompt_ccam("Cholécystectomie", sources, contexte)
assert "CCAM PMSI V4 2025" in prompt
assert "HMFC004" in prompt
class TestParseOllamaResponseCCAM:
def test_parse_ccam_structured_json(self):
"""Le parsing extrait analyse_acte dans le raisonnement."""
from src.medical.rag_search import _parse_ollama_response
import json
raw = json.dumps({
"analyse_acte": "Cholécystectomie par voie coelioscopique",
"codes_candidats": "HMFC004, HMFC003",
"discrimination": "HMFC004 est le code spécifique à la coelioscopie",
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie coelioscopique = HMFC004",
})
result = _parse_ollama_response(raw)
assert result is not None
assert result["code"] == "HMFC004"
assert "raisonnement" in result
assert "ANALYSE ACTE" in result["raisonnement"]
assert "CODES CANDIDATS" in result["raisonnement"]
assert "analyse_acte" not in result

212
tests/test_rum_export.py Normal file
View File

@@ -0,0 +1,212 @@
"""Tests pour le module d'export RUM V016."""
import pytest
from src.config import ActeCCAM, Diagnostic, DossierMedical, Sejour
from src.export.rum_export import (
RUMConfig,
export_rum,
_format_cim10,
_format_date,
_format_sex,
_format_ccam_act,
_map_mode_entree,
_map_mode_sortie,
)
class TestFormatHelpers:
def test_format_cim10_normal(self):
assert _format_cim10("K85.1") == "K851 "
assert len(_format_cim10("K85.1")) == 8
def test_format_cim10_short(self):
result = _format_cim10("J18")
assert result == "J18 "
assert len(result) == 8
def test_format_cim10_none(self):
assert _format_cim10(None) == " "
assert len(_format_cim10(None)) == 8
def test_format_cim10_empty(self):
assert _format_cim10("") == " "
def test_format_date_ddmmyyyy(self):
assert _format_date("15/03/2025") == "15032025"
def test_format_date_iso(self):
assert _format_date("2025-03-15") == "15032025"
def test_format_date_none(self):
assert _format_date(None) == " "
assert len(_format_date(None)) == 8
def test_format_sex_masculin(self):
assert _format_sex("M") == "1"
assert _format_sex("Masculin") == "1"
assert _format_sex("H") == "1"
def test_format_sex_feminin(self):
assert _format_sex("F") == "2"
assert _format_sex("Féminin") == "2"
def test_format_sex_none(self):
assert _format_sex(None) == " "
def test_map_mode_entree(self):
assert _map_mode_entree("Domicile") == "8"
assert _map_mode_entree("Mutation") == "6"
assert _map_mode_entree("Transfert") == "7"
assert _map_mode_entree(None) == " "
def test_map_mode_sortie(self):
assert _map_mode_sortie("Domicile") == "8"
assert _map_mode_sortie("Décès") == "9"
assert _map_mode_sortie("Transfert") == "7"
assert _map_mode_sortie(None) == " "
def test_format_ccam_act(self):
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="15/03/2025")
result = _format_ccam_act(acte)
assert len(result) == 29
assert result[:7] == "HMFC004"
assert result[7] == "1" # phase
assert result[8] == "1" # activité
assert result[9:17] == "15032025" # date
class TestExportRUM:
def _make_dossier(self, **kwargs):
defaults = dict(
source_file="test.pdf",
sejour=Sejour(
sexe="M",
date_entree="01/01/2025",
date_sortie="05/01/2025",
mode_entree="Domicile",
mode_sortie="Domicile",
),
diagnostic_principal=Diagnostic(texte="Pneumonie", cim10_suggestion="J18.9"),
diagnostics_associes=[
Diagnostic(texte="HTA", cim10_suggestion="I10"),
],
actes_ccam=[
ActeCCAM(texte="Radio thorax", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
],
)
defaults.update(kwargs)
return DossierMedical(**defaults)
def test_fixed_zone_length(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# La zone fixe fait 165 chars, plus DAS et actes
assert len(rum) >= 165
def test_fixed_zone_exact_165(self):
dossier = self._make_dossier(diagnostics_associes=[], actes_ccam=[])
rum = export_rum(dossier)
assert len(rum) == 165
def test_version_format(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[9:12] == "016" # version format
assert rum[24:27] == "016" # version RUM
def test_finess(self):
config = RUMConfig(finess="123456789")
dossier = self._make_dossier()
rum = export_rum(dossier, config)
assert rum[15:24] == "123456789"
def test_sexe(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[85] == "1" # M
def test_dates(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[92:100] == "01012025" # date entrée
assert rum[102:110] == "05012025" # date sortie
def test_modes(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[100] == "8" # mode entrée domicile
assert rum[110] == "8" # mode sortie domicile
def test_dp_field(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[131:139] == "J189 "
def test_nb_das(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[125:127] == "01"
def test_nb_actes(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
assert rum[129:131] == "01"
def test_das_variable_zone(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# DAS commence à pos 165, 8 chars
das_zone = rum[165:173]
assert das_zone == "I10 "
def test_acte_variable_zone(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# 1 DAS (8 chars) puis l'acte (29 chars) à pos 173
acte_zone = rum[173:202]
assert len(acte_zone) == 29
assert acte_zone[:7] == "ZBQK002"
def test_total_length(self):
dossier = self._make_dossier()
rum = export_rum(dossier)
# 165 + 1*8 (DAS) + 1*29 (acte) = 202
assert len(rum) == 202
class TestEdgeCases:
def test_no_dp(self):
dossier = DossierMedical(source_file="test.pdf")
rum = export_rum(dossier)
assert len(rum) == 165
assert rum[131:139] == " "
def test_no_sejour_data(self):
dossier = DossierMedical(
source_file="test.pdf",
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="J18.9"),
)
rum = export_rum(dossier)
assert len(rum) == 165
assert rum[85] == " " # sexe vide
def test_multiple_das_and_actes(self):
dossier = DossierMedical(
source_file="test.pdf",
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="K85.1"),
diagnostics_associes=[
Diagnostic(texte="D1", cim10_suggestion="I10"),
Diagnostic(texte="D2", cim10_suggestion="E11.9"),
Diagnostic(texte="D3", cim10_suggestion="I48.9"),
],
actes_ccam=[
ActeCCAM(texte="A1", code_ccam_suggestion="HMFC004", date="01/01/2025"),
ActeCCAM(texte="A2", code_ccam_suggestion="ZBQK002", date="02/01/2025"),
],
)
rum = export_rum(dossier)
# 165 + 3*8 + 2*29 = 165 + 24 + 58 = 247
assert len(rum) == 247
assert rum[125:127] == "03" # nb DAS
assert rum[129:131] == "02" # nb actes

View File

@@ -90,7 +90,7 @@ class TestEnrichDossierSeverity:
Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"),
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
]
alertes = enrich_dossier_severity(dp, das)
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
# I48.9 = CMA probable
assert das[0].est_cma is True
@@ -101,9 +101,21 @@ class TestEnrichDossierSeverity:
# Au moins une alerte CMA
assert any("CMA" in a for a in alertes)
assert cma_count >= 1
def test_dp_severity_set(self):
dp = Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9")
alertes = enrich_dossier_severity(dp, [])
alertes, cma_count, cms_count = enrich_dossier_severity(dp, [])
assert dp.niveau_severite == "severe"
assert dp.est_cma is True
def test_cms_detection(self):
"""CMS détecté quand CMA + sévérité severe."""
dp = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
das = [
Diagnostic(texte="Sepsis sévère", cim10_suggestion="A41.9"),
]
alertes, cma_count, cms_count = enrich_dossier_severity(dp, das)
assert das[0].est_cma is True
assert das[0].est_cms is True
assert cms_count == 1