feat: cache Ollama + parallélisation ThreadPool + filtrage DAS renforcé + modules GHM/CPAM/export RUM

- Cache persistant JSON thread-safe pour les résultats Ollama (invalidation par modèle)
- Parallélisation des appels Ollama (ThreadPoolExecutor, 2 workers)
- 6 nouvelles règles de filtrage DAS parasites (doublons, ponctuation, OCR, labo, fragments)
- Client Ollama centralisé (mode JSON natif + retry)
- Module GHM (estimation CMD/sévérité)
- Module contrôle CPAM (parser + contre-argumentation RAG)
- Export RUM (format RSS)
- Viewer enrichi (détail dossier)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-12 13:44:34 +01:00
parent a00e5f1147
commit a58398f5d4
25 changed files with 2872 additions and 97 deletions

View File

@@ -7,7 +7,8 @@ from unittest.mock import patch, MagicMock
import pytest
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.config import RAGSource, Diagnostic, ActeCCAM, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
from src.medical.ollama_cache import OllamaCache
class TestRAGSource:
@@ -494,6 +495,47 @@ class TestRAGSearchMocked:
assert diag.raisonnement == "1. ANALYSE CLINIQUE : La pancréatite..."
assert len(diag.sources_rag) == 1
def test_enrich_diagnostic_invalid_code_ignored(self):
"""Un code Ollama invalide ne remplace pas le code existant."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "X99.99", # code invalide
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
# Le code original est conservé (pas remplacé par le code invalide)
assert diag.cim10_suggestion == "K85.9"
def test_enrich_diagnostic_normalizes_code(self):
"""Un code Ollama sans point est normalisé (K851 → K85.1)."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
mock_sources = [
{"document": "cim10", "page": 496, "code": "K85", "extrait": "K85", "score": 0.9},
]
mock_llm = {
"code": "K851", # sans point
"confidence": "high",
"justification": "Pancréatite biliaire",
}
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
assert diag.cim10_suggestion == "K85.1"
def test_enrich_diagnostic_est_dp_flag(self):
"""Le flag est_dp est bien passé à _build_prompt."""
from src.medical.rag_search import enrich_diagnostic
@@ -533,10 +575,12 @@ class TestEnrichDossier:
captured_contexts = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured_contexts.append(contexte.copy())
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured_contexts) == 1 # DP seulement (pas de DAS)
@@ -563,10 +607,12 @@ class TestEnrichDossier:
captured = []
def mock_enrich(diag, contexte, est_dp=True):
def mock_enrich(diag, contexte, est_dp=True, cache=None):
captured.append({"texte": diag.texte, "est_dp": est_dp, "dp_texte": contexte.get("dp_texte")})
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich):
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(captured) == 2
@@ -578,6 +624,149 @@ class TestEnrichDossier:
assert captured[1]["dp_texte"] == "Pancréatite aiguë biliaire"
class TestNormalizeCode:
def test_insert_dot(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K810") == "K81.0"
def test_already_dotted(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("k85.1") == "K85.1"
def test_three_chars(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code("K85") == "K85"
def test_strip_spaces(self):
from src.medical.cim10_dict import normalize_code
assert normalize_code(" E660 ") == "E66.0"
class TestValidateCodeCIM10:
def test_known_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K81.9")
assert is_valid is True
assert label # non vide
def test_unknown_code(self):
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("Z99.99")
assert is_valid is False
assert label == ""
def test_normalize_before_validate(self):
"""K810 doit être normalisé en K81.0 et trouvé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K810")
assert is_valid is True
def test_three_char_code(self):
"""Code parent sans point (K85) doit être validé."""
from src.medical.cim10_dict import validate_code
is_valid, label = validate_code("K85")
assert is_valid is True
class TestValidateCIM10PostProcessing:
def test_hallucination_rejected(self):
"""Les codes hallucination (Aucun, N/A...) sont rejetés."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion is None
assert any("rejeté" in a for a in dossier.alertes_codage)
def test_normalizes_format(self):
"""K810 est normalisé en K81.0."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K810"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
def test_invalid_code_gets_low_confidence(self):
"""Un code inexistant reçoit confidence=low et une alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
def test_valid_code_unchanged(self):
"""Un code valide n'est pas modifié et pas d'alerte."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
assert not any("CIM-10" in a for a in dossier.alertes_codage)
def test_non_codable_rejected(self):
"""'non_codable' est rejeté comme hallucination."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Truc", cim10_suggestion="non_codable"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion is None
def test_hallucination_fallback_found(self):
"""Hallucination rejetée mais fallback dictionnaire trouve un code."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite aiguë", cim10_suggestion="Aucun"),
)
_validate_cim10(dossier)
assert dossier.diagnostic_principal.cim10_suggestion == "K81.0"
assert dossier.diagnostic_principal.cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_fallback_found(self):
"""Code invalide remplacé par fallback dictionnaire."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "I10"
assert dossier.diagnostics_associes[0].cim10_confidence == "medium"
assert any("fallback" in a for a in dossier.alertes_codage)
def test_invalid_code_no_fallback(self):
"""Code invalide sans fallback possible → low confidence."""
from src.medical.cim10_extractor import _validate_cim10
dossier = DossierMedical(
diagnostics_associes=[
Diagnostic(texte="Chose bizarre inconnue", cim10_suggestion="Z99.99"),
],
)
_validate_cim10(dossier)
assert dossier.diagnostics_associes[0].cim10_suggestion == "Z99.99"
assert dossier.diagnostics_associes[0].cim10_confidence == "low"
assert any("absent du dictionnaire" in a for a in dossier.alertes_codage)
class TestFormatContexte:
"""Tests pour _format_contexte."""
@@ -610,3 +799,241 @@ class TestFormatContexte:
assert "TDM abdominal" in result
assert "éruption cutanée" in result
assert "Pancréatite aiguë biliaire" in result
class TestActeCCAMExtended:
def test_backward_compatible(self):
"""Les nouveaux champs RAG sont optionnels — rétrocompatible."""
a = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
assert a.texte == "Cholécystectomie"
assert a.code_ccam_suggestion == "HMFC004"
assert a.ccam_confidence is None
assert a.justification is None
assert a.raisonnement is None
assert a.sources_rag == []
def test_with_rag_fields(self):
a = ActeCCAM(
texte="Cholécystectomie par coelioscopie",
code_ccam_suggestion="HMFC004",
ccam_confidence="high",
justification="HMFC004 correspond à la cholécystectomie par coelioscopie",
raisonnement="ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
sources_rag=[
RAGSource(document="ccam", page=10, code="HMFC004"),
],
)
assert a.ccam_confidence == "high"
assert a.justification is not None
assert len(a.sources_rag) == 1
assert a.sources_rag[0].code == "HMFC004"
def test_serialization_exclude_none(self):
a = ActeCCAM(texte="Test", code_ccam_suggestion="HMFC004")
data = a.model_dump(exclude_none=True)
assert "ccam_confidence" not in data
assert "justification" not in data
assert "raisonnement" not in data
assert "sources_rag" in data
class TestSearchSimilarCCAM:
def test_prioritizes_ccam(self):
"""Les sources CCAM sont priorisées (au moins 5 sur 8)."""
from src.medical.rag_search import search_similar_ccam
import numpy as np
mock_metadata = []
for i in range(6):
mock_metadata.append({"document": "ccam", "code": f"HMFC00{i}", "page": i, "extrait": f"CCAM {i}"})
for i in range(6):
mock_metadata.append({"document": "guide_methodo", "page": i + 10, "extrait": f"Guide {i}"})
mock_index = MagicMock()
mock_index.ntotal = 12
scores = np.array([[0.9 - i * 0.03 for i in range(12)]], dtype=np.float32)
indices = np.array([list(range(12))], dtype=np.int64)
mock_index.search.return_value = (scores, indices)
with patch("src.medical.rag_index.get_index", return_value=(mock_index, mock_metadata)), \
patch("src.medical.rag_search._get_embed_model") as mock_model:
mock_model.return_value.encode.return_value = np.array([[0.1] * 768], dtype=np.float32)
results = search_similar_ccam("cholécystectomie", top_k=8)
ccam_count = sum(1 for r in results if r["document"] == "ccam")
assert ccam_count >= 5, f"Seulement {ccam_count} sources CCAM sur {len(results)}"
def test_no_index(self):
"""search_similar_ccam retourne une liste vide si l'index n'existe pas."""
from src.medical.rag_search import search_similar_ccam
with patch("src.medical.rag_index.get_index", return_value=None):
results = search_similar_ccam("cholécystectomie")
assert results == []
class TestEnrichActe:
def test_enrich_with_ollama(self):
"""Enrichissement complet avec sources + Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie par coelioscopie")
mock_sources = [
{
"document": "ccam",
"page": 10,
"code": "HMFC004",
"extrait": "HMFC004 Cholécystectomie par coelioscopie...",
"score": 0.92,
},
]
mock_llm = {
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie par coelioscopie = HMFC004",
"raisonnement": "ANALYSE ACTE : Cholécystectomie par voie coelioscopique...",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(True, "Cholécystectomie")):
enrich_acte(acte, {"sexe": "F", "age": 43})
assert acte.code_ccam_suggestion == "HMFC004"
assert acte.ccam_confidence == "high"
assert acte.justification == "Cholécystectomie par coelioscopie = HMFC004"
assert acte.raisonnement is not None
assert len(acte.sources_rag) == 1
def test_enrich_no_sources(self):
"""enrich_acte ne plante pas si aucune source trouvée."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Acte inconnu", code_ccam_suggestion="ABCD123")
with patch("src.medical.rag_search.search_similar_ccam", return_value=[]):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert acte.sources_rag == []
assert acte.justification is None
def test_enrich_no_ollama(self):
"""Enrichissement avec sources FAISS mais sans Ollama."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=None):
enrich_acte(acte, {"sexe": "M", "age": 50})
assert len(acte.sources_rag) == 1
assert acte.justification is None
assert acte.raisonnement is None
def test_enrich_invalid_code(self):
"""Un code CCAM invalide d'Ollama ne remplace pas le code existant."""
from src.medical.rag_search import enrich_acte
acte = ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004")
mock_sources = [
{"document": "ccam", "page": 10, "code": "HMFC004", "extrait": "HMFC004", "score": 0.9},
]
mock_llm = {
"code": "ZZZZ999",
"confidence": "high",
"justification": "Hallucination",
}
with patch("src.medical.rag_search.search_similar_ccam", return_value=mock_sources), \
patch("src.medical.rag_search._call_ollama", return_value=mock_llm), \
patch("src.medical.rag_search.ccam_validate", return_value=(False, "")):
enrich_acte(acte, {"sexe": "M", "age": 50})
# Le code original est conservé
assert acte.code_ccam_suggestion == "HMFC004"
# Mais la confidence est quand même affectée
assert acte.ccam_confidence == "high"
class TestEnrichDossierCCAM:
def test_enriches_actes(self):
"""enrich_dossier enrichit aussi les actes CCAM."""
from src.medical.rag_search import enrich_dossier
dossier = DossierMedical(
diagnostic_principal=Diagnostic(texte="Lithiase vésiculaire"),
actes_ccam=[
ActeCCAM(texte="Cholécystectomie par coelioscopie"),
ActeCCAM(texte="Anesthésie générale"),
],
)
enriched = []
def mock_enrich_diag(diag, contexte, est_dp=True, cache=None):
pass
def mock_enrich_acte(acte, contexte, cache=None):
enriched.append(acte.texte)
with patch("src.medical.rag_search.enrich_diagnostic", side_effect=mock_enrich_diag), \
patch("src.medical.rag_search.enrich_acte", side_effect=mock_enrich_acte), \
patch("src.medical.rag_search.OllamaCache") as mock_cache_cls:
mock_cache_cls.return_value = MagicMock()
enrich_dossier(dossier)
assert len(enriched) == 2
assert "Cholécystectomie par coelioscopie" in enriched
assert "Anesthésie générale" in enriched
class TestBuildPromptCCAM:
def test_prompt_contains_acte(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie"}]
contexte = {"sexe": "F", "age": 43}
prompt = _build_prompt_ccam("Cholécystectomie par coelioscopie", sources, contexte)
assert "Cholécystectomie par coelioscopie" in prompt
assert "CCAM" in prompt
assert "analyse_acte" in prompt
assert "objet JSON" in prompt
def test_prompt_contains_source_info(self):
from src.medical.rag_search import _build_prompt_ccam
sources = [{"document": "ccam", "code": "HMFC004", "page": 10, "extrait": "HMFC004 Cholécystectomie par coelioscopie"}]
contexte = {}
prompt = _build_prompt_ccam("Cholécystectomie", sources, contexte)
assert "CCAM PMSI V4 2025" in prompt
assert "HMFC004" in prompt
class TestParseOllamaResponseCCAM:
def test_parse_ccam_structured_json(self):
"""Le parsing extrait analyse_acte dans le raisonnement."""
from src.medical.rag_search import _parse_ollama_response
import json
raw = json.dumps({
"analyse_acte": "Cholécystectomie par voie coelioscopique",
"codes_candidats": "HMFC004, HMFC003",
"discrimination": "HMFC004 est le code spécifique à la coelioscopie",
"code": "HMFC004",
"confidence": "high",
"justification": "Cholécystectomie coelioscopique = HMFC004",
})
result = _parse_ollama_response(raw)
assert result is not None
assert result["code"] == "HMFC004"
assert "raisonnement" in result
assert "ANALYSE ACTE" in result["raisonnement"]
assert "CODES CANDIDATS" in result["raisonnement"]
assert "analyse_acte" not in result