Implémente un système RAG (Retrieval Augmented Generation) qui indexe les documents de référence ATIH (CIM-10 FR 2026, Guide Métho MCO, CCAM PMSI) et utilise Ollama (mistral-small3.2:24b) pour justifier et valider le codage CIM-10 des diagnostics. - Nouveaux modèles Pydantic : RAGSource, Diagnostic étendu (confidence, justification, sources_rag) — rétrocompatible - Module rag_index.py : chunking des 3 PDFs, embedding sentence-camembert-large, index FAISS IndexFlatIP (3630 vecteurs) - Module rag_search.py : recherche FAISS + appel Ollama avec fallback double - Flag CLI --no-rag pour désactiver l'enrichissement RAG - 18 nouveaux tests (88/88 passent) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
272 lines
9.9 KiB
Python
272 lines
9.9 KiB
Python
"""Tests pour le RAG CIM-10 (modèles, chunking, intégration)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
|
|
|
|
|
|
class TestRAGSource:
|
|
def test_create_minimal(self):
|
|
src = RAGSource(document="cim10")
|
|
assert src.document == "cim10"
|
|
assert src.page is None
|
|
assert src.code is None
|
|
assert src.extrait is None
|
|
|
|
def test_create_full(self):
|
|
src = RAGSource(
|
|
document="guide_methodo",
|
|
page=42,
|
|
code="K85",
|
|
extrait="Pancréatite aiguë biliaire...",
|
|
)
|
|
assert src.document == "guide_methodo"
|
|
assert src.page == 42
|
|
assert src.code == "K85"
|
|
assert src.extrait == "Pancréatite aiguë biliaire..."
|
|
|
|
def test_serialization(self):
|
|
src = RAGSource(document="ccam", page=1, code="HMFC004")
|
|
data = src.model_dump(exclude_none=True)
|
|
assert data == {"document": "ccam", "page": 1, "code": "HMFC004"}
|
|
|
|
|
|
class TestDiagnosticExtended:
|
|
def test_backward_compatible(self):
|
|
"""Les nouveaux champs sont optionnels — rétrocompatible."""
|
|
d = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
|
|
assert d.texte == "Pancréatite aiguë"
|
|
assert d.cim10_suggestion == "K85.9"
|
|
assert d.cim10_confidence is None
|
|
assert d.justification is None
|
|
assert d.sources_rag == []
|
|
|
|
def test_with_rag_fields(self):
|
|
d = Diagnostic(
|
|
texte="Lithiase cholédoque",
|
|
cim10_suggestion="K80.5",
|
|
cim10_confidence="high",
|
|
justification="Code K80.5 correspond à la lithiase du cholédoque",
|
|
sources_rag=[
|
|
RAGSource(document="cim10", page=480, code="K80"),
|
|
],
|
|
)
|
|
assert d.cim10_confidence == "high"
|
|
assert d.justification is not None
|
|
assert len(d.sources_rag) == 1
|
|
assert d.sources_rag[0].code == "K80"
|
|
|
|
def test_serialization_exclude_none(self):
|
|
"""Vérifier que le JSON n'inclut pas les champs None."""
|
|
d = Diagnostic(texte="Test", cim10_suggestion="K85.9")
|
|
data = d.model_dump(exclude_none=True)
|
|
assert "cim10_confidence" not in data
|
|
assert "justification" not in data
|
|
assert "sources_rag" in data # list vide incluse
|
|
|
|
def test_dossier_with_extended_diagnostic(self):
|
|
"""Un DossierMedical avec des diagnostics enrichis par le RAG."""
|
|
dossier = DossierMedical(
|
|
diagnostic_principal=Diagnostic(
|
|
texte="Pancréatite aiguë biliaire",
|
|
cim10_suggestion="K85.1",
|
|
cim10_confidence="high",
|
|
justification="Confirmé par CIM-10 FR 2026",
|
|
sources_rag=[
|
|
RAGSource(document="cim10", page=496, code="K85"),
|
|
RAGSource(document="guide_methodo", page=30),
|
|
],
|
|
),
|
|
)
|
|
assert dossier.diagnostic_principal.cim10_confidence == "high"
|
|
assert len(dossier.diagnostic_principal.sources_rag) == 2
|
|
|
|
|
|
class TestExtractMedicalInfoRAGFlag:
|
|
def test_use_rag_false_no_change(self):
|
|
"""use_rag=False ne modifie pas le comportement existant."""
|
|
from src.medical.cim10_extractor import extract_medical_info
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
|
|
|
|
dossier = extract_medical_info(parsed, text, use_rag=False)
|
|
assert dossier.diagnostic_principal is not None
|
|
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
|
# Pas de sources RAG
|
|
assert dossier.diagnostic_principal.sources_rag == []
|
|
assert dossier.diagnostic_principal.justification is None
|
|
|
|
def test_use_rag_true_calls_enrich(self):
|
|
"""use_rag=True appelle _enrich_with_rag (mocké)."""
|
|
from src.medical.cim10_extractor import extract_medical_info
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
|
|
|
|
with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich:
|
|
dossier = extract_medical_info(parsed, text, use_rag=True)
|
|
mock_enrich.assert_called_once_with(dossier)
|
|
|
|
def test_use_rag_default_false(self):
|
|
"""Par défaut, use_rag=False."""
|
|
from src.medical.cim10_extractor import extract_medical_info
|
|
|
|
parsed = {
|
|
"type": "crh",
|
|
"patient": {"sexe": "M"},
|
|
"sejour": {},
|
|
"diagnostics": [],
|
|
}
|
|
text = "Test simple."
|
|
|
|
with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich:
|
|
extract_medical_info(parsed, text)
|
|
mock_enrich.assert_not_called()
|
|
|
|
|
|
class TestChunkingCIM10:
|
|
@pytest.mark.skipif(
|
|
not CIM10_PDF.exists(),
|
|
reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}",
|
|
)
|
|
def test_chunks_contain_known_codes(self):
|
|
from src.medical.rag_index import _chunk_cim10
|
|
|
|
chunks = _chunk_cim10(CIM10_PDF)
|
|
assert len(chunks) > 100, f"Trop peu de chunks : {len(chunks)}"
|
|
|
|
codes = {c.code for c in chunks if c.code}
|
|
assert "K85" in codes, "K85 (pancréatite) non trouvé"
|
|
assert "K80" in codes, "K80 (lithiase biliaire) non trouvé"
|
|
assert "E66" in codes, "E66 (obésité) non trouvé"
|
|
|
|
@pytest.mark.skipif(
|
|
not CIM10_PDF.exists(),
|
|
reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}",
|
|
)
|
|
def test_chunk_content(self):
|
|
from src.medical.rag_index import _chunk_cim10
|
|
|
|
chunks = _chunk_cim10(CIM10_PDF)
|
|
k85_chunks = [c for c in chunks if c.code == "K85"]
|
|
assert len(k85_chunks) >= 1
|
|
assert "pancréatite" in k85_chunks[0].text.lower() or "pancreatite" in k85_chunks[0].text.lower()
|
|
|
|
|
|
class TestChunkingGuideMethodo:
|
|
@pytest.mark.skipif(
|
|
not GUIDE_METHODO_PDF.exists(),
|
|
reason=f"PDF Guide Métho non trouvé : {GUIDE_METHODO_PDF}",
|
|
)
|
|
def test_chunks_extracted(self):
|
|
from src.medical.rag_index import _chunk_guide_methodo
|
|
|
|
chunks = _chunk_guide_methodo(GUIDE_METHODO_PDF)
|
|
assert len(chunks) >= 10, f"Trop peu de chunks : {len(chunks)}"
|
|
assert all(c.document == "guide_methodo" for c in chunks)
|
|
|
|
|
|
class TestChunkingCCAM:
|
|
@pytest.mark.skipif(
|
|
not CCAM_PDF.exists(),
|
|
reason=f"PDF CCAM non trouvé : {CCAM_PDF}",
|
|
)
|
|
def test_chunks_extracted(self):
|
|
from src.medical.rag_index import _chunk_ccam
|
|
|
|
chunks = _chunk_ccam(CCAM_PDF)
|
|
assert len(chunks) >= 1, f"Aucun chunk CCAM extrait"
|
|
assert all(c.document == "ccam" for c in chunks)
|
|
|
|
|
|
class TestRAGSearchMocked:
|
|
def test_search_similar_no_index(self):
|
|
"""search_similar retourne une liste vide si l'index n'existe pas."""
|
|
from src.medical.rag_search import search_similar
|
|
|
|
with patch("src.medical.rag_index.get_index", return_value=None):
|
|
results = search_similar("pancréatite aiguë")
|
|
assert results == []
|
|
|
|
def test_enrich_diagnostic_no_sources(self):
|
|
"""enrich_diagnostic ne plante pas si aucune source trouvée."""
|
|
from src.medical.rag_search import enrich_diagnostic
|
|
|
|
diag = Diagnostic(texte="test quelconque", cim10_suggestion="Z99.9")
|
|
|
|
with patch("src.medical.rag_search.search_similar", return_value=[]):
|
|
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
|
|
|
|
assert diag.sources_rag == []
|
|
assert diag.justification is None
|
|
|
|
def test_enrich_diagnostic_with_sources_no_ollama(self):
|
|
"""Enrichissement avec sources FAISS mais sans Ollama."""
|
|
from src.medical.rag_search import enrich_diagnostic
|
|
|
|
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
|
|
mock_sources = [
|
|
{
|
|
"document": "cim10",
|
|
"page": 496,
|
|
"code": "K85",
|
|
"extrait": "K85 Pancréatite aiguë...",
|
|
"score": 0.92,
|
|
},
|
|
]
|
|
|
|
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
|
patch("src.medical.rag_search._call_ollama", return_value=None):
|
|
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
|
|
|
|
assert len(diag.sources_rag) == 1
|
|
assert diag.sources_rag[0].document == "cim10"
|
|
assert diag.sources_rag[0].code == "K85"
|
|
# Pas de justification (Ollama non disponible)
|
|
assert diag.justification is None
|
|
|
|
def test_enrich_diagnostic_with_ollama(self):
|
|
"""Enrichissement complet avec sources + Ollama."""
|
|
from src.medical.rag_search import enrich_diagnostic
|
|
|
|
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
|
|
mock_sources = [
|
|
{
|
|
"document": "cim10",
|
|
"page": 496,
|
|
"code": "K85",
|
|
"extrait": "K85 Pancréatite aiguë...",
|
|
"score": 0.95,
|
|
},
|
|
]
|
|
mock_llm = {
|
|
"code": "K85.1",
|
|
"confidence": "high",
|
|
"justification": "Pancréatite aiguë d'origine biliaire = K85.1",
|
|
}
|
|
|
|
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
|
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
|
|
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
|
|
|
|
assert diag.cim10_suggestion == "K85.1"
|
|
assert diag.cim10_confidence == "high"
|
|
assert diag.justification == "Pancréatite aiguë d'origine biliaire = K85.1"
|
|
assert len(diag.sources_rag) == 1
|