feat: ajout RAG CIM-10 avec FAISS + Ollama
Implémente un système RAG (Retrieval Augmented Generation) qui indexe les documents de référence ATIH (CIM-10 FR 2026, Guide Métho MCO, CCAM PMSI) et utilise Ollama (mistral-small3.2:24b) pour justifier et valider le codage CIM-10 des diagnostics. - Nouveaux modèles Pydantic : RAGSource, Diagnostic étendu (confidence, justification, sources_rag) — rétrocompatible - Module rag_index.py : chunking des 3 PDFs, embedding sentence-camembert-large, index FAISS IndexFlatIP (3630 vecteurs) - Module rag_search.py : recherche FAISS + appel Ollama avec fallback double - Flag CLI --no-rag pour désactiver l'enrichissement RAG - 18 nouveaux tests (88/88 passent) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
271
tests/test_rag.py
Normal file
271
tests/test_rag.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Tests pour le RAG CIM-10 (modèles, chunking, intégration)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF
|
||||
|
||||
|
||||
class TestRAGSource:
|
||||
def test_create_minimal(self):
|
||||
src = RAGSource(document="cim10")
|
||||
assert src.document == "cim10"
|
||||
assert src.page is None
|
||||
assert src.code is None
|
||||
assert src.extrait is None
|
||||
|
||||
def test_create_full(self):
|
||||
src = RAGSource(
|
||||
document="guide_methodo",
|
||||
page=42,
|
||||
code="K85",
|
||||
extrait="Pancréatite aiguë biliaire...",
|
||||
)
|
||||
assert src.document == "guide_methodo"
|
||||
assert src.page == 42
|
||||
assert src.code == "K85"
|
||||
assert src.extrait == "Pancréatite aiguë biliaire..."
|
||||
|
||||
def test_serialization(self):
|
||||
src = RAGSource(document="ccam", page=1, code="HMFC004")
|
||||
data = src.model_dump(exclude_none=True)
|
||||
assert data == {"document": "ccam", "page": 1, "code": "HMFC004"}
|
||||
|
||||
|
||||
class TestDiagnosticExtended:
|
||||
def test_backward_compatible(self):
|
||||
"""Les nouveaux champs sont optionnels — rétrocompatible."""
|
||||
d = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
|
||||
assert d.texte == "Pancréatite aiguë"
|
||||
assert d.cim10_suggestion == "K85.9"
|
||||
assert d.cim10_confidence is None
|
||||
assert d.justification is None
|
||||
assert d.sources_rag == []
|
||||
|
||||
def test_with_rag_fields(self):
|
||||
d = Diagnostic(
|
||||
texte="Lithiase cholédoque",
|
||||
cim10_suggestion="K80.5",
|
||||
cim10_confidence="high",
|
||||
justification="Code K80.5 correspond à la lithiase du cholédoque",
|
||||
sources_rag=[
|
||||
RAGSource(document="cim10", page=480, code="K80"),
|
||||
],
|
||||
)
|
||||
assert d.cim10_confidence == "high"
|
||||
assert d.justification is not None
|
||||
assert len(d.sources_rag) == 1
|
||||
assert d.sources_rag[0].code == "K80"
|
||||
|
||||
def test_serialization_exclude_none(self):
|
||||
"""Vérifier que le JSON n'inclut pas les champs None."""
|
||||
d = Diagnostic(texte="Test", cim10_suggestion="K85.9")
|
||||
data = d.model_dump(exclude_none=True)
|
||||
assert "cim10_confidence" not in data
|
||||
assert "justification" not in data
|
||||
assert "sources_rag" in data # list vide incluse
|
||||
|
||||
def test_dossier_with_extended_diagnostic(self):
|
||||
"""Un DossierMedical avec des diagnostics enrichis par le RAG."""
|
||||
dossier = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(
|
||||
texte="Pancréatite aiguë biliaire",
|
||||
cim10_suggestion="K85.1",
|
||||
cim10_confidence="high",
|
||||
justification="Confirmé par CIM-10 FR 2026",
|
||||
sources_rag=[
|
||||
RAGSource(document="cim10", page=496, code="K85"),
|
||||
RAGSource(document="guide_methodo", page=30),
|
||||
],
|
||||
),
|
||||
)
|
||||
assert dossier.diagnostic_principal.cim10_confidence == "high"
|
||||
assert len(dossier.diagnostic_principal.sources_rag) == 2
|
||||
|
||||
|
||||
class TestExtractMedicalInfoRAGFlag:
|
||||
def test_use_rag_false_no_change(self):
|
||||
"""use_rag=False ne modifie pas le comportement existant."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
|
||||
|
||||
dossier = extract_medical_info(parsed, text, use_rag=False)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "K85.1"
|
||||
# Pas de sources RAG
|
||||
assert dossier.diagnostic_principal.sources_rag == []
|
||||
assert dossier.diagnostic_principal.justification is None
|
||||
|
||||
def test_use_rag_true_calls_enrich(self):
|
||||
"""use_rag=True appelle _enrich_with_rag (mocké)."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour."
|
||||
|
||||
with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich:
|
||||
dossier = extract_medical_info(parsed, text, use_rag=True)
|
||||
mock_enrich.assert_called_once_with(dossier)
|
||||
|
||||
def test_use_rag_default_false(self):
|
||||
"""Par défaut, use_rag=False."""
|
||||
from src.medical.cim10_extractor import extract_medical_info
|
||||
|
||||
parsed = {
|
||||
"type": "crh",
|
||||
"patient": {"sexe": "M"},
|
||||
"sejour": {},
|
||||
"diagnostics": [],
|
||||
}
|
||||
text = "Test simple."
|
||||
|
||||
with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich:
|
||||
extract_medical_info(parsed, text)
|
||||
mock_enrich.assert_not_called()
|
||||
|
||||
|
||||
class TestChunkingCIM10:
|
||||
@pytest.mark.skipif(
|
||||
not CIM10_PDF.exists(),
|
||||
reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}",
|
||||
)
|
||||
def test_chunks_contain_known_codes(self):
|
||||
from src.medical.rag_index import _chunk_cim10
|
||||
|
||||
chunks = _chunk_cim10(CIM10_PDF)
|
||||
assert len(chunks) > 100, f"Trop peu de chunks : {len(chunks)}"
|
||||
|
||||
codes = {c.code for c in chunks if c.code}
|
||||
assert "K85" in codes, "K85 (pancréatite) non trouvé"
|
||||
assert "K80" in codes, "K80 (lithiase biliaire) non trouvé"
|
||||
assert "E66" in codes, "E66 (obésité) non trouvé"
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not CIM10_PDF.exists(),
|
||||
reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}",
|
||||
)
|
||||
def test_chunk_content(self):
|
||||
from src.medical.rag_index import _chunk_cim10
|
||||
|
||||
chunks = _chunk_cim10(CIM10_PDF)
|
||||
k85_chunks = [c for c in chunks if c.code == "K85"]
|
||||
assert len(k85_chunks) >= 1
|
||||
assert "pancréatite" in k85_chunks[0].text.lower() or "pancreatite" in k85_chunks[0].text.lower()
|
||||
|
||||
|
||||
class TestChunkingGuideMethodo:
|
||||
@pytest.mark.skipif(
|
||||
not GUIDE_METHODO_PDF.exists(),
|
||||
reason=f"PDF Guide Métho non trouvé : {GUIDE_METHODO_PDF}",
|
||||
)
|
||||
def test_chunks_extracted(self):
|
||||
from src.medical.rag_index import _chunk_guide_methodo
|
||||
|
||||
chunks = _chunk_guide_methodo(GUIDE_METHODO_PDF)
|
||||
assert len(chunks) >= 10, f"Trop peu de chunks : {len(chunks)}"
|
||||
assert all(c.document == "guide_methodo" for c in chunks)
|
||||
|
||||
|
||||
class TestChunkingCCAM:
|
||||
@pytest.mark.skipif(
|
||||
not CCAM_PDF.exists(),
|
||||
reason=f"PDF CCAM non trouvé : {CCAM_PDF}",
|
||||
)
|
||||
def test_chunks_extracted(self):
|
||||
from src.medical.rag_index import _chunk_ccam
|
||||
|
||||
chunks = _chunk_ccam(CCAM_PDF)
|
||||
assert len(chunks) >= 1, f"Aucun chunk CCAM extrait"
|
||||
assert all(c.document == "ccam" for c in chunks)
|
||||
|
||||
|
||||
class TestRAGSearchMocked:
|
||||
def test_search_similar_no_index(self):
|
||||
"""search_similar retourne une liste vide si l'index n'existe pas."""
|
||||
from src.medical.rag_search import search_similar
|
||||
|
||||
with patch("src.medical.rag_index.get_index", return_value=None):
|
||||
results = search_similar("pancréatite aiguë")
|
||||
assert results == []
|
||||
|
||||
def test_enrich_diagnostic_no_sources(self):
|
||||
"""enrich_diagnostic ne plante pas si aucune source trouvée."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="test quelconque", cim10_suggestion="Z99.9")
|
||||
|
||||
with patch("src.medical.rag_search.search_similar", return_value=[]):
|
||||
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
|
||||
|
||||
assert diag.sources_rag == []
|
||||
assert diag.justification is None
|
||||
|
||||
def test_enrich_diagnostic_with_sources_no_ollama(self):
|
||||
"""Enrichissement avec sources FAISS mais sans Ollama."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9")
|
||||
mock_sources = [
|
||||
{
|
||||
"document": "cim10",
|
||||
"page": 496,
|
||||
"code": "K85",
|
||||
"extrait": "K85 Pancréatite aiguë...",
|
||||
"score": 0.92,
|
||||
},
|
||||
]
|
||||
|
||||
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
||||
patch("src.medical.rag_search._call_ollama", return_value=None):
|
||||
enrich_diagnostic(diag, {"sexe": "M", "age": 50})
|
||||
|
||||
assert len(diag.sources_rag) == 1
|
||||
assert diag.sources_rag[0].document == "cim10"
|
||||
assert diag.sources_rag[0].code == "K85"
|
||||
# Pas de justification (Ollama non disponible)
|
||||
assert diag.justification is None
|
||||
|
||||
def test_enrich_diagnostic_with_ollama(self):
|
||||
"""Enrichissement complet avec sources + Ollama."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="Pancréatite aiguë biliaire")
|
||||
mock_sources = [
|
||||
{
|
||||
"document": "cim10",
|
||||
"page": 496,
|
||||
"code": "K85",
|
||||
"extrait": "K85 Pancréatite aiguë...",
|
||||
"score": 0.95,
|
||||
},
|
||||
]
|
||||
mock_llm = {
|
||||
"code": "K85.1",
|
||||
"confidence": "high",
|
||||
"justification": "Pancréatite aiguë d'origine biliaire = K85.1",
|
||||
}
|
||||
|
||||
with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \
|
||||
patch("src.medical.rag_search._call_ollama", return_value=mock_llm):
|
||||
enrich_diagnostic(diag, {"sexe": "F", "age": 43})
|
||||
|
||||
assert diag.cim10_suggestion == "K85.1"
|
||||
assert diag.cim10_confidence == "high"
|
||||
assert diag.justification == "Pancréatite aiguë d'origine biliaire = K85.1"
|
||||
assert len(diag.sources_rag) == 1
|
||||
Reference in New Issue
Block a user