"""Tests pour le RAG CIM-10 (modèles, chunking, intégration).""" from __future__ import annotations from pathlib import Path from unittest.mock import patch, MagicMock import pytest from src.config import RAGSource, Diagnostic, DossierMedical, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF class TestRAGSource: def test_create_minimal(self): src = RAGSource(document="cim10") assert src.document == "cim10" assert src.page is None assert src.code is None assert src.extrait is None def test_create_full(self): src = RAGSource( document="guide_methodo", page=42, code="K85", extrait="Pancréatite aiguë biliaire...", ) assert src.document == "guide_methodo" assert src.page == 42 assert src.code == "K85" assert src.extrait == "Pancréatite aiguë biliaire..." def test_serialization(self): src = RAGSource(document="ccam", page=1, code="HMFC004") data = src.model_dump(exclude_none=True) assert data == {"document": "ccam", "page": 1, "code": "HMFC004"} class TestDiagnosticExtended: def test_backward_compatible(self): """Les nouveaux champs sont optionnels — rétrocompatible.""" d = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9") assert d.texte == "Pancréatite aiguë" assert d.cim10_suggestion == "K85.9" assert d.cim10_confidence is None assert d.justification is None assert d.sources_rag == [] def test_with_rag_fields(self): d = Diagnostic( texte="Lithiase cholédoque", cim10_suggestion="K80.5", cim10_confidence="high", justification="Code K80.5 correspond à la lithiase du cholédoque", sources_rag=[ RAGSource(document="cim10", page=480, code="K80"), ], ) assert d.cim10_confidence == "high" assert d.justification is not None assert len(d.sources_rag) == 1 assert d.sources_rag[0].code == "K80" def test_serialization_exclude_none(self): """Vérifier que le JSON n'inclut pas les champs None.""" d = Diagnostic(texte="Test", cim10_suggestion="K85.9") data = d.model_dump(exclude_none=True) assert "cim10_confidence" not in data assert "justification" not in data assert "sources_rag" in data # list vide incluse def test_dossier_with_extended_diagnostic(self): """Un DossierMedical avec des diagnostics enrichis par le RAG.""" dossier = DossierMedical( diagnostic_principal=Diagnostic( texte="Pancréatite aiguë biliaire", cim10_suggestion="K85.1", cim10_confidence="high", justification="Confirmé par CIM-10 FR 2026", sources_rag=[ RAGSource(document="cim10", page=496, code="K85"), RAGSource(document="guide_methodo", page=30), ], ), ) assert dossier.diagnostic_principal.cim10_confidence == "high" assert len(dossier.diagnostic_principal.sources_rag) == 2 class TestExtractMedicalInfoRAGFlag: def test_use_rag_false_no_change(self): """use_rag=False ne modifie pas le comportement existant.""" from src.medical.cim10_extractor import extract_medical_info parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour." dossier = extract_medical_info(parsed, text, use_rag=False) assert dossier.diagnostic_principal is not None assert dossier.diagnostic_principal.cim10_suggestion == "K85.1" # Pas de sources RAG assert dossier.diagnostic_principal.sources_rag == [] assert dossier.diagnostic_principal.justification is None def test_use_rag_true_calls_enrich(self): """use_rag=True appelle _enrich_with_rag (mocké).""" from src.medical.cim10_extractor import extract_medical_info parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Pancréatite aiguë biliaire.\nTTT de sortie :\nParacétamol\n\nDevenir : retour." with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich: dossier = extract_medical_info(parsed, text, use_rag=True) mock_enrich.assert_called_once_with(dossier) def test_use_rag_default_false(self): """Par défaut, use_rag=False.""" from src.medical.cim10_extractor import extract_medical_info parsed = { "type": "crh", "patient": {"sexe": "M"}, "sejour": {}, "diagnostics": [], } text = "Test simple." with patch("src.medical.cim10_extractor._enrich_with_rag") as mock_enrich: extract_medical_info(parsed, text) mock_enrich.assert_not_called() class TestChunkingCIM10: @pytest.mark.skipif( not CIM10_PDF.exists(), reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}", ) def test_chunks_contain_known_codes(self): from src.medical.rag_index import _chunk_cim10 chunks = _chunk_cim10(CIM10_PDF) assert len(chunks) > 100, f"Trop peu de chunks : {len(chunks)}" codes = {c.code for c in chunks if c.code} assert "K85" in codes, "K85 (pancréatite) non trouvé" assert "K80" in codes, "K80 (lithiase biliaire) non trouvé" assert "E66" in codes, "E66 (obésité) non trouvé" @pytest.mark.skipif( not CIM10_PDF.exists(), reason=f"PDF CIM-10 non trouvé : {CIM10_PDF}", ) def test_chunk_content(self): from src.medical.rag_index import _chunk_cim10 chunks = _chunk_cim10(CIM10_PDF) k85_chunks = [c for c in chunks if c.code == "K85"] assert len(k85_chunks) >= 1 assert "pancréatite" in k85_chunks[0].text.lower() or "pancreatite" in k85_chunks[0].text.lower() class TestChunkingGuideMethodo: @pytest.mark.skipif( not GUIDE_METHODO_PDF.exists(), reason=f"PDF Guide Métho non trouvé : {GUIDE_METHODO_PDF}", ) def test_chunks_extracted(self): from src.medical.rag_index import _chunk_guide_methodo chunks = _chunk_guide_methodo(GUIDE_METHODO_PDF) assert len(chunks) >= 10, f"Trop peu de chunks : {len(chunks)}" assert all(c.document == "guide_methodo" for c in chunks) class TestChunkingCCAM: @pytest.mark.skipif( not CCAM_PDF.exists(), reason=f"PDF CCAM non trouvé : {CCAM_PDF}", ) def test_chunks_extracted(self): from src.medical.rag_index import _chunk_ccam chunks = _chunk_ccam(CCAM_PDF) assert len(chunks) >= 1, f"Aucun chunk CCAM extrait" assert all(c.document == "ccam" for c in chunks) class TestRAGSearchMocked: def test_search_similar_no_index(self): """search_similar retourne une liste vide si l'index n'existe pas.""" from src.medical.rag_search import search_similar with patch("src.medical.rag_index.get_index", return_value=None): results = search_similar("pancréatite aiguë") assert results == [] def test_enrich_diagnostic_no_sources(self): """enrich_diagnostic ne plante pas si aucune source trouvée.""" from src.medical.rag_search import enrich_diagnostic diag = Diagnostic(texte="test quelconque", cim10_suggestion="Z99.9") with patch("src.medical.rag_search.search_similar", return_value=[]): enrich_diagnostic(diag, {"sexe": "M", "age": 50}) assert diag.sources_rag == [] assert diag.justification is None def test_enrich_diagnostic_with_sources_no_ollama(self): """Enrichissement avec sources FAISS mais sans Ollama.""" from src.medical.rag_search import enrich_diagnostic diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9") mock_sources = [ { "document": "cim10", "page": 496, "code": "K85", "extrait": "K85 Pancréatite aiguë...", "score": 0.92, }, ] with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \ patch("src.medical.rag_search._call_ollama", return_value=None): enrich_diagnostic(diag, {"sexe": "M", "age": 50}) assert len(diag.sources_rag) == 1 assert diag.sources_rag[0].document == "cim10" assert diag.sources_rag[0].code == "K85" # Pas de justification (Ollama non disponible) assert diag.justification is None def test_enrich_diagnostic_with_ollama(self): """Enrichissement complet avec sources + Ollama.""" from src.medical.rag_search import enrich_diagnostic diag = Diagnostic(texte="Pancréatite aiguë biliaire") mock_sources = [ { "document": "cim10", "page": 496, "code": "K85", "extrait": "K85 Pancréatite aiguë...", "score": 0.95, }, ] mock_llm = { "code": "K85.1", "confidence": "high", "justification": "Pancréatite aiguë d'origine biliaire = K85.1", } with patch("src.medical.rag_search.search_similar", return_value=mock_sources), \ patch("src.medical.rag_search._call_ollama", return_value=mock_llm): enrich_diagnostic(diag, {"sexe": "F", "age": 43}) assert diag.cim10_suggestion == "K85.1" assert diag.cim10_confidence == "high" assert diag.justification == "Pancréatite aiguë d'origine biliaire = K85.1" assert len(diag.sources_rag) == 1