Files
aivanov_CIM/tests/test_codeur.py
2026-03-05 01:20:14 +01:00

780 lines
21 KiB
Python

"""
Tests pour le Codeur.
Ces tests vérifient que le Codeur propose correctement les codes DP, DR, DAS et CCAM
avec justifications, preuves et scores de confiance.
"""
import hashlib
from datetime import datetime, timedelta
from unittest.mock import MagicMock, Mock
import pytest
from pipeline_mco_pmsi.coders.codeur import Codeur
from pipeline_mco_pmsi.models.clinical import (
ClinicalFact,
Evidence,
Qualifier,
Span,
)
from pipeline_mco_pmsi.models.coding import Code, CodeCandidate
from pipeline_mco_pmsi.models.metadata import StayMetadata
@pytest.fixture
def mock_rag_engine():
"""Crée un mock du RAG Engine."""
mock = MagicMock()
return mock
@pytest.fixture
def codeur(mock_rag_engine):
"""Crée une instance du Codeur avec un RAG Engine mocké."""
return Codeur(
rag_engine=mock_rag_engine,
model_name="mock-llm",
model_version="1.0.0",
prompt_version="1.0.0",
conservative_mode=True,
)
@pytest.fixture
def stay_metadata():
"""Crée des métadonnées de séjour pour les tests."""
return StayMetadata(
stay_id="stay_001",
admission_date=datetime(2024, 1, 1),
discharge_date=datetime(2024, 1, 5),
specialty="Chirurgie",
unit="Bloc opératoire",
age=45,
sex="M",
)
@pytest.fixture
def sample_evidence():
"""Crée une preuve d'exemple."""
return Evidence(
document_id="doc_001",
span=Span(start=100, end=120),
text="Appendicite aiguë",
context="Le patient présente une appendicite aiguë nécessitant une intervention",
)
@pytest.fixture
def sample_qualifier_affirmed():
"""Crée un qualificateur affirmé."""
return Qualifier(
certainty="affirmé",
markers=[],
confidence=0.95,
)
@pytest.fixture
def sample_qualifier_negated():
"""Crée un qualificateur nié."""
return Qualifier(
certainty="nié",
markers=["pas de", "absence de"],
confidence=0.90,
)
@pytest.fixture
def sample_qualifier_suspected():
"""Crée un qualificateur suspecté."""
return Qualifier(
certainty="suspecté",
markers=["possible", "suspecté"],
confidence=0.70,
)
def test_codeur_initialization(codeur):
"""Test l'initialisation du Codeur."""
assert codeur.model_name == "mock-llm"
assert codeur.model_version_str == "1.0.0"
assert codeur.prompt_version == "1.0.0"
assert codeur.conservative_mode is True
assert len(codeur.model_digest) == 64 # SHA-256
def test_filter_facts_conservative_removes_negated(
codeur, sample_evidence, sample_qualifier_negated
):
"""Test que les faits niés sont filtrés en mode conservateur."""
# Exigence 2.4
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite",
qualifier=sample_qualifier_negated,
temporality="actuel",
evidence=sample_evidence,
confidence=0.9,
)
]
filtered = codeur._filter_facts_conservative(facts)
assert len(filtered) == 0
def test_filter_facts_conservative_keeps_affirmed(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que les faits affirmés sont conservés en mode conservateur."""
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
]
filtered = codeur._filter_facts_conservative(facts)
assert len(filtered) == 1
assert filtered[0].fact_id == "f_001"
def test_select_dp_rejects_negated_facts(
codeur, sample_evidence, sample_qualifier_negated, mock_rag_engine
):
"""Test que le DP ne peut pas être un fait nié."""
# Exigence 2.4
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite",
qualifier=sample_qualifier_negated,
temporality="actuel",
evidence=sample_evidence,
confidence=0.9,
)
]
# Mock des candidats
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
]
fact_candidates = {"f_001": mock_rag_engine.search_icd10.return_value}
dp = codeur._select_dp(facts, fact_candidates, "2026")
assert dp is None
def test_select_dp_rejects_suspected_facts(
codeur, sample_evidence, sample_qualifier_suspected, mock_rag_engine
):
"""Test que le DP ne peut pas être un fait suspecté."""
# Exigence 2.5
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite",
qualifier=sample_qualifier_suspected,
temporality="actuel",
evidence=sample_evidence,
confidence=0.7,
)
]
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
]
fact_candidates = {"f_001": mock_rag_engine.search_icd10.return_value}
dp = codeur._select_dp(facts, fact_candidates, "2026")
assert dp is None
def test_select_dp_rejects_history_facts(
codeur, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que le DP ne peut pas être un antécédent."""
# Exigence 2.6
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Diabète",
qualifier=sample_qualifier_affirmed,
temporality="antecedent",
evidence=sample_evidence,
confidence=0.9,
)
]
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="E11.9",
label="Diabète sucré de type 2",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="E11.9 Diabète sucré de type 2",
)
]
fact_candidates = {"f_001": mock_rag_engine.search_icd10.return_value}
dp = codeur._select_dp(facts, fact_candidates, "2026")
assert dp is None
def test_select_dp_selects_affirmed_current_diagnostic(
codeur, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que le DP est correctement sélectionné pour un diagnostic affirmé actuel."""
# Exigence 8.1
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
]
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
]
fact_candidates = {"f_001": mock_rag_engine.search_icd10.return_value}
dp = codeur._select_dp(facts, fact_candidates, "2026")
assert dp is not None
assert dp.code == "K35.8"
assert dp.type == "dp"
assert dp.label == "Appendicite aiguë"
assert len(dp.evidence) >= 1 # Exigence 1.1
assert 0.0 <= dp.confidence <= 1.0 # Exigence 8.5
assert len(dp.reasoning) > 0 # Exigence 8.6
def test_select_dp_prioritizes_complications(
codeur, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que les complications sont priorisées pour le DP."""
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Diabète",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.9,
),
ClinicalFact(
fact_id="f_002",
type="complication",
text="Péritonite",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.85,
),
]
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="K65.0",
label="Péritonite aiguë",
similarity_score=0.90,
source="reranked",
chunk_id="chunk_002",
chunk_text="K65.0 Péritonite aiguë",
)
]
fact_candidates = {
"f_001": [
CodeCandidate(
code="E11.9",
label="Diabète",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="E11.9 Diabète",
)
],
"f_002": mock_rag_engine.search_icd10.return_value,
}
dp = codeur._select_dp(facts, fact_candidates, "2026")
assert dp is not None
assert dp.code == "K65.0" # La complication est sélectionnée
def test_create_code_has_required_evidence(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que chaque code créé a 1-3 preuves."""
# Exigence 1.1, 1.2
candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
fact = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
code = codeur._create_code(candidate, fact, "dp", "2026")
assert 1 <= len(code.evidence) <= 3
assert code.evidence[0].document_id == "doc_001"
assert code.evidence[0].span.start == 100
assert code.evidence[0].span.end == 120
def test_create_code_has_confidence_score(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que chaque code a un score de confiance."""
# Exigence 8.5
candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
fact = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
code = codeur._create_code(candidate, fact, "dp", "2026")
assert 0.0 <= code.confidence <= 1.0
def test_create_code_has_reasoning(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que chaque code a un raisonnement."""
# Exigence 8.6
candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
fact = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
code = codeur._create_code(candidate, fact, "dp", "2026")
assert len(code.reasoning) > 0
assert "Diagnostic Principal" in code.reasoning
def test_assign_confidence_penalizes_suspected(
codeur, sample_evidence, sample_qualifier_suspected
):
"""Test que les faits suspectés ont une confiance réduite."""
# Exigence 2.2
candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
fact_suspected = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite",
qualifier=sample_qualifier_suspected,
temporality="actuel",
evidence=sample_evidence,
confidence=0.7,
)
fact_affirmed = ClinicalFact(
fact_id="f_002",
type="diagnostic",
text="Appendicite aiguë",
qualifier=Qualifier(certainty="affirmé", markers=[], confidence=0.95),
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
confidence_suspected = codeur.assign_confidence(candidate, fact_suspected)
confidence_affirmed = codeur.assign_confidence(candidate, fact_affirmed)
assert confidence_suspected < confidence_affirmed
def test_assign_confidence_penalizes_history(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que les antécédents ont une confiance réduite."""
# Exigence 2.3
candidate = CodeCandidate(
code="E11.9",
label="Diabète",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="E11.9 Diabète",
)
fact_history = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Diabète",
qualifier=sample_qualifier_affirmed,
temporality="antecedent",
evidence=sample_evidence,
confidence=0.9,
)
fact_current = ClinicalFact(
fact_id="f_002",
type="diagnostic",
text="Diabète",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.9,
)
confidence_history = codeur.assign_confidence(candidate, fact_history)
confidence_current = codeur.assign_confidence(candidate, fact_current)
assert confidence_history < confidence_current
def test_select_ccam_selects_acts(
codeur, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que les actes CCAM sont correctement sélectionnés."""
# Exigence 8.4
facts = [
ClinicalFact(
fact_id="f_001",
type="acte",
text="Appendicectomie",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
]
mock_rag_engine.search_ccam.return_value = [
CodeCandidate(
code="HHFA001",
label="Appendicectomie",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="HHFA001 Appendicectomie",
)
]
fact_candidates = {"f_001": mock_rag_engine.search_ccam.return_value}
ccam_codes = codeur._select_ccam(facts, fact_candidates, "2025")
assert len(ccam_codes) == 1
assert ccam_codes[0].code == "HHFA001"
assert ccam_codes[0].type == "ccam"
assert len(ccam_codes[0].evidence) >= 1 # Exigence 1.2
def test_propose_codes_returns_complete_proposal(
codeur, stay_metadata, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que propose_codes retourne une proposition complète."""
# Exigences 8.1, 8.2, 8.3, 8.4
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
),
ClinicalFact(
fact_id="f_002",
type="acte",
text="Appendicectomie",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
),
]
# Mock des recherches RAG
mock_rag_engine.search_icd10.return_value = [
CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
]
mock_rag_engine.search_ccam.return_value = [
CodeCandidate(
code="HHFA001",
label="Appendicectomie",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_002",
chunk_text="HHFA001 Appendicectomie",
)
]
proposal = codeur.propose_codes(facts, stay_metadata)
# Vérifier la structure de la proposition
assert proposal.stay_id == "stay_001"
assert proposal.dp is not None
assert proposal.dp.code == "K35.8"
assert len(proposal.ccam) == 1
assert proposal.ccam[0].code == "HHFA001"
assert len(proposal.reasoning) > 0 # Exigence 8.6
assert proposal.model_version.model_name == "mock-llm"
assert proposal.prompt_version == "1.0.0"
def test_propose_codes_handles_no_dp(
codeur, stay_metadata, sample_evidence, sample_qualifier_negated, mock_rag_engine
):
"""Test que propose_codes gère l'absence de DP."""
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite",
qualifier=sample_qualifier_negated,
temporality="actuel",
evidence=sample_evidence,
confidence=0.9,
)
]
mock_rag_engine.search_icd10.return_value = []
proposal = codeur.propose_codes(facts, stay_metadata)
assert proposal.dp is None
assert "Aucun Diagnostic Principal" in proposal.reasoning
def test_select_das_excludes_dp_and_dr(
codeur, sample_evidence, sample_qualifier_affirmed, mock_rag_engine
):
"""Test que les DAS n'incluent pas le DP ou le DR."""
# Exigence 8.3
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
),
ClinicalFact(
fact_id="f_002",
type="diagnostic",
text="Diabète",
qualifier=sample_qualifier_affirmed,
temporality="antecedent",
evidence=sample_evidence,
confidence=0.9,
),
]
# Mock des candidats
dp_candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
das_candidate = CodeCandidate(
code="E11.9",
label="Diabète",
similarity_score=0.90,
source="reranked",
chunk_id="chunk_002",
chunk_text="E11.9 Diabète",
)
fact_candidates = {
"f_001": [dp_candidate],
"f_002": [das_candidate],
}
# Créer le DP
dp = codeur._create_code(dp_candidate, facts[0], "dp", "2026")
# Sélectionner les DAS
das = codeur._select_das(facts, fact_candidates, dp, None, "2026")
# Vérifier que le DAS ne contient pas le code du DP
das_codes = [d.code for d in das]
assert "K35.8" not in das_codes
assert "E11.9" in das_codes
def test_generate_code_reasoning_includes_evidence(
codeur, sample_evidence, sample_qualifier_affirmed
):
"""Test que le raisonnement inclut la preuve."""
# Exigence 8.6
candidate = CodeCandidate(
code="K35.8",
label="Appendicite aiguë",
similarity_score=0.95,
source="reranked",
chunk_id="chunk_001",
chunk_text="K35.8 Appendicite aiguë",
)
fact = ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
reasoning = codeur._generate_code_reasoning(candidate, fact, "dp")
assert "Appendicite aiguë" in reasoning
assert "doc_001" in reasoning
assert "Preuve textuelle" in reasoning
def test_generate_global_reasoning_includes_summary(
codeur, stay_metadata, sample_evidence, sample_qualifier_affirmed
):
"""Test que le raisonnement global inclut un résumé."""
# Exigence 8.6
dp = Code(
code="K35.8",
label="Appendicite aiguë",
type="dp",
evidence=[sample_evidence],
confidence=0.95,
reasoning="Test reasoning",
referentiel_version="2026",
)
facts = [
ClinicalFact(
fact_id="f_001",
type="diagnostic",
text="Appendicite aiguë",
qualifier=sample_qualifier_affirmed,
temporality="actuel",
evidence=sample_evidence,
confidence=0.95,
)
]
reasoning = codeur._generate_global_reasoning(
dp, None, [], [], facts, stay_metadata
)
assert "stay_001" in reasoning
assert "Chirurgie" in reasoning
assert "K35.8" in reasoning
assert "Appendicite aiguë" in reasoning
assert "conservative" in reasoning.lower()
assert "preuves textuelles" in reasoning.lower()