feat(pmsi): add DP candidate pool + pool rank LLM + benchmark

- DPPoolCandidate model (terme, section, preuve, score_initial)
- build_dp_candidate_pool() with filters (_is_pool_excluded, _dedup_pool)
- Pool exclusion: admin noise, bio values, vague symptoms, place names
- DP_POOL_RANK template for LLM-based ranking among pool candidates
- llm_dp_pool_rank() with guardrails (GF-1 evidence, GF-3 confidence)
- benchmark_quality.py: --dp-candidates, --use-dp-pool-rank flags
- 41 new tests (pool, exclusion, dedup, pool rank, synthese)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-24 00:06:44 +01:00
parent 56c38c3d98
commit da34bdc8d7
6 changed files with 1911 additions and 2 deletions

View File

@@ -3,9 +3,11 @@
import pytest
from src.config import (
ActeCCAM,
DossierMedical,
Diagnostic,
DPCandidate,
DPPoolCandidate,
DPSelection,
DP_SCORING_WEIGHTS,
DP_REVIEW_THRESHOLD,
@@ -15,14 +17,20 @@ from src.config import (
)
from src.medical.dp_scoring import (
build_dp_shortlist,
build_dp_candidate_pool,
score_candidates,
select_dp,
generate_synthese_pmsi,
llm_dp_pool_rank,
_format_pool_for_prompt,
_build_clinical_context,
_get_context_window,
_is_z_code_whitelisted,
_is_comorbidity_code,
_has_explicit_pec_proof,
_dedup_by_code,
_dedup_pool,
_is_pool_excluded,
_normalize_evidence_section,
_is_comorbidite_banale,
_has_pec_marker,
@@ -718,6 +726,390 @@ class TestSectionNormalization:
assert _normalize_evidence_section("sections fortes du dossier") == "autres"
class TestSynthesePMSI:
"""Tests pour generate_synthese_pmsi()."""
def test_returns_synthese_on_valid_response(self, monkeypatch):
"""Réponse LLM valide → SynthesePMSI complète."""
mock_response = {
"motif_admission": "Douleur abdominale aiguë",
"probleme_pris_en_charge": "Pancréatite aiguë biliaire",
"diagnostic_retenu": "Pancréatite aiguë d'origine biliaire",
"actes_ou_traitements_majeurs": ["Scanner abdominal", "Mise à jeun"],
"complications": [],
"terrain_comorbidites": ["HTA traitée", "Diabète type 2"],
"preuves": [
{"section": "motif_hospitalisation", "excerpt": "douleur abdominale intense"},
{"section": "conclusion", "excerpt": "pancréatite aiguë biliaire confirmée"},
],
}
def mock_call_ollama(prompt, **kwargs):
return mock_response
import src.medical.dp_scoring as mod
monkeypatch.setattr(mod, "call_ollama", mock_call_ollama, raising=False)
# Forcer l'import inline à utiliser notre mock
import src.medical.ollama_client as oc_mod
monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama)
parsed = _make_parsed(sections={"conclusion": "pancréatite aiguë biliaire confirmée"})
dossier = DossierMedical()
result = generate_synthese_pmsi(parsed, "texte complet", dossier)
assert result is not None
assert isinstance(result, SynthesePMSI)
assert result.probleme_pris_en_charge == "Pancréatite aiguë biliaire"
assert result.motif_admission == "Douleur abdominale aiguë"
assert "Scanner abdominal" in result.actes_ou_traitements_majeurs
assert len(result.terrain_comorbidites) == 2
assert result.complications == []
assert len(result.preuves) == 2
assert result.preuves[0].section == "motif_hospitalisation"
def test_returns_none_on_invalid_response(self, monkeypatch):
"""Réponse LLM non-dict → None."""
def mock_call_ollama(prompt, **kwargs):
return "texte brut"
import src.medical.ollama_client as oc_mod
monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
result = generate_synthese_pmsi(parsed, "texte", dossier)
assert result is None
def test_returns_none_on_exception(self, monkeypatch):
"""Exception LLM → None."""
def mock_call_ollama(prompt, **kwargs):
raise ConnectionError("Ollama down")
import src.medical.ollama_client as oc_mod
monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
result = generate_synthese_pmsi(parsed, "texte", dossier)
assert result is None
def test_robust_to_string_lists(self, monkeypatch):
"""Le LLM renvoie des strings au lieu de listes → toléré."""
mock_response = {
"motif_admission": "Fièvre",
"probleme_pris_en_charge": "Pneumopathie",
"diagnostic_retenu": "Pneumopathie bactérienne",
"actes_ou_traitements_majeurs": "Antibiothérapie IV", # string
"complications": "Insuffisance respiratoire", # string
"terrain_comorbidites": "BPCO", # string
"preuves": [],
}
def mock_call_ollama(prompt, **kwargs):
return mock_response
import src.medical.ollama_client as oc_mod
monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
result = generate_synthese_pmsi(parsed, "texte", dossier)
assert result is not None
assert result.actes_ou_traitements_majeurs == ["Antibiothérapie IV"]
assert result.complications == ["Insuffisance respiratoire"]
assert result.terrain_comorbidites == ["BPCO"]
def test_preuves_malformed_skipped(self, monkeypatch):
"""Preuves sans section/excerpt → ignorées."""
mock_response = {
"motif_admission": "Test",
"probleme_pris_en_charge": "Test",
"diagnostic_retenu": "Test",
"preuves": [
{"section": "conclusion", "excerpt": "valide"},
{"section": "", "excerpt": "section vide"},
{"no_section": True},
"pas un dict",
],
}
def mock_call_ollama(prompt, **kwargs):
return mock_response
import src.medical.ollama_client as oc_mod
monkeypatch.setattr(oc_mod, "call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
result = generate_synthese_pmsi(parsed, "texte", dossier)
assert result is not None
assert len(result.preuves) == 1
assert result.preuves[0].section == "conclusion"
def test_serialization_round_trip(self):
"""SynthesePMSI se sérialise/désérialise correctement."""
syn = SynthesePMSI(
motif_admission="Douleur thoracique",
probleme_pris_en_charge="Infarctus du myocarde",
diagnostic_retenu="IDM ST+ antérieur",
actes_ou_traitements_majeurs=["Coronarographie", "Angioplastie"],
complications=["Insuffisance cardiaque"],
terrain_comorbidites=["HTA", "Tabagisme"],
preuves=[PreuveSynthese(section="conclusion", excerpt="IDM confirmé")],
)
data = syn.model_dump()
restored = SynthesePMSI(**data)
assert restored.probleme_pris_en_charge == "Infarctus du myocarde"
assert len(restored.preuves) == 1
assert restored.preuves[0].section == "conclusion"
def test_dossier_medical_field(self):
"""Le champ synthese_pmsi est disponible sur DossierMedical."""
dossier = DossierMedical()
assert dossier.synthese_pmsi is None
dossier.synthese_pmsi = SynthesePMSI(
probleme_pris_en_charge="Test",
)
assert dossier.synthese_pmsi.probleme_pris_en_charge == "Test"
data = dossier.model_dump(exclude_none=True)
assert "synthese_pmsi" in data
# ===========================================================================
# DP Candidate Pool
# ===========================================================================
class TestDPPoolCandidate:
"""Tests du modèle DPPoolCandidate."""
def test_basic_creation(self):
c = DPPoolCandidate(terme="Pancréatite aiguë", section="conclusion")
assert c.terme == "Pancréatite aiguë"
assert c.section == "conclusion"
assert c.score_initial == 0.0
assert c.preuve == ""
def test_serialization(self):
c = DPPoolCandidate(
terme="Cholécystite aiguë",
section="diag_sortie",
preuve="cholécystite aiguë lithiasique",
score_initial=0.9,
)
data = c.model_dump()
restored = DPPoolCandidate(**data)
assert restored.terme == "Cholécystite aiguë"
assert restored.score_initial == 0.9
class TestIsPoolExcluded:
"""Tests du filtrage des candidats pool."""
def test_bio_value_excluded(self):
assert _is_pool_excluded("CRP 180 mg/L") is True
def test_bio_term_with_number_excluded(self):
assert _is_pool_excluded("Hémoglobine 7.2 g/dL") is True
def test_vague_symptom_excluded(self):
assert _is_pool_excluded("douleur") is True
assert _is_pool_excluded("fièvre") is True
def test_vague_symptom_with_context_kept(self):
"""Symptôme qualifié (multi-mots) → conservé."""
assert _is_pool_excluded("douleur abdominale aiguë") is False
def test_medical_diagnosis_kept(self):
assert _is_pool_excluded("Pancréatite aiguë biliaire") is False
def test_numeric_value_excluded(self):
assert _is_pool_excluded("12.5 g/dL") is True
class TestDedupPool:
"""Tests de la déduplication du pool."""
def test_dedup_keeps_highest_score(self):
candidates = [
DPPoolCandidate(terme="Pancréatite aiguë", section="conclusion", score_initial=0.7),
DPPoolCandidate(terme="Pancréatite aiguë", section="diag_sortie", score_initial=1.0),
]
result = _dedup_pool(candidates)
assert len(result) == 1
assert result[0].score_initial == 1.0
assert result[0].section == "diag_sortie"
def test_dedup_normalizes_text(self):
"""Variantes d'accents/espaces → même clé."""
candidates = [
DPPoolCandidate(terme="Pancréatite aiguë", section="a", score_initial=0.5),
DPPoolCandidate(terme="pancreatite aigue", section="b", score_initial=0.8),
]
result = _dedup_pool(candidates)
assert len(result) == 1
def test_distinct_terms_kept(self):
candidates = [
DPPoolCandidate(terme="Pancréatite aiguë", section="a", score_initial=0.7),
DPPoolCandidate(terme="Cholécystite aiguë", section="b", score_initial=0.9),
]
result = _dedup_pool(candidates)
assert len(result) == 2
class TestBuildDPCandidatePool:
"""Tests d'intégration de build_dp_candidate_pool()."""
def test_indicative_phrase_extraction(self):
"""Les phrases indicatives sont extraites du texte."""
text = "Le patient a été hospitalisé pour pancréatite aiguë biliaire. Suivi habituel."
parsed = _make_parsed(sections={"conclusion": "Pancréatite aiguë biliaire confirmée."})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, text, None, dossier)
termes = [c.terme.lower() for c in pool]
assert any("pancréatite" in t or "pancreatite" in t for t in termes)
def test_sections_fortes_extraction(self):
"""Les diagnostics des sections fortes apparaissent dans le pool."""
parsed = _make_parsed(sections={
"diag_sortie": "Cholécystite aiguë lithiasique",
"conclusion": "Évolution favorable après cholécystectomie",
})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte complet", None, dossier)
termes = [c.terme.lower() for c in pool]
assert any("cholécystite" in t or "cholecystite" in t for t in termes)
def test_edsnlp_entities_included(self):
"""Les entités edsnlp non-niées apparaissent dans le pool."""
from dataclasses import dataclass
@dataclass
class MockEntity:
texte: str
code: str
negation: bool = False
hypothese: bool = False
@dataclass
class MockResult:
cim10_entities: list
edsnlp = MockResult(cim10_entities=[
MockEntity(texte="pneumopathie", code="J18.9"),
MockEntity(texte="HTA", code="I10", negation=True), # exclu
])
parsed = _make_parsed()
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte", edsnlp, dossier)
termes = [c.terme.lower() for c in pool]
assert any("pneumopathie" in t for t in termes)
# HTA niée ne doit pas apparaître
assert not any(t == "hta" for t in termes)
def test_actes_included(self):
"""Les actes CCAM du dossier apparaissent comme candidats."""
parsed = _make_parsed()
dossier = DossierMedical()
dossier.actes_ccam = [
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"),
]
pool = build_dp_candidate_pool(parsed, "texte", None, dossier)
termes = [c.terme.lower() for c in pool]
assert any("cholécystectomie" in t or "cholecystectomie" in t for t in termes)
def test_cim10_map_matches(self):
"""Les termes CIM10_MAP matchés dans les sections fortes sont inclus."""
parsed = _make_parsed(sections={
"conclusion": "Patient avec pancréatite aiguë biliaire sévère.",
})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte", None, dossier)
sections = [c.section for c in pool]
assert "cim10_map" in sections
def test_bio_values_excluded(self):
"""Les valeurs biologiques ne polluent pas le pool."""
parsed = _make_parsed(sections={
"conclusion": "CRP 180 mg/L. Hémoglobine 7.2 g/dL. Pancréatite aiguë.",
})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte", None, dossier)
termes = [c.terme.lower() for c in pool]
assert not any("crp" in t and "mg" in t for t in termes)
def test_dedup_across_sources(self):
"""Un même terme de 2 sources → 1 seule entrée (meilleur score)."""
parsed = _make_parsed(sections={
"conclusion": "Pancréatite aiguë biliaire confirmée.",
"motif_hospitalisation": "Pancréatite aiguë biliaire.",
})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte", None, dossier)
# Compter les variantes "pancréatite aiguë biliaire"
from src.medical.cim10_dict import normalize_text
keys = [normalize_text(c.terme) for c in pool]
pancreatite_keys = [k for k in keys if "pancreatite" in k and "biliaire" in k]
# Après dedup, devrait être au plus 1-2 (phrase complète vs segment)
assert len(pancreatite_keys) <= 2
def test_cap_at_30(self):
"""Le pool est plafonné à 30 candidats."""
# Créer un texte avec beaucoup de diagnostics
diagnostics = [f"diagnostic numéro {i}" for i in range(50)]
section_text = ". ".join(diagnostics) + "."
parsed = _make_parsed(sections={"conclusion": section_text})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, section_text, None, dossier)
assert len(pool) <= 30
def test_empty_input(self):
"""Entrée vide → pool vide."""
parsed = _make_parsed()
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "", None, dossier)
assert isinstance(pool, list)
def test_score_ordering(self):
"""Le pool est trié par score_initial décroissant."""
parsed = _make_parsed(sections={
"diag_sortie": "Cholécystite aiguë",
"conclusion": "Angiocholite associée",
})
dossier = DossierMedical()
pool = build_dp_candidate_pool(parsed, "texte", None, dossier)
if len(pool) >= 2:
scores = [c.score_initial for c in pool]
assert scores == sorted(scores, reverse=True)
# ===========================================================================
# Anti-comorbidité SynthesePMSI
# ===========================================================================
@@ -943,3 +1335,252 @@ class TestBuildMotifFallback:
parsed = _make_parsed()
dossier = DossierMedical()
assert _build_motif(parsed, dossier) == "Non renseigné"
# ===================================================================
# Tests DP Pool Rank
# ===================================================================
class TestFormatPoolForPrompt:
"""Tests pour _format_pool_for_prompt()."""
def test_basic_formatting(self):
"""Vérifie le format des candidats pour le prompt."""
pool = [
DPPoolCandidate(terme="Pneumopathie", section="conclusion",
preuve="Au total : pneumopathie", score_initial=0.7),
DPPoolCandidate(terme="Embolie pulmonaire", section="diag_sortie",
preuve="Diagnostic de sortie", score_initial=1.0),
]
text = _format_pool_for_prompt(pool)
assert "[0]" in text
assert "[1]" in text
assert "Pneumopathie" in text
assert "Embolie pulmonaire" in text
assert "conclusion" in text
assert "diag_sortie" in text
def test_max_items_cap(self):
"""Vérifie que max_items est respecté."""
pool = [
DPPoolCandidate(terme=f"Diag_{i}", section="conclusion", score_initial=0.5)
for i in range(10)
]
text = _format_pool_for_prompt(pool, max_items=3)
assert "[0]" in text
assert "[2]" in text
assert "[3]" not in text
def test_empty_pool(self):
"""Pool vide → texte vide."""
assert _format_pool_for_prompt([]) == ""
class TestBuildClinicalContext:
"""Tests pour _build_clinical_context()."""
def test_with_synthese(self):
"""Avec SynthesePMSI disponible."""
synthese = SynthesePMSI(
motif_admission="Douleur thoracique",
probleme_pris_en_charge="Embolie pulmonaire",
diagnostic_retenu="Embolie pulmonaire bilatérale",
)
parsed = _make_parsed()
dossier = DossierMedical()
ctx = _build_clinical_context(parsed, dossier, "", synthese)
assert "Embolie pulmonaire" in ctx
assert "Douleur thoracique" in ctx
def test_without_synthese(self):
"""Sans SynthesePMSI → fallback sections fortes."""
parsed = _make_parsed(sections={"conclusion": "Pneumopathie traitée"})
dossier = DossierMedical()
ctx = _build_clinical_context(parsed, dossier, "texte complet", None)
assert "Pneumopathie traitée" in ctx
assert "Motif" in ctx
class TestLlmDpPoolRank:
"""Tests unitaires pour llm_dp_pool_rank() — sans appel LLM réel."""
def test_empty_pool_fallback_off(self):
"""Pool vide + fallback OFF → REVIEW."""
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=[],
fallback_oneshot=False,
)
assert selection.verdict == "review"
assert "pool vide" in selection.winner_reason
def test_empty_pool_fallback_on(self, monkeypatch):
"""Pool vide + fallback ON → tente llm_dp_fallback."""
# Mock llm_dp_fallback pour retourner un résultat connu
from src.medical import dp_scoring
mock_selection = DPSelection(
verdict="review",
winner_reason="fallback activé",
)
monkeypatch.setattr(dp_scoring, "llm_dp_fallback", lambda *a, **kw: mock_selection)
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=[],
fallback_oneshot=True,
)
assert selection.verdict == "review"
assert "fallback" in selection.winner_reason
def test_valid_llm_response_high_confidence(self, monkeypatch):
"""Réponse LLM valide avec confidence high → CONFIRMED."""
pool = [
DPPoolCandidate(terme="Embolie pulmonaire", section="conclusion",
preuve="Au total : embolie pulmonaire", score_initial=0.7),
DPPoolCandidate(terme="HTA", section="conclusion",
preuve="terrain HTA", score_initial=0.3),
]
# Mock call_ollama
def mock_call_ollama(prompt, **kwargs):
return {
"chosen_index": 0,
"chosen_terme": "Embolie pulmonaire",
"evidence_section": "conclusion",
"evidence_excerpt": "Au total : embolie pulmonaire",
"confidence": "high",
"reason": "pathologie aiguë traitée",
}
from src.medical import dp_scoring
monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama)
parsed = _make_parsed(sections={"conclusion": "Au total : embolie pulmonaire"})
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=pool,
fallback_oneshot=False,
)
assert selection.verdict == "confirmed"
assert len(selection.candidates) == 1
assert selection.candidates[0].label == "Embolie pulmonaire"
assert selection.candidates[0].source_section == "llm_pool_rank (conclusion)"
assert selection.candidates[0].code is None # pas de code CIM-10, sera codé en aval
def test_valid_llm_response_medium_confidence(self, monkeypatch):
"""Réponse LLM avec confidence medium → REVIEW."""
pool = [
DPPoolCandidate(terme="Insuffisance cardiaque", section="conclusion",
preuve="insuffisance cardiaque", score_initial=0.7),
]
def mock_call_ollama(prompt, **kwargs):
return {
"chosen_index": 0,
"chosen_terme": "Insuffisance cardiaque",
"evidence_section": "conclusion",
"evidence_excerpt": "insuffisance cardiaque globale",
"confidence": "medium",
"reason": "diagnostic probable",
}
monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=pool,
fallback_oneshot=False,
)
assert selection.verdict == "review"
assert "confidence medium" in selection.winner_reason
def test_chosen_index_minus_one_fallback_off(self, monkeypatch):
"""chosen_index=-1 + fallback OFF → REVIEW."""
pool = [
DPPoolCandidate(terme="HTA", section="conclusion",
preuve="HTA", score_initial=0.3),
]
def mock_call_ollama(prompt, **kwargs):
return {
"chosen_index": -1,
"chosen_terme": "",
"confidence": "low",
"reason": "aucun candidat solide",
}
monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=pool,
fallback_oneshot=False,
)
assert selection.verdict == "review"
assert "aucun candidat retenu" in selection.winner_reason
def test_index_out_of_range_fallback_off(self, monkeypatch):
"""Index hors plage → REVIEW."""
pool = [
DPPoolCandidate(terme="Pneumopathie", section="conclusion",
preuve="...", score_initial=0.7),
]
def mock_call_ollama(prompt, **kwargs):
return {
"chosen_index": 5,
"chosen_terme": "Fantôme",
"confidence": "high",
}
monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=pool,
fallback_oneshot=False,
)
assert selection.verdict == "review"
def test_score_details_contain_pool_info(self, monkeypatch):
"""Les score_details du candidat contiennent les infos pool."""
pool = [
DPPoolCandidate(terme="Cholécystite aiguë", section="diag_sortie",
preuve="cholécystite aiguë lithiasique", score_initial=0.9),
]
def mock_call_ollama(prompt, **kwargs):
return {
"chosen_index": 0,
"chosen_terme": "Cholécystite aiguë",
"evidence_section": "diag_sortie",
"evidence_excerpt": "cholécystite aiguë lithiasique",
"confidence": "high",
"reason": "diagnostic chirurgical aigu",
}
monkeypatch.setattr("src.medical.ollama_client.call_ollama", mock_call_ollama)
parsed = _make_parsed()
dossier = DossierMedical()
selection = llm_dp_pool_rank(
parsed, "texte", dossier,
pool_candidates=pool,
fallback_oneshot=False,
)
assert selection.verdict == "confirmed"
details = selection.candidates[0].score_details
assert "pool_score" in details
assert "pool_index" in details
assert details["pool_index"] == 0