Files
t2a_v2/tests/test_gold_eval.py
2026-03-05 00:37:41 +01:00

207 lines
7.1 KiB
Python

"""Tests évaluation gold CRH — logique tolérante sans mocks.
3 cas inline :
1. Strict match OK
2. Strict FAIL mais acceptable via family3
3. R* choisi avec allow_symptom_dp=false → symptom_not_allowed
"""
from __future__ import annotations
from src.eval.gold_models import (
GoldCRHCase,
GoldDPExpected,
GoldEvidence,
evaluate_dp,
is_valid_cim10_format,
cim10_family3,
load_gold_jsonl,
)
# ---------------------------------------------------------------------------
# Helpers validation
# ---------------------------------------------------------------------------
class TestCIM10Format:
def test_valid_codes(self):
assert is_valid_cim10_format("I26.9")
assert is_valid_cim10_format("K81.0")
assert is_valid_cim10_format("R06")
assert is_valid_cim10_format("Z51.30")
def test_invalid_codes(self):
assert not is_valid_cim10_format("26.9")
assert not is_valid_cim10_format("INVALID")
assert not is_valid_cim10_format("")
assert not is_valid_cim10_format("I2")
def test_family3(self):
assert cim10_family3("I26.9") == "I26"
assert cim10_family3("K81.0") == "K81"
assert cim10_family3("R06") == "R06"
# ---------------------------------------------------------------------------
# Modèle GoldCRHCase
# ---------------------------------------------------------------------------
class TestGoldCRHCase:
def test_valid_case(self):
case = GoldCRHCase(
case_id="test_001",
dp_expected=GoldDPExpected(code="I26.9", label="Embolie pulmonaire"),
dp_acceptable_codes=["I26.0"],
dp_acceptable_family3=["I26"],
confidence="certain",
)
assert case.case_id == "test_001"
assert case.dp_expected.code == "I26.9"
assert case.allow_symptom_dp is False
def test_invalid_confidence_rejected(self):
import pytest
with pytest.raises(Exception):
GoldCRHCase(
case_id="test",
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
confidence="invalid_value",
)
def test_invalid_code_rejected(self):
import pytest
with pytest.raises(Exception):
GoldDPExpected(code="INVALID", label="Test")
def test_notes_max_length(self):
import pytest
with pytest.raises(Exception):
GoldCRHCase(
case_id="test",
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
notes="x" * 401,
)
# ---------------------------------------------------------------------------
# Évaluation tolérante — 3 cas demandés
# ---------------------------------------------------------------------------
def _make_gold(
code: str,
label: str,
acceptable_codes: list[str] | None = None,
acceptable_family3: list[str] | None = None,
allow_symptom: bool = False,
confidence: str = "certain",
) -> GoldCRHCase:
return GoldCRHCase(
case_id="test_case",
dp_expected=GoldDPExpected(code=code, label=label),
dp_acceptable_codes=acceptable_codes or [],
dp_acceptable_family3=acceptable_family3 or [],
allow_symptom_dp=allow_symptom,
confidence=confidence,
)
class TestEvaluateDP:
"""3 cas principaux + cas limites."""
def test_strict_match_ok(self):
"""Cas 1 — strict match : code choisi == code attendu."""
gold = _make_gold("I26.9", "Embolie pulmonaire", ["I26.0"], ["I26"])
result = evaluate_dp("I26.9", gold)
assert result["exact_match_strict"] is True
assert result["exact_match_tolerant_codes"] is True
assert result["family3_match_tolerant"] is True
assert result["acceptable_match"] is True
assert result["symptom_not_allowed"] is False
def test_strict_fail_family3_ok(self):
"""Cas 2 — strict FAIL, mais acceptable via family3."""
gold = _make_gold("I25.1", "SCA", ["I25.5"], ["I25"])
result = evaluate_dp("I25.8", gold)
assert result["exact_match_strict"] is False
assert result["exact_match_tolerant_codes"] is False # I25.8 pas dans [I25.1, I25.5]
assert result["family3_match_tolerant"] is True # I25 dans ["I25"]
assert result["acceptable_match"] is True
def test_symptom_not_allowed(self):
"""Cas 3 — R* choisi avec allow_symptom_dp=false → pénalité."""
gold = _make_gold("I25.1", "SCA", acceptable_family3=["I25"], allow_symptom=False)
result = evaluate_dp("R10.4", gold)
assert result["exact_match_strict"] is False
assert result["acceptable_match"] is False
assert result["symptom_not_allowed"] is True
def test_symptom_allowed(self):
"""R* choisi avec allow_symptom_dp=true → pas de pénalité."""
gold = _make_gold("R06.0", "Dyspnée", allow_symptom=True)
result = evaluate_dp("R06.0", gold)
assert result["exact_match_strict"] is True
assert result["symptom_not_allowed"] is False
def test_no_chosen_code(self):
"""Pas de code choisi → tout False."""
gold = _make_gold("I26.9", "EP")
result = evaluate_dp(None, gold)
assert result["exact_match_strict"] is False
assert result["acceptable_match"] is False
assert result["symptom_not_allowed"] is False
def test_tolerant_codes_match(self):
"""Code dans dp_acceptable_codes mais pas dp_expected."""
gold = _make_gold("I26.9", "EP", acceptable_codes=["I26.0"])
result = evaluate_dp("I26.0", gold)
assert result["exact_match_strict"] is False
assert result["exact_match_tolerant_codes"] is True
assert result["acceptable_match"] is True
def test_case_insensitive(self):
"""Codes en minuscules fonctionnent."""
gold = _make_gold("I26.9", "EP")
result = evaluate_dp("i26.9", gold)
assert result["exact_match_strict"] is True
# ---------------------------------------------------------------------------
# Chargement JSONL
# ---------------------------------------------------------------------------
class TestLoadGold:
def test_load_nonexistent_raises(self):
import pytest
with pytest.raises(FileNotFoundError):
load_gold_jsonl("/nonexistent/path.jsonl")
def test_load_valid_jsonl(self, tmp_path):
import json
jsonl = tmp_path / "test.jsonl"
case = {
"case_id": "test_001",
"dp_expected": {"code": "I26.9", "label": "EP"},
"confidence": "certain",
}
jsonl.write_text(json.dumps(case) + "\n", encoding="utf-8")
cases = load_gold_jsonl(jsonl)
assert len(cases) == 1
assert cases[0].case_id == "test_001"
assert cases[0].dp_expected.code == "I26.9"
def test_load_invalid_line_raises(self, tmp_path):
import pytest
jsonl = tmp_path / "bad.jsonl"
jsonl.write_text('{"case_id": "x", "dp_expected": {"code": "INVALID"}}\n')
with pytest.raises(ValueError, match="erreur"):
load_gold_jsonl(jsonl)