chore: add .gitignore
This commit is contained in:
279
tests/test_gold_debug_report.py
Normal file
279
tests/test_gold_debug_report.py
Normal file
@@ -0,0 +1,279 @@
|
||||
"""Tests gold debug reports — sans mocks, données synthétiques.
|
||||
|
||||
Vérifie :
|
||||
- Case report : structure JSON + top_candidates + match_eval
|
||||
- Top-errors : tri correct (CONFIRMED/high en tête)
|
||||
- CSV : headers présents + fichiers créés
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import json
|
||||
|
||||
from src.eval.gold_debug import (
|
||||
build_case_report,
|
||||
render_case_markdown,
|
||||
write_case_report,
|
||||
build_error_entry,
|
||||
sort_error_entries,
|
||||
write_top_errors_csv,
|
||||
write_top_errors_md,
|
||||
write_top_errors_jsonl,
|
||||
select_dim_pack_cases,
|
||||
write_dim_pack,
|
||||
TOP_ERRORS_CSV_COLS,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures synthétiques
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_data(dp_code="D50", das_codes=None):
|
||||
"""Construit un JSON pipeline minimal."""
|
||||
das = []
|
||||
for code in (das_codes or []):
|
||||
das.append({"texte": f"diag {code}", "cim10_suggestion": code, "source": "edsnlp"})
|
||||
return {
|
||||
"document_type": "crh",
|
||||
"diagnostic_principal": {"texte": "Anémie", "cim10_suggestion": dp_code, "source": "regex"},
|
||||
"diagnostics_associes": das,
|
||||
}
|
||||
|
||||
|
||||
def _make_dp_selection(chosen_code="D50", verdict="REVIEW", confidence="medium",
|
||||
candidates=None, delta=0.0):
|
||||
"""Construit un dp_selection dict."""
|
||||
cands = candidates or [
|
||||
{"index": 0, "code": "D50", "term": "Anémie", "score": 4.0,
|
||||
"section_strength": 3, "source": "regex", "score_details": {"section": 3, "confidence": 1},
|
||||
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
|
||||
{"index": 1, "code": "I25.1", "term": "SCA", "score": 4.0,
|
||||
"section_strength": 1, "source": "llm_das", "score_details": {"section": 1, "confidence": 3},
|
||||
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
|
||||
]
|
||||
return {
|
||||
"chosen_code": chosen_code,
|
||||
"chosen_term": "Anémie",
|
||||
"verdict": verdict,
|
||||
"confidence": confidence,
|
||||
"reason": f"Écart {delta} < seuil 3.0, LLM désactivé",
|
||||
"evidence": ["Scores proches : 4.0 vs 4.0"],
|
||||
"candidates": cands,
|
||||
"debug_scores": {"top1": 4.0, "top2": 4.0, "delta": delta},
|
||||
}
|
||||
|
||||
|
||||
def _make_gold_dict(code="I25.1", label="SCA", acceptable=None, family3=None):
|
||||
return {
|
||||
"dp_expected": {"code": code, "label": label},
|
||||
"dp_acceptable_codes": acceptable or ["I25.1", "I25.5"],
|
||||
"dp_acceptable_family3": family3 or ["I25"],
|
||||
"allow_symptom_dp": False,
|
||||
"confidence": "probable",
|
||||
}
|
||||
|
||||
|
||||
def _make_eval(strict=False, acceptable=False, family3=False, symptom=False):
|
||||
return {
|
||||
"case_id": "test_case",
|
||||
"dp_expected_code": "I25.1",
|
||||
"dp_expected_label": "SCA",
|
||||
"chosen_code": "D50",
|
||||
"confidence_gold": "probable",
|
||||
"allow_symptom_dp": False,
|
||||
"exact_match_strict": strict,
|
||||
"exact_match_tolerant_codes": acceptable,
|
||||
"family3_match_tolerant": family3,
|
||||
"acceptable_match": acceptable or family3,
|
||||
"symptom_not_allowed": symptom,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Case report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildCaseReport:
|
||||
def test_structure(self):
|
||||
data = _make_data("D50", ["I25.1", "Z95.5"])
|
||||
dp_sel = _make_dp_selection()
|
||||
gold = _make_gold_dict()
|
||||
ev = _make_eval()
|
||||
|
||||
report = build_case_report("74_test", data, dp_sel, gold, ev)
|
||||
|
||||
assert report["case_id"] == "74_test"
|
||||
assert report["document_type"] == "crh"
|
||||
assert report["prediction"]["chosen_code"] == "D50"
|
||||
assert report["prediction"]["verdict"] == "REVIEW"
|
||||
assert len(report["top_candidates"]) == 2
|
||||
assert report["top_candidates"][0]["rank"] == 1
|
||||
assert report["top_candidates"][0]["code"] == "D50"
|
||||
assert report["match_eval"]["strict_match"] is False
|
||||
assert report["match_eval"]["acceptable_match"] is False
|
||||
assert report["gold"]["dp_expected"]["code"] == "I25.1"
|
||||
|
||||
def test_no_gold(self):
|
||||
data = _make_data()
|
||||
dp_sel = _make_dp_selection()
|
||||
report = build_case_report("test_no_gold", data, dp_sel, None, None)
|
||||
|
||||
assert report["gold"] is None
|
||||
assert report["match_eval"] is None
|
||||
|
||||
def test_render_markdown(self):
|
||||
data = _make_data("D50", ["I25.1"])
|
||||
dp_sel = _make_dp_selection()
|
||||
gold = _make_gold_dict()
|
||||
ev = _make_eval()
|
||||
report = build_case_report("74_test", data, dp_sel, gold, ev)
|
||||
|
||||
md = render_case_markdown(report)
|
||||
assert "# Case Debug — 74_test" in md
|
||||
assert "D50" in md
|
||||
assert "I25.1" in md
|
||||
assert "Gold vs Prediction" in md
|
||||
assert "Top candidats" in md
|
||||
assert "Hypothèse bug" in md
|
||||
|
||||
def test_write_files(self, tmp_path):
|
||||
data = _make_data()
|
||||
dp_sel = _make_dp_selection()
|
||||
report = build_case_report("test_write", data, dp_sel, None, None)
|
||||
|
||||
json_p, md_p = write_case_report(report, tmp_path)
|
||||
assert json_p.exists()
|
||||
assert md_p.exists()
|
||||
loaded = json.loads(json_p.read_text())
|
||||
assert loaded["case_id"] == "test_write"
|
||||
|
||||
def test_pool_stats(self):
|
||||
data = _make_data("D50", ["I25.1", "Z95.5"])
|
||||
dp_sel = _make_dp_selection()
|
||||
report = build_case_report("test_pool", data, dp_sel, None, None)
|
||||
|
||||
assert report["pool_stats"]["raw_pool_size"] == 3 # 1 DP + 2 DAS
|
||||
assert report["pool_stats"]["filtered_pool_size"] == 2 # 2 candidates
|
||||
|
||||
def test_review_reason_tag(self):
|
||||
dp_sel = _make_dp_selection()
|
||||
dp_sel["reason"] = "Aucun candidat DP identifié"
|
||||
report = build_case_report("test_tag", _make_data(), dp_sel, None, None)
|
||||
assert report["prediction"]["review_reason_tag"] == "no_candidates"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Top-errors sort
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTopErrors:
|
||||
def _entry(self, case_id, acceptable, strict, verdict, confidence):
|
||||
"""Helper pour créer un report puis un error entry."""
|
||||
data = _make_data()
|
||||
dp_sel = _make_dp_selection(verdict=verdict, confidence=confidence)
|
||||
ev = _make_eval(strict=strict, acceptable=acceptable)
|
||||
report = build_case_report(case_id, data, dp_sel, _make_gold_dict(), ev)
|
||||
return build_error_entry(report)
|
||||
|
||||
def test_sort_acceptable_fail_first(self):
|
||||
"""Acceptable FAIL trie avant acceptable OK."""
|
||||
e_fail = self._entry("fail", False, False, "REVIEW", "medium")
|
||||
e_ok = self._entry("ok", True, True, "REVIEW", "medium")
|
||||
|
||||
sorted_ = sort_error_entries([e_ok, e_fail])
|
||||
assert sorted_[0]["case_id"] == "fail"
|
||||
assert sorted_[1]["case_id"] == "ok"
|
||||
|
||||
def test_sort_confirmed_before_review(self):
|
||||
"""CONFIRMED (dangereux) trie avant REVIEW dans les acceptable FAIL."""
|
||||
e_confirmed = self._entry("confirmed", False, False, "CONFIRMED", "medium")
|
||||
e_review = self._entry("review", False, False, "REVIEW", "medium")
|
||||
|
||||
sorted_ = sort_error_entries([e_review, e_confirmed])
|
||||
assert sorted_[0]["case_id"] == "confirmed"
|
||||
|
||||
def test_sort_high_before_medium(self):
|
||||
"""High confidence trie avant medium dans les CONFIRMED + acceptable FAIL."""
|
||||
e_high = self._entry("high", False, False, "CONFIRMED", "high")
|
||||
e_med = self._entry("med", False, False, "CONFIRMED", "medium")
|
||||
|
||||
sorted_ = sort_error_entries([e_med, e_high])
|
||||
assert sorted_[0]["case_id"] == "high"
|
||||
|
||||
def test_csv_headers(self, tmp_path):
|
||||
"""CSV contient tous les headers requis."""
|
||||
e = self._entry("test", False, False, "REVIEW", "medium")
|
||||
csv_p = tmp_path / "errors.csv"
|
||||
write_top_errors_csv([e], csv_p)
|
||||
|
||||
with open(csv_p) as f:
|
||||
reader = csv.DictReader(f)
|
||||
assert set(TOP_ERRORS_CSV_COLS).issubset(set(reader.fieldnames or []))
|
||||
|
||||
def test_md_created(self, tmp_path):
|
||||
e = self._entry("test", False, False, "REVIEW", "medium")
|
||||
md_p = tmp_path / "errors.md"
|
||||
write_top_errors_md([e], md_p)
|
||||
assert md_p.exists()
|
||||
content = md_p.read_text()
|
||||
assert "Top erreurs" in content
|
||||
assert "test" in content
|
||||
|
||||
def test_jsonl_created(self, tmp_path):
|
||||
e = self._entry("test", False, False, "REVIEW", "medium")
|
||||
jsonl_p = tmp_path / "errors.jsonl"
|
||||
write_top_errors_jsonl([e], jsonl_p)
|
||||
assert jsonl_p.exists()
|
||||
lines = jsonl_p.read_text().strip().splitlines()
|
||||
assert len(lines) == 1
|
||||
loaded = json.loads(lines[0])
|
||||
assert loaded["case_id"] == "test"
|
||||
assert "_sort_key" not in loaded # internals not leaked
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DIM Pack
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDimPack:
|
||||
def _report(self, case_id, verdict="REVIEW", acceptable_fail=False, has_symptom=False):
|
||||
data = _make_data()
|
||||
cands = [
|
||||
{"index": 0, "code": "D50", "term": "Anémie", "score": 4.0,
|
||||
"section_strength": 3, "score_details": {},
|
||||
"is_symptom_like": has_symptom, "is_comorbidity_like": False, "is_act_only": False},
|
||||
{"index": 1, "code": "I25.1", "term": "SCA", "score": 3.0,
|
||||
"section_strength": 1, "score_details": {},
|
||||
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
|
||||
]
|
||||
dp_sel = _make_dp_selection(verdict=verdict, candidates=cands)
|
||||
match_eval = {"strict_match": not acceptable_fail,
|
||||
"acceptable_match": not acceptable_fail,
|
||||
"family3_match": False, "symptom_not_allowed": False}
|
||||
return build_case_report(case_id, data, dp_sel, _make_gold_dict(), {"acceptable_match": not acceptable_fail})
|
||||
|
||||
def test_select_errors_first(self):
|
||||
r_error = self._report("error", acceptable_fail=True)
|
||||
r_ok = self._report("ok", acceptable_fail=False)
|
||||
|
||||
selected = select_dim_pack_cases([r_ok, r_error], 1)
|
||||
assert len(selected) == 1
|
||||
assert selected[0]["case_id"] == "error"
|
||||
|
||||
def test_write_pack(self, tmp_path):
|
||||
r1 = self._report("case_1")
|
||||
r2 = self._report("case_2")
|
||||
|
||||
csv_p, cases_dir = write_dim_pack([r1, r2], tmp_path)
|
||||
assert csv_p.exists()
|
||||
assert cases_dir.exists()
|
||||
assert (cases_dir / "case_1.json").exists()
|
||||
assert (cases_dir / "case_2.json").exists()
|
||||
|
||||
with open(csv_p) as f:
|
||||
reader = csv.DictReader(f)
|
||||
rows = list(reader)
|
||||
assert len(rows) == 2
|
||||
assert rows[0]["case_id"] == "case_1"
|
||||
206
tests/test_gold_eval.py
Normal file
206
tests/test_gold_eval.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""Tests évaluation gold CRH — logique tolérante sans mocks.
|
||||
|
||||
3 cas inline :
|
||||
1. Strict match OK
|
||||
2. Strict FAIL mais acceptable via family3
|
||||
3. R* choisi avec allow_symptom_dp=false → symptom_not_allowed
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from src.eval.gold_models import (
|
||||
GoldCRHCase,
|
||||
GoldDPExpected,
|
||||
GoldEvidence,
|
||||
evaluate_dp,
|
||||
is_valid_cim10_format,
|
||||
cim10_family3,
|
||||
load_gold_jsonl,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCIM10Format:
|
||||
def test_valid_codes(self):
|
||||
assert is_valid_cim10_format("I26.9")
|
||||
assert is_valid_cim10_format("K81.0")
|
||||
assert is_valid_cim10_format("R06")
|
||||
assert is_valid_cim10_format("Z51.30")
|
||||
|
||||
def test_invalid_codes(self):
|
||||
assert not is_valid_cim10_format("26.9")
|
||||
assert not is_valid_cim10_format("INVALID")
|
||||
assert not is_valid_cim10_format("")
|
||||
assert not is_valid_cim10_format("I2")
|
||||
|
||||
def test_family3(self):
|
||||
assert cim10_family3("I26.9") == "I26"
|
||||
assert cim10_family3("K81.0") == "K81"
|
||||
assert cim10_family3("R06") == "R06"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Modèle GoldCRHCase
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGoldCRHCase:
|
||||
def test_valid_case(self):
|
||||
case = GoldCRHCase(
|
||||
case_id="test_001",
|
||||
dp_expected=GoldDPExpected(code="I26.9", label="Embolie pulmonaire"),
|
||||
dp_acceptable_codes=["I26.0"],
|
||||
dp_acceptable_family3=["I26"],
|
||||
confidence="certain",
|
||||
)
|
||||
assert case.case_id == "test_001"
|
||||
assert case.dp_expected.code == "I26.9"
|
||||
assert case.allow_symptom_dp is False
|
||||
|
||||
def test_invalid_confidence_rejected(self):
|
||||
import pytest
|
||||
with pytest.raises(Exception):
|
||||
GoldCRHCase(
|
||||
case_id="test",
|
||||
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
|
||||
confidence="invalid_value",
|
||||
)
|
||||
|
||||
def test_invalid_code_rejected(self):
|
||||
import pytest
|
||||
with pytest.raises(Exception):
|
||||
GoldDPExpected(code="INVALID", label="Test")
|
||||
|
||||
def test_notes_max_length(self):
|
||||
import pytest
|
||||
with pytest.raises(Exception):
|
||||
GoldCRHCase(
|
||||
case_id="test",
|
||||
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
|
||||
notes="x" * 401,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Évaluation tolérante — 3 cas demandés
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_gold(
|
||||
code: str,
|
||||
label: str,
|
||||
acceptable_codes: list[str] | None = None,
|
||||
acceptable_family3: list[str] | None = None,
|
||||
allow_symptom: bool = False,
|
||||
confidence: str = "certain",
|
||||
) -> GoldCRHCase:
|
||||
return GoldCRHCase(
|
||||
case_id="test_case",
|
||||
dp_expected=GoldDPExpected(code=code, label=label),
|
||||
dp_acceptable_codes=acceptable_codes or [],
|
||||
dp_acceptable_family3=acceptable_family3 or [],
|
||||
allow_symptom_dp=allow_symptom,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
class TestEvaluateDP:
|
||||
"""3 cas principaux + cas limites."""
|
||||
|
||||
def test_strict_match_ok(self):
|
||||
"""Cas 1 — strict match : code choisi == code attendu."""
|
||||
gold = _make_gold("I26.9", "Embolie pulmonaire", ["I26.0"], ["I26"])
|
||||
result = evaluate_dp("I26.9", gold)
|
||||
|
||||
assert result["exact_match_strict"] is True
|
||||
assert result["exact_match_tolerant_codes"] is True
|
||||
assert result["family3_match_tolerant"] is True
|
||||
assert result["acceptable_match"] is True
|
||||
assert result["symptom_not_allowed"] is False
|
||||
|
||||
def test_strict_fail_family3_ok(self):
|
||||
"""Cas 2 — strict FAIL, mais acceptable via family3."""
|
||||
gold = _make_gold("I25.1", "SCA", ["I25.5"], ["I25"])
|
||||
result = evaluate_dp("I25.8", gold)
|
||||
|
||||
assert result["exact_match_strict"] is False
|
||||
assert result["exact_match_tolerant_codes"] is False # I25.8 pas dans [I25.1, I25.5]
|
||||
assert result["family3_match_tolerant"] is True # I25 dans ["I25"]
|
||||
assert result["acceptable_match"] is True
|
||||
|
||||
def test_symptom_not_allowed(self):
|
||||
"""Cas 3 — R* choisi avec allow_symptom_dp=false → pénalité."""
|
||||
gold = _make_gold("I25.1", "SCA", acceptable_family3=["I25"], allow_symptom=False)
|
||||
result = evaluate_dp("R10.4", gold)
|
||||
|
||||
assert result["exact_match_strict"] is False
|
||||
assert result["acceptable_match"] is False
|
||||
assert result["symptom_not_allowed"] is True
|
||||
|
||||
def test_symptom_allowed(self):
|
||||
"""R* choisi avec allow_symptom_dp=true → pas de pénalité."""
|
||||
gold = _make_gold("R06.0", "Dyspnée", allow_symptom=True)
|
||||
result = evaluate_dp("R06.0", gold)
|
||||
|
||||
assert result["exact_match_strict"] is True
|
||||
assert result["symptom_not_allowed"] is False
|
||||
|
||||
def test_no_chosen_code(self):
|
||||
"""Pas de code choisi → tout False."""
|
||||
gold = _make_gold("I26.9", "EP")
|
||||
result = evaluate_dp(None, gold)
|
||||
|
||||
assert result["exact_match_strict"] is False
|
||||
assert result["acceptable_match"] is False
|
||||
assert result["symptom_not_allowed"] is False
|
||||
|
||||
def test_tolerant_codes_match(self):
|
||||
"""Code dans dp_acceptable_codes mais pas dp_expected."""
|
||||
gold = _make_gold("I26.9", "EP", acceptable_codes=["I26.0"])
|
||||
result = evaluate_dp("I26.0", gold)
|
||||
|
||||
assert result["exact_match_strict"] is False
|
||||
assert result["exact_match_tolerant_codes"] is True
|
||||
assert result["acceptable_match"] is True
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Codes en minuscules fonctionnent."""
|
||||
gold = _make_gold("I26.9", "EP")
|
||||
result = evaluate_dp("i26.9", gold)
|
||||
|
||||
assert result["exact_match_strict"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Chargement JSONL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLoadGold:
|
||||
def test_load_nonexistent_raises(self):
|
||||
import pytest
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_gold_jsonl("/nonexistent/path.jsonl")
|
||||
|
||||
def test_load_valid_jsonl(self, tmp_path):
|
||||
import json
|
||||
jsonl = tmp_path / "test.jsonl"
|
||||
case = {
|
||||
"case_id": "test_001",
|
||||
"dp_expected": {"code": "I26.9", "label": "EP"},
|
||||
"confidence": "certain",
|
||||
}
|
||||
jsonl.write_text(json.dumps(case) + "\n", encoding="utf-8")
|
||||
|
||||
cases = load_gold_jsonl(jsonl)
|
||||
assert len(cases) == 1
|
||||
assert cases[0].case_id == "test_001"
|
||||
assert cases[0].dp_expected.code == "I26.9"
|
||||
|
||||
def test_load_invalid_line_raises(self, tmp_path):
|
||||
import pytest
|
||||
jsonl = tmp_path / "bad.jsonl"
|
||||
jsonl.write_text('{"case_id": "x", "dp_expected": {"code": "INVALID"}}\n')
|
||||
|
||||
with pytest.raises(ValueError, match="erreur"):
|
||||
load_gold_jsonl(jsonl)
|
||||
290
tests/test_p0_patches.py
Normal file
290
tests/test_p0_patches.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""Tests P0 : correctifs bloquants (BUG-1, BUG-2, LOGIC-1)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from src.config import (
|
||||
CodeDecision,
|
||||
Diagnostic,
|
||||
DossierMedical,
|
||||
RAGSource,
|
||||
Sejour,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# P0-1 — BUG-1 : VETO-02 ne doit pas s'appliquer au DP Trackare
|
||||
# ============================================================
|
||||
|
||||
class TestVeto02SkipsTrackareDp:
|
||||
"""VETO-02 (DP sans preuve) doit être ignoré quand dp.source == 'trackare'."""
|
||||
|
||||
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
|
||||
def test_trackare_dp_no_veto02(self, _mock_sev, _mock_rule, dossier_trackare_dp):
|
||||
"""Un DP Trackare sans preuve ne doit PAS déclencher VETO-02 HARD."""
|
||||
from src.quality.veto_engine import apply_vetos
|
||||
|
||||
report = apply_vetos(dossier_trackare_dp)
|
||||
|
||||
veto02_dp = [
|
||||
i for i in report.issues
|
||||
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
|
||||
]
|
||||
assert veto02_dp == [], (
|
||||
f"VETO-02 déclenché à tort sur DP Trackare : {veto02_dp}"
|
||||
)
|
||||
|
||||
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
|
||||
def test_trackare_dp_verdict_not_fail_from_veto02(self, _mock_sev, _mock_rule, dossier_trackare_dp):
|
||||
"""Le verdict global ne doit pas être FAIL à cause du seul VETO-02 DP Trackare."""
|
||||
from src.quality.veto_engine import apply_vetos
|
||||
|
||||
report = apply_vetos(dossier_trackare_dp)
|
||||
|
||||
# Pas de HARD issue liée à VETO-02 sur le DP
|
||||
hard_veto02 = [
|
||||
i for i in report.issues
|
||||
if i.veto == "VETO-02" and i.where == "diagnostic_principal" and i.severity == "HARD"
|
||||
]
|
||||
assert hard_veto02 == []
|
||||
|
||||
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
|
||||
def test_crh_dp_without_evidence_still_triggers_veto02(self, _mock_sev, _mock_rule):
|
||||
"""Un DP CRH sans preuve doit toujours déclencher VETO-02 HARD (non-régression)."""
|
||||
from src.quality.veto_engine import apply_vetos
|
||||
|
||||
dossier = DossierMedical(
|
||||
document_type="crh",
|
||||
sejour=Sejour(sexe="M", age=50),
|
||||
diagnostic_principal=Diagnostic(
|
||||
texte="Pneumopathie",
|
||||
cim10_suggestion="J18.9",
|
||||
source="crh",
|
||||
# Pas de preuve
|
||||
),
|
||||
)
|
||||
report = apply_vetos(dossier)
|
||||
|
||||
veto02_dp = [
|
||||
i for i in report.issues
|
||||
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
|
||||
]
|
||||
assert len(veto02_dp) == 1, "VETO-02 doit toujours s'appliquer aux DP CRH sans preuve"
|
||||
assert veto02_dp[0].severity == "HARD"
|
||||
|
||||
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
|
||||
def test_trackare_dp_with_source_none_still_triggers_veto02(self, _mock_sev, _mock_rule):
|
||||
"""Un DP sans source définie (source=None) doit déclencher VETO-02 normalement."""
|
||||
from src.quality.veto_engine import apply_vetos
|
||||
|
||||
dossier = DossierMedical(
|
||||
sejour=Sejour(),
|
||||
diagnostic_principal=Diagnostic(
|
||||
texte="Test",
|
||||
cim10_suggestion="A00.0",
|
||||
# source=None (défaut)
|
||||
),
|
||||
)
|
||||
report = apply_vetos(dossier)
|
||||
|
||||
veto02_dp = [
|
||||
i for i in report.issues
|
||||
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
|
||||
]
|
||||
assert len(veto02_dp) == 1, "DP sans source doit déclencher VETO-02"
|
||||
|
||||
|
||||
# ============================================================
|
||||
# P0-2 — BUG-2 : sources_rag toujours initialisé (même si RAG vide)
|
||||
# ============================================================
|
||||
|
||||
class TestRagZeroResultsSetsSourcesRag:
|
||||
"""enrich_diagnostic() doit toujours initialiser sources_rag, même sans résultat FAISS."""
|
||||
|
||||
@patch("src.medical.rag_search.search_similar", return_value=[])
|
||||
def test_zero_results_sets_empty_list(self, _mock_faiss):
|
||||
"""sources_rag doit être [] (pas None) quand FAISS retourne 0 résultat."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="Test diagnostic", cim10_suggestion="K85.1")
|
||||
assert diag.sources_rag == [] # Pydantic default
|
||||
|
||||
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=None)
|
||||
|
||||
assert diag.sources_rag == [], (
|
||||
f"sources_rag devrait être [] après 0 résultat FAISS, got: {diag.sources_rag}"
|
||||
)
|
||||
|
||||
@patch("src.medical.rag_search.search_similar", return_value=[])
|
||||
def test_zero_results_with_cache_hit_applies_cached(self, _mock_faiss):
|
||||
"""Avec 0 résultat FAISS mais un cache hit, le résultat LLM doit être appliqué."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.1")
|
||||
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.get.return_value = {
|
||||
"code": "K85.1",
|
||||
"confidence": "high",
|
||||
"justification": "Cached justification",
|
||||
}
|
||||
|
||||
with patch("src.medical.rag_search._apply_llm_result_diagnostic") as mock_apply:
|
||||
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=mock_cache)
|
||||
|
||||
# Le cache hit doit être appliqué malgré 0 résultat FAISS
|
||||
mock_apply.assert_called_once()
|
||||
assert diag.sources_rag == []
|
||||
|
||||
@patch("src.medical.rag_search.search_similar", return_value=[
|
||||
{"document": "cim10", "page": 42, "code": "K85.1", "extrait": "Pancréatite aigüe biliaire"},
|
||||
])
|
||||
def test_with_results_sets_sources_rag(self, _mock_faiss):
|
||||
"""Avec des résultats FAISS, sources_rag doit être rempli normalement (non-régression)."""
|
||||
from src.medical.rag_search import enrich_diagnostic
|
||||
|
||||
diag = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
|
||||
|
||||
with patch("src.medical.rag_search._call_ollama", return_value=None):
|
||||
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=None)
|
||||
|
||||
assert len(diag.sources_rag) == 1
|
||||
assert diag.sources_rag[0].document == "cim10"
|
||||
assert diag.sources_rag[0].code == "K85.1"
|
||||
|
||||
|
||||
# ============================================================
|
||||
# P0-3 — LOGIC-1 : promotion DAS→DP doit être tracée
|
||||
# ============================================================
|
||||
|
||||
class TestDasToDpPromotionTraced:
|
||||
"""RULE-DAS-TO-DP doit laisser une trace dans alertes_codage."""
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
|
||||
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
|
||||
def test_promotion_adds_alerte(self, _bio, _ref, _valid, _rule):
|
||||
"""Quand un DAS est promu DP, alertes_codage doit contenir RULE-DAS-TO-DP."""
|
||||
from src.quality.decision_engine import apply_decisions
|
||||
|
||||
das_candidate = Diagnostic(
|
||||
texte="Pancréatite aiguë biliaire",
|
||||
cim10_suggestion="K85.1",
|
||||
cim10_confidence="high",
|
||||
source="crh",
|
||||
)
|
||||
dossier = DossierMedical(
|
||||
sejour=Sejour(),
|
||||
diagnostic_principal=None,
|
||||
diagnostics_associes=[das_candidate],
|
||||
)
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
# Le DP doit avoir été promu
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_final == "K85.1"
|
||||
assert dossier.diagnostic_principal.cim10_decision.action == "PROMOTE_DP"
|
||||
|
||||
# Traçabilité : alerte avec règle et code
|
||||
matching_alertes = [
|
||||
a for a in dossier.alertes_codage
|
||||
if "RULE-DAS-TO-DP" in a and "K85.1" in a
|
||||
]
|
||||
assert len(matching_alertes) == 1, (
|
||||
f"alertes_codage devrait contenir une entrée RULE-DAS-TO-DP, got: {dossier.alertes_codage}"
|
||||
)
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
|
||||
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
|
||||
def test_promotion_alerte_contains_diagnosis_text(self, _bio, _ref, _valid, _rule):
|
||||
"""L'alerte de promotion doit mentionner le texte du diagnostic promu."""
|
||||
from src.quality.decision_engine import apply_decisions
|
||||
|
||||
dossier = DossierMedical(
|
||||
sejour=Sejour(),
|
||||
diagnostic_principal=None,
|
||||
diagnostics_associes=[
|
||||
Diagnostic(
|
||||
texte="Embolie pulmonaire",
|
||||
cim10_suggestion="I26.9",
|
||||
cim10_confidence="high",
|
||||
source="crh",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
assert dossier.diagnostic_principal is not None
|
||||
alerte = [a for a in dossier.alertes_codage if "RULE-DAS-TO-DP" in a]
|
||||
assert len(alerte) == 1
|
||||
assert "I26.9" in alerte[0]
|
||||
assert "Embolie pulmonaire" in alerte[0]
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
|
||||
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
|
||||
def test_no_promotion_when_dp_exists(self, _bio, _ref, _valid, _rule):
|
||||
"""Pas de promotion si un DP existe déjà (non-régression)."""
|
||||
from src.quality.decision_engine import apply_decisions
|
||||
|
||||
dossier = DossierMedical(
|
||||
sejour=Sejour(),
|
||||
diagnostic_principal=Diagnostic(
|
||||
texte="DP existant",
|
||||
cim10_suggestion="J18.9",
|
||||
source="crh",
|
||||
),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(
|
||||
texte="DAS candidat",
|
||||
cim10_suggestion="K85.1",
|
||||
cim10_confidence="high",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
# DP inchangé
|
||||
assert dossier.diagnostic_principal.cim10_suggestion == "J18.9"
|
||||
# Pas d'alerte de promotion
|
||||
promotion_alertes = [a for a in dossier.alertes_codage if "RULE-DAS-TO-DP" in a]
|
||||
assert promotion_alertes == []
|
||||
|
||||
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
|
||||
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
|
||||
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
|
||||
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
|
||||
def test_promotion_removes_das_from_list(self, _bio, _ref, _valid, _rule):
|
||||
"""Le DAS promu doit être retiré de diagnostics_associes."""
|
||||
from src.quality.decision_engine import apply_decisions
|
||||
|
||||
das1 = Diagnostic(texte="DAS gardé", cim10_suggestion="R10.4", cim10_confidence="high")
|
||||
das2 = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1", cim10_confidence="high")
|
||||
dossier = DossierMedical(
|
||||
sejour=Sejour(),
|
||||
diagnostic_principal=None,
|
||||
diagnostics_associes=[das1, das2],
|
||||
)
|
||||
|
||||
apply_decisions(dossier)
|
||||
|
||||
# K85.1 promu (score pathologie > symptôme R)
|
||||
assert dossier.diagnostic_principal is not None
|
||||
assert dossier.diagnostic_principal.cim10_final == "K85.1"
|
||||
# Le DAS promu ne doit plus être dans la liste
|
||||
remaining_codes = [d.cim10_suggestion for d in dossier.diagnostics_associes]
|
||||
assert "K85.1" not in remaining_codes
|
||||
226
tests/test_p1_lite.py
Normal file
226
tests/test_p1_lite.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""Tests P1-lite — LOGIC-2 (CPAM dégradé), LOGIC-3 (modèles identiques).
|
||||
|
||||
Sans mocks : manipulation directe des structures de données et env vars.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from src.config import (
|
||||
ControleCPAM,
|
||||
DossierMedical,
|
||||
OLLAMA_MODELS,
|
||||
Sejour,
|
||||
check_adversarial_model_config,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LOGIC-2 — CPAM passe 1 échoue → mode dégradé tracé
|
||||
# ============================================================
|
||||
|
||||
class TestCpamDegradedMode:
|
||||
"""Vérifie que le mode dégradé passe 1 est correctement tracé."""
|
||||
|
||||
def test_degraded_sets_alertes_codage(self):
|
||||
"""Si extraction est None, alertes_codage doit contenir le message."""
|
||||
dossier = DossierMedical(sejour=Sejour())
|
||||
|
||||
# Simule le comportement de generate_cpam_response quand extraction = None
|
||||
extraction = None
|
||||
degraded_pass1 = extraction is None
|
||||
if degraded_pass1:
|
||||
dossier.alertes_codage.append(
|
||||
"CPAM: passe 1 (extraction structurée) échouée → mode dégradé"
|
||||
)
|
||||
|
||||
assert any("passe 1" in a for a in dossier.alertes_codage)
|
||||
assert any("dégradé" in a for a in dossier.alertes_codage)
|
||||
|
||||
def test_degraded_sets_quality_flags_on_result(self):
|
||||
"""quality_flags ajouté au résultat quand dégradé."""
|
||||
result = {"conclusion": "test"}
|
||||
|
||||
degraded_pass1 = True
|
||||
if degraded_pass1:
|
||||
result.setdefault("quality_flags", {})
|
||||
result["quality_flags"]["cpam_pass1_failed"] = True
|
||||
result["quality_flags"]["degraded_mode"] = True
|
||||
|
||||
assert result["quality_flags"]["cpam_pass1_failed"] is True
|
||||
assert result["quality_flags"]["degraded_mode"] is True
|
||||
|
||||
def test_non_degraded_no_quality_flags(self):
|
||||
"""Pas de quality_flags quand extraction réussit."""
|
||||
result = {"conclusion": "test"}
|
||||
|
||||
extraction = {"comprehension_contestation": "ok"}
|
||||
degraded_pass1 = extraction is None
|
||||
|
||||
assert degraded_pass1 is False
|
||||
assert "quality_flags" not in result
|
||||
|
||||
def test_quality_flags_format_matches_spec(self):
|
||||
"""Format quality_flags conforme au spec."""
|
||||
result: dict = {}
|
||||
result.setdefault("quality_flags", {})
|
||||
result["quality_flags"]["cpam_pass1_failed"] = True
|
||||
result["quality_flags"]["degraded_mode"] = True
|
||||
|
||||
flags = result["quality_flags"]
|
||||
assert isinstance(flags, dict)
|
||||
assert "cpam_pass1_failed" in flags
|
||||
assert "degraded_mode" in flags
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LOGIC-3 — Modèles CPAM et validation identiques
|
||||
# ============================================================
|
||||
|
||||
class TestAdversarialModelCheck:
|
||||
"""Vérifie la détection de modèles identiques CPAM/validation."""
|
||||
|
||||
def test_same_model_detected(self):
|
||||
"""Modèles identiques → (True, message)."""
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = "test-same-model"
|
||||
OLLAMA_MODELS["validation"] = "test-same-model"
|
||||
try:
|
||||
same, msg = check_adversarial_model_config()
|
||||
assert same is True
|
||||
assert "identiques" in msg
|
||||
assert "test-same-model" in msg
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
def test_different_models_ok(self):
|
||||
"""Modèles différents → (False, '')."""
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = "model-a"
|
||||
OLLAMA_MODELS["validation"] = "model-b"
|
||||
try:
|
||||
same, msg = check_adversarial_model_config()
|
||||
assert same is False
|
||||
assert msg == ""
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
def test_adversarial_skip_returns_degraded_result(self):
|
||||
"""Si même modèle, la validation adversariale retourne un résultat dégradé."""
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = "same-model"
|
||||
OLLAMA_MODELS["validation"] = "same-model"
|
||||
try:
|
||||
same, msg = check_adversarial_model_config()
|
||||
assert same is True
|
||||
|
||||
# Simule le comportement de _validate_adversarial quand same_model
|
||||
degraded = {
|
||||
"coherent": True,
|
||||
"erreurs": [f"Validation adversariale dégradée : {msg}"],
|
||||
"score_confiance": 0,
|
||||
}
|
||||
assert degraded["score_confiance"] == 0
|
||||
assert "dégradée" in degraded["erreurs"][0]
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
def test_empty_model_not_flagged(self):
|
||||
"""Modèles vides ne déclenchent pas le flag."""
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = ""
|
||||
OLLAMA_MODELS["validation"] = ""
|
||||
try:
|
||||
same, msg = check_adversarial_model_config()
|
||||
assert same is False
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
|
||||
# ============================================================
|
||||
# LOGIC-2 & LOGIC-3 — quality_flags + alertes visibles output
|
||||
# ============================================================
|
||||
|
||||
class TestQualityFlagsOutput:
|
||||
"""Vérifie que les quality_flags et alertes sont visibles dans l'output."""
|
||||
|
||||
def test_cpam_pass1_failure_sets_quality_flags_and_alert(self):
|
||||
"""LOGIC-2 — passe 1 échouée → quality_flags + alerte dans dossier."""
|
||||
dossier = DossierMedical(sejour=Sejour())
|
||||
result: dict = {"conclusion": "test argument"}
|
||||
|
||||
# Simule le flow exact de generate_cpam_response (lines 122-165)
|
||||
extraction = None # passe 1 échouée
|
||||
degraded_pass1 = extraction is None
|
||||
if degraded_pass1:
|
||||
dossier.alertes_codage.append(
|
||||
"CPAM: passe 1 (extraction structurée) échouée → mode dégradé"
|
||||
)
|
||||
if degraded_pass1:
|
||||
result.setdefault("quality_flags", {})
|
||||
result["quality_flags"]["cpam_pass1_failed"] = True
|
||||
result["quality_flags"]["degraded_mode"] = True
|
||||
|
||||
# Vérifications
|
||||
assert result["quality_flags"]["cpam_pass1_failed"] is True
|
||||
assert result["quality_flags"]["degraded_mode"] is True
|
||||
assert any("passe 1" in a and "dégradé" in a for a in dossier.alertes_codage)
|
||||
|
||||
def test_adversarial_same_model_sets_quality_flag_and_alert(self):
|
||||
"""LOGIC-3 — modèles identiques → quality_flags + alerte dans dossier."""
|
||||
dossier = DossierMedical(sejour=Sejour())
|
||||
result: dict = {"conclusion": "test argument"}
|
||||
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = "same-test-model"
|
||||
OLLAMA_MODELS["validation"] = "same-test-model"
|
||||
try:
|
||||
# Simule le flow exact de generate_cpam_response (lines 192-199)
|
||||
same_model, model_msg = check_adversarial_model_config()
|
||||
if same_model:
|
||||
result.setdefault("quality_flags", {})
|
||||
result["quality_flags"]["adversarial_disabled_same_model"] = True
|
||||
dossier.alertes_codage.append(
|
||||
"Validation adversariale désactivée (modèles identiques)"
|
||||
)
|
||||
|
||||
assert same_model is True
|
||||
assert result["quality_flags"]["adversarial_disabled_same_model"] is True
|
||||
assert any("adversariale" in a and "identiques" in a
|
||||
for a in dossier.alertes_codage)
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
def test_no_flags_when_all_ok(self):
|
||||
"""Pas de quality_flags quand tout fonctionne correctement."""
|
||||
dossier = DossierMedical(sejour=Sejour())
|
||||
result: dict = {"conclusion": "test argument"}
|
||||
|
||||
# Passe 1 OK
|
||||
extraction = {"comprehension_contestation": "ok"}
|
||||
degraded_pass1 = extraction is None
|
||||
assert degraded_pass1 is False
|
||||
|
||||
# Modèles différents
|
||||
old_cpam = OLLAMA_MODELS["cpam"]
|
||||
old_val = OLLAMA_MODELS["validation"]
|
||||
OLLAMA_MODELS["cpam"] = "model-a"
|
||||
OLLAMA_MODELS["validation"] = "model-b"
|
||||
try:
|
||||
same_model, _ = check_adversarial_model_config()
|
||||
assert same_model is False
|
||||
finally:
|
||||
OLLAMA_MODELS["cpam"] = old_cpam
|
||||
OLLAMA_MODELS["validation"] = old_val
|
||||
|
||||
assert "quality_flags" not in result
|
||||
assert len(dossier.alertes_codage) == 0
|
||||
Reference in New Issue
Block a user