chore: add .gitignore

This commit is contained in:
dom
2026-03-05 00:37:41 +01:00
parent 542797a124
commit 2578afb6ff
1716 changed files with 1905609 additions and 18 deletions

View File

@@ -0,0 +1,279 @@
"""Tests gold debug reports — sans mocks, données synthétiques.
Vérifie :
- Case report : structure JSON + top_candidates + match_eval
- Top-errors : tri correct (CONFIRMED/high en tête)
- CSV : headers présents + fichiers créés
"""
from __future__ import annotations
import csv
import json
from src.eval.gold_debug import (
build_case_report,
render_case_markdown,
write_case_report,
build_error_entry,
sort_error_entries,
write_top_errors_csv,
write_top_errors_md,
write_top_errors_jsonl,
select_dim_pack_cases,
write_dim_pack,
TOP_ERRORS_CSV_COLS,
)
# ---------------------------------------------------------------------------
# Fixtures synthétiques
# ---------------------------------------------------------------------------
def _make_data(dp_code="D50", das_codes=None):
"""Construit un JSON pipeline minimal."""
das = []
for code in (das_codes or []):
das.append({"texte": f"diag {code}", "cim10_suggestion": code, "source": "edsnlp"})
return {
"document_type": "crh",
"diagnostic_principal": {"texte": "Anémie", "cim10_suggestion": dp_code, "source": "regex"},
"diagnostics_associes": das,
}
def _make_dp_selection(chosen_code="D50", verdict="REVIEW", confidence="medium",
candidates=None, delta=0.0):
"""Construit un dp_selection dict."""
cands = candidates or [
{"index": 0, "code": "D50", "term": "Anémie", "score": 4.0,
"section_strength": 3, "source": "regex", "score_details": {"section": 3, "confidence": 1},
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
{"index": 1, "code": "I25.1", "term": "SCA", "score": 4.0,
"section_strength": 1, "source": "llm_das", "score_details": {"section": 1, "confidence": 3},
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
]
return {
"chosen_code": chosen_code,
"chosen_term": "Anémie",
"verdict": verdict,
"confidence": confidence,
"reason": f"Écart {delta} < seuil 3.0, LLM désactivé",
"evidence": ["Scores proches : 4.0 vs 4.0"],
"candidates": cands,
"debug_scores": {"top1": 4.0, "top2": 4.0, "delta": delta},
}
def _make_gold_dict(code="I25.1", label="SCA", acceptable=None, family3=None):
return {
"dp_expected": {"code": code, "label": label},
"dp_acceptable_codes": acceptable or ["I25.1", "I25.5"],
"dp_acceptable_family3": family3 or ["I25"],
"allow_symptom_dp": False,
"confidence": "probable",
}
def _make_eval(strict=False, acceptable=False, family3=False, symptom=False):
return {
"case_id": "test_case",
"dp_expected_code": "I25.1",
"dp_expected_label": "SCA",
"chosen_code": "D50",
"confidence_gold": "probable",
"allow_symptom_dp": False,
"exact_match_strict": strict,
"exact_match_tolerant_codes": acceptable,
"family3_match_tolerant": family3,
"acceptable_match": acceptable or family3,
"symptom_not_allowed": symptom,
}
# ---------------------------------------------------------------------------
# Case report
# ---------------------------------------------------------------------------
class TestBuildCaseReport:
def test_structure(self):
data = _make_data("D50", ["I25.1", "Z95.5"])
dp_sel = _make_dp_selection()
gold = _make_gold_dict()
ev = _make_eval()
report = build_case_report("74_test", data, dp_sel, gold, ev)
assert report["case_id"] == "74_test"
assert report["document_type"] == "crh"
assert report["prediction"]["chosen_code"] == "D50"
assert report["prediction"]["verdict"] == "REVIEW"
assert len(report["top_candidates"]) == 2
assert report["top_candidates"][0]["rank"] == 1
assert report["top_candidates"][0]["code"] == "D50"
assert report["match_eval"]["strict_match"] is False
assert report["match_eval"]["acceptable_match"] is False
assert report["gold"]["dp_expected"]["code"] == "I25.1"
def test_no_gold(self):
data = _make_data()
dp_sel = _make_dp_selection()
report = build_case_report("test_no_gold", data, dp_sel, None, None)
assert report["gold"] is None
assert report["match_eval"] is None
def test_render_markdown(self):
data = _make_data("D50", ["I25.1"])
dp_sel = _make_dp_selection()
gold = _make_gold_dict()
ev = _make_eval()
report = build_case_report("74_test", data, dp_sel, gold, ev)
md = render_case_markdown(report)
assert "# Case Debug — 74_test" in md
assert "D50" in md
assert "I25.1" in md
assert "Gold vs Prediction" in md
assert "Top candidats" in md
assert "Hypothèse bug" in md
def test_write_files(self, tmp_path):
data = _make_data()
dp_sel = _make_dp_selection()
report = build_case_report("test_write", data, dp_sel, None, None)
json_p, md_p = write_case_report(report, tmp_path)
assert json_p.exists()
assert md_p.exists()
loaded = json.loads(json_p.read_text())
assert loaded["case_id"] == "test_write"
def test_pool_stats(self):
data = _make_data("D50", ["I25.1", "Z95.5"])
dp_sel = _make_dp_selection()
report = build_case_report("test_pool", data, dp_sel, None, None)
assert report["pool_stats"]["raw_pool_size"] == 3 # 1 DP + 2 DAS
assert report["pool_stats"]["filtered_pool_size"] == 2 # 2 candidates
def test_review_reason_tag(self):
dp_sel = _make_dp_selection()
dp_sel["reason"] = "Aucun candidat DP identifié"
report = build_case_report("test_tag", _make_data(), dp_sel, None, None)
assert report["prediction"]["review_reason_tag"] == "no_candidates"
# ---------------------------------------------------------------------------
# Top-errors sort
# ---------------------------------------------------------------------------
class TestTopErrors:
def _entry(self, case_id, acceptable, strict, verdict, confidence):
"""Helper pour créer un report puis un error entry."""
data = _make_data()
dp_sel = _make_dp_selection(verdict=verdict, confidence=confidence)
ev = _make_eval(strict=strict, acceptable=acceptable)
report = build_case_report(case_id, data, dp_sel, _make_gold_dict(), ev)
return build_error_entry(report)
def test_sort_acceptable_fail_first(self):
"""Acceptable FAIL trie avant acceptable OK."""
e_fail = self._entry("fail", False, False, "REVIEW", "medium")
e_ok = self._entry("ok", True, True, "REVIEW", "medium")
sorted_ = sort_error_entries([e_ok, e_fail])
assert sorted_[0]["case_id"] == "fail"
assert sorted_[1]["case_id"] == "ok"
def test_sort_confirmed_before_review(self):
"""CONFIRMED (dangereux) trie avant REVIEW dans les acceptable FAIL."""
e_confirmed = self._entry("confirmed", False, False, "CONFIRMED", "medium")
e_review = self._entry("review", False, False, "REVIEW", "medium")
sorted_ = sort_error_entries([e_review, e_confirmed])
assert sorted_[0]["case_id"] == "confirmed"
def test_sort_high_before_medium(self):
"""High confidence trie avant medium dans les CONFIRMED + acceptable FAIL."""
e_high = self._entry("high", False, False, "CONFIRMED", "high")
e_med = self._entry("med", False, False, "CONFIRMED", "medium")
sorted_ = sort_error_entries([e_med, e_high])
assert sorted_[0]["case_id"] == "high"
def test_csv_headers(self, tmp_path):
"""CSV contient tous les headers requis."""
e = self._entry("test", False, False, "REVIEW", "medium")
csv_p = tmp_path / "errors.csv"
write_top_errors_csv([e], csv_p)
with open(csv_p) as f:
reader = csv.DictReader(f)
assert set(TOP_ERRORS_CSV_COLS).issubset(set(reader.fieldnames or []))
def test_md_created(self, tmp_path):
e = self._entry("test", False, False, "REVIEW", "medium")
md_p = tmp_path / "errors.md"
write_top_errors_md([e], md_p)
assert md_p.exists()
content = md_p.read_text()
assert "Top erreurs" in content
assert "test" in content
def test_jsonl_created(self, tmp_path):
e = self._entry("test", False, False, "REVIEW", "medium")
jsonl_p = tmp_path / "errors.jsonl"
write_top_errors_jsonl([e], jsonl_p)
assert jsonl_p.exists()
lines = jsonl_p.read_text().strip().splitlines()
assert len(lines) == 1
loaded = json.loads(lines[0])
assert loaded["case_id"] == "test"
assert "_sort_key" not in loaded # internals not leaked
# ---------------------------------------------------------------------------
# DIM Pack
# ---------------------------------------------------------------------------
class TestDimPack:
def _report(self, case_id, verdict="REVIEW", acceptable_fail=False, has_symptom=False):
data = _make_data()
cands = [
{"index": 0, "code": "D50", "term": "Anémie", "score": 4.0,
"section_strength": 3, "score_details": {},
"is_symptom_like": has_symptom, "is_comorbidity_like": False, "is_act_only": False},
{"index": 1, "code": "I25.1", "term": "SCA", "score": 3.0,
"section_strength": 1, "score_details": {},
"is_symptom_like": False, "is_comorbidity_like": False, "is_act_only": False},
]
dp_sel = _make_dp_selection(verdict=verdict, candidates=cands)
match_eval = {"strict_match": not acceptable_fail,
"acceptable_match": not acceptable_fail,
"family3_match": False, "symptom_not_allowed": False}
return build_case_report(case_id, data, dp_sel, _make_gold_dict(), {"acceptable_match": not acceptable_fail})
def test_select_errors_first(self):
r_error = self._report("error", acceptable_fail=True)
r_ok = self._report("ok", acceptable_fail=False)
selected = select_dim_pack_cases([r_ok, r_error], 1)
assert len(selected) == 1
assert selected[0]["case_id"] == "error"
def test_write_pack(self, tmp_path):
r1 = self._report("case_1")
r2 = self._report("case_2")
csv_p, cases_dir = write_dim_pack([r1, r2], tmp_path)
assert csv_p.exists()
assert cases_dir.exists()
assert (cases_dir / "case_1.json").exists()
assert (cases_dir / "case_2.json").exists()
with open(csv_p) as f:
reader = csv.DictReader(f)
rows = list(reader)
assert len(rows) == 2
assert rows[0]["case_id"] == "case_1"

206
tests/test_gold_eval.py Normal file
View File

@@ -0,0 +1,206 @@
"""Tests évaluation gold CRH — logique tolérante sans mocks.
3 cas inline :
1. Strict match OK
2. Strict FAIL mais acceptable via family3
3. R* choisi avec allow_symptom_dp=false → symptom_not_allowed
"""
from __future__ import annotations
from src.eval.gold_models import (
GoldCRHCase,
GoldDPExpected,
GoldEvidence,
evaluate_dp,
is_valid_cim10_format,
cim10_family3,
load_gold_jsonl,
)
# ---------------------------------------------------------------------------
# Helpers validation
# ---------------------------------------------------------------------------
class TestCIM10Format:
def test_valid_codes(self):
assert is_valid_cim10_format("I26.9")
assert is_valid_cim10_format("K81.0")
assert is_valid_cim10_format("R06")
assert is_valid_cim10_format("Z51.30")
def test_invalid_codes(self):
assert not is_valid_cim10_format("26.9")
assert not is_valid_cim10_format("INVALID")
assert not is_valid_cim10_format("")
assert not is_valid_cim10_format("I2")
def test_family3(self):
assert cim10_family3("I26.9") == "I26"
assert cim10_family3("K81.0") == "K81"
assert cim10_family3("R06") == "R06"
# ---------------------------------------------------------------------------
# Modèle GoldCRHCase
# ---------------------------------------------------------------------------
class TestGoldCRHCase:
def test_valid_case(self):
case = GoldCRHCase(
case_id="test_001",
dp_expected=GoldDPExpected(code="I26.9", label="Embolie pulmonaire"),
dp_acceptable_codes=["I26.0"],
dp_acceptable_family3=["I26"],
confidence="certain",
)
assert case.case_id == "test_001"
assert case.dp_expected.code == "I26.9"
assert case.allow_symptom_dp is False
def test_invalid_confidence_rejected(self):
import pytest
with pytest.raises(Exception):
GoldCRHCase(
case_id="test",
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
confidence="invalid_value",
)
def test_invalid_code_rejected(self):
import pytest
with pytest.raises(Exception):
GoldDPExpected(code="INVALID", label="Test")
def test_notes_max_length(self):
import pytest
with pytest.raises(Exception):
GoldCRHCase(
case_id="test",
dp_expected=GoldDPExpected(code="I26.9", label="Test"),
notes="x" * 401,
)
# ---------------------------------------------------------------------------
# Évaluation tolérante — 3 cas demandés
# ---------------------------------------------------------------------------
def _make_gold(
code: str,
label: str,
acceptable_codes: list[str] | None = None,
acceptable_family3: list[str] | None = None,
allow_symptom: bool = False,
confidence: str = "certain",
) -> GoldCRHCase:
return GoldCRHCase(
case_id="test_case",
dp_expected=GoldDPExpected(code=code, label=label),
dp_acceptable_codes=acceptable_codes or [],
dp_acceptable_family3=acceptable_family3 or [],
allow_symptom_dp=allow_symptom,
confidence=confidence,
)
class TestEvaluateDP:
"""3 cas principaux + cas limites."""
def test_strict_match_ok(self):
"""Cas 1 — strict match : code choisi == code attendu."""
gold = _make_gold("I26.9", "Embolie pulmonaire", ["I26.0"], ["I26"])
result = evaluate_dp("I26.9", gold)
assert result["exact_match_strict"] is True
assert result["exact_match_tolerant_codes"] is True
assert result["family3_match_tolerant"] is True
assert result["acceptable_match"] is True
assert result["symptom_not_allowed"] is False
def test_strict_fail_family3_ok(self):
"""Cas 2 — strict FAIL, mais acceptable via family3."""
gold = _make_gold("I25.1", "SCA", ["I25.5"], ["I25"])
result = evaluate_dp("I25.8", gold)
assert result["exact_match_strict"] is False
assert result["exact_match_tolerant_codes"] is False # I25.8 pas dans [I25.1, I25.5]
assert result["family3_match_tolerant"] is True # I25 dans ["I25"]
assert result["acceptable_match"] is True
def test_symptom_not_allowed(self):
"""Cas 3 — R* choisi avec allow_symptom_dp=false → pénalité."""
gold = _make_gold("I25.1", "SCA", acceptable_family3=["I25"], allow_symptom=False)
result = evaluate_dp("R10.4", gold)
assert result["exact_match_strict"] is False
assert result["acceptable_match"] is False
assert result["symptom_not_allowed"] is True
def test_symptom_allowed(self):
"""R* choisi avec allow_symptom_dp=true → pas de pénalité."""
gold = _make_gold("R06.0", "Dyspnée", allow_symptom=True)
result = evaluate_dp("R06.0", gold)
assert result["exact_match_strict"] is True
assert result["symptom_not_allowed"] is False
def test_no_chosen_code(self):
"""Pas de code choisi → tout False."""
gold = _make_gold("I26.9", "EP")
result = evaluate_dp(None, gold)
assert result["exact_match_strict"] is False
assert result["acceptable_match"] is False
assert result["symptom_not_allowed"] is False
def test_tolerant_codes_match(self):
"""Code dans dp_acceptable_codes mais pas dp_expected."""
gold = _make_gold("I26.9", "EP", acceptable_codes=["I26.0"])
result = evaluate_dp("I26.0", gold)
assert result["exact_match_strict"] is False
assert result["exact_match_tolerant_codes"] is True
assert result["acceptable_match"] is True
def test_case_insensitive(self):
"""Codes en minuscules fonctionnent."""
gold = _make_gold("I26.9", "EP")
result = evaluate_dp("i26.9", gold)
assert result["exact_match_strict"] is True
# ---------------------------------------------------------------------------
# Chargement JSONL
# ---------------------------------------------------------------------------
class TestLoadGold:
def test_load_nonexistent_raises(self):
import pytest
with pytest.raises(FileNotFoundError):
load_gold_jsonl("/nonexistent/path.jsonl")
def test_load_valid_jsonl(self, tmp_path):
import json
jsonl = tmp_path / "test.jsonl"
case = {
"case_id": "test_001",
"dp_expected": {"code": "I26.9", "label": "EP"},
"confidence": "certain",
}
jsonl.write_text(json.dumps(case) + "\n", encoding="utf-8")
cases = load_gold_jsonl(jsonl)
assert len(cases) == 1
assert cases[0].case_id == "test_001"
assert cases[0].dp_expected.code == "I26.9"
def test_load_invalid_line_raises(self, tmp_path):
import pytest
jsonl = tmp_path / "bad.jsonl"
jsonl.write_text('{"case_id": "x", "dp_expected": {"code": "INVALID"}}\n')
with pytest.raises(ValueError, match="erreur"):
load_gold_jsonl(jsonl)

290
tests/test_p0_patches.py Normal file
View File

@@ -0,0 +1,290 @@
"""Tests P0 : correctifs bloquants (BUG-1, BUG-2, LOGIC-1)."""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from src.config import (
CodeDecision,
Diagnostic,
DossierMedical,
RAGSource,
Sejour,
)
# ============================================================
# P0-1 — BUG-1 : VETO-02 ne doit pas s'appliquer au DP Trackare
# ============================================================
class TestVeto02SkipsTrackareDp:
"""VETO-02 (DP sans preuve) doit être ignoré quand dp.source == 'trackare'."""
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
def test_trackare_dp_no_veto02(self, _mock_sev, _mock_rule, dossier_trackare_dp):
"""Un DP Trackare sans preuve ne doit PAS déclencher VETO-02 HARD."""
from src.quality.veto_engine import apply_vetos
report = apply_vetos(dossier_trackare_dp)
veto02_dp = [
i for i in report.issues
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
]
assert veto02_dp == [], (
f"VETO-02 déclenché à tort sur DP Trackare : {veto02_dp}"
)
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
def test_trackare_dp_verdict_not_fail_from_veto02(self, _mock_sev, _mock_rule, dossier_trackare_dp):
"""Le verdict global ne doit pas être FAIL à cause du seul VETO-02 DP Trackare."""
from src.quality.veto_engine import apply_vetos
report = apply_vetos(dossier_trackare_dp)
# Pas de HARD issue liée à VETO-02 sur le DP
hard_veto02 = [
i for i in report.issues
if i.veto == "VETO-02" and i.where == "diagnostic_principal" and i.severity == "HARD"
]
assert hard_veto02 == []
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
def test_crh_dp_without_evidence_still_triggers_veto02(self, _mock_sev, _mock_rule):
"""Un DP CRH sans preuve doit toujours déclencher VETO-02 HARD (non-régression)."""
from src.quality.veto_engine import apply_vetos
dossier = DossierMedical(
document_type="crh",
sejour=Sejour(sexe="M", age=50),
diagnostic_principal=Diagnostic(
texte="Pneumopathie",
cim10_suggestion="J18.9",
source="crh",
# Pas de preuve
),
)
report = apply_vetos(dossier)
veto02_dp = [
i for i in report.issues
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
]
assert len(veto02_dp) == 1, "VETO-02 doit toujours s'appliquer aux DP CRH sans preuve"
assert veto02_dp[0].severity == "HARD"
@patch("src.quality.veto_engine.rule_enabled", return_value=True)
@patch("src.quality.veto_engine.rule_force_severity", return_value=None)
def test_trackare_dp_with_source_none_still_triggers_veto02(self, _mock_sev, _mock_rule):
"""Un DP sans source définie (source=None) doit déclencher VETO-02 normalement."""
from src.quality.veto_engine import apply_vetos
dossier = DossierMedical(
sejour=Sejour(),
diagnostic_principal=Diagnostic(
texte="Test",
cim10_suggestion="A00.0",
# source=None (défaut)
),
)
report = apply_vetos(dossier)
veto02_dp = [
i for i in report.issues
if i.veto == "VETO-02" and i.where == "diagnostic_principal"
]
assert len(veto02_dp) == 1, "DP sans source doit déclencher VETO-02"
# ============================================================
# P0-2 — BUG-2 : sources_rag toujours initialisé (même si RAG vide)
# ============================================================
class TestRagZeroResultsSetsSourcesRag:
"""enrich_diagnostic() doit toujours initialiser sources_rag, même sans résultat FAISS."""
@patch("src.medical.rag_search.search_similar", return_value=[])
def test_zero_results_sets_empty_list(self, _mock_faiss):
"""sources_rag doit être [] (pas None) quand FAISS retourne 0 résultat."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Test diagnostic", cim10_suggestion="K85.1")
assert diag.sources_rag == [] # Pydantic default
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=None)
assert diag.sources_rag == [], (
f"sources_rag devrait être [] après 0 résultat FAISS, got: {diag.sources_rag}"
)
@patch("src.medical.rag_search.search_similar", return_value=[])
def test_zero_results_with_cache_hit_applies_cached(self, _mock_faiss):
"""Avec 0 résultat FAISS mais un cache hit, le résultat LLM doit être appliqué."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.1")
mock_cache = MagicMock()
mock_cache.get.return_value = {
"code": "K85.1",
"confidence": "high",
"justification": "Cached justification",
}
with patch("src.medical.rag_search._apply_llm_result_diagnostic") as mock_apply:
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=mock_cache)
# Le cache hit doit être appliqué malgré 0 résultat FAISS
mock_apply.assert_called_once()
assert diag.sources_rag == []
@patch("src.medical.rag_search.search_similar", return_value=[
{"document": "cim10", "page": 42, "code": "K85.1", "extrait": "Pancréatite aigüe biliaire"},
])
def test_with_results_sets_sources_rag(self, _mock_faiss):
"""Avec des résultats FAISS, sources_rag doit être rempli normalement (non-régression)."""
from src.medical.rag_search import enrich_diagnostic
diag = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
with patch("src.medical.rag_search._call_ollama", return_value=None):
enrich_diagnostic(diag, contexte={}, est_dp=True, cache=None)
assert len(diag.sources_rag) == 1
assert diag.sources_rag[0].document == "cim10"
assert diag.sources_rag[0].code == "K85.1"
# ============================================================
# P0-3 — LOGIC-1 : promotion DAS→DP doit être tracée
# ============================================================
class TestDasToDpPromotionTraced:
"""RULE-DAS-TO-DP doit laisser une trace dans alertes_codage."""
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
def test_promotion_adds_alerte(self, _bio, _ref, _valid, _rule):
"""Quand un DAS est promu DP, alertes_codage doit contenir RULE-DAS-TO-DP."""
from src.quality.decision_engine import apply_decisions
das_candidate = Diagnostic(
texte="Pancréatite aiguë biliaire",
cim10_suggestion="K85.1",
cim10_confidence="high",
source="crh",
)
dossier = DossierMedical(
sejour=Sejour(),
diagnostic_principal=None,
diagnostics_associes=[das_candidate],
)
apply_decisions(dossier)
# Le DP doit avoir été promu
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_final == "K85.1"
assert dossier.diagnostic_principal.cim10_decision.action == "PROMOTE_DP"
# Traçabilité : alerte avec règle et code
matching_alertes = [
a for a in dossier.alertes_codage
if "RULE-DAS-TO-DP" in a and "K85.1" in a
]
assert len(matching_alertes) == 1, (
f"alertes_codage devrait contenir une entrée RULE-DAS-TO-DP, got: {dossier.alertes_codage}"
)
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
def test_promotion_alerte_contains_diagnosis_text(self, _bio, _ref, _valid, _rule):
"""L'alerte de promotion doit mentionner le texte du diagnostic promu."""
from src.quality.decision_engine import apply_decisions
dossier = DossierMedical(
sejour=Sejour(),
diagnostic_principal=None,
diagnostics_associes=[
Diagnostic(
texte="Embolie pulmonaire",
cim10_suggestion="I26.9",
cim10_confidence="high",
source="crh",
),
],
)
apply_decisions(dossier)
assert dossier.diagnostic_principal is not None
alerte = [a for a in dossier.alertes_codage if "RULE-DAS-TO-DP" in a]
assert len(alerte) == 1
assert "I26.9" in alerte[0]
assert "Embolie pulmonaire" in alerte[0]
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
def test_no_promotion_when_dp_exists(self, _bio, _ref, _valid, _rule):
"""Pas de promotion si un DP existe déjà (non-régression)."""
from src.quality.decision_engine import apply_decisions
dossier = DossierMedical(
sejour=Sejour(),
diagnostic_principal=Diagnostic(
texte="DP existant",
cim10_suggestion="J18.9",
source="crh",
),
diagnostics_associes=[
Diagnostic(
texte="DAS candidat",
cim10_suggestion="K85.1",
cim10_confidence="high",
),
],
)
apply_decisions(dossier)
# DP inchangé
assert dossier.diagnostic_principal.cim10_suggestion == "J18.9"
# Pas d'alerte de promotion
promotion_alertes = [a for a in dossier.alertes_codage if "RULE-DAS-TO-DP" in a]
assert promotion_alertes == []
@patch("src.quality.decision_engine.rule_enabled", return_value=True)
@patch("src.quality.decision_engine.cim10_validate", return_value=(True, "label"))
@patch("src.quality.decision_engine.load_reference_ranges", return_value={})
@patch("src.quality.decision_engine.load_bio_rules", return_value={})
def test_promotion_removes_das_from_list(self, _bio, _ref, _valid, _rule):
"""Le DAS promu doit être retiré de diagnostics_associes."""
from src.quality.decision_engine import apply_decisions
das1 = Diagnostic(texte="DAS gardé", cim10_suggestion="R10.4", cim10_confidence="high")
das2 = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1", cim10_confidence="high")
dossier = DossierMedical(
sejour=Sejour(),
diagnostic_principal=None,
diagnostics_associes=[das1, das2],
)
apply_decisions(dossier)
# K85.1 promu (score pathologie > symptôme R)
assert dossier.diagnostic_principal is not None
assert dossier.diagnostic_principal.cim10_final == "K85.1"
# Le DAS promu ne doit plus être dans la liste
remaining_codes = [d.cim10_suggestion for d in dossier.diagnostics_associes]
assert "K85.1" not in remaining_codes

226
tests/test_p1_lite.py Normal file
View File

@@ -0,0 +1,226 @@
"""Tests P1-lite — LOGIC-2 (CPAM dégradé), LOGIC-3 (modèles identiques).
Sans mocks : manipulation directe des structures de données et env vars.
"""
from __future__ import annotations
from src.config import (
ControleCPAM,
DossierMedical,
OLLAMA_MODELS,
Sejour,
check_adversarial_model_config,
)
# ============================================================
# LOGIC-2 — CPAM passe 1 échoue → mode dégradé tracé
# ============================================================
class TestCpamDegradedMode:
"""Vérifie que le mode dégradé passe 1 est correctement tracé."""
def test_degraded_sets_alertes_codage(self):
"""Si extraction est None, alertes_codage doit contenir le message."""
dossier = DossierMedical(sejour=Sejour())
# Simule le comportement de generate_cpam_response quand extraction = None
extraction = None
degraded_pass1 = extraction is None
if degraded_pass1:
dossier.alertes_codage.append(
"CPAM: passe 1 (extraction structurée) échouée → mode dégradé"
)
assert any("passe 1" in a for a in dossier.alertes_codage)
assert any("dégradé" in a for a in dossier.alertes_codage)
def test_degraded_sets_quality_flags_on_result(self):
"""quality_flags ajouté au résultat quand dégradé."""
result = {"conclusion": "test"}
degraded_pass1 = True
if degraded_pass1:
result.setdefault("quality_flags", {})
result["quality_flags"]["cpam_pass1_failed"] = True
result["quality_flags"]["degraded_mode"] = True
assert result["quality_flags"]["cpam_pass1_failed"] is True
assert result["quality_flags"]["degraded_mode"] is True
def test_non_degraded_no_quality_flags(self):
"""Pas de quality_flags quand extraction réussit."""
result = {"conclusion": "test"}
extraction = {"comprehension_contestation": "ok"}
degraded_pass1 = extraction is None
assert degraded_pass1 is False
assert "quality_flags" not in result
def test_quality_flags_format_matches_spec(self):
"""Format quality_flags conforme au spec."""
result: dict = {}
result.setdefault("quality_flags", {})
result["quality_flags"]["cpam_pass1_failed"] = True
result["quality_flags"]["degraded_mode"] = True
flags = result["quality_flags"]
assert isinstance(flags, dict)
assert "cpam_pass1_failed" in flags
assert "degraded_mode" in flags
# ============================================================
# LOGIC-3 — Modèles CPAM et validation identiques
# ============================================================
class TestAdversarialModelCheck:
"""Vérifie la détection de modèles identiques CPAM/validation."""
def test_same_model_detected(self):
"""Modèles identiques → (True, message)."""
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = "test-same-model"
OLLAMA_MODELS["validation"] = "test-same-model"
try:
same, msg = check_adversarial_model_config()
assert same is True
assert "identiques" in msg
assert "test-same-model" in msg
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
def test_different_models_ok(self):
"""Modèles différents → (False, '')."""
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = "model-a"
OLLAMA_MODELS["validation"] = "model-b"
try:
same, msg = check_adversarial_model_config()
assert same is False
assert msg == ""
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
def test_adversarial_skip_returns_degraded_result(self):
"""Si même modèle, la validation adversariale retourne un résultat dégradé."""
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = "same-model"
OLLAMA_MODELS["validation"] = "same-model"
try:
same, msg = check_adversarial_model_config()
assert same is True
# Simule le comportement de _validate_adversarial quand same_model
degraded = {
"coherent": True,
"erreurs": [f"Validation adversariale dégradée : {msg}"],
"score_confiance": 0,
}
assert degraded["score_confiance"] == 0
assert "dégradée" in degraded["erreurs"][0]
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
def test_empty_model_not_flagged(self):
"""Modèles vides ne déclenchent pas le flag."""
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = ""
OLLAMA_MODELS["validation"] = ""
try:
same, msg = check_adversarial_model_config()
assert same is False
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
# ============================================================
# LOGIC-2 & LOGIC-3 — quality_flags + alertes visibles output
# ============================================================
class TestQualityFlagsOutput:
"""Vérifie que les quality_flags et alertes sont visibles dans l'output."""
def test_cpam_pass1_failure_sets_quality_flags_and_alert(self):
"""LOGIC-2 — passe 1 échouée → quality_flags + alerte dans dossier."""
dossier = DossierMedical(sejour=Sejour())
result: dict = {"conclusion": "test argument"}
# Simule le flow exact de generate_cpam_response (lines 122-165)
extraction = None # passe 1 échouée
degraded_pass1 = extraction is None
if degraded_pass1:
dossier.alertes_codage.append(
"CPAM: passe 1 (extraction structurée) échouée → mode dégradé"
)
if degraded_pass1:
result.setdefault("quality_flags", {})
result["quality_flags"]["cpam_pass1_failed"] = True
result["quality_flags"]["degraded_mode"] = True
# Vérifications
assert result["quality_flags"]["cpam_pass1_failed"] is True
assert result["quality_flags"]["degraded_mode"] is True
assert any("passe 1" in a and "dégradé" in a for a in dossier.alertes_codage)
def test_adversarial_same_model_sets_quality_flag_and_alert(self):
"""LOGIC-3 — modèles identiques → quality_flags + alerte dans dossier."""
dossier = DossierMedical(sejour=Sejour())
result: dict = {"conclusion": "test argument"}
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = "same-test-model"
OLLAMA_MODELS["validation"] = "same-test-model"
try:
# Simule le flow exact de generate_cpam_response (lines 192-199)
same_model, model_msg = check_adversarial_model_config()
if same_model:
result.setdefault("quality_flags", {})
result["quality_flags"]["adversarial_disabled_same_model"] = True
dossier.alertes_codage.append(
"Validation adversariale désactivée (modèles identiques)"
)
assert same_model is True
assert result["quality_flags"]["adversarial_disabled_same_model"] is True
assert any("adversariale" in a and "identiques" in a
for a in dossier.alertes_codage)
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
def test_no_flags_when_all_ok(self):
"""Pas de quality_flags quand tout fonctionne correctement."""
dossier = DossierMedical(sejour=Sejour())
result: dict = {"conclusion": "test argument"}
# Passe 1 OK
extraction = {"comprehension_contestation": "ok"}
degraded_pass1 = extraction is None
assert degraded_pass1 is False
# Modèles différents
old_cpam = OLLAMA_MODELS["cpam"]
old_val = OLLAMA_MODELS["validation"]
OLLAMA_MODELS["cpam"] = "model-a"
OLLAMA_MODELS["validation"] = "model-b"
try:
same_model, _ = check_adversarial_model_config()
assert same_model is False
finally:
OLLAMA_MODELS["cpam"] = old_cpam
OLLAMA_MODELS["validation"] = old_val
assert "quality_flags" not in result
assert len(dossier.alertes_codage) == 0