chore: add .gitignore

This commit is contained in:
dom
2026-03-05 00:37:34 +01:00
parent da34bdc8d7
commit d2e0fec97d
2087 changed files with 1485338 additions and 14 deletions

272
tests/test_dp_gold.py Normal file
View File

@@ -0,0 +1,272 @@
"""Tests gold DP scoring : retraitement de vrais CRH avec validation du dp_selection.
Lance l'extraction sur des textes anonymisés réels et vérifie que :
1. dp_selection est peuplé (pas None)
2. Le verdict est cohérent
3. Le DP sélectionné correspond au DP attendu (gold) — mode déterministe
4. Les candidats sont scorés et triés
"""
import json
import pytest
from pathlib import Path
from src.config import DossierMedical
from src.medical.cim10_extractor import extract_medical_info
from src.extraction.crh_parser import parse_crh
BASE = Path(__file__).resolve().parent.parent
ANON_DIR = BASE / "output" / "anonymized"
STRUCT_DIR = BASE / "output" / "structured"
# Gold dossiers DETERMINISTES : DP trouvable sans LLM (CIM10_MAP, regex, code explicite)
GOLD_DETERMINISTIC = [
# 1. Classique K85.1 — DP clair dans conclusion via CIM10_MAP
("1_23042753", "CRH_23042753", "K85.1"),
# 5. CRH avec DP regex — "pancréatite aiguë biliaire" en conclusion
("21_23111304", "CRH_23111304", "K85.1"),
]
# Gold dossiers LLM-DEPENDANTS : DP correct nécessite le LLM (pas dans CIM10_MAP)
# On vérifie que le scoring fonctionne, sans exiger le code exact
GOLD_LLM_DEPENDENT = [
# K85.0 idiopathique — "pancréatite aiguë" dans MAP → K85.9, le .0 nécessite LLM
("223_23169043", "CRH_23169043", "K85.0", "K85"),
# H10.2 pédiatrique — conjonctivite pas dans MAP, nécessite LLM
("250_23196454", "CRH_23196454", "H10.2", None),
# D86.9 sarcoïdose — pas dans MAP, nécessite LLM
("144_23097531", "CRH_23097531", "D86.9", None),
]
ALL_GOLD = [
(d, c, e) for d, c, e in GOLD_DETERMINISTIC
] + [
(d, c, e) for d, c, e, _ in GOLD_LLM_DEPENDENT
]
def _load_crh(dir_name: str, crh_name: str) -> tuple[dict, str]:
"""Charge le texte anonymisé et le parse avec le CRH parser."""
text_path = ANON_DIR / dir_name / f"{crh_name}_anonymized.txt"
if not text_path.exists():
pytest.skip(f"Fichier anonymisé manquant : {text_path}")
text = text_path.read_text(encoding="utf-8")
parsed = parse_crh(text)
return parsed, text
def _load_existing_json(dir_name: str, crh_name: str) -> dict:
"""Charge le JSON existant pour comparaison."""
json_path = STRUCT_DIR / dir_name / f"{crh_name}_cim10.json"
if not json_path.exists():
return {}
return json.loads(json_path.read_text(encoding="utf-8"))
class TestDPGoldDeterministic:
"""Tests sur dossiers CRH dont le DP est trouvable sans LLM."""
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", GOLD_DETERMINISTIC)
def test_dp_selection_populated(self, dir_name, crh_name, expected_dp):
"""dp_selection est peuplé pour chaque CRH gold."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
assert dossier.dp_selection is not None, (
f"{crh_name}: dp_selection est None — le scoring n'a pas été déclenché"
)
assert len(dossier.dp_selection.candidates) >= 1, (
f"{crh_name}: aucun candidat DP trouvé"
)
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", GOLD_DETERMINISTIC)
def test_dp_code_matches_gold(self, dir_name, crh_name, expected_dp):
"""Le DP sélectionné correspond au code gold attendu."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
assert dossier.diagnostic_principal is not None, (
f"{crh_name}: aucun DP extrait"
)
actual_code = dossier.diagnostic_principal.cim10_suggestion
assert actual_code == expected_dp, (
f"{crh_name}: DP attendu {expected_dp}, obtenu {actual_code}"
)
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", GOLD_DETERMINISTIC)
def test_candidates_have_scores(self, dir_name, crh_name, expected_dp):
"""Chaque candidat a un score et des détails."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
if dossier.dp_selection is None:
pytest.skip("dp_selection absent")
for c in dossier.dp_selection.candidates:
assert isinstance(c.score, int), f"Score non-entier pour {c.label}"
assert isinstance(c.score_details, dict), f"score_details manquant pour {c.label}"
assert c.source_section, f"source_section vide pour {c.label}"
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", GOLD_DETERMINISTIC)
def test_candidates_sorted_by_score(self, dir_name, crh_name, expected_dp):
"""Les candidats sont triés par score décroissant."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
if dossier.dp_selection is None or len(dossier.dp_selection.candidates) < 2:
pytest.skip("Pas assez de candidats pour vérifier le tri")
scores = [c.score for c in dossier.dp_selection.candidates]
assert scores == sorted(scores, reverse=True), (
f"{crh_name}: candidats non triés par score: {scores}"
)
class TestDPGoldLLMDependent:
"""Tests sur dossiers dont le DP exact nécessite le LLM.
On vérifie que le scoring fonctionne (candidats trouvés, triés, scorés)
sans exiger le code CIM-10 exact.
"""
@pytest.mark.parametrize(
"dir_name,crh_name,expected_dp,expected_family",
GOLD_LLM_DEPENDENT,
)
def test_dp_selection_populated(self, dir_name, crh_name, expected_dp, expected_family):
"""dp_selection est peuplé même sans LLM."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
assert dossier.dp_selection is not None, (
f"{crh_name}: dp_selection est None"
)
assert len(dossier.dp_selection.candidates) >= 1, (
f"{crh_name}: aucun candidat DP trouvé"
)
@pytest.mark.parametrize(
"dir_name,crh_name,expected_dp,expected_family",
GOLD_LLM_DEPENDENT,
)
def test_dp_family_if_specified(self, dir_name, crh_name, expected_dp, expected_family):
"""Si une famille est spécifiée, le DP trouvé est dans la bonne famille CIM-10."""
if expected_family is None:
pytest.skip("Pas de famille attendue pour ce dossier")
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
if dossier.diagnostic_principal is None:
pytest.skip("Aucun DP extrait")
actual_code = dossier.diagnostic_principal.cim10_suggestion
assert actual_code and actual_code.startswith(expected_family), (
f"{crh_name}: DP attendu famille {expected_family}*, obtenu {actual_code}"
)
@pytest.mark.parametrize(
"dir_name,crh_name,expected_dp,expected_family",
GOLD_LLM_DEPENDENT,
)
def test_candidates_have_scores(self, dir_name, crh_name, expected_dp, expected_family):
"""Chaque candidat a un score et des détails."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
if dossier.dp_selection is None:
pytest.skip("dp_selection absent")
for c in dossier.dp_selection.candidates:
assert isinstance(c.score, int), f"Score non-entier pour {c.label}"
assert isinstance(c.score_details, dict), f"score_details manquant pour {c.label}"
class TestDPGoldNonRegression:
"""Vérifie que le nouveau scoring ne dégrade pas les DAS existants.
Tolérance élevée car les anciens JSON contenaient des DAS enrichis par LLM
(source: "llm_das") qui ne sont pas reproductibles sans Ollama.
"""
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", ALL_GOLD)
def test_das_still_extracted(self, dir_name, crh_name, expected_dp):
"""Les DAS principaux (non-LLM) sont toujours extraits."""
parsed, text = _load_crh(dir_name, crh_name)
dossier = extract_medical_info(parsed, text)
old_json = _load_existing_json(dir_name, crh_name)
if not old_json:
pytest.skip("JSON existant manquant")
# Filtrer les DAS LLM-only de l'ancien JSON (source "llm_das")
old_das_codes = set()
for d in old_json.get("diagnostics_associes", []):
code = d.get("cim10_suggestion")
source = d.get("source", "")
if code and source != "llm_das":
old_das_codes.add(code)
new_das_codes = {
d.cim10_suggestion
for d in dossier.diagnostics_associes
if d.cim10_suggestion
}
# Tolérance : le DP code lui-même est exclu des DAS, c'est normal
missing = old_das_codes - new_das_codes
if missing:
missing -= {expected_dp}
# Tolérance : codes de même famille que le DP sont exclus (dédup DP/DAS)
if expected_dp:
missing = {c for c in missing if c[:3] != expected_dp[:3]}
# Tolérance élargie : sans LLM beaucoup de DAS manquent
# (les anciens JSON avaient des DAS source "llm_das", "conclusion", "edsnlp"
# enrichis par le LLM qui ne sont pas reproductibles sans Ollama)
assert len(missing) <= 10, (
f"{crh_name}: trop de DAS manquants après le nouveau scoring: {missing}"
)
class TestDPGoldVerbose:
"""Test verbeux pour inspection manuelle — affiche les détails dp_selection."""
@pytest.mark.parametrize("dir_name,crh_name,expected_dp", ALL_GOLD)
def test_print_dp_selection(self, dir_name, crh_name, expected_dp, capsys):
"""Affiche dp_selection pour inspection manuelle (pytest -s)."""
parsed, text = _load_crh(dir_name, crh_name)
# Lister les sections CRH parsées
sections = parsed.get("sections", {})
section_keys = list(sections.keys())
dossier = extract_medical_info(parsed, text)
print(f"\n{'='*60}")
print(f"GOLD: {crh_name} — DP attendu: {expected_dp}")
print(f"Sections CRH parsées: {section_keys}")
# Afficher les nouvelles sections si présentes
for key in ("diag_sortie", "diag_principal", "synthese"):
if key in sections:
print(f" {key}: {sections[key][:150]}...")
if dossier.dp_selection:
sel = dossier.dp_selection
print(f"Verdict: {sel.verdict}")
print(f"Winner reason: {sel.winner_reason}")
for i, c in enumerate(sel.candidates):
print(f" [{i+1}] {c.code}{c.label[:60]} "
f"(section={c.source_section}, score={c.score}, "
f"details={c.score_details})")
else:
print("dp_selection: None (Trackare DP?)")
actual = dossier.diagnostic_principal
if actual:
print(f"DP retenu: {actual.cim10_suggestion}{actual.texte}")
print(f"Source: {actual.source}")
else:
print("AUCUN DP")
print(f"{'='*60}")