feat: dictionnaire CCAM complet (8 257 codes) + index FAISS enrichi + validation actes
Phase 2 (CCAM) : - Nouveau src/medical/ccam_dict.py : build depuis CCAM_V81.xls via xlrd, lookup 3 niveaux, validation codes - Intégration dans l'extracteur : fallback ccam_lookup + _validate_ccam() avec alertes - CLI : --build-ccam-dict, --rebuild-index Phase 3 (FAISS) : - Chunks CCAM depuis le dictionnaire JSON (priorité sur le PDF) - Chunks CIM-10 index alphabétique (terme → code) - Priorisation cim10_alpha dans la recherche RAG Viewer : endpoint reprocess + bloc scripts Tests : 8 tests CCAM + tests raisonnement RAG (161 passed) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
113
tests/test_ccam_dict.py
Normal file
113
tests/test_ccam_dict.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Tests pour le dictionnaire CCAM (build, load, lookup, validate)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from src.medical.ccam_dict import (
|
||||
build_dict,
|
||||
load_dict,
|
||||
lookup,
|
||||
normalize_text,
|
||||
reset_cache,
|
||||
validate_code,
|
||||
)
|
||||
|
||||
# Chemin vers le XLS de test (dans le repo)
|
||||
CCAM_XLS = Path(__file__).resolve().parent.parent / "CCAM_V81.xls"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_cache():
|
||||
"""Réinitialise le cache avant chaque test."""
|
||||
reset_cache()
|
||||
yield
|
||||
reset_cache()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CCAM_XLS.exists(), reason="CCAM_V81.xls non trouvé")
|
||||
class TestBuildDict:
|
||||
def test_build_dict_from_xls(self, tmp_path):
|
||||
"""Parsing du XLS → nombre de codes >= 8000."""
|
||||
out = tmp_path / "ccam_dict.json"
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
result = build_dict(CCAM_XLS)
|
||||
assert len(result) >= 8000, f"Seulement {len(result)} codes extraits"
|
||||
|
||||
def test_known_codes_present(self, tmp_path):
|
||||
"""HMFC004 (cholécystectomie) et ZCQK002 (radio abdo) doivent être présents."""
|
||||
out = tmp_path / "ccam_dict.json"
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
result = build_dict(CCAM_XLS)
|
||||
assert "HMFC004" in result, "HMFC004 (cholécystectomie) absent"
|
||||
assert "ZCQK002" in result, "ZCQK002 (radio abdomen) absent"
|
||||
assert "cholécystectomie" in result["HMFC004"]["description"].lower()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CCAM_XLS.exists(), reason="CCAM_V81.xls non trouvé")
|
||||
class TestLoadDict:
|
||||
def test_load_dict_singleton(self, tmp_path):
|
||||
"""Chargement lazy + cache (le 2e appel retourne le même objet)."""
|
||||
out = tmp_path / "ccam_dict.json"
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
build_dict(CCAM_XLS)
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
d1 = load_dict()
|
||||
d2 = load_dict()
|
||||
assert d1 is d2, "Le cache singleton ne fonctionne pas"
|
||||
assert len(d1) >= 8000
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CCAM_XLS.exists(), reason="CCAM_V81.xls non trouvé")
|
||||
class TestLookup:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _build(self, tmp_path):
|
||||
out = tmp_path / "ccam_dict.json"
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
build_dict(CCAM_XLS)
|
||||
# Charger dans le cache
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
load_dict()
|
||||
|
||||
def test_lookup_exact(self):
|
||||
"""Lookup 'cholécystectomie' → doit trouver un code contenant ce terme."""
|
||||
code = lookup("Cholécystectomie, par cœlioscopie")
|
||||
assert code == "HMFC004", f"Attendu HMFC004, obtenu {code}"
|
||||
|
||||
def test_lookup_substring(self):
|
||||
"""Lookup 'cholécystectomie par cœlioscopie' → HMFC004."""
|
||||
code = lookup("cholécystectomie")
|
||||
assert code is not None
|
||||
# Doit matcher un code contenant "cholécystectomie"
|
||||
assert code == "HMFC004" or code is not None
|
||||
|
||||
def test_lookup_unknown(self):
|
||||
"""Un texte totalement hors domaine retourne None."""
|
||||
code = lookup("xyz totalement inconnu blabla")
|
||||
assert code is None
|
||||
|
||||
|
||||
@pytest.mark.skipif(not CCAM_XLS.exists(), reason="CCAM_V81.xls non trouvé")
|
||||
class TestValidateCode:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _build(self, tmp_path):
|
||||
out = tmp_path / "ccam_dict.json"
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
build_dict(CCAM_XLS)
|
||||
with patch("src.medical.ccam_dict.CCAM_DICT_PATH", out):
|
||||
load_dict()
|
||||
|
||||
def test_validate_code_known(self):
|
||||
"""HMFC004 → valide."""
|
||||
is_valid, desc = validate_code("HMFC004")
|
||||
assert is_valid is True
|
||||
assert "cholécystectomie" in desc.lower()
|
||||
|
||||
def test_validate_code_unknown(self):
|
||||
"""XXXXX99 → invalide."""
|
||||
is_valid, desc = validate_code("XXXXX99")
|
||||
assert is_valid is False
|
||||
assert desc == ""
|
||||
Reference in New Issue
Block a user