Externalize dictionaries and add anonymization review corpus
This commit is contained in:
92
tests/unit/test_config_externalization.py
Normal file
92
tests/unit/test_config_externalization.py
Normal file
@@ -0,0 +1,92 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests de non-régression pour la config externalisée.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
import anonymizer_core_refactored_onnx as core
|
||||
from config_defaults import (
|
||||
deep_merge_dict,
|
||||
ensure_runtime_dictionaries_config,
|
||||
load_effective_dictionaries_dict,
|
||||
read_default_dictionaries_text,
|
||||
read_runtime_dictionaries_overlay_text,
|
||||
)
|
||||
|
||||
|
||||
def test_default_config_template_is_externalized():
|
||||
text = read_default_dictionaries_text()
|
||||
|
||||
assert "blacklist:" in text
|
||||
assert "whitelist_phrases:" in text
|
||||
|
||||
cfg = core.load_dictionaries(None)
|
||||
assert "CHCB" in cfg["blacklist"]["force_mask_terms"]
|
||||
|
||||
|
||||
def test_runtime_overlay_template_is_minimal():
|
||||
text = read_runtime_dictionaries_overlay_text()
|
||||
|
||||
assert "dictionnaires.default.yml" in text
|
||||
assert "{}" in text
|
||||
|
||||
|
||||
def test_deep_merge_dict_preserves_nested_defaults():
|
||||
base = {
|
||||
"whitelist": {
|
||||
"sections_titres": ["DIM"],
|
||||
"org_gpe_keep": False,
|
||||
},
|
||||
"flags": {
|
||||
"case_insensitive": True,
|
||||
"regex_engine": "python",
|
||||
},
|
||||
}
|
||||
override = {
|
||||
"whitelist": {
|
||||
"sections_titres": ["GHM"],
|
||||
"org_gpe_keep": True,
|
||||
},
|
||||
"flags": {
|
||||
"regex_engine": "re2",
|
||||
},
|
||||
}
|
||||
|
||||
merged = deep_merge_dict(base, override)
|
||||
|
||||
assert merged["whitelist"]["sections_titres"] == ["DIM", "GHM"]
|
||||
assert merged["whitelist"]["org_gpe_keep"] is True
|
||||
assert merged["flags"]["case_insensitive"] is True
|
||||
assert merged["flags"]["regex_engine"] == "re2"
|
||||
|
||||
|
||||
def test_additional_stopwords_refresh_and_reset(tmp_path: Path):
|
||||
cfg_path = tmp_path / "cfg.yml"
|
||||
cfg_path.write_text("additional_stopwords:\n - xyzzymed\n", encoding="utf-8")
|
||||
|
||||
core.load_dictionaries(cfg_path)
|
||||
assert "xyzzymed" in core._MEDICAL_STOP_WORDS_SET
|
||||
assert "xyzzymed" in core._MEDICAL_STOP_WORDS
|
||||
|
||||
core.load_dictionaries(None)
|
||||
assert "xyzzymed" not in core._MEDICAL_STOP_WORDS_SET
|
||||
assert "xyzzymed" not in core._MEDICAL_STOP_WORDS
|
||||
|
||||
|
||||
def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
|
||||
cfg_path = tmp_path / "dictionnaires.yml"
|
||||
|
||||
created = ensure_runtime_dictionaries_config(cfg_path)
|
||||
assert created == cfg_path
|
||||
assert cfg_path.exists()
|
||||
|
||||
effective = load_effective_dictionaries_dict(cfg_path)
|
||||
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
|
||||
|
||||
cfg_path.write_text(
|
||||
"blacklist:\n force_mask_terms:\n - LOCAL_SIGLE\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
effective = load_effective_dictionaries_dict(cfg_path)
|
||||
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
|
||||
assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]
|
||||
Reference in New Issue
Block a user