Files
anonymisation/tests/unit/test_config_externalization.py
Domi31tls 1c44a26eb3 chore(rgpd): replace CHCB/Bayonne/Saint-Denis/Réunion refs in source + configs (D-12)
Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis,
Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils,
et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels.

- profile key chcb_strict → chuxx_strict
- CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield,
  Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999,
  préfixe tél 05.59.44 → 0X.XX.XX
- renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 14:39:21 +02:00

105 lines
3.1 KiB
Python

#!/usr/bin/env python3
"""
Tests de non-régression pour la config externalisée.
"""
from pathlib import Path
import anonymizer_core_refactored_onnx as core
from config_defaults import (
deep_merge_dict,
ensure_runtime_dictionaries_config,
load_effective_dictionaries_dict,
load_effective_param_lists,
read_default_dictionaries_text,
read_runtime_dictionaries_overlay_text,
)
def test_default_config_template_is_externalized():
text = read_default_dictionaries_text()
assert "blacklist:" in text
assert "whitelist_phrases:" in text
cfg = core.load_dictionaries(None)
assert "CHUXX" in cfg["blacklist"]["force_mask_terms"]
def test_runtime_overlay_template_is_minimal():
text = read_runtime_dictionaries_overlay_text()
assert "dictionnaires.default.yml" in text
assert "{}" in text
def test_deep_merge_dict_preserves_nested_defaults():
base = {
"whitelist": {
"sections_titres": ["DIM"],
"org_gpe_keep": False,
},
"flags": {
"case_insensitive": True,
"regex_engine": "python",
},
}
override = {
"whitelist": {
"sections_titres": ["GHM"],
"org_gpe_keep": True,
},
"flags": {
"regex_engine": "re2",
},
}
merged = deep_merge_dict(base, override)
assert merged["whitelist"]["sections_titres"] == ["DIM", "GHM"]
assert merged["whitelist"]["org_gpe_keep"] is True
assert merged["flags"]["case_insensitive"] is True
assert merged["flags"]["regex_engine"] == "re2"
def test_additional_stopwords_refresh_and_reset(tmp_path: Path):
cfg_path = tmp_path / "cfg.yml"
cfg_path.write_text("additional_stopwords:\n - xyzzymed\n", encoding="utf-8")
core.load_dictionaries(cfg_path)
assert "xyzzymed" in core._MEDICAL_STOP_WORDS_SET
assert "xyzzymed" in core._MEDICAL_STOP_WORDS
core.load_dictionaries(None)
assert "xyzzymed" not in core._MEDICAL_STOP_WORDS_SET
assert "xyzzymed" not in core._MEDICAL_STOP_WORDS
def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
cfg_path = tmp_path / "dictionnaires.yml"
created = ensure_runtime_dictionaries_config(cfg_path)
assert created == cfg_path
assert cfg_path.exists()
effective = load_effective_dictionaries_dict(cfg_path)
assert "CHUXX" in effective["blacklist"]["force_mask_terms"]
cfg_path.write_text(
"blacklist:\n force_mask_terms:\n - LOCAL_SIGLE\n",
encoding="utf-8",
)
effective = load_effective_dictionaries_dict(cfg_path)
assert "CHUXX" in effective["blacklist"]["force_mask_terms"]
assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]
def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path: Path):
cfg_path = tmp_path / "dictionnaires.yml"
cfg_path.write_text("{}\n", encoding="utf-8")
params = load_effective_param_lists(cfg_path)
assert "classification internationale" in params["whitelist_phrases"]
assert "CHUXX" in params["blacklist_force_mask_terms"]
assert params["additional_stopwords"] == []