#!/usr/bin/env python3 """ Tests de non-régression pour la config externalisée. """ from pathlib import Path import anonymizer_core_refactored_onnx as core from config_defaults import ( deep_merge_dict, ensure_runtime_dictionaries_config, load_effective_dictionaries_dict, load_effective_param_lists, read_default_dictionaries_text, read_runtime_dictionaries_overlay_text, ) def test_default_config_template_is_externalized(): text = read_default_dictionaries_text() assert "blacklist:" in text assert "whitelist_phrases:" in text cfg = core.load_dictionaries(None) assert "CHUXX" in cfg["blacklist"]["force_mask_terms"] def test_runtime_overlay_template_is_minimal(): text = read_runtime_dictionaries_overlay_text() assert "dictionnaires.default.yml" in text assert "{}" in text def test_deep_merge_dict_preserves_nested_defaults(): base = { "whitelist": { "sections_titres": ["DIM"], "org_gpe_keep": False, }, "flags": { "case_insensitive": True, "regex_engine": "python", }, } override = { "whitelist": { "sections_titres": ["GHM"], "org_gpe_keep": True, }, "flags": { "regex_engine": "re2", }, } merged = deep_merge_dict(base, override) assert merged["whitelist"]["sections_titres"] == ["DIM", "GHM"] assert merged["whitelist"]["org_gpe_keep"] is True assert merged["flags"]["case_insensitive"] is True assert merged["flags"]["regex_engine"] == "re2" def test_additional_stopwords_refresh_and_reset(tmp_path: Path): cfg_path = tmp_path / "cfg.yml" cfg_path.write_text("additional_stopwords:\n - xyzzymed\n", encoding="utf-8") core.load_dictionaries(cfg_path) assert "xyzzymed" in core._MEDICAL_STOP_WORDS_SET assert "xyzzymed" in core._MEDICAL_STOP_WORDS core.load_dictionaries(None) assert "xyzzymed" not in core._MEDICAL_STOP_WORDS_SET assert "xyzzymed" not in core._MEDICAL_STOP_WORDS def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path): cfg_path = tmp_path / "dictionnaires.yml" created = ensure_runtime_dictionaries_config(cfg_path) assert created == cfg_path assert cfg_path.exists() effective = load_effective_dictionaries_dict(cfg_path) assert "CHUXX" in effective["blacklist"]["force_mask_terms"] cfg_path.write_text( "blacklist:\n force_mask_terms:\n - LOCAL_SIGLE\n", encoding="utf-8", ) effective = load_effective_dictionaries_dict(cfg_path) assert "CHUXX" in effective["blacklist"]["force_mask_terms"] assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"] def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path: Path): cfg_path = tmp_path / "dictionnaires.yml" cfg_path.write_text("{}\n", encoding="utf-8") params = load_effective_param_lists(cfg_path) assert "classification internationale" in params["whitelist_phrases"] assert "CHUXX" in params["blacklist_force_mask_terms"] assert params["additional_stopwords"] == []