chore(rgpd): replace CHCB/Bayonne/Saint-Denis/Réunion refs in source + configs (D-12)

Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis,
Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils,
et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels.

- profile key chcb_strict → chuxx_strict
- CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield,
  Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999,
  préfixe tél 05.59.44 → 0X.XX.XX
- renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 14:39:21 +02:00
parent 7b09b06065
commit 92557d4e74
37 changed files with 819 additions and 128 deletions

View File

@@ -22,7 +22,7 @@ def test_default_config_template_is_externalized():
assert "whitelist_phrases:" in text
cfg = core.load_dictionaries(None)
assert "CHCB" in cfg["blacklist"]["force_mask_terms"]
assert "CHUXX" in cfg["blacklist"]["force_mask_terms"]
def test_runtime_overlay_template_is_minimal():
@@ -82,14 +82,14 @@ def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
assert cfg_path.exists()
effective = load_effective_dictionaries_dict(cfg_path)
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
assert "CHUXX" in effective["blacklist"]["force_mask_terms"]
cfg_path.write_text(
"blacklist:\n force_mask_terms:\n - LOCAL_SIGLE\n",
encoding="utf-8",
)
effective = load_effective_dictionaries_dict(cfg_path)
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
assert "CHUXX" in effective["blacklist"]["force_mask_terms"]
assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]
@@ -100,5 +100,5 @@ def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path:
params = load_effective_param_lists(cfg_path)
assert "classification internationale" in params["whitelist_phrases"]
assert "CHCB" in params["blacklist_force_mask_terms"]
assert "CHUXX" in params["blacklist_force_mask_terms"]
assert params["additional_stopwords"] == []

View File

@@ -56,8 +56,8 @@ class TestHeaderPiiDetection:
def test_structured_code_postal_preserves_label_and_audit(self):
cfg = load_dictionaries(None)
anon = anonymise_document_regex(["Code postal : 64100"], [[]], cfg)
anon = anonymise_document_regex(["Code postal : 12345"], [[]], cfg)
text = selective_rescan(anon.text_out, cfg)
assert text == "Code postal : [CODE_POSTAL]"
assert any(h.kind == "CODE_POSTAL" and h.original == "64100" for h in anon.audit)
assert any(h.kind == "CODE_POSTAL" and h.original == "12345" for h in anon.audit)

View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
from pathlib import Path
from profile_defaults import (
delete_runtime_profile,
ensure_runtime_profiles_config,
get_default_profile_key,
list_default_profile_keys,
list_effective_profiles,
load_effective_profiles_dict,
read_default_profiles_text,
read_runtime_profiles_overlay_text,
save_runtime_profile,
set_runtime_default_profile,
)
def test_default_profiles_template_is_externalized():
text = read_default_profiles_text()
assert "default_profile:" in text
assert "chuxx_strict:" in text
assert "partage_recherche:" in text
assert "standard_local" in list_default_profile_keys()
def test_runtime_profiles_overlay_template_is_minimal():
text = read_runtime_profiles_overlay_text()
assert "profiles.default.yml" in text
assert "{}" in text
def test_runtime_profiles_overlay_is_created_and_merged(tmp_path: Path):
cfg_path = tmp_path / "profiles.yml"
created = ensure_runtime_profiles_config(cfg_path)
assert created == cfg_path
assert cfg_path.exists()
effective = load_effective_profiles_dict(cfg_path)
assert effective["default_profile"] == "standard_local"
cfg_path.write_text(
"default_profile: partage_recherche\n"
"profiles:\n"
" partage_recherche:\n"
" description: Profil local surcharge\n",
encoding="utf-8",
)
effective = load_effective_profiles_dict(cfg_path)
assert effective["default_profile"] == "partage_recherche"
assert effective["profiles"]["partage_recherche"]["description"] == "Profil local surcharge"
def test_list_effective_profiles_normalizes_flags(tmp_path: Path):
cfg_path = tmp_path / "profiles.yml"
cfg_path.write_text(
"profiles:\n"
" custom:\n"
" label: Profil custom\n"
" require_manual_mask: true\n"
" force_disable_vlm: true\n"
" preferred_manual_mask_template: hopital/formulaire.yml\n"
" param_lists:\n"
" whitelist_phrases:\n"
" - DOCUMENT INTERNE\n"
" blacklist_force_mask_terms:\n"
" - CUSTOM_ETAB\n"
" additional_stopwords:\n"
" - DIM\n"
" dictionaries_overlay:\n"
" blacklist:\n"
" force_mask_terms:\n"
" - CUSTOM_ETAB\n",
encoding="utf-8",
)
profiles = list_effective_profiles(cfg_path)
assert profiles["custom"]["label"] == "Profil custom"
assert profiles["custom"]["require_manual_mask"] is True
assert profiles["custom"]["force_disable_vlm"] is True
assert profiles["custom"]["preferred_manual_mask_template"] == "hopital/formulaire.yml"
assert profiles["custom"]["has_param_lists"] is True
assert profiles["custom"]["param_lists"]["whitelist_phrases"] == ["DOCUMENT INTERNE"]
assert profiles["custom"]["param_lists"]["blacklist_force_mask_terms"] == ["CUSTOM_ETAB"]
assert profiles["custom"]["param_lists"]["additional_stopwords"] == ["DIM"]
assert "CUSTOM_ETAB" in profiles["custom"]["dictionaries_overlay"]["blacklist"]["force_mask_terms"]
def test_default_profile_key_keeps_merged_default_when_available(tmp_path: Path):
cfg_path = tmp_path / "profiles.yml"
cfg_path.write_text(
"default_profile: missing\n"
"profiles:\n"
" custom:\n"
" label: Profil custom\n",
encoding="utf-8",
)
assert get_default_profile_key(cfg_path) == "standard_local"
def test_save_runtime_profile_persists_new_profile_and_default(tmp_path: Path):
cfg_path = tmp_path / "profiles.yml"
save_runtime_profile(
"bureau_strict",
{
"label": "Bureau strict",
"description": "Profil créé depuis la GUI",
"require_manual_mask": True,
"force_disable_vlm": True,
"preferred_manual_mask_template": "chuxx/formulaire.yml",
"has_preferred_manual_mask_template": True,
"param_lists": {
"whitelist_phrases": ["VALIDATION DIM"],
"blacklist_force_mask_terms": ["CHUXX"],
"additional_stopwords": ["RUM"],
},
"has_param_lists": True,
"dictionaries_overlay": {
"blacklist": {
"force_mask_terms": ["CHUXX"],
},
},
},
cfg_path,
set_default=True,
)
data = load_effective_profiles_dict(cfg_path)
assert data["default_profile"] == "bureau_strict"
saved = list_effective_profiles(cfg_path)["bureau_strict"]
assert saved["label"] == "Bureau strict"
assert saved["require_manual_mask"] is True
assert saved["force_disable_vlm"] is True
assert saved["preferred_manual_mask_template"] == "chuxx/formulaire.yml"
assert saved["param_lists"]["whitelist_phrases"] == ["VALIDATION DIM"]
assert saved["param_lists"]["blacklist_force_mask_terms"] == ["CHUXX"]
assert saved["param_lists"]["additional_stopwords"] == ["RUM"]
def test_set_and_delete_runtime_profile(tmp_path: Path):
cfg_path = tmp_path / "profiles.yml"
save_runtime_profile(
"profil_temporaire",
{
"label": "Profil temporaire",
"description": "",
"require_manual_mask": False,
"force_disable_vlm": False,
"dictionaries_overlay": {},
},
cfg_path,
)
set_runtime_default_profile("profil_temporaire", cfg_path)
assert get_default_profile_key(cfg_path) == "profil_temporaire"
delete_runtime_profile("profil_temporaire", cfg_path)
profiles = list_effective_profiles(cfg_path)
assert "profil_temporaire" not in profiles
assert get_default_profile_key(cfg_path) == "standard_local"