Wire admin rules into ONNX anonymizer

This commit is contained in:
2026-04-21 12:10:17 +02:00
parent e9dccdfad6
commit bc24a21fea
6 changed files with 631 additions and 217 deletions

View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
Tests d'intégration des règles d'administration dans le moteur ONNX.
"""
from pathlib import Path
from anonymizer_core_refactored_onnx import (
anonymise_document_regex,
load_dictionaries,
selective_rescan,
)
def _write_runtime_pair(tmp_path: Path, admin_rules_text: str) -> Path:
dict_path = tmp_path / "dictionnaires.yml"
dict_path.write_text("{}", encoding="utf-8")
(tmp_path / "admin_rules.yml").write_text(admin_rules_text, encoding="utf-8")
return dict_path
def test_runtime_exact_term_rule_masks_local_sigle(tmp_path: Path):
cfg_path = _write_runtime_pair(
tmp_path,
"""version: 1
rules:
- id: rule_local_sigle
label: Masquer LOCAL_SIGLE
type: exact_term
action: mask
placeholder: "[MASK]"
status: active
match:
exact_value: LOCAL_SIGLE
scope:
document_families: [all]
environments: [test]
sections: [narrative, structured]
governance:
owner: qualite
justification: Test d'integration local.
created_at: "2026-04-21"
review_required_for_activation: true
approved_by: responsable_qualite
tests:
required_case_ids: [007_overlay_force_mask_local]
""",
)
cfg = load_dictionaries(cfg_path)
anon = anonymise_document_regex(["Réorientation vers LOCAL_SIGLE en urgence."], [[]], cfg)
text = selective_rescan(anon.text_out, cfg)
assert text == "Réorientation vers [MASK] en urgence."
assert any(hit.kind == "force_term" and hit.original == "LOCAL_SIGLE" for hit in anon.audit)
def test_runtime_normalized_identifier_masks_prefixed_and_bare_forms(tmp_path: Path):
cfg_path = _write_runtime_pair(
tmp_path,
"""version: 1
rules:
- id: rule_identifier_1234567
label: Identifier 1234567
type: normalized_identifier
action: mask
placeholder: "[NDA]"
status: active
match:
canonical_value: "1234567"
normalization:
case_insensitive: true
whole_word: true
multiline: true
allow_bare_value: true
accepted_prefixes: [""]
prefix_value_separators: ["", " "]
scope:
document_families: [all]
environments: [test]
sections: [narrative, structured, table]
governance:
owner: qualite
justification: Test d'identifiant normalise.
created_at: "2026-04-21"
review_required_for_activation: true
approved_by: responsable_qualite
tests:
required_case_ids: [003_multiline_venue_number]
""",
)
cfg = load_dictionaries(cfg_path)
anon = anonymise_document_regex(["N°1234567 puis N° 1234567 et 1234567"], [[]], cfg)
text = selective_rescan(anon.text_out, cfg)
assert text == "N°[NDA] puis N° [NDA] et [NDA]"
assert "1234567" not in text
assert any(hit.kind == "NDA" and hit.original == "1234567" for hit in anon.audit)
def test_runtime_contextual_identifier_masks_multiline_and_propagates_value(tmp_path: Path):
cfg_path = _write_runtime_pair(
tmp_path,
"""version: 1
rules:
- id: rule_context_ipp
label: IPP contextuel
type: contextual_identifier
action: mask
placeholder: "[IPP]"
status: active
match:
canonical_value: ABC12345
context_prefixes: ["IPP"]
context_separators: [":", " : ", "\\n"]
normalization:
case_insensitive: true
whole_word: true
multiline: true
scope:
document_families: [all]
environments: [test]
sections: [structured, narrative]
governance:
owner: qualite
justification: Test d'identifiant contextuel.
created_at: "2026-04-21"
review_required_for_activation: true
approved_by: responsable_qualite
tests:
required_case_ids: [004_structured_admin_complete]
""",
)
cfg = load_dictionaries(cfg_path)
anon = anonymise_document_regex(["IPP\nABC12345\nRappel ABC12345"], [[]], cfg)
text = selective_rescan(anon.text_out, cfg)
assert text == "IPP\n[IPP]\nRappel [IPP]"
assert "ABC12345" not in text
assert any(hit.kind == "IPP" and hit.original == "ABC12345" for hit in anon.audit)