Wire admin rules into ONNX anonymizer
This commit is contained in:
141
tests/unit/test_admin_rules_integration.py
Normal file
141
tests/unit/test_admin_rules_integration.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests d'intégration des règles d'administration dans le moteur ONNX.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from anonymizer_core_refactored_onnx import (
|
||||
anonymise_document_regex,
|
||||
load_dictionaries,
|
||||
selective_rescan,
|
||||
)
|
||||
|
||||
|
||||
def _write_runtime_pair(tmp_path: Path, admin_rules_text: str) -> Path:
|
||||
dict_path = tmp_path / "dictionnaires.yml"
|
||||
dict_path.write_text("{}", encoding="utf-8")
|
||||
(tmp_path / "admin_rules.yml").write_text(admin_rules_text, encoding="utf-8")
|
||||
return dict_path
|
||||
|
||||
|
||||
def test_runtime_exact_term_rule_masks_local_sigle(tmp_path: Path):
|
||||
cfg_path = _write_runtime_pair(
|
||||
tmp_path,
|
||||
"""version: 1
|
||||
rules:
|
||||
- id: rule_local_sigle
|
||||
label: Masquer LOCAL_SIGLE
|
||||
type: exact_term
|
||||
action: mask
|
||||
placeholder: "[MASK]"
|
||||
status: active
|
||||
match:
|
||||
exact_value: LOCAL_SIGLE
|
||||
scope:
|
||||
document_families: [all]
|
||||
environments: [test]
|
||||
sections: [narrative, structured]
|
||||
governance:
|
||||
owner: qualite
|
||||
justification: Test d'integration local.
|
||||
created_at: "2026-04-21"
|
||||
review_required_for_activation: true
|
||||
approved_by: responsable_qualite
|
||||
tests:
|
||||
required_case_ids: [007_overlay_force_mask_local]
|
||||
""",
|
||||
)
|
||||
|
||||
cfg = load_dictionaries(cfg_path)
|
||||
anon = anonymise_document_regex(["Réorientation vers LOCAL_SIGLE en urgence."], [[]], cfg)
|
||||
text = selective_rescan(anon.text_out, cfg)
|
||||
|
||||
assert text == "Réorientation vers [MASK] en urgence."
|
||||
assert any(hit.kind == "force_term" and hit.original == "LOCAL_SIGLE" for hit in anon.audit)
|
||||
|
||||
|
||||
def test_runtime_normalized_identifier_masks_prefixed_and_bare_forms(tmp_path: Path):
|
||||
cfg_path = _write_runtime_pair(
|
||||
tmp_path,
|
||||
"""version: 1
|
||||
rules:
|
||||
- id: rule_identifier_1234567
|
||||
label: Identifier 1234567
|
||||
type: normalized_identifier
|
||||
action: mask
|
||||
placeholder: "[NDA]"
|
||||
status: active
|
||||
match:
|
||||
canonical_value: "1234567"
|
||||
normalization:
|
||||
case_insensitive: true
|
||||
whole_word: true
|
||||
multiline: true
|
||||
allow_bare_value: true
|
||||
accepted_prefixes: ["N°"]
|
||||
prefix_value_separators: ["", " "]
|
||||
scope:
|
||||
document_families: [all]
|
||||
environments: [test]
|
||||
sections: [narrative, structured, table]
|
||||
governance:
|
||||
owner: qualite
|
||||
justification: Test d'identifiant normalise.
|
||||
created_at: "2026-04-21"
|
||||
review_required_for_activation: true
|
||||
approved_by: responsable_qualite
|
||||
tests:
|
||||
required_case_ids: [003_multiline_venue_number]
|
||||
""",
|
||||
)
|
||||
|
||||
cfg = load_dictionaries(cfg_path)
|
||||
anon = anonymise_document_regex(["N°1234567 puis N° 1234567 et 1234567"], [[]], cfg)
|
||||
text = selective_rescan(anon.text_out, cfg)
|
||||
|
||||
assert text == "N°[NDA] puis N° [NDA] et [NDA]"
|
||||
assert "1234567" not in text
|
||||
assert any(hit.kind == "NDA" and hit.original == "1234567" for hit in anon.audit)
|
||||
|
||||
|
||||
def test_runtime_contextual_identifier_masks_multiline_and_propagates_value(tmp_path: Path):
|
||||
cfg_path = _write_runtime_pair(
|
||||
tmp_path,
|
||||
"""version: 1
|
||||
rules:
|
||||
- id: rule_context_ipp
|
||||
label: IPP contextuel
|
||||
type: contextual_identifier
|
||||
action: mask
|
||||
placeholder: "[IPP]"
|
||||
status: active
|
||||
match:
|
||||
canonical_value: ABC12345
|
||||
context_prefixes: ["IPP"]
|
||||
context_separators: [":", " : ", "\\n"]
|
||||
normalization:
|
||||
case_insensitive: true
|
||||
whole_word: true
|
||||
multiline: true
|
||||
scope:
|
||||
document_families: [all]
|
||||
environments: [test]
|
||||
sections: [structured, narrative]
|
||||
governance:
|
||||
owner: qualite
|
||||
justification: Test d'identifiant contextuel.
|
||||
created_at: "2026-04-21"
|
||||
review_required_for_activation: true
|
||||
approved_by: responsable_qualite
|
||||
tests:
|
||||
required_case_ids: [004_structured_admin_complete]
|
||||
""",
|
||||
)
|
||||
|
||||
cfg = load_dictionaries(cfg_path)
|
||||
anon = anonymise_document_regex(["IPP\nABC12345\nRappel ABC12345"], [[]], cfg)
|
||||
text = selective_rescan(anon.text_out, cfg)
|
||||
|
||||
assert text == "IPP\n[IPP]\nRappel [IPP]"
|
||||
assert "ABC12345" not in text
|
||||
assert any(hit.kind == "IPP" and hit.original == "ABC12345" for hit in anon.audit)
|
||||
Reference in New Issue
Block a user