142 lines
4.1 KiB
Python
142 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests d'intégration des règles d'administration dans le moteur ONNX.
|
|
"""
|
|
from pathlib import Path
|
|
|
|
from anonymizer_core_refactored_onnx import (
|
|
anonymise_document_regex,
|
|
load_dictionaries,
|
|
selective_rescan,
|
|
)
|
|
|
|
|
|
def _write_runtime_pair(tmp_path: Path, admin_rules_text: str) -> Path:
|
|
dict_path = tmp_path / "dictionnaires.yml"
|
|
dict_path.write_text("{}", encoding="utf-8")
|
|
(tmp_path / "admin_rules.yml").write_text(admin_rules_text, encoding="utf-8")
|
|
return dict_path
|
|
|
|
|
|
def test_runtime_exact_term_rule_masks_local_sigle(tmp_path: Path):
|
|
cfg_path = _write_runtime_pair(
|
|
tmp_path,
|
|
"""version: 1
|
|
rules:
|
|
- id: rule_local_sigle
|
|
label: Masquer LOCAL_SIGLE
|
|
type: exact_term
|
|
action: mask
|
|
placeholder: "[MASK]"
|
|
status: active
|
|
match:
|
|
exact_value: LOCAL_SIGLE
|
|
scope:
|
|
document_families: [all]
|
|
environments: [test]
|
|
sections: [narrative, structured]
|
|
governance:
|
|
owner: qualite
|
|
justification: Test d'integration local.
|
|
created_at: "2026-04-21"
|
|
review_required_for_activation: true
|
|
approved_by: responsable_qualite
|
|
tests:
|
|
required_case_ids: [007_overlay_force_mask_local]
|
|
""",
|
|
)
|
|
|
|
cfg = load_dictionaries(cfg_path)
|
|
anon = anonymise_document_regex(["Réorientation vers LOCAL_SIGLE en urgence."], [[]], cfg)
|
|
text = selective_rescan(anon.text_out, cfg)
|
|
|
|
assert text == "Réorientation vers [MASK] en urgence."
|
|
assert any(hit.kind == "force_term" and hit.original == "LOCAL_SIGLE" for hit in anon.audit)
|
|
|
|
|
|
def test_runtime_normalized_identifier_masks_prefixed_and_bare_forms(tmp_path: Path):
|
|
cfg_path = _write_runtime_pair(
|
|
tmp_path,
|
|
"""version: 1
|
|
rules:
|
|
- id: rule_identifier_1234567
|
|
label: Identifier 1234567
|
|
type: normalized_identifier
|
|
action: mask
|
|
placeholder: "[NDA]"
|
|
status: active
|
|
match:
|
|
canonical_value: "1234567"
|
|
normalization:
|
|
case_insensitive: true
|
|
whole_word: true
|
|
multiline: true
|
|
allow_bare_value: true
|
|
accepted_prefixes: ["N°"]
|
|
prefix_value_separators: ["", " "]
|
|
scope:
|
|
document_families: [all]
|
|
environments: [test]
|
|
sections: [narrative, structured, table]
|
|
governance:
|
|
owner: qualite
|
|
justification: Test d'identifiant normalise.
|
|
created_at: "2026-04-21"
|
|
review_required_for_activation: true
|
|
approved_by: responsable_qualite
|
|
tests:
|
|
required_case_ids: [003_multiline_venue_number]
|
|
""",
|
|
)
|
|
|
|
cfg = load_dictionaries(cfg_path)
|
|
anon = anonymise_document_regex(["N°1234567 puis N° 1234567 et 1234567"], [[]], cfg)
|
|
text = selective_rescan(anon.text_out, cfg)
|
|
|
|
assert text == "N°[NDA] puis N° [NDA] et [NDA]"
|
|
assert "1234567" not in text
|
|
assert any(hit.kind == "NDA" and hit.original == "1234567" for hit in anon.audit)
|
|
|
|
|
|
def test_runtime_contextual_identifier_masks_multiline_and_propagates_value(tmp_path: Path):
|
|
cfg_path = _write_runtime_pair(
|
|
tmp_path,
|
|
"""version: 1
|
|
rules:
|
|
- id: rule_context_ipp
|
|
label: IPP contextuel
|
|
type: contextual_identifier
|
|
action: mask
|
|
placeholder: "[IPP]"
|
|
status: active
|
|
match:
|
|
canonical_value: ABC12345
|
|
context_prefixes: ["IPP"]
|
|
context_separators: [":", " : ", "\\n"]
|
|
normalization:
|
|
case_insensitive: true
|
|
whole_word: true
|
|
multiline: true
|
|
scope:
|
|
document_families: [all]
|
|
environments: [test]
|
|
sections: [structured, narrative]
|
|
governance:
|
|
owner: qualite
|
|
justification: Test d'identifiant contextuel.
|
|
created_at: "2026-04-21"
|
|
review_required_for_activation: true
|
|
approved_by: responsable_qualite
|
|
tests:
|
|
required_case_ids: [004_structured_admin_complete]
|
|
""",
|
|
)
|
|
|
|
cfg = load_dictionaries(cfg_path)
|
|
anon = anonymise_document_regex(["IPP\nABC12345\nRappel ABC12345"], [[]], cfg)
|
|
text = selective_rescan(anon.text_out, cfg)
|
|
|
|
assert text == "IPP\n[IPP]\nRappel [IPP]"
|
|
assert "ABC12345" not in text
|
|
assert any(hit.kind == "IPP" and hit.original == "ABC12345" for hit in anon.audit)
|