#!/usr/bin/env python3 """ Tests d'intégration des règles d'administration dans le moteur ONNX. """ from pathlib import Path from anonymizer_core_refactored_onnx import ( anonymise_document_regex, load_dictionaries, selective_rescan, ) def _write_runtime_pair(tmp_path: Path, admin_rules_text: str) -> Path: dict_path = tmp_path / "dictionnaires.yml" dict_path.write_text("{}", encoding="utf-8") (tmp_path / "admin_rules.yml").write_text(admin_rules_text, encoding="utf-8") return dict_path def test_runtime_exact_term_rule_masks_local_sigle(tmp_path: Path): cfg_path = _write_runtime_pair( tmp_path, """version: 1 rules: - id: rule_local_sigle label: Masquer LOCAL_SIGLE type: exact_term action: mask placeholder: "[MASK]" status: active match: exact_value: LOCAL_SIGLE scope: document_families: [all] environments: [test] sections: [narrative, structured] governance: owner: qualite justification: Test d'integration local. created_at: "2026-04-21" review_required_for_activation: true approved_by: responsable_qualite tests: required_case_ids: [007_overlay_force_mask_local] """, ) cfg = load_dictionaries(cfg_path) anon = anonymise_document_regex(["Réorientation vers LOCAL_SIGLE en urgence."], [[]], cfg) text = selective_rescan(anon.text_out, cfg) assert text == "Réorientation vers [MASK] en urgence." assert any(hit.kind == "force_term" and hit.original == "LOCAL_SIGLE" for hit in anon.audit) def test_runtime_normalized_identifier_masks_prefixed_and_bare_forms(tmp_path: Path): cfg_path = _write_runtime_pair( tmp_path, """version: 1 rules: - id: rule_identifier_1234567 label: Identifier 1234567 type: normalized_identifier action: mask placeholder: "[NDA]" status: active match: canonical_value: "1234567" normalization: case_insensitive: true whole_word: true multiline: true allow_bare_value: true accepted_prefixes: ["N°"] prefix_value_separators: ["", " "] scope: document_families: [all] environments: [test] sections: [narrative, structured, table] governance: owner: qualite justification: Test d'identifiant normalise. created_at: "2026-04-21" review_required_for_activation: true approved_by: responsable_qualite tests: required_case_ids: [003_multiline_venue_number] """, ) cfg = load_dictionaries(cfg_path) anon = anonymise_document_regex(["N°1234567 puis N° 1234567 et 1234567"], [[]], cfg) text = selective_rescan(anon.text_out, cfg) assert text == "N°[NDA] puis N° [NDA] et [NDA]" assert "1234567" not in text assert any(hit.kind == "NDA" and hit.original == "1234567" for hit in anon.audit) def test_runtime_contextual_identifier_masks_multiline_and_propagates_value(tmp_path: Path): cfg_path = _write_runtime_pair( tmp_path, """version: 1 rules: - id: rule_context_ipp label: IPP contextuel type: contextual_identifier action: mask placeholder: "[IPP]" status: active match: canonical_value: ABC12345 context_prefixes: ["IPP"] context_separators: [":", " : ", "\\n"] normalization: case_insensitive: true whole_word: true multiline: true scope: document_families: [all] environments: [test] sections: [structured, narrative] governance: owner: qualite justification: Test d'identifiant contextuel. created_at: "2026-04-21" review_required_for_activation: true approved_by: responsable_qualite tests: required_case_ids: [004_structured_admin_complete] """, ) cfg = load_dictionaries(cfg_path) anon = anonymise_document_regex(["IPP\nABC12345\nRappel ABC12345"], [[]], cfg) text = selective_rescan(anon.text_out, cfg) assert text == "IPP\n[IPP]\nRappel [IPP]" assert "ABC12345" not in text assert any(hit.kind == "IPP" and hit.original == "ABC12345" for hit in anon.audit)