from pathlib import Path from manual_masking import ( DEFAULT_MASK_OUTPUT_DIRNAME, DEFAULT_MASK_PREVIEW_DIRNAME, append_jsonl_file, ensure_mask_templates_dir, list_mask_templates, mask_templates_dir, mask_template_label, resolve_manual_mask_pdf, ) def test_mask_templates_dir_is_under_config(): base = Path("/tmp/anonymisation") assert mask_templates_dir(base) == base / "config" / "mask_templates" def test_ensure_mask_templates_dir_creates_folder(tmp_path: Path): created = ensure_mask_templates_dir(tmp_path) assert created == tmp_path / "config" / "mask_templates" assert created.is_dir() def test_resolve_manual_mask_pdf_accepts_only_pdf(): assert resolve_manual_mask_pdf(Path("/tmp/test.pdf")) == Path("/tmp/test.pdf") assert resolve_manual_mask_pdf(Path("/tmp/test.PDF")) == Path("/tmp/test.PDF") assert resolve_manual_mask_pdf(Path("/tmp/test.docx")) is None assert resolve_manual_mask_pdf(None) is None def test_manual_mask_outputs_follow_project_convention(): assert DEFAULT_MASK_OUTPUT_DIRNAME == "anonymise" assert DEFAULT_MASK_PREVIEW_DIRNAME == "anonymise_preview" def test_list_mask_templates_filters_supported_extensions(tmp_path: Path): templates_dir = ensure_mask_templates_dir(tmp_path) kept = templates_dir / "alpha.yml" other = templates_dir / "beta.txt" nested = templates_dir / "nested" / "gamma.json" nested.parent.mkdir(parents=True) kept.write_text("x", encoding="utf-8") other.write_text("x", encoding="utf-8") nested.write_text("x", encoding="utf-8") assert list_mask_templates(tmp_path) == [kept, nested] assert mask_template_label(nested, tmp_path) == "nested/gamma.json" def test_append_jsonl_file_appends_non_empty_content(tmp_path: Path): target = tmp_path / "target.jsonl" extra = tmp_path / "extra.jsonl" target.write_text('{"kind":"A"}\n', encoding="utf-8") extra.write_text('{"kind":"B"}\n', encoding="utf-8") append_jsonl_file(target, extra) assert target.read_text(encoding="utf-8") == '{"kind":"A"}\n{"kind":"B"}\n'