from __future__ import annotations from pathlib import Path from typing import Optional MASK_TEMPLATES_SUBDIR = Path("config") / "mask_templates" MASK_TEMPLATE_EXTENSIONS = {".yml", ".yaml", ".json"} DEFAULT_MASK_OUTPUT_DIRNAME = "anonymise" DEFAULT_MASK_PREVIEW_DIRNAME = "anonymise_preview" def mask_templates_dir(base_dir: Path) -> Path: return base_dir / MASK_TEMPLATES_SUBDIR def ensure_mask_templates_dir(base_dir: Path) -> Path: path = mask_templates_dir(base_dir) path.mkdir(parents=True, exist_ok=True) return path def resolve_manual_mask_pdf(single_file: Optional[Path]) -> Optional[Path]: if single_file is None: return None if single_file.suffix.lower() != ".pdf": return None return single_file def list_mask_templates(base_dir: Path) -> list[Path]: templates_root = ensure_mask_templates_dir(base_dir) return sorted( path for path in templates_root.rglob("*") if path.is_file() and path.suffix.lower() in MASK_TEMPLATE_EXTENSIONS ) def mask_template_label(path: Path, base_dir: Optional[Path] = None) -> str: if base_dir is None: return path.name try: return str(path.relative_to(mask_templates_dir(base_dir))) except ValueError: return path.name def append_jsonl_file(target_path: Path, extra_path: Path) -> None: if not target_path.exists() or not extra_path.exists(): return extra_text = extra_path.read_text(encoding="utf-8").strip() if not extra_text: return with target_path.open("a", encoding="utf-8") as target: target.write(extra_text + "\n")