from __future__ import annotations from pathlib import Path from typing import Iterable def _is_relative_to(path: Path, other: Path) -> bool: try: path.relative_to(other) return True except ValueError: return False def list_supported_documents(root_dir: Path, supported_extensions: Iterable[str]) -> list[Path]: """List supported input documents while ignoring the GUI output subtree.""" normalized_exts = {ext.lower() for ext in supported_extensions} output_dir = root_dir / "anonymise" documents: list[Path] = [] for path in root_dir.rglob("*"): if not path.is_file(): continue if _is_relative_to(path, output_dir): continue if path.suffix.lower() not in normalized_exts: continue documents.append(path) return sorted(documents) def build_batch_output_dir(root_dir: Path, output_root: Path, source_path: Path) -> Path: """Preserve the source parent path under the batch output directory.""" relative_parent = source_path.relative_to(root_dir).parent if relative_parent == Path("."): return output_root return output_root / relative_parent def iter_pseudonymized_texts(output_dir: Path): """Yield anonymized text outputs recursively for post-run checks.""" return output_dir.rglob("*.pseudonymise.txt")