Files
anonymisation/gui_batch_paths.py

44 lines
1.3 KiB
Python

from __future__ import annotations
from pathlib import Path
from typing import Iterable
def _is_relative_to(path: Path, other: Path) -> bool:
try:
path.relative_to(other)
return True
except ValueError:
return False
def list_supported_documents(root_dir: Path, supported_extensions: Iterable[str]) -> list[Path]:
"""List supported input documents while ignoring the GUI output subtree."""
normalized_exts = {ext.lower() for ext in supported_extensions}
output_dir = root_dir / "anonymise"
documents: list[Path] = []
for path in root_dir.rglob("*"):
if not path.is_file():
continue
if _is_relative_to(path, output_dir):
continue
if path.suffix.lower() not in normalized_exts:
continue
documents.append(path)
return sorted(documents)
def build_batch_output_dir(root_dir: Path, output_root: Path, source_path: Path) -> Path:
"""Preserve the source parent path under the batch output directory."""
relative_parent = source_path.relative_to(root_dir).parent
if relative_parent == Path("."):
return output_root
return output_root / relative_parent
def iter_pseudonymized_texts(output_dir: Path):
"""Yield anonymized text outputs recursively for post-run checks."""
return output_dir.rglob("*.pseudonymise.txt")