feat: modules batch paths + masquage manuel + templates de masque
- gui_batch_paths.py : listing documents + construction chemins de sortie batch - manual_masking.py : masquage manuel piloté par templates YAML - config/mask_templates/ : template FC19 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
43
gui_batch_paths.py
Normal file
43
gui_batch_paths.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
def _is_relative_to(path: Path, other: Path) -> bool:
|
||||
try:
|
||||
path.relative_to(other)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def list_supported_documents(root_dir: Path, supported_extensions: Iterable[str]) -> list[Path]:
|
||||
"""List supported input documents while ignoring the GUI output subtree."""
|
||||
normalized_exts = {ext.lower() for ext in supported_extensions}
|
||||
output_dir = root_dir / "anonymise"
|
||||
documents: list[Path] = []
|
||||
|
||||
for path in root_dir.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if _is_relative_to(path, output_dir):
|
||||
continue
|
||||
if path.suffix.lower() not in normalized_exts:
|
||||
continue
|
||||
documents.append(path)
|
||||
|
||||
return sorted(documents)
|
||||
|
||||
|
||||
def build_batch_output_dir(root_dir: Path, output_root: Path, source_path: Path) -> Path:
|
||||
"""Preserve the source parent path under the batch output directory."""
|
||||
relative_parent = source_path.relative_to(root_dir).parent
|
||||
if relative_parent == Path("."):
|
||||
return output_root
|
||||
return output_root / relative_parent
|
||||
|
||||
|
||||
def iter_pseudonymized_texts(output_dir: Path):
|
||||
"""Yield anonymized text outputs recursively for post-run checks."""
|
||||
return output_dir.rglob("*.pseudonymise.txt")
|
||||
Reference in New Issue
Block a user