From 5f8825a0d9f6a7e5bdce8a4bb111fee42b8a7d32 Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Thu, 4 Jun 2026 16:30:56 +0200 Subject: [PATCH] feat: modules batch paths + masquage manuel + templates de masque MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - gui_batch_paths.py : listing documents + construction chemins de sortie batch - manual_masking.py : masquage manuel piloté par templates YAML - config/mask_templates/ : template FC19 Co-Authored-By: Claude Opus 4.8 (1M context) --- config/mask_templates/FC19_template.yml | 18 ++++++++ gui_batch_paths.py | 43 +++++++++++++++++++ manual_masking.py | 56 +++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 config/mask_templates/FC19_template.yml create mode 100644 gui_batch_paths.py create mode 100644 manual_masking.py diff --git a/config/mask_templates/FC19_template.yml b/config/mask_templates/FC19_template.yml new file mode 100644 index 0000000..055006e --- /dev/null +++ b/config/mask_templates/FC19_template.yml @@ -0,0 +1,18 @@ +version: 1 +name: FC19_template +page_size: + width: 595.0 + height: 842.0 +masks: +- page: 0 + x0: 123.2 + y0: 25.6 + x1: 485.6 + y1: 66.4 + label: MASK +- page: 0 + x0: 205.6 + y0: 351.2 + x1: 341.6 + y1: 367.2 + label: MASK diff --git a/gui_batch_paths.py b/gui_batch_paths.py new file mode 100644 index 0000000..777f488 --- /dev/null +++ b/gui_batch_paths.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + + +def _is_relative_to(path: Path, other: Path) -> bool: + try: + path.relative_to(other) + return True + except ValueError: + return False + + +def list_supported_documents(root_dir: Path, supported_extensions: Iterable[str]) -> list[Path]: + """List supported input documents while ignoring the GUI output subtree.""" + normalized_exts = {ext.lower() for ext in supported_extensions} + output_dir = root_dir / "anonymise" + documents: list[Path] = [] + + for path in root_dir.rglob("*"): + if not path.is_file(): + continue + if _is_relative_to(path, output_dir): + continue + if path.suffix.lower() not in normalized_exts: + continue + documents.append(path) + + return sorted(documents) + + +def build_batch_output_dir(root_dir: Path, output_root: Path, source_path: Path) -> Path: + """Preserve the source parent path under the batch output directory.""" + relative_parent = source_path.relative_to(root_dir).parent + if relative_parent == Path("."): + return output_root + return output_root / relative_parent + + +def iter_pseudonymized_texts(output_dir: Path): + """Yield anonymized text outputs recursively for post-run checks.""" + return output_dir.rglob("*.pseudonymise.txt") diff --git a/manual_masking.py b/manual_masking.py new file mode 100644 index 0000000..b6c0134 --- /dev/null +++ b/manual_masking.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Optional + + +MASK_TEMPLATES_SUBDIR = Path("config") / "mask_templates" +MASK_TEMPLATE_EXTENSIONS = {".yml", ".yaml", ".json"} +DEFAULT_MASK_OUTPUT_DIRNAME = "anonymise" +DEFAULT_MASK_PREVIEW_DIRNAME = "anonymise_preview" + + +def mask_templates_dir(base_dir: Path) -> Path: + return base_dir / MASK_TEMPLATES_SUBDIR + + +def ensure_mask_templates_dir(base_dir: Path) -> Path: + path = mask_templates_dir(base_dir) + path.mkdir(parents=True, exist_ok=True) + return path + + +def resolve_manual_mask_pdf(single_file: Optional[Path]) -> Optional[Path]: + if single_file is None: + return None + if single_file.suffix.lower() != ".pdf": + return None + return single_file + + +def list_mask_templates(base_dir: Path) -> list[Path]: + templates_root = ensure_mask_templates_dir(base_dir) + return sorted( + path + for path in templates_root.rglob("*") + if path.is_file() and path.suffix.lower() in MASK_TEMPLATE_EXTENSIONS + ) + + +def mask_template_label(path: Path, base_dir: Optional[Path] = None) -> str: + if base_dir is None: + return path.name + try: + return str(path.relative_to(mask_templates_dir(base_dir))) + except ValueError: + return path.name + + +def append_jsonl_file(target_path: Path, extra_path: Path) -> None: + if not target_path.exists() or not extra_path.exists(): + return + extra_text = extra_path.read_text(encoding="utf-8").strip() + if not extra_text: + return + with target_path.open("a", encoding="utf-8") as target: + target.write(extra_text + "\n")