#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PDF Mask Designer (Standalone) ------------------------------ - Ouvre un PDF de référence - Permet de "dessiner des masques" (rectangles) à la souris, par page - Sauvegarde/charge un template (YAML/JSON) décrivant les masques - Prévisualise l'application des masques sur 1–2 PDF - Applique les masques : * Vectoriel : annotations de redaction (le texte est supprimé) * Raster : "brûle" les boîtes noires dans l'image de page (sécurité maximale) - Journal/Audit : écrit *.audit.jsonl avec MASK_TEMPLATE + bbox + nom de template Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML pip install pymupdf==1.24.9 Pillow==10.2.0 PyYAML==6.0.2 """ from __future__ import annotations import argparse import io import json import math import os from dataclasses import dataclass, asdict from pathlib import Path from typing import Dict, List, Optional, Tuple, Any import tkinter as tk from tkinter import filedialog, messagebox, ttk from PIL import Image, ImageTk import fitz # PyMuPDF import yaml from manual_masking import ( DEFAULT_MASK_OUTPUT_DIRNAME, DEFAULT_MASK_PREVIEW_DIRNAME, ) APP_TITLE = "Éditeur de masques PDF" TEMPLATE_VERSION = 1 # ----------------------------- Data structures ----------------------------- @dataclass class MaskRect: page: int x0: float y0: float x1: float y1: float label: str = "MASK" @dataclass class Template: name: str page_size: Tuple[float, float] # (width, height) in PDF points version: int = TEMPLATE_VERSION masks: List[MaskRect] = None def to_dict(self) -> Dict[str, Any]: return { "version": self.version, "name": self.name, "page_size": {"width": self.page_size[0], "height": self.page_size[1]}, "masks": [asdict(m) for m in (self.masks or [])], } @staticmethod def from_dict(d: Dict[str, Any]) -> "Template": ps = d.get("page_size") or {} masks = [] for m in d.get("masks", []): masks.append(MaskRect( page=int(m["page"]), x0=float(m["x0"]), y0=float(m["y0"]), x1=float(m["x1"]), y1=float(m["y1"]), label=m.get("label", "MASK") )) name = d.get("name") or "template" return Template(name=name, page_size=(float(ps.get("width", 595)), float(ps.get("height", 842))), version=int(d.get("version", TEMPLATE_VERSION)), masks=masks) # ----------------------------- Utility funcs ------------------------------ def clamp(v, a, b): return max(a, min(b, v)) def rect_norm(x0, y0, x1, y1) -> Tuple[float, float, float, float]: return (min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1)) def page_pix(doc: fitz.Document, pno: int, zoom: float) -> Image.Image: page = doc[pno] mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, annots=False) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) return img def draw_overlay(img: Image.Image, rects: List[MaskRect], zoom: float, page: int) -> Image.Image: # returns a copy with alpha-red rectangles from PIL import ImageDraw out = img.copy() draw = ImageDraw.Draw(out, "RGBA") for r in rects: if r.page != page: continue draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0,110), outline=(0,0,0,220), width=2) return out def save_template_yaml(tpl: Template, path: Path): with open(path, "w", encoding="utf-8") as f: yaml.safe_dump(tpl.to_dict(), f, allow_unicode=True, sort_keys=False) def load_template_yaml(path: Path) -> Template: d = yaml.safe_load(path.read_text(encoding="utf-8")) or {} return Template.from_dict(d) # ----------------------------- Application logic -------------------------- def apply_template_vector(pdf_in: Path, pdf_out: Path, tpl: Template, audit_path: Path): doc = fitz.open(str(pdf_in)) w0, h0 = tpl.page_size with audit_path.open("w", encoding="utf-8") as audit: for pno in range(len(doc)): page = doc[pno] pw, ph = page.rect.width, page.rect.height # scaling if page size differs (simple proportional fit) sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0 for m in tpl.masks or []: if m.page not in (-1, pno): # -1 = all pages continue r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy) page.add_redact_annot(r, fill=(0,0,0)) audit.write(json.dumps({ "kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno, "bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)], "mode": "vector" }, ensure_ascii=False) + "\n") try: page.apply_redactions() except Exception: pass doc.save(str(pdf_out), deflate=True, garbage=4, clean=True, incremental=False) doc.close() def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int, audit_path: Path): doc = fitz.open(str(pdf_in)) out = fitz.open() w0, h0 = tpl.page_size with audit_path.open("w", encoding="utf-8") as audit: for pno in range(len(doc)): page = doc[pno]; pw, ph = page.rect.width, page.rect.height sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0 zoom = dpi/72.0 pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), annots=False) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) from PIL import ImageDraw draw = ImageDraw.Draw(img) for m in tpl.masks or []: if m.page not in (-1, pno): continue r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy) draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0)) audit.write(json.dumps({ "kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno, "bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)], "mode": "raster" }, ensure_ascii=False) + "\n") buf = io.BytesIO() img.save(buf, format="PNG"); buf.seek(0) dst = out.new_page(width=page.rect.width, height=page.rect.height) dst.insert_image(page.rect, stream=buf.getvalue()) out.save(str(pdf_out), deflate=True, garbage=4, clean=True) out.close(); doc.close() # ----------------------------- GUI ------------------------------ class MaskDesignerApp: def __init__( self, root: tk.Tk, *, initial_pdf: Optional[Path] = None, initial_template: Optional[Path] = None, templates_dir: Optional[Path] = None, output_dir_name: str = DEFAULT_MASK_OUTPUT_DIRNAME, preview_dir_name: str = DEFAULT_MASK_PREVIEW_DIRNAME, ): self.root = root self.root.title(APP_TITLE) self.root.geometry("1280x900") self.zoom = 1.25 # affichage self.doc: Optional[fitz.Document] = None self.doc_path: Optional[Path] = None self.curr_page = 0 self.curr_image: Optional[Image.Image] = None self.tk_image: Optional[ImageTk.PhotoImage] = None self.masks: Dict[int, List[MaskRect]] = {} # per-page self.template_name = tk.StringVar(value="template_masks") self.status = tk.StringVar(value="Prêt.") self.raster_dpi = tk.IntVar(value=200) self.templates_dir = templates_dir self.output_dir_name = output_dir_name self.preview_dir_name = preview_dir_name self.is_drawing = False self.start_xy: Optional[Tuple[int,int]] = None self._build_ui() if initial_pdf: self.open_pdf_path(initial_pdf) if initial_template: self.load_template_path(initial_template) # UI layout def _build_ui(self): top = tk.Frame(self.root, padx=8, pady=8) top.pack(fill=tk.BOTH, expand=True) bar = tk.Frame(top); bar.pack(fill=tk.X) tk.Button(bar, text="Ouvrir PDF…", command=self.open_pdf).pack(side=tk.LEFT) tk.Button(bar, text="←", command=self.prev_page).pack(side=tk.LEFT, padx=(8,2)) tk.Button(bar, text="→", command=self.next_page).pack(side=tk.LEFT, padx=2) tk.Button(bar, text="Zoom -", command=lambda: self.set_zoom( max(0.5, self.zoom-0.1) )).pack(side=tk.LEFT, padx=6) tk.Button(bar, text="Zoom +", command=lambda: self.set_zoom( self.zoom+0.1 )).pack(side=tk.LEFT, padx=2) tk.Label(bar, text="Nom template :").pack(side=tk.LEFT, padx=(12,2)) tk.Entry(bar, textvariable=self.template_name, width=24).pack(side=tk.LEFT) tk.Button(bar, text="Sauver template…", command=self.save_template).pack(side=tk.LEFT, padx=6) tk.Button(bar, text="Charger template…", command=self.load_template).pack(side=tk.LEFT, padx=2) tk.Button(bar, text="Effacer masques page", command=self.clear_page_masks).pack(side=tk.LEFT, padx=12) tools = tk.Frame(top); tools.pack(fill=tk.X, pady=(4,2)) tk.Label(tools, text="Prévisualiser / Appliquer sur un échantillon :").pack(side=tk.LEFT) tk.Button(tools, text="Prévisualiser (vector)", command=self.preview_vector).pack(side=tk.LEFT, padx=6) tk.Button(tools, text="Prévisualiser (raster)", command=self.preview_raster).pack(side=tk.LEFT, padx=2) tk.Label(tools, text="DPI raster:").pack(side=tk.LEFT, padx=(12,2)) tk.Entry(tools, textvariable=self.raster_dpi, width=6).pack(side=tk.LEFT) tk.Button(tools, text="Appliquer (vector)…", command=self.apply_vector_batch).pack(side=tk.LEFT, padx=(16,4)) tk.Button(tools, text="Appliquer (raster)…", command=self.apply_raster_batch).pack(side=tk.LEFT, padx=2) self.canvas = tk.Canvas(top, bg="#f5f7fb") self.canvas.pack(fill=tk.BOTH, expand=True, pady=(6,4)) self.canvas.bind("", self.on_down) self.canvas.bind("", self.on_drag) self.canvas.bind("", self.on_up) statusbar = tk.Label(self.root, textvariable=self.status, anchor="w", bd=1, relief=tk.SUNKEN) statusbar.pack(side=tk.BOTTOM, fill=tk.X) # Document handling def open_pdf(self): path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")]) if not path: return self.open_pdf_path(Path(path)) def open_pdf_path(self, path: Path): try: self.doc = fitz.open(str(path)) self.doc_path = Path(path) self.curr_page = 0 self.masks.clear() self.template_name.set(self.doc_path.stem + "_template") self.refresh() self.status.set(f"PDF ouvert : {self.doc_path.name} — {len(self.doc)} page(s)") except Exception as e: messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}") def refresh(self): if not self.doc: return img = page_pix(self.doc, self.curr_page, self.zoom) # overlay current page masks rects = self.masks.get(self.curr_page, []) img_o = draw_overlay(img, rects, self.zoom, self.curr_page) self.curr_image = img_o self.tk_image = ImageTk.PhotoImage(img_o) self.canvas.delete("all") self.canvas.create_image(0,0, anchor="nw", image=self.tk_image) self.canvas.config(scrollregion=(0,0,img_o.width, img_o.height)) def prev_page(self): if not self.doc: return self.curr_page = max(0, self.curr_page-1) self.refresh() def next_page(self): if not self.doc: return self.curr_page = min(len(self.doc)-1, self.curr_page+1) self.refresh() def set_zoom(self, z: float): self.zoom = clamp(z, 0.5, 3.0) self.refresh() # Drawing masks def on_down(self, ev): if not self.doc: return self.is_drawing = True x = self.canvas.canvasx(ev.x) y = self.canvas.canvasy(ev.y) self.start_xy = (x, y) self._preview_rect = self.canvas.create_rectangle(x, y, x, y, outline="#000", width=2) def on_drag(self, ev): if not self.doc or not self.is_drawing: return sx, sy = self.start_xy x = self.canvas.canvasx(ev.x) y = self.canvas.canvasy(ev.y) self.canvas.coords(self._preview_rect, sx, sy, x, y) def on_up(self, ev): if not self.doc or not self.is_drawing: return self.is_drawing = False sx, sy = self.start_xy x = self.canvas.canvasx(ev.x) y = self.canvas.canvasy(ev.y) x0, y0, x1, y1 = rect_norm(sx, sy, x, y) # convert screen px to PDF points page = self.doc[self.curr_page] # we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix) # So we need to divide by zoom to get PDF points (since page_pix used Matrix(zoom, zoom)) z = self.zoom rx0, ry0, rx1, ry1 = x0 / z, y0 / z, x1 / z, y1 / z rect = MaskRect(page=self.curr_page, x0=rx0, y0=ry0, x1=rx1, y1=ry1, label="MASK") self.masks.setdefault(self.curr_page, []).append(rect) self.canvas.delete(self._preview_rect) self.refresh() self.status.set(f"Masque ajouté p.{self.curr_page+1}: ({int(rx0)},{int(ry0)})–({int(rx1)},{int(ry1)})") # Template I/O def _current_template(self) -> Template: if not self.doc: raise RuntimeError("Aucun PDF ouvert.") page0 = self.doc[0] tpl = Template( name=self.template_name.get().strip() or "template", page_size=(page0.rect.width, page0.rect.height), masks=[m for arr in self.masks.values() for m in arr] ) return tpl def save_template(self): try: tpl = self._current_template() except Exception as e: messagebox.showwarning("Info", str(e)); return path = filedialog.asksaveasfilename( defaultextension=".yml", filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")], initialdir=str(self._template_initialdir()), initialfile=f"{tpl.name}.yml", ) if not path: return p = Path(path) try: if p.suffix.lower() in (".yml", ".yaml"): save_template_yaml(tpl, p) else: p.write_text(json.dumps(tpl.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8") messagebox.showinfo("OK", f"Template enregistré : {p.name}") except Exception as e: messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}") def load_template(self): path = filedialog.askopenfilename( filetypes=[("YAML/JSON", "*.yml *.yaml *.json")], initialdir=str(self._template_initialdir()), ) if not path: return self.load_template_path(Path(path)) def load_template_path(self, path: Path): p = Path(path) try: if p.suffix.lower() in (".yml", ".yaml"): tpl = load_template_yaml(p) else: tpl = Template.from_dict(json.loads(p.read_text(encoding="utf-8"))) self.template_name.set(tpl.name) # reset masks and map to current doc pages (keep same page numbers; -1 means all pages) self.masks.clear() for m in tpl.masks or []: self.masks.setdefault(m.page, []).append(m) self.refresh() self.status.set(f"Template chargé : {p.name}") except Exception as e: messagebox.showerror("Erreur", f"Template invalide : {e}") def clear_page_masks(self): if not self.doc: return if self.curr_page in self.masks: del self.masks[self.curr_page] self.refresh() self.status.set(f"Masques de la page {self.curr_page+1} supprimés.") def _template_initialdir(self) -> Path: if self.templates_dir is not None: self.templates_dir.mkdir(parents=True, exist_ok=True) return self.templates_dir if self.doc_path is not None: return self.doc_path.parent return Path.cwd() # Preview / Apply def _build_template_from_state(self) -> Optional[Template]: if not self.doc: messagebox.showwarning("Info", "Ouvrez d'abord un PDF de référence.") return None return self._current_template() def preview_vector(self): tpl = self._build_template_from_state() if not tpl: return samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")]) if not samp: return for i, s in enumerate(samp[:2], start=1): pdf_in = Path(s) out_dir = pdf_in.parent / self.preview_dir_name out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_vector(pdf_in, pdf_out, tpl, audit) except Exception as e: messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}") messagebox.showinfo( "Prévisualisation", f"Terminé (vectoriel). Ouvrez le dossier '{self.preview_dir_name}'.", ) def preview_raster(self): tpl = self._build_template_from_state() if not tpl: return samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")]) if not samp: return dpi = int(self.raster_dpi.get()) for i, s in enumerate(samp[:2], start=1): pdf_in = Path(s) out_dir = pdf_in.parent / self.preview_dir_name out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit) except Exception as e: messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}") messagebox.showinfo( "Prévisualisation", f"Terminé (raster). Ouvrez le dossier '{self.preview_dir_name}'.", ) def apply_vector_batch(self): tpl = self._build_template_from_state() if not tpl: return files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (vectoriel)", filetypes=[("PDF","*.pdf")]) if not files: return for s in files: pdf_in = Path(s) out_dir = pdf_in.parent / self.output_dir_name out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_vector(pdf_in, pdf_out, tpl, audit) except Exception as e: messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}") messagebox.showinfo("Terminé", "Masques appliqués (vectoriel).") def apply_raster_batch(self): tpl = self._build_template_from_state() if not tpl: return files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (raster)", filetypes=[("PDF","*.pdf")]) if not files: return dpi = int(self.raster_dpi.get()) for s in files: pdf_in = Path(s) out_dir = pdf_in.parent / self.output_dir_name out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit) except Exception as e: messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}") messagebox.showinfo("Terminé", "Masques appliqués (raster).") # ----------------------------- Main ------------------------------ def build_arg_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Editeur de masques PDF reutilisables") parser.add_argument("--pdf", type=Path, help="PDF de reference a ouvrir au demarrage") parser.add_argument("--template", type=Path, help="Template YAML/JSON a charger au demarrage") parser.add_argument("--templates-dir", type=Path, help="Dossier par defaut pour sauver/charger les templates") parser.add_argument("--output-dir-name", default=DEFAULT_MASK_OUTPUT_DIRNAME, help="Nom du dossier de sortie pour l'application des masques") parser.add_argument("--preview-dir-name", default=DEFAULT_MASK_PREVIEW_DIRNAME, help="Nom du dossier de sortie pour les previsualisations") return parser def main(argv: Optional[List[str]] = None): args = build_arg_parser().parse_args(argv) root = tk.Tk() app = MaskDesignerApp( root, initial_pdf=args.pdf, initial_template=args.template, templates_dir=args.templates_dir, output_dir_name=args.output_dir_name, preview_dir_name=args.preview_dir_name, ) root.mainloop() if __name__ == "__main__": main()