#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ PDF Mask Designer (Standalone) ------------------------------ - Ouvre un PDF de référence - Permet de "dessiner des masques" (rectangles) à la souris, par page - Sauvegarde/charge un template (YAML/JSON) décrivant les masques - Prévisualise l'application des masques sur 1–2 PDF - Applique les masques : * Vectoriel : annotations de redaction (le texte est supprimé) * Raster : "brûle" les boîtes noires dans l'image de page (sécurité maximale) - Journal/Audit : écrit *.audit.jsonl avec MASK_TEMPLATE + bbox + nom de template Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML pip install pymupdf==1.24.9 Pillow==10.2.0 PyYAML==6.0.2 """ from __future__ import annotations import io import json import math import os from dataclasses import dataclass, asdict from pathlib import Path from typing import Dict, List, Optional, Tuple, Any import tkinter as tk from tkinter import filedialog, messagebox, ttk from PIL import Image, ImageTk import fitz # PyMuPDF import yaml APP_TITLE = "PDF Mask Designer (Standalone)" TEMPLATE_VERSION = 1 # ----------------------------- Data structures ----------------------------- @dataclass class MaskRect: page: int x0: float y0: float x1: float y1: float label: str = "MASK" @dataclass class Template: name: str page_size: Tuple[float, float] # (width, height) in PDF points version: int = TEMPLATE_VERSION masks: List[MaskRect] = None def to_dict(self) -> Dict[str, Any]: return { "version": self.version, "name": self.name, "page_size": {"width": self.page_size[0], "height": self.page_size[1]}, "masks": [asdict(m) for m in (self.masks or [])], } @staticmethod def from_dict(d: Dict[str, Any]) -> "Template": ps = d.get("page_size") or {} masks = [] for m in d.get("masks", []): masks.append(MaskRect( page=int(m["page"]), x0=float(m["x0"]), y0=float(m["y0"]), x1=float(m["x1"]), y1=float(m["y1"]), label=m.get("label", "MASK") )) name = d.get("name") or "template" return Template(name=name, page_size=(float(ps.get("width", 595)), float(ps.get("height", 842))), version=int(d.get("version", TEMPLATE_VERSION)), masks=masks) # ----------------------------- Utility funcs ------------------------------ def clamp(v, a, b): return max(a, min(b, v)) def rect_norm(x0, y0, x1, y1) -> Tuple[float, float, float, float]: return (min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1)) def page_pix(doc: fitz.Document, pno: int, zoom: float) -> Image.Image: page = doc[pno] mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, annots=False) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) return img def draw_overlay(img: Image.Image, rects: List[MaskRect], zoom: float, page: int) -> Image.Image: # returns a copy with alpha-red rectangles from PIL import ImageDraw out = img.copy() draw = ImageDraw.Draw(out, "RGBA") for r in rects: if r.page != page: continue draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0,110), outline=(0,0,0,220), width=2) return out def save_template_yaml(tpl: Template, path: Path): with open(path, "w", encoding="utf-8") as f: yaml.safe_dump(tpl.to_dict(), f, allow_unicode=True, sort_keys=False) def load_template_yaml(path: Path) -> Template: d = yaml.safe_load(path.read_text(encoding="utf-8")) or {} return Template.from_dict(d) # ----------------------------- Application logic -------------------------- def apply_template_vector(pdf_in: Path, pdf_out: Path, tpl: Template, audit_path: Path): doc = fitz.open(str(pdf_in)) w0, h0 = tpl.page_size with audit_path.open("w", encoding="utf-8") as audit: for pno in range(len(doc)): page = doc[pno] pw, ph = page.rect.width, page.rect.height # scaling if page size differs (simple proportional fit) sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0 for m in tpl.masks or []: if m.page not in (-1, pno): # -1 = all pages continue r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy) page.add_redact_annot(r, fill=(0,0,0)) audit.write(json.dumps({ "kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno, "bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)], "mode": "vector" }, ensure_ascii=False) + "\n") try: page.apply_redactions() except Exception: pass doc.save(str(pdf_out), deflate=True, garbage=4, clean=True, incremental=False) doc.close() def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int, audit_path: Path): doc = fitz.open(str(pdf_in)) out = fitz.open() w0, h0 = tpl.page_size with audit_path.open("w", encoding="utf-8") as audit: for pno in range(len(doc)): page = doc[pno]; pw, ph = page.rect.width, page.rect.height sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0 zoom = dpi/72.0 pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), annots=False) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) from PIL import ImageDraw draw = ImageDraw.Draw(img) for m in tpl.masks or []: if m.page not in (-1, pno): continue r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy) draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0)) audit.write(json.dumps({ "kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno, "bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)], "mode": "raster" }, ensure_ascii=False) + "\n") buf = io.BytesIO() img.save(buf, format="PNG"); buf.seek(0) dst = out.new_page(width=page.rect.width, height=page.rect.height) dst.insert_image(page.rect, stream=buf.getvalue()) out.save(str(pdf_out), deflate=True, garbage=4, clean=True) out.close(); doc.close() # ----------------------------- GUI ------------------------------ class MaskDesignerApp: def __init__(self, root: tk.Tk): self.root = root self.root.title(APP_TITLE) self.root.geometry("1280x900") self.zoom = 1.25 # affichage self.doc: Optional[fitz.Document] = None self.doc_path: Optional[Path] = None self.curr_page = 0 self.curr_image: Optional[Image.Image] = None self.tk_image: Optional[ImageTk.PhotoImage] = None self.masks: Dict[int, List[MaskRect]] = {} # per-page self.template_name = tk.StringVar(value="template_masks") self.status = tk.StringVar(value="Prêt.") self.raster_dpi = tk.IntVar(value=200) self.is_drawing = False self.start_xy: Optional[Tuple[int,int]] = None self._build_ui() # UI layout def _build_ui(self): top = tk.Frame(self.root, padx=8, pady=8) top.pack(fill=tk.BOTH, expand=True) bar = tk.Frame(top); bar.pack(fill=tk.X) tk.Button(bar, text="Ouvrir PDF…", command=self.open_pdf).pack(side=tk.LEFT) tk.Button(bar, text="←", command=self.prev_page).pack(side=tk.LEFT, padx=(8,2)) tk.Button(bar, text="→", command=self.next_page).pack(side=tk.LEFT, padx=2) tk.Button(bar, text="Zoom -", command=lambda: self.set_zoom( max(0.5, self.zoom-0.1) )).pack(side=tk.LEFT, padx=6) tk.Button(bar, text="Zoom +", command=lambda: self.set_zoom( self.zoom+0.1 )).pack(side=tk.LEFT, padx=2) tk.Label(bar, text="Nom template :").pack(side=tk.LEFT, padx=(12,2)) tk.Entry(bar, textvariable=self.template_name, width=24).pack(side=tk.LEFT) tk.Button(bar, text="Sauver template…", command=self.save_template).pack(side=tk.LEFT, padx=6) tk.Button(bar, text="Charger template…", command=self.load_template).pack(side=tk.LEFT, padx=2) tk.Button(bar, text="Effacer masques page", command=self.clear_page_masks).pack(side=tk.LEFT, padx=12) tools = tk.Frame(top); tools.pack(fill=tk.X, pady=(4,2)) tk.Label(tools, text="Prévisualiser / Appliquer sur un échantillon :").pack(side=tk.LEFT) tk.Button(tools, text="Prévisualiser (vector)", command=self.preview_vector).pack(side=tk.LEFT, padx=6) tk.Button(tools, text="Prévisualiser (raster)", command=self.preview_raster).pack(side=tk.LEFT, padx=2) tk.Label(tools, text="DPI raster:").pack(side=tk.LEFT, padx=(12,2)) tk.Entry(tools, textvariable=self.raster_dpi, width=6).pack(side=tk.LEFT) tk.Button(tools, text="Appliquer (vector)…", command=self.apply_vector_batch).pack(side=tk.LEFT, padx=(16,4)) tk.Button(tools, text="Appliquer (raster)…", command=self.apply_raster_batch).pack(side=tk.LEFT, padx=2) self.canvas = tk.Canvas(top, bg="#f5f7fb") self.canvas.pack(fill=tk.BOTH, expand=True, pady=(6,4)) self.canvas.bind("", self.on_down) self.canvas.bind("", self.on_drag) self.canvas.bind("", self.on_up) statusbar = tk.Label(self.root, textvariable=self.status, anchor="w", bd=1, relief=tk.SUNKEN) statusbar.pack(side=tk.BOTTOM, fill=tk.X) # Document handling def open_pdf(self): path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")]) if not path: return try: self.doc = fitz.open(path) self.doc_path = Path(path) self.curr_page = 0 self.masks.clear() self.template_name.set(self.doc_path.stem + "_template") self.refresh() self.status.set(f"PDF ouvert : {Path(path).name} — {len(self.doc)} page(s)") except Exception as e: messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}") def refresh(self): if not self.doc: return img = page_pix(self.doc, self.curr_page, self.zoom) # overlay current page masks rects = self.masks.get(self.curr_page, []) img_o = draw_overlay(img, rects, 1.0, self.curr_page) self.curr_image = img_o self.tk_image = ImageTk.PhotoImage(img_o) self.canvas.delete("all") self.canvas.create_image(0,0, anchor="nw", image=self.tk_image) self.canvas.config(scrollregion=(0,0,img_o.width, img_o.height)) def prev_page(self): if not self.doc: return self.curr_page = max(0, self.curr_page-1) self.refresh() def next_page(self): if not self.doc: return self.curr_page = min(len(self.doc)-1, self.curr_page+1) self.refresh() def set_zoom(self, z: float): self.zoom = clamp(z, 0.5, 3.0) self.refresh() # Drawing masks def on_down(self, ev): if not self.doc: return self.is_drawing = True self.start_xy = (ev.x, ev.y) self._preview_rect = self.canvas.create_rectangle(ev.x, ev.y, ev.x, ev.y, outline="#000", width=2) def on_drag(self, ev): if not self.doc or not self.is_drawing: return sx, sy = self.start_xy self.canvas.coords(self._preview_rect, sx, sy, ev.x, ev.y) def on_up(self, ev): if not self.doc or not self.is_drawing: return self.is_drawing = False sx, sy = self.start_xy x0, y0, x1, y1 = rect_norm(sx, sy, ev.x, ev.y) # convert screen px to PDF points page = self.doc[self.curr_page] # we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix) # So we need to divide by zoom to get PDF points (since page_pix used Matrix(zoom, zoom)) z = self.zoom rx0, ry0, rx1, ry1 = x0 / z, y0 / z, x1 / z, y1 / z rect = MaskRect(page=self.curr_page, x0=rx0, y0=ry0, x1=rx1, y1=ry1, label="MASK") self.masks.setdefault(self.curr_page, []).append(rect) self.canvas.delete(self._preview_rect) self.refresh() self.status.set(f"Masque ajouté p.{self.curr_page+1}: ({int(rx0)},{int(ry0)})–({int(rx1)},{int(ry1)})") # Template I/O def _current_template(self) -> Template: if not self.doc: raise RuntimeError("Aucun PDF ouvert.") page0 = self.doc[0] tpl = Template( name=self.template_name.get().strip() or "template", page_size=(page0.rect.width, page0.rect.height), masks=[m for arr in self.masks.values() for m in arr] ) return tpl def save_template(self): try: tpl = self._current_template() except Exception as e: messagebox.showwarning("Info", str(e)); return path = filedialog.asksaveasfilename(defaultextension=".yml", filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")], initialfile=f"{tpl.name}.yml") if not path: return p = Path(path) try: if p.suffix.lower() in (".yml", ".yaml"): save_template_yaml(tpl, p) else: p.write_text(json.dumps(tpl.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8") messagebox.showinfo("OK", f"Template enregistré : {p.name}") except Exception as e: messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}") def load_template(self): path = filedialog.askopenfilename(filetypes=[("YAML/JSON", "*.yml *.yaml *.json")]) if not path: return p = Path(path) try: if p.suffix.lower() in (".yml", ".yaml"): tpl = load_template_yaml(p) else: tpl = Template.from_dict(json.loads(p.read_text(encoding="utf-8"))) self.template_name.set(tpl.name) # reset masks and map to current doc pages (keep same page numbers; -1 means all pages) self.masks.clear() for m in tpl.masks or []: self.masks.setdefault(m.page, []).append(m) self.refresh() self.status.set(f"Template chargé : {p.name}") except Exception as e: messagebox.showerror("Erreur", f"Template invalide : {e}") def clear_page_masks(self): if not self.doc: return if self.curr_page in self.masks: del self.masks[self.curr_page] self.refresh() self.status.set(f"Masques de la page {self.curr_page+1} supprimés.") # Preview / Apply def _build_template_from_state(self) -> Optional[Template]: if not self.doc: messagebox.showwarning("Info", "Ouvrez d'abord un PDF de référence.") return None return self._current_template() def preview_vector(self): tpl = self._build_template_from_state() if not tpl: return samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")]) if not samp: return for i, s in enumerate(samp[:2], start=1): pdf_in = Path(s) out_dir = pdf_in.parent / "masked_preview" out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_vector(pdf_in, pdf_out, tpl, audit) except Exception as e: messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}") messagebox.showinfo("Prévisualisation", "Terminé (vectoriel). Ouvrez le dossier 'masked_preview'.") def preview_raster(self): tpl = self._build_template_from_state() if not tpl: return samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")]) if not samp: return dpi = int(self.raster_dpi.get()) for i, s in enumerate(samp[:2], start=1): pdf_in = Path(s) out_dir = pdf_in.parent / "masked_preview" out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit) except Exception as e: messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}") messagebox.showinfo("Prévisualisation", "Terminé (raster). Ouvrez le dossier 'masked_preview'.") def apply_vector_batch(self): tpl = self._build_template_from_state() if not tpl: return files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (vectoriel)", filetypes=[("PDF","*.pdf")]) if not files: return for s in files: pdf_in = Path(s) out_dir = pdf_in.parent / "masked" out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_vector(pdf_in, pdf_out, tpl, audit) except Exception as e: messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}") messagebox.showinfo("Terminé", "Masques appliqués (vectoriel).") def apply_raster_batch(self): tpl = self._build_template_from_state() if not tpl: return files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (raster)", filetypes=[("PDF","*.pdf")]) if not files: return dpi = int(self.raster_dpi.get()) for s in files: pdf_in = Path(s) out_dir = pdf_in.parent / "masked" out_dir.mkdir(exist_ok=True) pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf" audit = out_dir / f"{pdf_in.stem}.audit.jsonl" try: apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit) except Exception as e: messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}") messagebox.showinfo("Terminé", "Masques appliqués (raster).") # ----------------------------- Main ------------------------------ def main(): root = tk.Tk() app = MaskDesignerApp(root) root.mainloop() if __name__ == "__main__": main()