Files
anonymisation/pdf_mask_designer.py
Domi31tls 380e520013 feat(gui): apply WIP profils+masques+build-windows from stash (2026-04-27)
Application du stash@{0} resté en WIP depuis le 27/04 :
  "On main: wip-gui-profils-masque-manuel-build-windows-2026-04-27"

## Apport

- Pseudonymisation_Gui_V5.py (+1208 lignes) : profils, panneau paramètres
  avancés, éditeur de masques intégré, gestion whitelist/blacklist
- launcher.py (+315) : splash natif PyInstaller, single-instance,
  téléchargement modèles
- anonymisation_onefile.spec : config PyInstaller mise à jour
- pdf_mask_designer.py (+114) : éditeur de masques amélioré
- config_defaults.py (+23) : constantes nouvelles
- tests/unit/test_config_externalization.py (+12) : tests config
- .gitignore (+5)

## Pourquoi

La version courante de la GUI sur la branche feature manquait :
- L'éditeur de masques
- Les profils
- Le panneau paramètres avancés
- Le splash natif au démarrage

Aucun conflit avec mes 10 commits Q-1 (pas de chevauchement de fichiers).

## Validation

75 passed, 10 xfailed sur pytest tests/unit/.

## Note

Le stash reste disponible dans `git stash list` jusqu'à drop explicite.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 11:09:46 +02:00

512 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PDF Mask Designer (Standalone)
------------------------------
- Ouvre un PDF de référence
- Permet de "dessiner des masques" (rectangles) à la souris, par page
- Sauvegarde/charge un template (YAML/JSON) décrivant les masques
- Prévisualise l'application des masques sur 12 PDF
- Applique les masques :
* Vectoriel : annotations de redaction (le texte est supprimé)
* Raster : "brûle" les boîtes noires dans l'image de page (sécurité maximale)
- Journal/Audit : écrit *.audit.jsonl avec MASK_TEMPLATE + bbox + nom de template
Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML
pip install pymupdf==1.24.9 Pillow==10.2.0 PyYAML==6.0.2
"""
from __future__ import annotations
import argparse
import io
import json
import math
import os
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Any
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from PIL import Image, ImageTk
import fitz # PyMuPDF
import yaml
from manual_masking import (
DEFAULT_MASK_OUTPUT_DIRNAME,
DEFAULT_MASK_PREVIEW_DIRNAME,
)
APP_TITLE = "Éditeur de masques PDF"
TEMPLATE_VERSION = 1
# ----------------------------- Data structures -----------------------------
@dataclass
class MaskRect:
page: int
x0: float
y0: float
x1: float
y1: float
label: str = "MASK"
@dataclass
class Template:
name: str
page_size: Tuple[float, float] # (width, height) in PDF points
version: int = TEMPLATE_VERSION
masks: List[MaskRect] = None
def to_dict(self) -> Dict[str, Any]:
return {
"version": self.version,
"name": self.name,
"page_size": {"width": self.page_size[0], "height": self.page_size[1]},
"masks": [asdict(m) for m in (self.masks or [])],
}
@staticmethod
def from_dict(d: Dict[str, Any]) -> "Template":
ps = d.get("page_size") or {}
masks = []
for m in d.get("masks", []):
masks.append(MaskRect(
page=int(m["page"]),
x0=float(m["x0"]), y0=float(m["y0"]),
x1=float(m["x1"]), y1=float(m["y1"]),
label=m.get("label", "MASK")
))
name = d.get("name") or "template"
return Template(name=name, page_size=(float(ps.get("width", 595)), float(ps.get("height", 842))),
version=int(d.get("version", TEMPLATE_VERSION)), masks=masks)
# ----------------------------- Utility funcs ------------------------------
def clamp(v, a, b): return max(a, min(b, v))
def rect_norm(x0, y0, x1, y1) -> Tuple[float, float, float, float]:
return (min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1))
def page_pix(doc: fitz.Document, pno: int, zoom: float) -> Image.Image:
page = doc[pno]
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat, annots=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img
def draw_overlay(img: Image.Image, rects: List[MaskRect], zoom: float, page: int) -> Image.Image:
# returns a copy with alpha-red rectangles
from PIL import ImageDraw
out = img.copy()
draw = ImageDraw.Draw(out, "RGBA")
for r in rects:
if r.page != page: continue
draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0,110), outline=(0,0,0,220), width=2)
return out
def save_template_yaml(tpl: Template, path: Path):
with open(path, "w", encoding="utf-8") as f:
yaml.safe_dump(tpl.to_dict(), f, allow_unicode=True, sort_keys=False)
def load_template_yaml(path: Path) -> Template:
d = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
return Template.from_dict(d)
# ----------------------------- Application logic --------------------------
def apply_template_vector(pdf_in: Path, pdf_out: Path, tpl: Template, audit_path: Path):
doc = fitz.open(str(pdf_in))
w0, h0 = tpl.page_size
with audit_path.open("w", encoding="utf-8") as audit:
for pno in range(len(doc)):
page = doc[pno]
pw, ph = page.rect.width, page.rect.height
# scaling if page size differs (simple proportional fit)
sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0
for m in tpl.masks or []:
if m.page not in (-1, pno): # -1 = all pages
continue
r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy)
page.add_redact_annot(r, fill=(0,0,0))
audit.write(json.dumps({
"kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno,
"bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)],
"mode": "vector"
}, ensure_ascii=False) + "\n")
try:
page.apply_redactions()
except Exception:
pass
doc.save(str(pdf_out), deflate=True, garbage=4, clean=True, incremental=False)
doc.close()
def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int, audit_path: Path):
doc = fitz.open(str(pdf_in))
out = fitz.open()
w0, h0 = tpl.page_size
with audit_path.open("w", encoding="utf-8") as audit:
for pno in range(len(doc)):
page = doc[pno]; pw, ph = page.rect.width, page.rect.height
sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0
zoom = dpi/72.0
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), annots=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
from PIL import ImageDraw
draw = ImageDraw.Draw(img)
for m in tpl.masks or []:
if m.page not in (-1, pno): continue
r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy)
draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0))
audit.write(json.dumps({
"kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno,
"bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)],
"mode": "raster"
}, ensure_ascii=False) + "\n")
buf = io.BytesIO()
img.save(buf, format="PNG"); buf.seek(0)
dst = out.new_page(width=page.rect.width, height=page.rect.height)
dst.insert_image(page.rect, stream=buf.getvalue())
out.save(str(pdf_out), deflate=True, garbage=4, clean=True)
out.close(); doc.close()
# ----------------------------- GUI ------------------------------
class MaskDesignerApp:
def __init__(
self,
root: tk.Tk,
*,
initial_pdf: Optional[Path] = None,
initial_template: Optional[Path] = None,
templates_dir: Optional[Path] = None,
output_dir_name: str = DEFAULT_MASK_OUTPUT_DIRNAME,
preview_dir_name: str = DEFAULT_MASK_PREVIEW_DIRNAME,
):
self.root = root
self.root.title(APP_TITLE)
self.root.geometry("1280x900")
self.zoom = 1.25 # affichage
self.doc: Optional[fitz.Document] = None
self.doc_path: Optional[Path] = None
self.curr_page = 0
self.curr_image: Optional[Image.Image] = None
self.tk_image: Optional[ImageTk.PhotoImage] = None
self.masks: Dict[int, List[MaskRect]] = {} # per-page
self.template_name = tk.StringVar(value="template_masks")
self.status = tk.StringVar(value="Prêt.")
self.raster_dpi = tk.IntVar(value=200)
self.templates_dir = templates_dir
self.output_dir_name = output_dir_name
self.preview_dir_name = preview_dir_name
self.is_drawing = False
self.start_xy: Optional[Tuple[int,int]] = None
self._build_ui()
if initial_pdf:
self.open_pdf_path(initial_pdf)
if initial_template:
self.load_template_path(initial_template)
# UI layout
def _build_ui(self):
top = tk.Frame(self.root, padx=8, pady=8)
top.pack(fill=tk.BOTH, expand=True)
bar = tk.Frame(top); bar.pack(fill=tk.X)
tk.Button(bar, text="Ouvrir PDF…", command=self.open_pdf).pack(side=tk.LEFT)
tk.Button(bar, text="", command=self.prev_page).pack(side=tk.LEFT, padx=(8,2))
tk.Button(bar, text="", command=self.next_page).pack(side=tk.LEFT, padx=2)
tk.Button(bar, text="Zoom -", command=lambda: self.set_zoom( max(0.5, self.zoom-0.1) )).pack(side=tk.LEFT, padx=6)
tk.Button(bar, text="Zoom +", command=lambda: self.set_zoom( self.zoom+0.1 )).pack(side=tk.LEFT, padx=2)
tk.Label(bar, text="Nom template :").pack(side=tk.LEFT, padx=(12,2))
tk.Entry(bar, textvariable=self.template_name, width=24).pack(side=tk.LEFT)
tk.Button(bar, text="Sauver template…", command=self.save_template).pack(side=tk.LEFT, padx=6)
tk.Button(bar, text="Charger template…", command=self.load_template).pack(side=tk.LEFT, padx=2)
tk.Button(bar, text="Effacer masques page", command=self.clear_page_masks).pack(side=tk.LEFT, padx=12)
tools = tk.Frame(top); tools.pack(fill=tk.X, pady=(4,2))
tk.Label(tools, text="Prévisualiser / Appliquer sur un échantillon :").pack(side=tk.LEFT)
tk.Button(tools, text="Prévisualiser (vector)", command=self.preview_vector).pack(side=tk.LEFT, padx=6)
tk.Button(tools, text="Prévisualiser (raster)", command=self.preview_raster).pack(side=tk.LEFT, padx=2)
tk.Label(tools, text="DPI raster:").pack(side=tk.LEFT, padx=(12,2))
tk.Entry(tools, textvariable=self.raster_dpi, width=6).pack(side=tk.LEFT)
tk.Button(tools, text="Appliquer (vector)…", command=self.apply_vector_batch).pack(side=tk.LEFT, padx=(16,4))
tk.Button(tools, text="Appliquer (raster)…", command=self.apply_raster_batch).pack(side=tk.LEFT, padx=2)
self.canvas = tk.Canvas(top, bg="#f5f7fb")
self.canvas.pack(fill=tk.BOTH, expand=True, pady=(6,4))
self.canvas.bind("<ButtonPress-1>", self.on_down)
self.canvas.bind("<B1-Motion>", self.on_drag)
self.canvas.bind("<ButtonRelease-1>", self.on_up)
statusbar = tk.Label(self.root, textvariable=self.status, anchor="w", bd=1, relief=tk.SUNKEN)
statusbar.pack(side=tk.BOTTOM, fill=tk.X)
# Document handling
def open_pdf(self):
path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")])
if not path: return
self.open_pdf_path(Path(path))
def open_pdf_path(self, path: Path):
try:
self.doc = fitz.open(str(path))
self.doc_path = Path(path)
self.curr_page = 0
self.masks.clear()
self.template_name.set(self.doc_path.stem + "_template")
self.refresh()
self.status.set(f"PDF ouvert : {self.doc_path.name}{len(self.doc)} page(s)")
except Exception as e:
messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}")
def refresh(self):
if not self.doc: return
img = page_pix(self.doc, self.curr_page, self.zoom)
# overlay current page masks
rects = self.masks.get(self.curr_page, [])
img_o = draw_overlay(img, rects, self.zoom, self.curr_page)
self.curr_image = img_o
self.tk_image = ImageTk.PhotoImage(img_o)
self.canvas.delete("all")
self.canvas.create_image(0,0, anchor="nw", image=self.tk_image)
self.canvas.config(scrollregion=(0,0,img_o.width, img_o.height))
def prev_page(self):
if not self.doc: return
self.curr_page = max(0, self.curr_page-1)
self.refresh()
def next_page(self):
if not self.doc: return
self.curr_page = min(len(self.doc)-1, self.curr_page+1)
self.refresh()
def set_zoom(self, z: float):
self.zoom = clamp(z, 0.5, 3.0)
self.refresh()
# Drawing masks
def on_down(self, ev):
if not self.doc: return
self.is_drawing = True
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
self.start_xy = (x, y)
self._preview_rect = self.canvas.create_rectangle(x, y, x, y, outline="#000", width=2)
def on_drag(self, ev):
if not self.doc or not self.is_drawing: return
sx, sy = self.start_xy
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
self.canvas.coords(self._preview_rect, sx, sy, x, y)
def on_up(self, ev):
if not self.doc or not self.is_drawing: return
self.is_drawing = False
sx, sy = self.start_xy
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
x0, y0, x1, y1 = rect_norm(sx, sy, x, y)
# convert screen px to PDF points
page = self.doc[self.curr_page]
# we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix)
# So we need to divide by zoom to get PDF points (since page_pix used Matrix(zoom, zoom))
z = self.zoom
rx0, ry0, rx1, ry1 = x0 / z, y0 / z, x1 / z, y1 / z
rect = MaskRect(page=self.curr_page, x0=rx0, y0=ry0, x1=rx1, y1=ry1, label="MASK")
self.masks.setdefault(self.curr_page, []).append(rect)
self.canvas.delete(self._preview_rect)
self.refresh()
self.status.set(f"Masque ajouté p.{self.curr_page+1}: ({int(rx0)},{int(ry0)})({int(rx1)},{int(ry1)})")
# Template I/O
def _current_template(self) -> Template:
if not self.doc:
raise RuntimeError("Aucun PDF ouvert.")
page0 = self.doc[0]
tpl = Template(
name=self.template_name.get().strip() or "template",
page_size=(page0.rect.width, page0.rect.height),
masks=[m for arr in self.masks.values() for m in arr]
)
return tpl
def save_template(self):
try:
tpl = self._current_template()
except Exception as e:
messagebox.showwarning("Info", str(e)); return
path = filedialog.asksaveasfilename(
defaultextension=".yml",
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
initialdir=str(self._template_initialdir()),
initialfile=f"{tpl.name}.yml",
)
if not path: return
p = Path(path)
try:
if p.suffix.lower() in (".yml", ".yaml"):
save_template_yaml(tpl, p)
else:
p.write_text(json.dumps(tpl.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8")
messagebox.showinfo("OK", f"Template enregistré : {p.name}")
except Exception as e:
messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}")
def load_template(self):
path = filedialog.askopenfilename(
filetypes=[("YAML/JSON", "*.yml *.yaml *.json")],
initialdir=str(self._template_initialdir()),
)
if not path: return
self.load_template_path(Path(path))
def load_template_path(self, path: Path):
p = Path(path)
try:
if p.suffix.lower() in (".yml", ".yaml"):
tpl = load_template_yaml(p)
else:
tpl = Template.from_dict(json.loads(p.read_text(encoding="utf-8")))
self.template_name.set(tpl.name)
# reset masks and map to current doc pages (keep same page numbers; -1 means all pages)
self.masks.clear()
for m in tpl.masks or []:
self.masks.setdefault(m.page, []).append(m)
self.refresh()
self.status.set(f"Template chargé : {p.name}")
except Exception as e:
messagebox.showerror("Erreur", f"Template invalide : {e}")
def clear_page_masks(self):
if not self.doc: return
if self.curr_page in self.masks:
del self.masks[self.curr_page]
self.refresh()
self.status.set(f"Masques de la page {self.curr_page+1} supprimés.")
def _template_initialdir(self) -> Path:
if self.templates_dir is not None:
self.templates_dir.mkdir(parents=True, exist_ok=True)
return self.templates_dir
if self.doc_path is not None:
return self.doc_path.parent
return Path.cwd()
# Preview / Apply
def _build_template_from_state(self) -> Optional[Template]:
if not self.doc:
messagebox.showwarning("Info", "Ouvrez d'abord un PDF de référence.")
return None
return self._current_template()
def preview_vector(self):
tpl = self._build_template_from_state()
if not tpl: return
samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")])
if not samp: return
for i, s in enumerate(samp[:2], start=1):
pdf_in = Path(s)
out_dir = pdf_in.parent / self.preview_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
try:
apply_template_vector(pdf_in, pdf_out, tpl, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}")
messagebox.showinfo(
"Prévisualisation",
f"Terminé (vectoriel). Ouvrez le dossier '{self.preview_dir_name}'.",
)
def preview_raster(self):
tpl = self._build_template_from_state()
if not tpl: return
samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")])
if not samp: return
dpi = int(self.raster_dpi.get())
for i, s in enumerate(samp[:2], start=1):
pdf_in = Path(s)
out_dir = pdf_in.parent / self.preview_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
try:
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}")
messagebox.showinfo(
"Prévisualisation",
f"Terminé (raster). Ouvrez le dossier '{self.preview_dir_name}'.",
)
def apply_vector_batch(self):
tpl = self._build_template_from_state()
if not tpl: return
files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (vectoriel)", filetypes=[("PDF","*.pdf")])
if not files: return
for s in files:
pdf_in = Path(s)
out_dir = pdf_in.parent / self.output_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
try:
apply_template_vector(pdf_in, pdf_out, tpl, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}")
messagebox.showinfo("Terminé", "Masques appliqués (vectoriel).")
def apply_raster_batch(self):
tpl = self._build_template_from_state()
if not tpl: return
files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (raster)", filetypes=[("PDF","*.pdf")])
if not files: return
dpi = int(self.raster_dpi.get())
for s in files:
pdf_in = Path(s)
out_dir = pdf_in.parent / self.output_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
try:
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}")
messagebox.showinfo("Terminé", "Masques appliqués (raster).")
# ----------------------------- Main ------------------------------
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Editeur de masques PDF reutilisables")
parser.add_argument("--pdf", type=Path, help="PDF de reference a ouvrir au demarrage")
parser.add_argument("--template", type=Path, help="Template YAML/JSON a charger au demarrage")
parser.add_argument("--templates-dir", type=Path, help="Dossier par defaut pour sauver/charger les templates")
parser.add_argument("--output-dir-name", default=DEFAULT_MASK_OUTPUT_DIRNAME, help="Nom du dossier de sortie pour l'application des masques")
parser.add_argument("--preview-dir-name", default=DEFAULT_MASK_PREVIEW_DIRNAME, help="Nom du dossier de sortie pour les previsualisations")
return parser
def main(argv: Optional[List[str]] = None):
args = build_arg_parser().parse_args(argv)
root = tk.Tk()
app = MaskDesignerApp(
root,
initial_pdf=args.pdf,
initial_template=args.template,
templates_dir=args.templates_dir,
output_dir_name=args.output_dir_name,
preview_dir_name=args.preview_dir_name,
)
root.mainloop()
if __name__ == "__main__":
main()