- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
440 lines
18 KiB
Python
440 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
PDF Mask Designer (Standalone)
|
||
------------------------------
|
||
- Ouvre un PDF de référence
|
||
- Permet de "dessiner des masques" (rectangles) à la souris, par page
|
||
- Sauvegarde/charge un template (YAML/JSON) décrivant les masques
|
||
- Prévisualise l'application des masques sur 1–2 PDF
|
||
- Applique les masques :
|
||
* Vectoriel : annotations de redaction (le texte est supprimé)
|
||
* Raster : "brûle" les boîtes noires dans l'image de page (sécurité maximale)
|
||
- Journal/Audit : écrit *.audit.jsonl avec MASK_TEMPLATE + bbox + nom de template
|
||
|
||
Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML
|
||
pip install pymupdf==1.24.9 Pillow==10.2.0 PyYAML==6.0.2
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import io
|
||
import json
|
||
import math
|
||
import os
|
||
from dataclasses import dataclass, asdict
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional, Tuple, Any
|
||
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, ttk
|
||
from PIL import Image, ImageTk
|
||
import fitz # PyMuPDF
|
||
import yaml
|
||
|
||
APP_TITLE = "PDF Mask Designer (Standalone)"
|
||
TEMPLATE_VERSION = 1
|
||
|
||
# ----------------------------- Data structures -----------------------------
|
||
|
||
@dataclass
|
||
class MaskRect:
|
||
page: int
|
||
x0: float
|
||
y0: float
|
||
x1: float
|
||
y1: float
|
||
label: str = "MASK"
|
||
|
||
@dataclass
|
||
class Template:
|
||
name: str
|
||
page_size: Tuple[float, float] # (width, height) in PDF points
|
||
version: int = TEMPLATE_VERSION
|
||
masks: List[MaskRect] = None
|
||
|
||
def to_dict(self) -> Dict[str, Any]:
|
||
return {
|
||
"version": self.version,
|
||
"name": self.name,
|
||
"page_size": {"width": self.page_size[0], "height": self.page_size[1]},
|
||
"masks": [asdict(m) for m in (self.masks or [])],
|
||
}
|
||
|
||
@staticmethod
|
||
def from_dict(d: Dict[str, Any]) -> "Template":
|
||
ps = d.get("page_size") or {}
|
||
masks = []
|
||
for m in d.get("masks", []):
|
||
masks.append(MaskRect(
|
||
page=int(m["page"]),
|
||
x0=float(m["x0"]), y0=float(m["y0"]),
|
||
x1=float(m["x1"]), y1=float(m["y1"]),
|
||
label=m.get("label", "MASK")
|
||
))
|
||
name = d.get("name") or "template"
|
||
return Template(name=name, page_size=(float(ps.get("width", 595)), float(ps.get("height", 842))),
|
||
version=int(d.get("version", TEMPLATE_VERSION)), masks=masks)
|
||
|
||
# ----------------------------- Utility funcs ------------------------------
|
||
|
||
def clamp(v, a, b): return max(a, min(b, v))
|
||
|
||
def rect_norm(x0, y0, x1, y1) -> Tuple[float, float, float, float]:
|
||
return (min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1))
|
||
|
||
def page_pix(doc: fitz.Document, pno: int, zoom: float) -> Image.Image:
|
||
page = doc[pno]
|
||
mat = fitz.Matrix(zoom, zoom)
|
||
pix = page.get_pixmap(matrix=mat, annots=False)
|
||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||
return img
|
||
|
||
def draw_overlay(img: Image.Image, rects: List[MaskRect], zoom: float, page: int) -> Image.Image:
|
||
# returns a copy with alpha-red rectangles
|
||
from PIL import ImageDraw
|
||
out = img.copy()
|
||
draw = ImageDraw.Draw(out, "RGBA")
|
||
for r in rects:
|
||
if r.page != page: continue
|
||
draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0,110), outline=(0,0,0,220), width=2)
|
||
return out
|
||
|
||
def save_template_yaml(tpl: Template, path: Path):
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
yaml.safe_dump(tpl.to_dict(), f, allow_unicode=True, sort_keys=False)
|
||
|
||
def load_template_yaml(path: Path) -> Template:
|
||
d = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
|
||
return Template.from_dict(d)
|
||
|
||
# ----------------------------- Application logic --------------------------
|
||
|
||
def apply_template_vector(pdf_in: Path, pdf_out: Path, tpl: Template, audit_path: Path):
|
||
doc = fitz.open(str(pdf_in))
|
||
w0, h0 = tpl.page_size
|
||
with audit_path.open("w", encoding="utf-8") as audit:
|
||
for pno in range(len(doc)):
|
||
page = doc[pno]
|
||
pw, ph = page.rect.width, page.rect.height
|
||
# scaling if page size differs (simple proportional fit)
|
||
sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0
|
||
for m in tpl.masks or []:
|
||
if m.page not in (-1, pno): # -1 = all pages
|
||
continue
|
||
r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy)
|
||
page.add_redact_annot(r, fill=(0,0,0))
|
||
audit.write(json.dumps({
|
||
"kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno,
|
||
"bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)],
|
||
"mode": "vector"
|
||
}, ensure_ascii=False) + "\n")
|
||
try:
|
||
page.apply_redactions()
|
||
except Exception:
|
||
pass
|
||
doc.save(str(pdf_out), deflate=True, garbage=4, clean=True, incremental=False)
|
||
doc.close()
|
||
|
||
def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int, audit_path: Path):
|
||
doc = fitz.open(str(pdf_in))
|
||
out = fitz.open()
|
||
w0, h0 = tpl.page_size
|
||
with audit_path.open("w", encoding="utf-8") as audit:
|
||
for pno in range(len(doc)):
|
||
page = doc[pno]; pw, ph = page.rect.width, page.rect.height
|
||
sx, sy = pw / w0 if w0 else 1.0, ph / h0 if h0 else 1.0
|
||
zoom = dpi/72.0
|
||
pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom), annots=False)
|
||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||
from PIL import ImageDraw
|
||
draw = ImageDraw.Draw(img)
|
||
for m in tpl.masks or []:
|
||
if m.page not in (-1, pno): continue
|
||
r = fitz.Rect(m.x0*sx, m.y0*sy, m.x1*sx, m.y1*sy)
|
||
draw.rectangle([r.x0*zoom, r.y0*zoom, r.x1*zoom, r.y1*zoom], fill=(0,0,0))
|
||
audit.write(json.dumps({
|
||
"kind": "MASK_TEMPLATE", "template": tpl.name, "page": pno,
|
||
"bbox": [round(r.x0,2), round(r.y0,2), round(r.x1,2), round(r.y1,2)],
|
||
"mode": "raster"
|
||
}, ensure_ascii=False) + "\n")
|
||
buf = io.BytesIO()
|
||
img.save(buf, format="PNG"); buf.seek(0)
|
||
dst = out.new_page(width=page.rect.width, height=page.rect.height)
|
||
dst.insert_image(page.rect, stream=buf.getvalue())
|
||
out.save(str(pdf_out), deflate=True, garbage=4, clean=True)
|
||
out.close(); doc.close()
|
||
|
||
# ----------------------------- GUI ------------------------------
|
||
|
||
class MaskDesignerApp:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root
|
||
self.root.title(APP_TITLE)
|
||
self.root.geometry("1280x900")
|
||
self.zoom = 1.25 # affichage
|
||
self.doc: Optional[fitz.Document] = None
|
||
self.doc_path: Optional[Path] = None
|
||
self.curr_page = 0
|
||
self.curr_image: Optional[Image.Image] = None
|
||
self.tk_image: Optional[ImageTk.PhotoImage] = None
|
||
self.masks: Dict[int, List[MaskRect]] = {} # per-page
|
||
self.template_name = tk.StringVar(value="template_masks")
|
||
self.status = tk.StringVar(value="Prêt.")
|
||
self.raster_dpi = tk.IntVar(value=200)
|
||
|
||
self.is_drawing = False
|
||
self.start_xy: Optional[Tuple[int,int]] = None
|
||
|
||
self._build_ui()
|
||
|
||
# UI layout
|
||
def _build_ui(self):
|
||
top = tk.Frame(self.root, padx=8, pady=8)
|
||
top.pack(fill=tk.BOTH, expand=True)
|
||
bar = tk.Frame(top); bar.pack(fill=tk.X)
|
||
|
||
tk.Button(bar, text="Ouvrir PDF…", command=self.open_pdf).pack(side=tk.LEFT)
|
||
tk.Button(bar, text="←", command=self.prev_page).pack(side=tk.LEFT, padx=(8,2))
|
||
tk.Button(bar, text="→", command=self.next_page).pack(side=tk.LEFT, padx=2)
|
||
tk.Button(bar, text="Zoom -", command=lambda: self.set_zoom( max(0.5, self.zoom-0.1) )).pack(side=tk.LEFT, padx=6)
|
||
tk.Button(bar, text="Zoom +", command=lambda: self.set_zoom( self.zoom+0.1 )).pack(side=tk.LEFT, padx=2)
|
||
|
||
tk.Label(bar, text="Nom template :").pack(side=tk.LEFT, padx=(12,2))
|
||
tk.Entry(bar, textvariable=self.template_name, width=24).pack(side=tk.LEFT)
|
||
tk.Button(bar, text="Sauver template…", command=self.save_template).pack(side=tk.LEFT, padx=6)
|
||
tk.Button(bar, text="Charger template…", command=self.load_template).pack(side=tk.LEFT, padx=2)
|
||
tk.Button(bar, text="Effacer masques page", command=self.clear_page_masks).pack(side=tk.LEFT, padx=12)
|
||
|
||
tools = tk.Frame(top); tools.pack(fill=tk.X, pady=(4,2))
|
||
tk.Label(tools, text="Prévisualiser / Appliquer sur un échantillon :").pack(side=tk.LEFT)
|
||
tk.Button(tools, text="Prévisualiser (vector)", command=self.preview_vector).pack(side=tk.LEFT, padx=6)
|
||
tk.Button(tools, text="Prévisualiser (raster)", command=self.preview_raster).pack(side=tk.LEFT, padx=2)
|
||
tk.Label(tools, text="DPI raster:").pack(side=tk.LEFT, padx=(12,2))
|
||
tk.Entry(tools, textvariable=self.raster_dpi, width=6).pack(side=tk.LEFT)
|
||
|
||
tk.Button(tools, text="Appliquer (vector)…", command=self.apply_vector_batch).pack(side=tk.LEFT, padx=(16,4))
|
||
tk.Button(tools, text="Appliquer (raster)…", command=self.apply_raster_batch).pack(side=tk.LEFT, padx=2)
|
||
|
||
self.canvas = tk.Canvas(top, bg="#f5f7fb")
|
||
self.canvas.pack(fill=tk.BOTH, expand=True, pady=(6,4))
|
||
self.canvas.bind("<ButtonPress-1>", self.on_down)
|
||
self.canvas.bind("<B1-Motion>", self.on_drag)
|
||
self.canvas.bind("<ButtonRelease-1>", self.on_up)
|
||
|
||
statusbar = tk.Label(self.root, textvariable=self.status, anchor="w", bd=1, relief=tk.SUNKEN)
|
||
statusbar.pack(side=tk.BOTTOM, fill=tk.X)
|
||
|
||
# Document handling
|
||
def open_pdf(self):
|
||
path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")])
|
||
if not path: return
|
||
try:
|
||
self.doc = fitz.open(path)
|
||
self.doc_path = Path(path)
|
||
self.curr_page = 0
|
||
self.masks.clear()
|
||
self.template_name.set(self.doc_path.stem + "_template")
|
||
self.refresh()
|
||
self.status.set(f"PDF ouvert : {Path(path).name} — {len(self.doc)} page(s)")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}")
|
||
|
||
def refresh(self):
|
||
if not self.doc: return
|
||
img = page_pix(self.doc, self.curr_page, self.zoom)
|
||
# overlay current page masks
|
||
rects = self.masks.get(self.curr_page, [])
|
||
img_o = draw_overlay(img, rects, 1.0, self.curr_page)
|
||
self.curr_image = img_o
|
||
self.tk_image = ImageTk.PhotoImage(img_o)
|
||
self.canvas.delete("all")
|
||
self.canvas.create_image(0,0, anchor="nw", image=self.tk_image)
|
||
self.canvas.config(scrollregion=(0,0,img_o.width, img_o.height))
|
||
|
||
def prev_page(self):
|
||
if not self.doc: return
|
||
self.curr_page = max(0, self.curr_page-1)
|
||
self.refresh()
|
||
|
||
def next_page(self):
|
||
if not self.doc: return
|
||
self.curr_page = min(len(self.doc)-1, self.curr_page+1)
|
||
self.refresh()
|
||
|
||
def set_zoom(self, z: float):
|
||
self.zoom = clamp(z, 0.5, 3.0)
|
||
self.refresh()
|
||
|
||
# Drawing masks
|
||
def on_down(self, ev):
|
||
if not self.doc: return
|
||
self.is_drawing = True
|
||
self.start_xy = (ev.x, ev.y)
|
||
self._preview_rect = self.canvas.create_rectangle(ev.x, ev.y, ev.x, ev.y, outline="#000", width=2)
|
||
|
||
def on_drag(self, ev):
|
||
if not self.doc or not self.is_drawing: return
|
||
sx, sy = self.start_xy
|
||
self.canvas.coords(self._preview_rect, sx, sy, ev.x, ev.y)
|
||
|
||
def on_up(self, ev):
|
||
if not self.doc or not self.is_drawing: return
|
||
self.is_drawing = False
|
||
sx, sy = self.start_xy
|
||
x0, y0, x1, y1 = rect_norm(sx, sy, ev.x, ev.y)
|
||
# convert screen px to PDF points
|
||
page = self.doc[self.curr_page]
|
||
# we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix)
|
||
# So we need to divide by zoom to get PDF points (since page_pix used Matrix(zoom, zoom))
|
||
z = self.zoom
|
||
rx0, ry0, rx1, ry1 = x0 / z, y0 / z, x1 / z, y1 / z
|
||
rect = MaskRect(page=self.curr_page, x0=rx0, y0=ry0, x1=rx1, y1=ry1, label="MASK")
|
||
self.masks.setdefault(self.curr_page, []).append(rect)
|
||
self.canvas.delete(self._preview_rect)
|
||
self.refresh()
|
||
self.status.set(f"Masque ajouté p.{self.curr_page+1}: ({int(rx0)},{int(ry0)})–({int(rx1)},{int(ry1)})")
|
||
|
||
# Template I/O
|
||
def _current_template(self) -> Template:
|
||
if not self.doc:
|
||
raise RuntimeError("Aucun PDF ouvert.")
|
||
page0 = self.doc[0]
|
||
tpl = Template(
|
||
name=self.template_name.get().strip() or "template",
|
||
page_size=(page0.rect.width, page0.rect.height),
|
||
masks=[m for arr in self.masks.values() for m in arr]
|
||
)
|
||
return tpl
|
||
|
||
def save_template(self):
|
||
try:
|
||
tpl = self._current_template()
|
||
except Exception as e:
|
||
messagebox.showwarning("Info", str(e)); return
|
||
path = filedialog.asksaveasfilename(defaultextension=".yml",
|
||
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
|
||
initialfile=f"{tpl.name}.yml")
|
||
if not path: return
|
||
p = Path(path)
|
||
try:
|
||
if p.suffix.lower() in (".yml", ".yaml"):
|
||
save_template_yaml(tpl, p)
|
||
else:
|
||
p.write_text(json.dumps(tpl.to_dict(), ensure_ascii=False, indent=2), encoding="utf-8")
|
||
messagebox.showinfo("OK", f"Template enregistré : {p.name}")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}")
|
||
|
||
def load_template(self):
|
||
path = filedialog.askopenfilename(filetypes=[("YAML/JSON", "*.yml *.yaml *.json")])
|
||
if not path: return
|
||
p = Path(path)
|
||
try:
|
||
if p.suffix.lower() in (".yml", ".yaml"):
|
||
tpl = load_template_yaml(p)
|
||
else:
|
||
tpl = Template.from_dict(json.loads(p.read_text(encoding="utf-8")))
|
||
self.template_name.set(tpl.name)
|
||
# reset masks and map to current doc pages (keep same page numbers; -1 means all pages)
|
||
self.masks.clear()
|
||
for m in tpl.masks or []:
|
||
self.masks.setdefault(m.page, []).append(m)
|
||
self.refresh()
|
||
self.status.set(f"Template chargé : {p.name}")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Template invalide : {e}")
|
||
|
||
def clear_page_masks(self):
|
||
if not self.doc: return
|
||
if self.curr_page in self.masks:
|
||
del self.masks[self.curr_page]
|
||
self.refresh()
|
||
self.status.set(f"Masques de la page {self.curr_page+1} supprimés.")
|
||
|
||
# Preview / Apply
|
||
def _build_template_from_state(self) -> Optional[Template]:
|
||
if not self.doc:
|
||
messagebox.showwarning("Info", "Ouvrez d'abord un PDF de référence.")
|
||
return None
|
||
return self._current_template()
|
||
|
||
def preview_vector(self):
|
||
tpl = self._build_template_from_state()
|
||
if not tpl: return
|
||
samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")])
|
||
if not samp: return
|
||
for i, s in enumerate(samp[:2], start=1):
|
||
pdf_in = Path(s)
|
||
out_dir = pdf_in.parent / "masked_preview"
|
||
out_dir.mkdir(exist_ok=True)
|
||
pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf"
|
||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||
try:
|
||
apply_template_vector(pdf_in, pdf_out, tpl, audit)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}")
|
||
messagebox.showinfo("Prévisualisation", "Terminé (vectoriel). Ouvrez le dossier 'masked_preview'.")
|
||
|
||
def preview_raster(self):
|
||
tpl = self._build_template_from_state()
|
||
if not tpl: return
|
||
samp = filedialog.askopenfilenames(title="Choisir 1 ou 2 PDF pour prévisualisation", filetypes=[("PDF","*.pdf")])
|
||
if not samp: return
|
||
dpi = int(self.raster_dpi.get())
|
||
for i, s in enumerate(samp[:2], start=1):
|
||
pdf_in = Path(s)
|
||
out_dir = pdf_in.parent / "masked_preview"
|
||
out_dir.mkdir(exist_ok=True)
|
||
pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf"
|
||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||
try:
|
||
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}")
|
||
messagebox.showinfo("Prévisualisation", "Terminé (raster). Ouvrez le dossier 'masked_preview'.")
|
||
|
||
def apply_vector_batch(self):
|
||
tpl = self._build_template_from_state()
|
||
if not tpl: return
|
||
files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (vectoriel)", filetypes=[("PDF","*.pdf")])
|
||
if not files: return
|
||
for s in files:
|
||
pdf_in = Path(s)
|
||
out_dir = pdf_in.parent / "masked"
|
||
out_dir.mkdir(exist_ok=True)
|
||
pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf"
|
||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||
try:
|
||
apply_template_vector(pdf_in, pdf_out, tpl, audit)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}")
|
||
messagebox.showinfo("Terminé", "Masques appliqués (vectoriel).")
|
||
|
||
def apply_raster_batch(self):
|
||
tpl = self._build_template_from_state()
|
||
if not tpl: return
|
||
files = filedialog.askopenfilenames(title="Choisir des PDF à traiter (raster)", filetypes=[("PDF","*.pdf")])
|
||
if not files: return
|
||
dpi = int(self.raster_dpi.get())
|
||
for s in files:
|
||
pdf_in = Path(s)
|
||
out_dir = pdf_in.parent / "masked"
|
||
out_dir.mkdir(exist_ok=True)
|
||
pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf"
|
||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||
try:
|
||
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Échec sur {pdf_in.name}: {e}")
|
||
messagebox.showinfo("Terminé", "Masques appliqués (raster).")
|
||
|
||
# ----------------------------- Main ------------------------------
|
||
|
||
def main():
|
||
root = tk.Tk()
|
||
app = MaskDesignerApp(root)
|
||
root.mainloop()
|
||
|
||
if __name__ == "__main__":
|
||
main()
|