#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ GUI Pseudonymisation – Patch d'intégration du Core refactorisé (P0) ------------------------------------------------------------------- Ce patch remplace le moteur interne d'extraction/anonymisation par le module `anonymizer_core_refactored.py` livré précédemment, et ajoute la génération optionnelle de PDF anonymisés avec **boîtes noires** (vector redaction et raster burn). Points clés : - Appel unique : core.process_pdf(pdf_path, out_dir, make_vector_redaction, also_make_raster_burn) - Sorties : .pseudonymise.txt, .audit.jsonl, .redacted_vector.pdf (option), .redacted_raster.pdf (option) - UI : ajout de cases à cocher pour activer la sortie PDF vector/raster ; désactivation du bouton « Télécharger » spaCy après succès. Dépendances : pdfplumber, pdfminer.six, pymupdf, pillow, spacy (optionnel pour l'UI), transformers (optionnel) """ from __future__ import annotations import os import sys import json import queue import threading from dataclasses import asdict from pathlib import Path from typing import Dict # GUI import tkinter as tk from tkinter import filedialog, messagebox, ttk # Core refactorisé try: import anonymizer_core_refactored as core except Exception as e: raise SystemExit("Impossible d'importer anonymizer_core_refactored.py. Placez-le à côté de ce script.") APP_TITLE = "Pseudonymisation (Refactor P0 + PDF Redaction)" # ---------------- Utilitaires ---------------- def resolve_base_dir() -> Path: return Path(getattr(sys, "_MEIPASS", Path(__file__).resolve().parent)) # ---------------- Application ---------------- class App: def __init__(self, root: tk.Tk): self.root = root self.root.title(APP_TITLE) self.root.geometry("1100x780") # State/UI vars self.dir_var = tk.StringVar() self.status_var = tk.StringVar(value="Prêt.") self.model_status_var = tk.StringVar(value="Modèle spaCy : optionnel (désactivez si absent)") self.queue: "queue.Queue[str]" = queue.Queue() # Options self.opt_vector_pdf = tk.BooleanVar(value=True) self.opt_raster_pdf = tk.BooleanVar(value=False) # spaCy (optionnel) — on garde l'emplacement UI mais on ne le rend pas bloquant self._build_ui() self._pump_logs() # ---------------- UI ---------------- def _build_ui(self): top = tk.Frame(self.root, padx=10, pady=10) top.pack(fill=tk.BOTH, expand=True) # Ligne dossier row1 = tk.Frame(top); row1.pack(fill=tk.X) tk.Label(row1, text="Dossier PDF :").pack(side=tk.LEFT) tk.Entry(row1, textvariable=self.dir_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6) tk.Button(row1, text="Parcourir…", command=self._browse).pack(side=tk.LEFT, padx=3) self.btn_run = tk.Button(row1, text="Lancer", command=self._run) self.btn_run.pack(side=tk.LEFT, padx=3) # Carte spaCy (informative) card = tk.LabelFrame(top, text="Modèle spaCy (FR) — optionnel", padx=8, pady=8) card.pack(fill=tk.X, pady=6) self.btn_download = tk.Button(card, text="Télécharger (wheel recommandé)", command=self._download_spacy_disabled, state=tk.DISABLED) self.btn_download.pack(side=tk.RIGHT) tk.Label(card, textvariable=self.model_status_var, anchor="w").pack(fill=tk.X) # Options de sortie PDF opt = tk.LabelFrame(top, text="Sorties PDF anonymisées", padx=8, pady=8) opt.pack(fill=tk.X, pady=6) tk.Checkbutton(opt, text="PDF vectoriel (redaction réelle)", variable=self.opt_vector_pdf).pack(side=tk.LEFT, padx=6) tk.Checkbutton(opt, text="PDF raster (sécurité maximale)", variable=self.opt_raster_pdf).pack(side=tk.LEFT, padx=6) # Journal tk.Label(top, text="Journal :").pack(anchor="w") self.txt = tk.Text(top, height=22) self.txt.pack(fill=tk.BOTH, expand=True, pady=(2,0)) tk.Label(top, textvariable=self.status_var, anchor="w").pack(fill=tk.X, pady=(4,0)) def _download_spacy_disabled(self): messagebox.showinfo("Info", "L'installation via wheel est recommandée et gérée hors app. Bouton désactivé.") def _pump_logs(self): try: while True: msg = self.queue.get_nowait() self.txt.insert(tk.END, msg + "\n"); self.txt.see(tk.END) except queue.Empty: pass finally: self.root.after(60, self._pump_logs) # ---------------- Actions ---------------- def _browse(self): d = filedialog.askdirectory() if d: self.dir_var.set(d) def _run(self): folder = Path(self.dir_var.get().strip()) if not folder.is_dir(): messagebox.showwarning("Dossier invalide", "Choisissez un dossier contenant des PDF.") return self.btn_run.config(state=tk.DISABLED) threading.Thread(target=self._worker, args=(folder,), daemon=True).start() def _worker(self, folder: Path): try: pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()]) if not pdfs: self._log("Aucun PDF trouvé."); return outdir = folder / "pseudonymise"; outdir.mkdir(exist_ok=True) ok = ko = 0 for i, pdf in enumerate(pdfs, start=1): self.status_var.set(f"{i}/{len(pdfs)} — {pdf.name}") try: outputs = core.process_pdf( pdf_path=pdf, out_dir=outdir, make_vector_redaction=self.opt_vector_pdf.get(), also_make_raster_burn=self.opt_raster_pdf.get(), ) # Log bref des artefacts self._log("✓ " + pdf.name) for k, v in outputs.items(): self._log(f" - {k}: {v}") ok += 1 except Exception as e: self._log(f"✗ {pdf.name} → ERREUR: {e}") ko += 1 self.status_var.set(f"Terminé : {ok} OK, {ko} erreurs. Sortie: {outdir}") finally: self.btn_run.config(state=tk.NORMAL) def _log(self, msg: str): self.queue.put(msg) # ---------------- main ---------------- def main(): root = tk.Tk() App(root) root.mainloop() if __name__ == "__main__": main()