- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
168 lines
6.4 KiB
Python
168 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
GUI Pseudonymisation – Patch d'intégration du Core refactorisé (P0)
|
||
-------------------------------------------------------------------
|
||
Ce patch remplace le moteur interne d'extraction/anonymisation par le module
|
||
`anonymizer_core_refactored.py` livré précédemment, et ajoute la génération
|
||
optionnelle de PDF anonymisés avec **boîtes noires** (vector redaction et raster burn).
|
||
|
||
Points clés :
|
||
- Appel unique : core.process_pdf(pdf_path, out_dir, make_vector_redaction, also_make_raster_burn)
|
||
- Sorties : .pseudonymise.txt, .audit.jsonl, .redacted_vector.pdf (option), .redacted_raster.pdf (option)
|
||
- UI : ajout de cases à cocher pour activer la sortie PDF vector/raster ;
|
||
désactivation du bouton « Télécharger » spaCy après succès.
|
||
|
||
Dépendances : pdfplumber, pdfminer.six, pymupdf, pillow, spacy (optionnel pour l'UI), transformers (optionnel)
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import queue
|
||
import threading
|
||
from dataclasses import asdict
|
||
from pathlib import Path
|
||
from typing import Dict
|
||
|
||
# GUI
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, ttk
|
||
|
||
# Core refactorisé
|
||
try:
|
||
import anonymizer_core_refactored as core
|
||
except Exception as e:
|
||
raise SystemExit("Impossible d'importer anonymizer_core_refactored.py. Placez-le à côté de ce script.")
|
||
|
||
APP_TITLE = "Pseudonymisation (Refactor P0 + PDF Redaction)"
|
||
|
||
# ---------------- Utilitaires ----------------
|
||
|
||
def resolve_base_dir() -> Path:
|
||
return Path(getattr(sys, "_MEIPASS", Path(__file__).resolve().parent))
|
||
|
||
# ---------------- Application ----------------
|
||
|
||
class App:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root
|
||
self.root.title(APP_TITLE)
|
||
self.root.geometry("1100x780")
|
||
|
||
# State/UI vars
|
||
self.dir_var = tk.StringVar()
|
||
self.status_var = tk.StringVar(value="Prêt.")
|
||
self.model_status_var = tk.StringVar(value="Modèle spaCy : optionnel (désactivez si absent)")
|
||
self.queue: "queue.Queue[str]" = queue.Queue()
|
||
|
||
# Options
|
||
self.opt_vector_pdf = tk.BooleanVar(value=True)
|
||
self.opt_raster_pdf = tk.BooleanVar(value=False)
|
||
|
||
# spaCy (optionnel) — on garde l'emplacement UI mais on ne le rend pas bloquant
|
||
self._build_ui()
|
||
self._pump_logs()
|
||
|
||
# ---------------- UI ----------------
|
||
def _build_ui(self):
|
||
top = tk.Frame(self.root, padx=10, pady=10)
|
||
top.pack(fill=tk.BOTH, expand=True)
|
||
|
||
# Ligne dossier
|
||
row1 = tk.Frame(top); row1.pack(fill=tk.X)
|
||
tk.Label(row1, text="Dossier PDF :").pack(side=tk.LEFT)
|
||
tk.Entry(row1, textvariable=self.dir_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6)
|
||
tk.Button(row1, text="Parcourir…", command=self._browse).pack(side=tk.LEFT, padx=3)
|
||
self.btn_run = tk.Button(row1, text="Lancer", command=self._run)
|
||
self.btn_run.pack(side=tk.LEFT, padx=3)
|
||
|
||
# Carte spaCy (informative)
|
||
card = tk.LabelFrame(top, text="Modèle spaCy (FR) — optionnel", padx=8, pady=8)
|
||
card.pack(fill=tk.X, pady=6)
|
||
self.btn_download = tk.Button(card, text="Télécharger (wheel recommandé)", command=self._download_spacy_disabled, state=tk.DISABLED)
|
||
self.btn_download.pack(side=tk.RIGHT)
|
||
tk.Label(card, textvariable=self.model_status_var, anchor="w").pack(fill=tk.X)
|
||
|
||
# Options de sortie PDF
|
||
opt = tk.LabelFrame(top, text="Sorties PDF anonymisées", padx=8, pady=8)
|
||
opt.pack(fill=tk.X, pady=6)
|
||
tk.Checkbutton(opt, text="PDF vectoriel (redaction réelle)", variable=self.opt_vector_pdf).pack(side=tk.LEFT, padx=6)
|
||
tk.Checkbutton(opt, text="PDF raster (sécurité maximale)", variable=self.opt_raster_pdf).pack(side=tk.LEFT, padx=6)
|
||
|
||
# Journal
|
||
tk.Label(top, text="Journal :").pack(anchor="w")
|
||
self.txt = tk.Text(top, height=22)
|
||
self.txt.pack(fill=tk.BOTH, expand=True, pady=(2,0))
|
||
tk.Label(top, textvariable=self.status_var, anchor="w").pack(fill=tk.X, pady=(4,0))
|
||
|
||
def _download_spacy_disabled(self):
|
||
messagebox.showinfo("Info", "L'installation via wheel est recommandée et gérée hors app. Bouton désactivé.")
|
||
|
||
def _pump_logs(self):
|
||
try:
|
||
while True:
|
||
msg = self.queue.get_nowait()
|
||
self.txt.insert(tk.END, msg + "\n"); self.txt.see(tk.END)
|
||
except queue.Empty:
|
||
pass
|
||
finally:
|
||
self.root.after(60, self._pump_logs)
|
||
|
||
# ---------------- Actions ----------------
|
||
def _browse(self):
|
||
d = filedialog.askdirectory()
|
||
if d:
|
||
self.dir_var.set(d)
|
||
|
||
def _run(self):
|
||
folder = Path(self.dir_var.get().strip())
|
||
if not folder.is_dir():
|
||
messagebox.showwarning("Dossier invalide", "Choisissez un dossier contenant des PDF.")
|
||
return
|
||
self.btn_run.config(state=tk.DISABLED)
|
||
threading.Thread(target=self._worker, args=(folder,), daemon=True).start()
|
||
|
||
def _worker(self, folder: Path):
|
||
try:
|
||
pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()])
|
||
if not pdfs:
|
||
self._log("Aucun PDF trouvé."); return
|
||
outdir = folder / "pseudonymise"; outdir.mkdir(exist_ok=True)
|
||
ok = ko = 0
|
||
for i, pdf in enumerate(pdfs, start=1):
|
||
self.status_var.set(f"{i}/{len(pdfs)} — {pdf.name}")
|
||
try:
|
||
outputs = core.process_pdf(
|
||
pdf_path=pdf,
|
||
out_dir=outdir,
|
||
make_vector_redaction=self.opt_vector_pdf.get(),
|
||
also_make_raster_burn=self.opt_raster_pdf.get(),
|
||
)
|
||
# Log bref des artefacts
|
||
self._log("✓ " + pdf.name)
|
||
for k, v in outputs.items():
|
||
self._log(f" - {k}: {v}")
|
||
ok += 1
|
||
except Exception as e:
|
||
self._log(f"✗ {pdf.name} → ERREUR: {e}")
|
||
ko += 1
|
||
self.status_var.set(f"Terminé : {ok} OK, {ko} erreurs. Sortie: {outdir}")
|
||
finally:
|
||
self.btn_run.config(state=tk.NORMAL)
|
||
|
||
def _log(self, msg: str):
|
||
self.queue.put(msg)
|
||
|
||
|
||
# ---------------- main ----------------
|
||
|
||
def main():
|
||
root = tk.Tk()
|
||
App(root)
|
||
root.mainloop()
|
||
|
||
if __name__ == "__main__":
|
||
main()
|