- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
892 lines
33 KiB
Python
892 lines
33 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Pseudonymisation – GUI v5 (Vue unique épurée)
|
||
----------------------------------------------
|
||
- Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés)
|
||
- Thème système natif (sv_ttk optionnel, fallback clam)
|
||
- Backend NER ONNX/EDS-Pseudo conservé en interne
|
||
- Pas d'onglet Avancé (NER + YAML chargés silencieusement)
|
||
|
||
Fichiers requis à côté :
|
||
- anonymizer_core_refactored_onnx.py
|
||
- ner_manager_onnx.py
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import enum
|
||
import json
|
||
import os
|
||
import platform
|
||
import queue
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import threading
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, ttk
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Core
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import anonymizer_core_refactored_onnx as core
|
||
except Exception as e:
|
||
raise SystemExit(f"Impossible d'importer le core ONNX : {e}")
|
||
|
||
try:
|
||
from ner_manager_onnx import NerModelManager, NerThresholds
|
||
except Exception:
|
||
NerModelManager = None # type: ignore
|
||
NerThresholds = None # type: ignore
|
||
|
||
try:
|
||
from eds_pseudo_manager import EdsPseudoManager
|
||
except Exception:
|
||
EdsPseudoManager = None # type: ignore
|
||
|
||
try:
|
||
import yaml
|
||
except Exception:
|
||
yaml = None
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Thème optionnel
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import sv_ttk # type: ignore
|
||
except ImportError:
|
||
sv_ttk = None
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Constantes
|
||
# ---------------------------------------------------------------------------
|
||
APP_TITLE = "Pseudonymisation de PDF"
|
||
APP_VERSION = "v5.0"
|
||
DEFAULT_CFG = Path("config/dictionnaires.yml")
|
||
|
||
DEFAULTS_CFG_TEXT = r"""
|
||
# dictionnaires.yml – valeurs par défaut (bloc littéral pour les regex)
|
||
version: 1
|
||
encoding: "utf-8"
|
||
normalization: "NFKC"
|
||
whitelist:
|
||
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
|
||
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
|
||
org_gpe_keep: true
|
||
blacklist:
|
||
force_mask_terms: []
|
||
force_mask_regex: []
|
||
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
|
||
regex_overrides:
|
||
- name: OGC_court
|
||
pattern: |-
|
||
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
|
||
placeholder: '[OGC]'
|
||
flags: [IGNORECASE]
|
||
flags:
|
||
case_insensitive: true
|
||
unicode_word_boundaries: true
|
||
regex_engine: "python"
|
||
"""
|
||
|
||
# Couleurs
|
||
CLR_PRIMARY = "#2563eb"
|
||
CLR_PRIMARY_LIGHT = "#dbeafe"
|
||
CLR_GREEN = "#16a34a"
|
||
CLR_GREEN_LIGHT = "#dcfce7"
|
||
CLR_RED = "#dc2626"
|
||
CLR_RED_LIGHT = "#fee2e2"
|
||
CLR_BLUE_LIGHT = "#eff6ff"
|
||
CLR_CARD_BG = "#ffffff"
|
||
CLR_CARD_BORDER = "#d1d5db"
|
||
CLR_BG = "#f9fafb"
|
||
CLR_TEXT = "#111827"
|
||
CLR_TEXT_SECONDARY = "#6b7280"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Messages worker → UI
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class MsgType(enum.Enum):
|
||
LOG = "log"
|
||
PROGRESS = "progress"
|
||
DONE = "done"
|
||
|
||
|
||
@dataclass
|
||
class UiMessage:
|
||
kind: MsgType
|
||
text: str = ""
|
||
current: int = 0
|
||
total: int = 0
|
||
filename: str = ""
|
||
ok: int = 0
|
||
ko: int = 0
|
||
masked: int = 0
|
||
outdir: str = ""
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def open_folder(path: Path):
|
||
try:
|
||
if platform.system() == "Windows":
|
||
os.startfile(str(path)) # type: ignore
|
||
elif platform.system() == "Darwin":
|
||
subprocess.Popen(["open", str(path)])
|
||
else:
|
||
subprocess.Popen(["xdg-open", str(path)])
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _detect_font() -> str:
|
||
"""Retourne la meilleure police sans-serif disponible."""
|
||
for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"):
|
||
try:
|
||
test = tk.Label(font=(name, 10))
|
||
actual = test.cget("font")
|
||
test.destroy()
|
||
if name.lower().replace(" ", "") in actual.lower().replace(" ", ""):
|
||
return name
|
||
except Exception:
|
||
continue
|
||
return "TkDefaultFont"
|
||
|
||
|
||
def _detect_dark_mode() -> bool:
|
||
"""Détecte le thème sombre GNOME."""
|
||
try:
|
||
result = subprocess.run(
|
||
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
|
||
capture_output=True, text=True, timeout=2,
|
||
)
|
||
return "dark" in result.stdout.lower()
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# ToolTip amélioré
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class ToolTip:
|
||
def __init__(self, widget: tk.Widget, text: str, delay: int = 400):
|
||
self.widget = widget
|
||
self.text = text
|
||
self.delay = delay
|
||
self.tip: Optional[tk.Toplevel] = None
|
||
self._after_id: Optional[str] = None
|
||
widget.bind("<Enter>", self._schedule)
|
||
widget.bind("<Leave>", self.hide)
|
||
|
||
def _schedule(self, *_):
|
||
self._cancel()
|
||
self._after_id = self.widget.after(self.delay, self._show)
|
||
|
||
def _cancel(self):
|
||
if self._after_id:
|
||
self.widget.after_cancel(self._after_id)
|
||
self._after_id = None
|
||
|
||
def _show(self):
|
||
if self.tip:
|
||
return
|
||
x = self.widget.winfo_rootx() + 20
|
||
y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
|
||
self.tip = tw = tk.Toplevel(self.widget)
|
||
tw.wm_overrideredirect(True)
|
||
tw.wm_geometry(f"+{x}+{y}")
|
||
lbl = tk.Label(
|
||
tw, text=self.text, justify=tk.LEFT,
|
||
background="#1f2937", foreground="#f9fafb",
|
||
relief=tk.SOLID, borderwidth=1,
|
||
padx=8, pady=5, wraplength=320,
|
||
)
|
||
lbl.pack(ipadx=1)
|
||
|
||
def hide(self, *_):
|
||
self._cancel()
|
||
if self.tip:
|
||
self.tip.destroy()
|
||
self.tip = None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Application principale
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class App:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root
|
||
self.root.title(APP_TITLE)
|
||
self.root.geometry("780x820")
|
||
self.root.minsize(600, 650)
|
||
|
||
# --- Thème ---
|
||
self._apply_theme()
|
||
|
||
# --- Polices ---
|
||
self._font_family = _detect_font()
|
||
self._f_title = (self._font_family, 20, "bold")
|
||
self._f_body = (self._font_family, 11)
|
||
self._f_body_bold = (self._font_family, 11, "bold")
|
||
self._f_button = (self._font_family, 13, "bold")
|
||
self._f_stat = (self._font_family, 24, "bold")
|
||
self._f_small = (self._font_family, 10)
|
||
self._f_card_title = (self._font_family, 12, "bold")
|
||
self._f_card_desc = (self._font_family, 10)
|
||
|
||
# --- Variables ---
|
||
self.dir_var = tk.StringVar()
|
||
self.status_var = tk.StringVar(value="Prêt.")
|
||
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
|
||
self.queue: "queue.Queue[UiMessage]" = queue.Queue()
|
||
|
||
# --- NER (interne) ---
|
||
self.use_hf = False
|
||
self.th_per = 0.90
|
||
self.th_org = 0.90
|
||
self.th_loc = 0.90
|
||
self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=Path("models")) if NerModelManager else None
|
||
self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=Path("models")) if EdsPseudoManager else None
|
||
self._active_manager: Optional[Any] = None
|
||
self.cfg_data: Dict[str, Any] = {}
|
||
|
||
# --- Fusion catalogue modèles ---
|
||
catalog: Dict[str, str] = {}
|
||
if self._onnx_manager:
|
||
catalog.update(self._onnx_manager.models_catalog())
|
||
if self._eds_manager:
|
||
catalog.update(self._eds_manager.models_catalog())
|
||
self._merged_catalog = catalog
|
||
|
||
# --- Résultats ---
|
||
self._last_outdir: Optional[Path] = None
|
||
|
||
# --- Construction UI ---
|
||
self._build_ui()
|
||
self._pump_logs()
|
||
self._ensure_cfg_exists()
|
||
self._load_cfg()
|
||
|
||
# ---------------------------------------------------------------
|
||
# Thème
|
||
# ---------------------------------------------------------------
|
||
def _apply_theme(self):
|
||
if sv_ttk is not None:
|
||
mode = "dark" if _detect_dark_mode() else "light"
|
||
sv_ttk.set_theme(mode)
|
||
else:
|
||
try:
|
||
style = ttk.Style()
|
||
style.theme_use("clam")
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Construction de la vue unique
|
||
# ---------------------------------------------------------------
|
||
def _build_ui(self):
|
||
self.root.configure(bg=CLR_BG)
|
||
|
||
# Conteneur scrollable
|
||
outer = tk.Frame(self.root, bg=CLR_BG)
|
||
outer.pack(fill=tk.BOTH, expand=True)
|
||
|
||
canvas = tk.Canvas(outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar = ttk.Scrollbar(outer, orient=tk.VERTICAL, command=canvas.yview)
|
||
self._scroll_frame = tk.Frame(canvas, bg=CLR_BG)
|
||
|
||
self._scroll_frame.bind(
|
||
"<Configure>",
|
||
lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
|
||
)
|
||
canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw")
|
||
canvas.configure(yscrollcommand=scrollbar.set)
|
||
|
||
# Ajuster la largeur du frame interne à celle du canvas
|
||
def _on_canvas_configure(event):
|
||
canvas.itemconfig(canvas_window, width=event.width)
|
||
canvas.bind("<Configure>", _on_canvas_configure)
|
||
|
||
# Scroll molette
|
||
def _on_mousewheel(event):
|
||
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
|
||
def _on_mousewheel_linux(event):
|
||
if event.num == 4:
|
||
canvas.yview_scroll(-3, "units")
|
||
elif event.num == 5:
|
||
canvas.yview_scroll(3, "units")
|
||
|
||
canvas.bind_all("<MouseWheel>", _on_mousewheel)
|
||
canvas.bind_all("<Button-4>", _on_mousewheel_linux)
|
||
canvas.bind_all("<Button-5>", _on_mousewheel_linux)
|
||
|
||
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
main = self._scroll_frame
|
||
pad_x = 32
|
||
|
||
# --- Titre ---
|
||
tk.Label(
|
||
main, text=APP_TITLE, font=self._f_title,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(24, 2))
|
||
|
||
tk.Label(
|
||
main,
|
||
text="Masquez automatiquement les données personnelles de vos documents PDF.",
|
||
font=self._f_body, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
# =============================================================
|
||
# ÉTAPE 1 — Choix du dossier
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="1. Choisir les documents", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
self._folder_zone = tk.Frame(
|
||
main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER,
|
||
highlightthickness=2, cursor="hand2",
|
||
)
|
||
self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
# Contenu initial (invite à cliquer)
|
||
self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG)
|
||
self._folder_inner.pack(fill=tk.X, padx=20, pady=18)
|
||
|
||
self._folder_icon_lbl = tk.Label(
|
||
self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28),
|
||
bg=CLR_CARD_BG,
|
||
)
|
||
self._folder_icon_lbl.pack()
|
||
|
||
self._folder_text_lbl = tk.Label(
|
||
self._folder_inner,
|
||
text="Cliquez pour choisir un dossier contenant vos PDF",
|
||
font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._folder_text_lbl.pack(pady=(4, 0))
|
||
|
||
# Rendre toute la zone cliquable
|
||
for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl):
|
||
w.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# =============================================================
|
||
# ÉTAPE 2 — Info formats générés
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="2. Formats générés", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
info_frame = tk.Frame(
|
||
main, bg=CLR_BLUE_LIGHT,
|
||
highlightbackground=CLR_CARD_BORDER, highlightthickness=1,
|
||
)
|
||
info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT)
|
||
info_inner.pack(fill=tk.X, padx=16, pady=12)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text="Les deux formats sont générés automatiquement :",
|
||
font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text=("\u2022 PDF Image — sécurité maximale, chaque page en image, aucun texte résiduel\n"
|
||
"\u2022 PDF Anonymisé — structure préservée comme l'original, fichier léger"),
|
||
font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||
anchor="w", justify=tk.LEFT,
|
||
).pack(fill=tk.X, pady=(4, 0))
|
||
|
||
# =============================================================
|
||
# BOUTON LANCER
|
||
# =============================================================
|
||
self.btn_run = tk.Button(
|
||
main, text="Lancer la pseudonymisation",
|
||
font=self._f_button, bg=CLR_PRIMARY, fg="white",
|
||
activebackground="#1d4ed8", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._run,
|
||
)
|
||
self.btn_run.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
# Lien aide
|
||
help_lbl = tk.Label(
|
||
main, text="Comment ça marche ?", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
help_lbl.pack(pady=(0, 18))
|
||
help_lbl.bind("<Button-1>", lambda e: self._show_help())
|
||
|
||
# =============================================================
|
||
# BARRE DE PROGRESSION (masquée)
|
||
# =============================================================
|
||
self._progress_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack — sera affiché dynamiquement
|
||
|
||
self._progressbar = ttk.Progressbar(
|
||
self._progress_frame, orient=tk.HORIZONTAL, mode="determinate",
|
||
)
|
||
self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4))
|
||
|
||
self._progress_label = tk.Label(
|
||
self._progress_frame, text="", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
)
|
||
self._progress_label.pack(fill=tk.X)
|
||
|
||
# =============================================================
|
||
# SECTION RÉSULTATS (masquée)
|
||
# =============================================================
|
||
self._results_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
tk.Label(
|
||
self._results_frame, text="Résultats", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 8))
|
||
|
||
stats_row = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
stats_row.pack(fill=tk.X, pady=(0, 12))
|
||
stats_row.columnconfigure(0, weight=1)
|
||
stats_row.columnconfigure(1, weight=1)
|
||
stats_row.columnconfigure(2, weight=1)
|
||
|
||
self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0)
|
||
self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1)
|
||
self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2)
|
||
|
||
self.btn_open_out = tk.Button(
|
||
self._results_frame, text="Ouvrir le dossier de résultats",
|
||
font=self._f_button, bg=CLR_GREEN, fg="white",
|
||
activebackground="#15803d", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._open_out,
|
||
)
|
||
self.btn_open_out.pack(fill=tk.X, pady=(0, 8))
|
||
|
||
# Toggle journal
|
||
self._log_visible = False
|
||
self._log_toggle = tk.Label(
|
||
self._results_frame, text="Voir le journal détaillé \u25BC",
|
||
font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
self._log_toggle.pack(pady=(0, 4))
|
||
self._log_toggle.bind("<Button-1>", lambda e: self._toggle_log())
|
||
|
||
self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
self.txt = tk.Text(
|
||
self._log_frame, height=14, font=self._f_small,
|
||
bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD,
|
||
state=tk.DISABLED,
|
||
)
|
||
log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview)
|
||
self.txt.configure(yscrollcommand=log_scrollbar.set)
|
||
self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# =============================================================
|
||
# BARRE DE STATUT
|
||
# =============================================================
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0))
|
||
|
||
status_bar = tk.Frame(main, bg=CLR_BG)
|
||
status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12))
|
||
|
||
tk.Label(
|
||
status_bar, textvariable=self.status_var, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(side=tk.LEFT)
|
||
|
||
tk.Label(
|
||
status_bar, text=APP_VERSION, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e",
|
||
).pack(side=tk.RIGHT)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Cartes de statistiques
|
||
# ---------------------------------------------------------------
|
||
def _make_stat_card(self, parent, number: str, label: str,
|
||
fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]:
|
||
padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0)
|
||
frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1)
|
||
frame.grid(row=0, column=col, sticky="nsew", padx=padx)
|
||
|
||
num_lbl = tk.Label(
|
||
frame, text=number, font=self._f_stat,
|
||
bg=bg_color, fg=fg_color,
|
||
)
|
||
num_lbl.pack(pady=(12, 2))
|
||
|
||
txt_lbl = tk.Label(
|
||
frame, text=label, font=self._f_small,
|
||
bg=bg_color, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
txt_lbl.pack(pady=(0, 12))
|
||
|
||
return {"frame": frame, "number": num_lbl, "label": txt_lbl}
|
||
|
||
def _update_stat_card(self, card: Dict[str, tk.Label], value: int,
|
||
fg_color: str, bg_color: str):
|
||
card["number"].configure(text=str(value), fg=fg_color, bg=bg_color)
|
||
card["frame"].configure(bg=bg_color, highlightbackground=bg_color)
|
||
card["label"].configure(bg=bg_color)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Actions dossier
|
||
# ---------------------------------------------------------------
|
||
def _browse(self):
|
||
d = filedialog.askdirectory()
|
||
if d:
|
||
self.dir_var.set(d)
|
||
self._update_folder_display()
|
||
|
||
def _update_folder_display(self):
|
||
folder = self.dir_var.get()
|
||
if not folder:
|
||
return
|
||
|
||
# Compter les PDF
|
||
pdf_count = 0
|
||
try:
|
||
pdf_count = len([p for p in Path(folder).glob("*.pdf") if p.is_file()])
|
||
except Exception:
|
||
pass
|
||
|
||
# Vider et reconstruire l'intérieur
|
||
for w in self._folder_inner.winfo_children():
|
||
w.destroy()
|
||
|
||
row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG)
|
||
row.pack(fill=tk.X)
|
||
|
||
tk.Label(
|
||
row, text="\U0001f4c2", font=(self._font_family, 16),
|
||
bg=CLR_CARD_BG,
|
||
).pack(side=tk.LEFT, padx=(0, 8))
|
||
|
||
info_frame = tk.Frame(row, bg=CLR_CARD_BG)
|
||
info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||
|
||
# Chemin (tronqué si trop long)
|
||
display_path = folder
|
||
if len(display_path) > 60:
|
||
display_path = "..." + display_path[-57:]
|
||
tk.Label(
|
||
info_frame, text=display_path, font=self._f_body_bold,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
suffix = "PDF trouvé" if pdf_count <= 1 else "PDF trouvés"
|
||
tk.Label(
|
||
info_frame, text=f"{pdf_count} {suffix}",
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
change_btn = tk.Label(
|
||
row, text="Changer", font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
change_btn.pack(side=tk.RIGHT, padx=(8, 0))
|
||
change_btn.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# Mettre à jour la bordure
|
||
self._folder_zone.configure(highlightbackground=CLR_GREEN)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Lancement
|
||
# ---------------------------------------------------------------
|
||
def _run(self):
|
||
folder = Path(self.dir_var.get().strip())
|
||
if not folder.is_dir():
|
||
messagebox.showwarning(
|
||
"Dossier invalide",
|
||
"Choisissez un dossier contenant des PDF.",
|
||
)
|
||
return
|
||
|
||
pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()])
|
||
if not pdfs:
|
||
messagebox.showwarning(
|
||
"Aucun PDF",
|
||
"Le dossier sélectionné ne contient aucun fichier PDF.",
|
||
)
|
||
return
|
||
|
||
self.btn_run.config(state=tk.DISABLED, bg="#93c5fd", text="Traitement en cours...")
|
||
self._show_progress(total=len(pdfs))
|
||
self._hide_results()
|
||
threading.Thread(target=self._worker, args=(folder, pdfs), daemon=True).start()
|
||
|
||
def _worker(self, folder: Path, pdfs: List[Path]):
|
||
try:
|
||
outdir = folder / "pseudonymise"
|
||
outdir.mkdir(exist_ok=True)
|
||
ok = ko = 0
|
||
global_counts: Dict[str, int] = {}
|
||
|
||
for i, pdf in enumerate(pdfs, start=1):
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.PROGRESS, current=i, total=len(pdfs),
|
||
filename=pdf.name,
|
||
))
|
||
|
||
try:
|
||
active = self._active_manager
|
||
use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded())
|
||
thresholds = None
|
||
if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)):
|
||
thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85)
|
||
|
||
outputs = core.process_pdf(
|
||
pdf_path=pdf,
|
||
out_dir=outdir,
|
||
make_vector_redaction=True,
|
||
also_make_raster_burn=True,
|
||
config_path=Path(self.cfg_path.get()),
|
||
use_hf=use_ner,
|
||
ner_manager=active,
|
||
ner_thresholds=thresholds,
|
||
)
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}"))
|
||
for k, v in outputs.items():
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}"))
|
||
|
||
audit_path = Path(outputs.get("audit", ""))
|
||
counts = self._count_audit(audit_path)
|
||
if counts:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())),
|
||
))
|
||
for k, v in counts.items():
|
||
global_counts[k] = global_counts.get(k, 0) + v
|
||
ok += 1
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {pdf.name} \u2192 ERREUR: {e}"))
|
||
ko += 1
|
||
|
||
total_masked = sum(global_counts.values())
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
|
||
outdir=str(outdir),
|
||
))
|
||
if ok:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())),
|
||
))
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}"))
|
||
self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir=""))
|
||
|
||
# ---------------------------------------------------------------
|
||
# Pompe de messages
|
||
# ---------------------------------------------------------------
|
||
def _pump_logs(self):
|
||
try:
|
||
while True:
|
||
msg = self.queue.get_nowait()
|
||
if msg.kind == MsgType.LOG:
|
||
self._append_log(msg.text)
|
||
elif msg.kind == MsgType.PROGRESS:
|
||
self._update_progress(msg.current, msg.total, msg.filename)
|
||
elif msg.kind == MsgType.DONE:
|
||
self._on_done(msg)
|
||
except queue.Empty:
|
||
pass
|
||
finally:
|
||
self.root.after(60, self._pump_logs)
|
||
|
||
def _append_log(self, text: str):
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.insert(tk.END, text + "\n")
|
||
self.txt.see(tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Progression
|
||
# ---------------------------------------------------------------
|
||
def _show_progress(self, total: int):
|
||
self._progressbar.configure(maximum=total, value=0)
|
||
self._progress_label.configure(text="")
|
||
self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18),
|
||
before=self._results_frame if self._results_frame.winfo_manager() else None)
|
||
|
||
def _hide_progress(self):
|
||
self._progress_frame.pack_forget()
|
||
|
||
def _update_progress(self, current: int, total: int, filename: str):
|
||
self._progressbar.configure(value=current)
|
||
self._progress_label.configure(text=f"{current}/{total} — {filename}")
|
||
self.status_var.set(f"{current}/{total} — {filename}")
|
||
|
||
# ---------------------------------------------------------------
|
||
# Résultats
|
||
# ---------------------------------------------------------------
|
||
def _show_results(self, ok: int, ko: int, masked: int):
|
||
self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT)
|
||
self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT)
|
||
|
||
err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY
|
||
err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6"
|
||
self._update_stat_card(self._stat_errors, ko, err_fg, err_bg)
|
||
|
||
self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12))
|
||
|
||
def _hide_results(self):
|
||
self._results_frame.pack_forget()
|
||
self._log_frame.pack_forget()
|
||
self._log_visible = False
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
# Vider le journal
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.delete("1.0", tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
def _on_done(self, msg: UiMessage):
|
||
self._hide_progress()
|
||
self.btn_run.config(state=tk.NORMAL, bg=CLR_PRIMARY, text="Lancer la pseudonymisation")
|
||
self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.")
|
||
|
||
if msg.outdir:
|
||
self._last_outdir = Path(msg.outdir)
|
||
|
||
self._show_results(msg.ok, msg.ko, msg.masked)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Toggle journal
|
||
# ---------------------------------------------------------------
|
||
def _toggle_log(self):
|
||
if self._log_visible:
|
||
self._log_frame.pack_forget()
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
else:
|
||
self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
|
||
self._log_toggle.configure(text="Masquer le journal \u25B2")
|
||
self._log_visible = not self._log_visible
|
||
|
||
# ---------------------------------------------------------------
|
||
# Ouvrir dossier résultats
|
||
# ---------------------------------------------------------------
|
||
def _open_out(self):
|
||
if self._last_outdir:
|
||
open_folder(self._last_outdir)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Aide
|
||
# ---------------------------------------------------------------
|
||
def _show_help(self):
|
||
messagebox.showinfo(
|
||
"Comment ça marche ?",
|
||
"1) Choisissez le dossier contenant vos fichiers PDF.\n\n"
|
||
"2) Cliquez sur « Lancer la pseudonymisation ».\n\n"
|
||
"Deux fichiers sont générés pour chaque PDF :\n"
|
||
" \u2022 PDF Image : chaque page devient une image avec les\n"
|
||
" données masquées. Sécurité maximale.\n"
|
||
" \u2022 PDF Anonymisé : structure préservée comme l'original,\n"
|
||
" fichier léger et texte sélectionnable.\n\n"
|
||
"Les résultats apparaissent dans un sous-dossier\n"
|
||
"« pseudonymise » à côté de vos originaux.",
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# YAML (interne)
|
||
# ---------------------------------------------------------------
|
||
def _ensure_cfg_exists(self):
|
||
p = Path(self.cfg_path.get())
|
||
p.parent.mkdir(parents=True, exist_ok=True)
|
||
if not p.exists():
|
||
p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
|
||
|
||
def _load_cfg(self):
|
||
if yaml is None:
|
||
return
|
||
self._ensure_cfg_exists()
|
||
try:
|
||
self.cfg_data = yaml.safe_load(
|
||
Path(self.cfg_path.get()).read_text(encoding="utf-8")
|
||
) or {}
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Audit
|
||
# ---------------------------------------------------------------
|
||
def _count_audit(self, audit_path: Path) -> Dict[str, int]:
|
||
d: Dict[str, int] = {}
|
||
try:
|
||
with open(audit_path, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
try:
|
||
obj = json.loads(line)
|
||
k = obj.get("kind", "?")
|
||
d[k] = d.get(k, 0) + 1
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
return d
|
||
|
||
# ---------------------------------------------------------------
|
||
# Modèles NER (API interne)
|
||
# ---------------------------------------------------------------
|
||
def _load_model(self, model_id: Optional[str] = None):
|
||
mid = model_id or "cmarkea/distilcamembert-base-ner"
|
||
is_eds = False
|
||
if self._eds_manager:
|
||
eds_ids = set(self._eds_manager.models_catalog().values())
|
||
if mid in eds_ids:
|
||
is_eds = True
|
||
if is_eds:
|
||
if not self._eds_manager:
|
||
return
|
||
manager = self._eds_manager
|
||
else:
|
||
if not self._onnx_manager:
|
||
return
|
||
manager = self._onnx_manager
|
||
try:
|
||
manager.load(mid)
|
||
self._active_manager = manager
|
||
self.use_hf = True
|
||
except Exception:
|
||
self.use_hf = False
|
||
|
||
def _unload_model(self):
|
||
if self._onnx_manager:
|
||
self._onnx_manager.unload()
|
||
if self._eds_manager:
|
||
self._eds_manager.unload()
|
||
self._active_manager = None
|
||
self.use_hf = False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Point d'entrée
|
||
# ---------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
root = tk.Tk()
|
||
App(root)
|
||
root.mainloop()
|