Files
anonymisation/Pseudonymisation_Gui_V5.py
Domi31tls 9e06bbfa1d Fix _app_dir() pour Nuitka + crash log visible sans console
- _app_dir() utilise toujours Path(__file__).parent au lieu de
  dir() qui ne détecte pas __compiled__ dans une fonction
- Ajout crash.log + messagebox en cas d'erreur fatale
  (même avec --windows-console-mode=disable)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 22:11:44 +01:00

956 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Pseudonymisation GUI v5 (Vue unique épurée)
----------------------------------------------
- Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés)
- Thème système natif (sv_ttk optionnel, fallback clam)
- Backend NER ONNX/EDS-Pseudo conservé en interne
- Pas d'onglet Avancé (NER + YAML chargés silencieusement)
Fichiers requis à côté :
- anonymizer_core_refactored_onnx.py
- ner_manager_onnx.py
"""
from __future__ import annotations
import enum
import json
import os
import platform
import queue
import re
import shutil
import subprocess
import threading
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
# ---------------------------------------------------------------------------
# Core
# ---------------------------------------------------------------------------
try:
import anonymizer_core_refactored_onnx as core
except Exception as e:
_err_msg = f"Impossible d'importer le core ONNX : {e}"
# Écrire l'erreur dans un fichier log à côté du script/exe
try:
_log = Path(__file__).resolve().parent / "crash.log"
import traceback as _tb
_log.write_text(f"{_err_msg}\n\n{_tb.format_exc()}", encoding="utf-8")
except Exception:
pass
try:
_r = tk.Tk(); _r.withdraw()
messagebox.showerror("Erreur d'import", _err_msg)
_r.destroy()
except Exception:
pass
raise SystemExit(_err_msg)
try:
from ner_manager_onnx import NerModelManager, NerThresholds
except Exception:
NerModelManager = None # type: ignore
NerThresholds = None # type: ignore
try:
from eds_pseudo_manager import EdsPseudoManager
except Exception:
EdsPseudoManager = None # type: ignore
try:
import yaml
except Exception:
yaml = None
# ---------------------------------------------------------------------------
# Thème optionnel
# ---------------------------------------------------------------------------
try:
import sv_ttk # type: ignore
except ImportError:
sv_ttk = None
# ---------------------------------------------------------------------------
# Constantes
# ---------------------------------------------------------------------------
APP_TITLE = "Pseudonymisation de PDF"
APP_VERSION = "v5.0"
def _app_dir() -> Path:
"""Répertoire racine de l'application (compatible Nuitka standalone)."""
return Path(__file__).resolve().parent
DEFAULT_CFG = _app_dir() / "config" / "dictionnaires.yml"
MODELS_DIR = _app_dir() / "models"
DEFAULTS_CFG_TEXT = r"""
# dictionnaires.yml valeurs par défaut (bloc littéral pour les regex)
version: 1
encoding: "utf-8"
normalization: "NFKC"
whitelist:
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
org_gpe_keep: true
blacklist:
force_mask_terms: []
force_mask_regex: []
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
regex_overrides:
- name: OGC_court
pattern: |-
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
placeholder: '[OGC]'
flags: [IGNORECASE]
flags:
case_insensitive: true
unicode_word_boundaries: true
regex_engine: "python"
"""
# Couleurs
CLR_PRIMARY = "#2563eb"
CLR_PRIMARY_LIGHT = "#dbeafe"
CLR_GREEN = "#16a34a"
CLR_GREEN_LIGHT = "#dcfce7"
CLR_RED = "#dc2626"
CLR_RED_LIGHT = "#fee2e2"
CLR_BLUE_LIGHT = "#eff6ff"
CLR_CARD_BG = "#ffffff"
CLR_CARD_BORDER = "#d1d5db"
CLR_BG = "#f9fafb"
CLR_TEXT = "#111827"
CLR_TEXT_SECONDARY = "#6b7280"
# ---------------------------------------------------------------------------
# Messages worker → UI
# ---------------------------------------------------------------------------
class MsgType(enum.Enum):
LOG = "log"
PROGRESS = "progress"
DONE = "done"
@dataclass
class UiMessage:
kind: MsgType
text: str = ""
current: int = 0
total: int = 0
filename: str = ""
ok: int = 0
ko: int = 0
masked: int = 0
outdir: str = ""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def open_folder(path: Path):
try:
if platform.system() == "Windows":
os.startfile(str(path)) # type: ignore
elif platform.system() == "Darwin":
subprocess.Popen(["open", str(path)])
else:
subprocess.Popen(["xdg-open", str(path)])
except Exception:
pass
def _detect_font() -> str:
"""Retourne la meilleure police sans-serif disponible."""
for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"):
try:
test = tk.Label(font=(name, 10))
actual = test.cget("font")
test.destroy()
if name.lower().replace(" ", "") in actual.lower().replace(" ", ""):
return name
except Exception:
continue
return "TkDefaultFont"
def _detect_dark_mode() -> bool:
"""Détecte le thème sombre GNOME."""
try:
result = subprocess.run(
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
capture_output=True, text=True, timeout=2,
)
return "dark" in result.stdout.lower()
except Exception:
return False
# ---------------------------------------------------------------------------
# ToolTip amélioré
# ---------------------------------------------------------------------------
class ToolTip:
def __init__(self, widget: tk.Widget, text: str, delay: int = 400):
self.widget = widget
self.text = text
self.delay = delay
self.tip: Optional[tk.Toplevel] = None
self._after_id: Optional[str] = None
widget.bind("<Enter>", self._schedule)
widget.bind("<Leave>", self.hide)
def _schedule(self, *_):
self._cancel()
self._after_id = self.widget.after(self.delay, self._show)
def _cancel(self):
if self._after_id:
self.widget.after_cancel(self._after_id)
self._after_id = None
def _show(self):
if self.tip:
return
x = self.widget.winfo_rootx() + 20
y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
self.tip = tw = tk.Toplevel(self.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry(f"+{x}+{y}")
lbl = tk.Label(
tw, text=self.text, justify=tk.LEFT,
background="#1f2937", foreground="#f9fafb",
relief=tk.SOLID, borderwidth=1,
padx=8, pady=5, wraplength=320,
)
lbl.pack(ipadx=1)
def hide(self, *_):
self._cancel()
if self.tip:
self.tip.destroy()
self.tip = None
# ---------------------------------------------------------------------------
# Application principale
# ---------------------------------------------------------------------------
class App:
def __init__(self, root: tk.Tk):
self.root = root
self.root.title(APP_TITLE)
self.root.geometry("780x820")
self.root.minsize(600, 650)
# --- Thème ---
self._apply_theme()
# --- Polices ---
self._font_family = _detect_font()
self._f_title = (self._font_family, 20, "bold")
self._f_body = (self._font_family, 11)
self._f_body_bold = (self._font_family, 11, "bold")
self._f_button = (self._font_family, 13, "bold")
self._f_stat = (self._font_family, 24, "bold")
self._f_small = (self._font_family, 10)
self._f_card_title = (self._font_family, 12, "bold")
self._f_card_desc = (self._font_family, 10)
# --- Variables ---
self.dir_var = tk.StringVar()
self.status_var = tk.StringVar(value="Prêt.")
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
self.queue: "queue.Queue[UiMessage]" = queue.Queue()
# --- NER (interne) ---
self.use_hf = False
self.th_per = 0.90
self.th_org = 0.90
self.th_loc = 0.90
self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=MODELS_DIR) if NerModelManager else None
self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=MODELS_DIR) if EdsPseudoManager else None
self._active_manager: Optional[Any] = None
self.cfg_data: Dict[str, Any] = {}
# --- Fusion catalogue modèles ---
catalog: Dict[str, str] = {}
if self._onnx_manager:
catalog.update(self._onnx_manager.models_catalog())
if self._eds_manager:
catalog.update(self._eds_manager.models_catalog())
self._merged_catalog = catalog
# --- Résultats ---
self._last_outdir: Optional[Path] = None
# --- Construction UI ---
self._build_ui()
self._pump_logs()
self._ensure_cfg_exists()
self._load_cfg()
# --- Chargement automatique du modèle NER ---
self._auto_load_ner()
# ---------------------------------------------------------------
# Thème
# ---------------------------------------------------------------
def _apply_theme(self):
if sv_ttk is not None:
mode = "dark" if _detect_dark_mode() else "light"
sv_ttk.set_theme(mode)
else:
try:
style = ttk.Style()
style.theme_use("clam")
except Exception:
pass
# ---------------------------------------------------------------
# Construction de la vue unique
# ---------------------------------------------------------------
def _build_ui(self):
self.root.configure(bg=CLR_BG)
# Conteneur scrollable
outer = tk.Frame(self.root, bg=CLR_BG)
outer.pack(fill=tk.BOTH, expand=True)
canvas = tk.Canvas(outer, bg=CLR_BG, highlightthickness=0)
scrollbar = ttk.Scrollbar(outer, orient=tk.VERTICAL, command=canvas.yview)
self._scroll_frame = tk.Frame(canvas, bg=CLR_BG)
self._scroll_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
)
canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
# Ajuster la largeur du frame interne à celle du canvas
def _on_canvas_configure(event):
canvas.itemconfig(canvas_window, width=event.width)
canvas.bind("<Configure>", _on_canvas_configure)
# Scroll molette
def _on_mousewheel(event):
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
def _on_mousewheel_linux(event):
if event.num == 4:
canvas.yview_scroll(-3, "units")
elif event.num == 5:
canvas.yview_scroll(3, "units")
canvas.bind_all("<MouseWheel>", _on_mousewheel)
canvas.bind_all("<Button-4>", _on_mousewheel_linux)
canvas.bind_all("<Button-5>", _on_mousewheel_linux)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
main = self._scroll_frame
pad_x = 32
# --- Titre ---
tk.Label(
main, text=APP_TITLE, font=self._f_title,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(24, 2))
tk.Label(
main,
text="Masquez automatiquement les données personnelles de vos documents PDF.",
font=self._f_body, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 18))
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 18))
# =============================================================
# ÉTAPE 1 — Choix du dossier
# =============================================================
tk.Label(
main, text="1. Choisir les documents", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
self._folder_zone = tk.Frame(
main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER,
highlightthickness=2, cursor="hand2",
)
self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
# Contenu initial (invite à cliquer)
self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG)
self._folder_inner.pack(fill=tk.X, padx=20, pady=18)
self._folder_icon_lbl = tk.Label(
self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28),
bg=CLR_CARD_BG,
)
self._folder_icon_lbl.pack()
self._folder_text_lbl = tk.Label(
self._folder_inner,
text="Cliquez pour choisir un dossier contenant vos PDF",
font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY,
)
self._folder_text_lbl.pack(pady=(4, 0))
# Rendre toute la zone cliquable
for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl):
w.bind("<Button-1>", lambda e: self._browse())
# =============================================================
# ÉTAPE 2 — Info formats générés
# =============================================================
tk.Label(
main, text="2. Formats générés", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
info_frame = tk.Frame(
main, bg=CLR_BLUE_LIGHT,
highlightbackground=CLR_CARD_BORDER, highlightthickness=1,
)
info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT)
info_inner.pack(fill=tk.X, padx=16, pady=12)
tk.Label(
info_inner,
text="Les deux formats sont générés automatiquement :",
font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X)
tk.Label(
info_inner,
text=("\u2022 PDF Image — sécurité maximale, chaque page en image, aucun texte résiduel\n"
"\u2022 PDF Anonymisé — structure préservée comme l'original, fichier léger"),
font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
anchor="w", justify=tk.LEFT,
).pack(fill=tk.X, pady=(4, 0))
# =============================================================
# BOUTON LANCER
# =============================================================
self.btn_run = tk.Button(
main, text="Lancer la pseudonymisation",
font=self._f_button, bg=CLR_PRIMARY, fg="white",
activebackground="#1d4ed8", activeforeground="white",
relief=tk.FLAT, cursor="hand2", pady=10,
command=self._run,
)
self.btn_run.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
# Lien aide
help_lbl = tk.Label(
main, text="Comment ça marche ?", font=self._f_small,
bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
)
help_lbl.pack(pady=(0, 18))
help_lbl.bind("<Button-1>", lambda e: self._show_help())
# =============================================================
# BARRE DE PROGRESSION (masquée)
# =============================================================
self._progress_frame = tk.Frame(main, bg=CLR_BG)
# NE PAS pack — sera affiché dynamiquement
self._progressbar = ttk.Progressbar(
self._progress_frame, orient=tk.HORIZONTAL, mode="determinate",
)
self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4))
self._progress_label = tk.Label(
self._progress_frame, text="", font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
)
self._progress_label.pack(fill=tk.X)
# =============================================================
# SECTION RÉSULTATS (masquée)
# =============================================================
self._results_frame = tk.Frame(main, bg=CLR_BG)
# NE PAS pack
tk.Label(
self._results_frame, text="Résultats", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 8))
stats_row = tk.Frame(self._results_frame, bg=CLR_BG)
stats_row.pack(fill=tk.X, pady=(0, 12))
stats_row.columnconfigure(0, weight=1)
stats_row.columnconfigure(1, weight=1)
stats_row.columnconfigure(2, weight=1)
self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0)
self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1)
self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2)
self.btn_open_out = tk.Button(
self._results_frame, text="Ouvrir le dossier de résultats",
font=self._f_button, bg=CLR_GREEN, fg="white",
activebackground="#15803d", activeforeground="white",
relief=tk.FLAT, cursor="hand2", pady=10,
command=self._open_out,
)
self.btn_open_out.pack(fill=tk.X, pady=(0, 8))
# Toggle journal
self._log_visible = False
self._log_toggle = tk.Label(
self._results_frame, text="Voir le journal détaillé \u25BC",
font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
)
self._log_toggle.pack(pady=(0, 4))
self._log_toggle.bind("<Button-1>", lambda e: self._toggle_log())
self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG)
# NE PAS pack
self.txt = tk.Text(
self._log_frame, height=14, font=self._f_small,
bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD,
state=tk.DISABLED,
)
log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview)
self.txt.configure(yscrollcommand=log_scrollbar.set)
self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# =============================================================
# BARRE DE STATUT
# =============================================================
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0))
status_bar = tk.Frame(main, bg=CLR_BG)
status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12))
tk.Label(
status_bar, textvariable=self.status_var, font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(side=tk.LEFT)
tk.Label(
status_bar, text=APP_VERSION, font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e",
).pack(side=tk.RIGHT)
# ---------------------------------------------------------------
# Cartes de statistiques
# ---------------------------------------------------------------
def _make_stat_card(self, parent, number: str, label: str,
fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]:
padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0)
frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1)
frame.grid(row=0, column=col, sticky="nsew", padx=padx)
num_lbl = tk.Label(
frame, text=number, font=self._f_stat,
bg=bg_color, fg=fg_color,
)
num_lbl.pack(pady=(12, 2))
txt_lbl = tk.Label(
frame, text=label, font=self._f_small,
bg=bg_color, fg=CLR_TEXT_SECONDARY,
)
txt_lbl.pack(pady=(0, 12))
return {"frame": frame, "number": num_lbl, "label": txt_lbl}
def _update_stat_card(self, card: Dict[str, tk.Label], value: int,
fg_color: str, bg_color: str):
card["number"].configure(text=str(value), fg=fg_color, bg=bg_color)
card["frame"].configure(bg=bg_color, highlightbackground=bg_color)
card["label"].configure(bg=bg_color)
# ---------------------------------------------------------------
# Actions dossier
# ---------------------------------------------------------------
def _browse(self):
d = filedialog.askdirectory()
if d:
self.dir_var.set(d)
self._update_folder_display()
def _update_folder_display(self):
folder = self.dir_var.get()
if not folder:
return
# Compter les PDF
pdf_count = 0
try:
pdf_count = len([p for p in Path(folder).glob("*.pdf") if p.is_file()])
except Exception:
pass
# Vider et reconstruire l'intérieur
for w in self._folder_inner.winfo_children():
w.destroy()
row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG)
row.pack(fill=tk.X)
tk.Label(
row, text="\U0001f4c2", font=(self._font_family, 16),
bg=CLR_CARD_BG,
).pack(side=tk.LEFT, padx=(0, 8))
info_frame = tk.Frame(row, bg=CLR_CARD_BG)
info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Chemin (tronqué si trop long)
display_path = folder
if len(display_path) > 60:
display_path = "..." + display_path[-57:]
tk.Label(
info_frame, text=display_path, font=self._f_body_bold,
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X)
suffix = "PDF trouvé" if pdf_count <= 1 else "PDF trouvés"
tk.Label(
info_frame, text=f"{pdf_count} {suffix}",
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(fill=tk.X)
change_btn = tk.Label(
row, text="Changer", font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2",
)
change_btn.pack(side=tk.RIGHT, padx=(8, 0))
change_btn.bind("<Button-1>", lambda e: self._browse())
# Mettre à jour la bordure
self._folder_zone.configure(highlightbackground=CLR_GREEN)
# ---------------------------------------------------------------
# Lancement
# ---------------------------------------------------------------
def _run(self):
folder = Path(self.dir_var.get().strip())
if not folder.is_dir():
messagebox.showwarning(
"Dossier invalide",
"Choisissez un dossier contenant des PDF.",
)
return
pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()])
if not pdfs:
messagebox.showwarning(
"Aucun PDF",
"Le dossier sélectionné ne contient aucun fichier PDF.",
)
return
self.btn_run.config(state=tk.DISABLED, bg="#93c5fd", text="Traitement en cours...")
self._show_progress(total=len(pdfs))
self._hide_results()
threading.Thread(target=self._worker, args=(folder, pdfs), daemon=True).start()
def _worker(self, folder: Path, pdfs: List[Path]):
try:
outdir = folder / "pseudonymise"
outdir.mkdir(exist_ok=True)
ok = ko = 0
global_counts: Dict[str, int] = {}
for i, pdf in enumerate(pdfs, start=1):
self.queue.put(UiMessage(
kind=MsgType.PROGRESS, current=i, total=len(pdfs),
filename=pdf.name,
))
try:
active = self._active_manager
use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded())
thresholds = None
if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)):
thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85)
outputs = core.process_pdf(
pdf_path=pdf,
out_dir=outdir,
make_vector_redaction=True,
also_make_raster_burn=True,
config_path=Path(self.cfg_path.get()),
use_hf=use_ner,
ner_manager=active,
ner_thresholds=thresholds,
)
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}"))
for k, v in outputs.items():
self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}"))
audit_path = Path(outputs.get("audit", ""))
counts = self._count_audit(audit_path)
if counts:
self.queue.put(UiMessage(
kind=MsgType.LOG,
text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())),
))
for k, v in counts.items():
global_counts[k] = global_counts.get(k, 0) + v
ok += 1
except Exception as e:
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {pdf.name} \u2192 ERREUR: {e}"))
ko += 1
total_masked = sum(global_counts.values())
self.queue.put(UiMessage(
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
outdir=str(outdir),
))
if ok:
self.queue.put(UiMessage(
kind=MsgType.LOG,
text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())),
))
except Exception as e:
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}"))
self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir=""))
# ---------------------------------------------------------------
# Pompe de messages
# ---------------------------------------------------------------
def _pump_logs(self):
try:
while True:
msg = self.queue.get_nowait()
if msg.kind == MsgType.LOG:
self._append_log(msg.text)
elif msg.kind == MsgType.PROGRESS:
self._update_progress(msg.current, msg.total, msg.filename)
elif msg.kind == MsgType.DONE:
self._on_done(msg)
except queue.Empty:
pass
finally:
self.root.after(60, self._pump_logs)
def _append_log(self, text: str):
self.txt.configure(state=tk.NORMAL)
self.txt.insert(tk.END, text + "\n")
self.txt.see(tk.END)
self.txt.configure(state=tk.DISABLED)
# ---------------------------------------------------------------
# Progression
# ---------------------------------------------------------------
def _show_progress(self, total: int):
self._progressbar.configure(maximum=total, value=0)
self._progress_label.configure(text="")
self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18),
before=self._results_frame if self._results_frame.winfo_manager() else None)
def _hide_progress(self):
self._progress_frame.pack_forget()
def _update_progress(self, current: int, total: int, filename: str):
self._progressbar.configure(value=current)
self._progress_label.configure(text=f"{current}/{total}{filename}")
self.status_var.set(f"{current}/{total}{filename}")
# ---------------------------------------------------------------
# Résultats
# ---------------------------------------------------------------
def _show_results(self, ok: int, ko: int, masked: int):
self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT)
self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT)
err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY
err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6"
self._update_stat_card(self._stat_errors, ko, err_fg, err_bg)
self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12))
def _hide_results(self):
self._results_frame.pack_forget()
self._log_frame.pack_forget()
self._log_visible = False
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
# Vider le journal
self.txt.configure(state=tk.NORMAL)
self.txt.delete("1.0", tk.END)
self.txt.configure(state=tk.DISABLED)
def _on_done(self, msg: UiMessage):
self._hide_progress()
self.btn_run.config(state=tk.NORMAL, bg=CLR_PRIMARY, text="Lancer la pseudonymisation")
self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.")
if msg.outdir:
self._last_outdir = Path(msg.outdir)
self._show_results(msg.ok, msg.ko, msg.masked)
# ---------------------------------------------------------------
# Toggle journal
# ---------------------------------------------------------------
def _toggle_log(self):
if self._log_visible:
self._log_frame.pack_forget()
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
else:
self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
self._log_toggle.configure(text="Masquer le journal \u25B2")
self._log_visible = not self._log_visible
# ---------------------------------------------------------------
# Ouvrir dossier résultats
# ---------------------------------------------------------------
def _open_out(self):
if self._last_outdir:
open_folder(self._last_outdir)
# ---------------------------------------------------------------
# Aide
# ---------------------------------------------------------------
def _show_help(self):
messagebox.showinfo(
"Comment ça marche ?",
"1) Choisissez le dossier contenant vos fichiers PDF.\n\n"
"2) Cliquez sur « Lancer la pseudonymisation ».\n\n"
"Deux fichiers sont générés pour chaque PDF :\n"
" \u2022 PDF Image : chaque page devient une image avec les\n"
" données masquées. Sécurité maximale.\n"
" \u2022 PDF Anonymisé : structure préservée comme l'original,\n"
" fichier léger et texte sélectionnable.\n\n"
"Les résultats apparaissent dans un sous-dossier\n"
"« pseudonymise » à côté de vos originaux.",
)
# ---------------------------------------------------------------
# YAML (interne)
# ---------------------------------------------------------------
def _ensure_cfg_exists(self):
p = Path(self.cfg_path.get())
p.parent.mkdir(parents=True, exist_ok=True)
if not p.exists():
p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
def _load_cfg(self):
if yaml is None:
return
self._ensure_cfg_exists()
try:
self.cfg_data = yaml.safe_load(
Path(self.cfg_path.get()).read_text(encoding="utf-8")
) or {}
except Exception:
pass
# ---------------------------------------------------------------
# Audit
# ---------------------------------------------------------------
def _count_audit(self, audit_path: Path) -> Dict[str, int]:
d: Dict[str, int] = {}
try:
with open(audit_path, "r", encoding="utf-8") as f:
for line in f:
try:
obj = json.loads(line)
k = obj.get("kind", "?")
d[k] = d.get(k, 0) + 1
except Exception:
pass
except Exception:
pass
return d
# ---------------------------------------------------------------
# Chargement automatique NER au démarrage
# ---------------------------------------------------------------
def _auto_load_ner(self):
"""Charge le modèle NER par défaut en arrière-plan."""
if not self._onnx_manager:
return
self.status_var.set("Chargement du modèle NER...")
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
def _auto_load_ner_worker(self):
try:
default_model = "cmarkea/distilcamembert-base-ner"
self._onnx_manager.load(default_model)
self._active_manager = self._onnx_manager
self.use_hf = True
self.status_var.set("Prêt — NER actif.")
except Exception as e:
self.status_var.set(f"Prêt (NER indisponible : {e})")
# ---------------------------------------------------------------
# Modèles NER (API interne)
# ---------------------------------------------------------------
def _load_model(self, model_id: Optional[str] = None):
mid = model_id or "cmarkea/distilcamembert-base-ner"
is_eds = False
if self._eds_manager:
eds_ids = set(self._eds_manager.models_catalog().values())
if mid in eds_ids:
is_eds = True
if is_eds:
if not self._eds_manager:
return
manager = self._eds_manager
else:
if not self._onnx_manager:
return
manager = self._onnx_manager
try:
manager.load(mid)
self._active_manager = manager
self.use_hf = True
except Exception:
self.use_hf = False
def _unload_model(self):
if self._onnx_manager:
self._onnx_manager.unload()
if self._eds_manager:
self._eds_manager.unload()
self._active_manager = None
self.use_hf = False
# ---------------------------------------------------------------------------
# Point d'entrée
# ---------------------------------------------------------------------------
if __name__ == "__main__":
try:
root = tk.Tk()
App(root)
root.mainloop()
except Exception as exc:
import traceback, sys
err = traceback.format_exc()
# Écrire dans un fichier log à côté de l'exe
log_path = Path(__file__).resolve().parent / "crash.log"
try:
log_path.write_text(err, encoding="utf-8")
except Exception:
pass
# Tenter d'afficher une messagebox (même sans console)
try:
import tkinter as _tk
_r = _tk.Tk()
_r.withdraw()
from tkinter import messagebox as _mb
_mb.showerror("Erreur fatale", f"L'application a planté :\n\n{exc}\n\nVoir crash.log")
_r.destroy()
except Exception:
pass
raise