Intégration du logo "aivanonym" (gradient magenta → rose → pêche → noir)
fourni par le propriétaire. Refonte visuelle complète :
• APP_VERSION bump v5.4 → v5.5
• Assets (tous générés depuis assets/icons/logo.png) :
- assets/icons/app.ico multi-résolution 16→256 (icône EXE Windows)
- assets/icons/icon_{16,32,48,64,128,256,512}.png (fallback + taskbar)
- assets/logo_header.png (260×61, intégré dans l'en-tête de la GUI)
- assets/logo_splash.png (335×80, intégré dans le splash)
- assets/splash.png redessiné avec logo + bandeau gradient primary→accent
• Palette dérivée du logo (remplace l'ancien bleu) :
- CLR_PRIMARY #E91E63 magenta logo (CTA, liens)
- CLR_PRIMARY_DARK #C2185B hover / pressed
- CLR_PRIMARY_LIGHT #FCE4EC fond doux (tags, cartes)
- CLR_ACCENT #FFB74D pêche logo (secondaire)
- CLR_ACCENT_LIGHT #FFF3E0
- CLR_TEXT/SECONDARY proches du noir/gris du logo
• Pseudonymisation_Gui_V5.py :
- Helper _asset(name) : résout sous sys._MEIPASS/assets en mode frozen
- _apply_window_icon() : iconbitmap (.ico sur Windows) + iconphoto (PNG)
- _load_image_safe() : charge PIL avec ref persistante (évite GC tkinter)
- Header fixe hors onglets : logo image + baseline "100% local"
- Ligne accent magenta sous le header (inspiration logo)
- Onglets custom uniformes (remplace ttk.Notebook dont les tabs avaient
des tailles variables selon l'état) : tous les boutons identiques,
seule une bordure basse magenta signale l'onglet actif. _switch_tab()
gère l'affichage du contenu et la mise à jour des styles.
- Onglet 1 "Anonymisation" : workflow principal (choix, lancer, résultats)
- Onglet 2 "Paramètres" : 3 listes (whitelist/blacklist/stopwords) +
export/import + save. Plus de section repliable — respiration visuelle.
- Boutons export/import repensés avec les couleurs de la palette
• anonymisation_onefile.spec :
- datas : ajout du dossier assets/ entier
- EXE(icon=assets/icons/app.ico) : le .exe a maintenant le logo dans
l'Explorateur Windows, la barre des tâches, le gestionnaire des tâches
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1829 lines
72 KiB
Python
1829 lines
72 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Pseudonymisation – GUI v5 (Vue unique épurée)
|
||
----------------------------------------------
|
||
- Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés)
|
||
- Thème système natif (sv_ttk optionnel, fallback clam)
|
||
- Backend NER ONNX/EDS-Pseudo conservé en interne
|
||
- Pas d'onglet Avancé (NER + YAML chargés silencieusement)
|
||
|
||
Fichiers requis à côté :
|
||
- anonymizer_core_refactored_onnx.py
|
||
- ner_manager_onnx.py
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import enum
|
||
import json
|
||
import os
|
||
import platform
|
||
import queue
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import threading
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, ttk
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Core
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import anonymizer_core_refactored_onnx as core
|
||
except Exception as e:
|
||
_err_msg = f"Impossible d'importer le core ONNX : {e}"
|
||
# Écrire l'erreur dans un fichier log à côté du script/exe
|
||
try:
|
||
_log = Path(__file__).resolve().parent / "crash.log"
|
||
import traceback as _tb
|
||
_log.write_text(f"{_err_msg}\n\n{_tb.format_exc()}", encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
try:
|
||
_r = tk.Tk(); _r.withdraw()
|
||
messagebox.showerror("Erreur d'import", _err_msg)
|
||
_r.destroy()
|
||
except Exception:
|
||
pass
|
||
raise SystemExit(_err_msg)
|
||
|
||
try:
|
||
from ner_manager_onnx import NerModelManager, NerThresholds
|
||
except Exception:
|
||
NerModelManager = None # type: ignore
|
||
NerThresholds = None # type: ignore
|
||
|
||
try:
|
||
from eds_pseudo_manager import EdsPseudoManager
|
||
except Exception:
|
||
EdsPseudoManager = None # type: ignore
|
||
|
||
try:
|
||
from vlm_manager import VlmManager, VlmConfig
|
||
except Exception:
|
||
VlmManager = None # type: ignore
|
||
VlmConfig = None # type: ignore
|
||
|
||
try:
|
||
import yaml
|
||
except Exception:
|
||
yaml = None
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Thème optionnel
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import sv_ttk # type: ignore
|
||
except ImportError:
|
||
sv_ttk = None
|
||
|
||
# PIL pour charger le logo / icônes (optionnel — dégradation si absent).
|
||
try:
|
||
from PIL import Image, ImageTk
|
||
_PIL_AVAILABLE = True
|
||
except Exception:
|
||
_PIL_AVAILABLE = False
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Constantes
|
||
# ---------------------------------------------------------------------------
|
||
APP_TITLE = "Pseudonymisation de vos documents"
|
||
APP_VERSION = "v5.5"
|
||
|
||
# Métadonnées de build — chargées depuis build_info.py (régénéré par rebuild_anon.ps1)
|
||
try:
|
||
from build_info import BUILD_DATE, BUILD_COMMIT, BUILD_BRANCH
|
||
except Exception:
|
||
BUILD_DATE = "dev"
|
||
BUILD_COMMIT = "dev"
|
||
BUILD_BRANCH = "dev"
|
||
|
||
|
||
def _version_long() -> str:
|
||
"""Version étendue : v5.4 · 2026-04-15 18:15 · 234137e"""
|
||
parts = [APP_VERSION]
|
||
if BUILD_DATE != "dev":
|
||
parts.append(BUILD_DATE)
|
||
if BUILD_COMMIT != "dev":
|
||
parts.append(f"#{BUILD_COMMIT}")
|
||
return " · ".join(parts)
|
||
|
||
|
||
def _asset(name: str) -> Path:
|
||
"""Résout le chemin d'un asset dans assets/ (compatible frozen PyInstaller)."""
|
||
if getattr(sys, 'frozen', False):
|
||
base = Path(sys._MEIPASS)
|
||
else:
|
||
base = Path(__file__).resolve().parent
|
||
return base / 'assets' / name
|
||
|
||
def _app_dir() -> Path:
|
||
"""Répertoire racine de l'application (compatible PyInstaller/Nuitka)."""
|
||
if getattr(sys, 'frozen', False):
|
||
return Path(sys._MEIPASS)
|
||
return Path(__file__).resolve().parent
|
||
|
||
def _exe_dir() -> Path:
|
||
"""Répertoire de l'exécutable (pour les fichiers persistants : config, logs)."""
|
||
if getattr(sys, 'frozen', False):
|
||
return Path(sys.executable).parent
|
||
return Path(__file__).resolve().parent
|
||
|
||
def _resolve_config() -> Path:
|
||
"""Cherche la config en priorité à côté de l'exe, sinon dans l'app embarquée.
|
||
|
||
Si le fichier n'existe pas à côté de l'exe, copie la version embarquée
|
||
pour que l'utilisateur puisse la modifier sans recompiler.
|
||
"""
|
||
exe_cfg = _exe_dir() / "config" / "dictionnaires.yml"
|
||
app_cfg = _app_dir() / "config" / "dictionnaires.yml"
|
||
|
||
if exe_cfg.exists():
|
||
return exe_cfg
|
||
|
||
# Premier lancement : copier la config embarquée à côté de l'exe
|
||
if app_cfg.exists():
|
||
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
|
||
import shutil
|
||
shutil.copy2(str(app_cfg), str(exe_cfg))
|
||
return exe_cfg
|
||
|
||
return app_cfg # fallback
|
||
|
||
DEFAULT_CFG = _resolve_config()
|
||
MODELS_DIR = _app_dir() / "models"
|
||
|
||
DEFAULTS_CFG_TEXT = r"""
|
||
# dictionnaires.yml – valeurs par défaut (bloc littéral pour les regex)
|
||
version: 1
|
||
encoding: "utf-8"
|
||
normalization: "NFKC"
|
||
whitelist:
|
||
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
|
||
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
|
||
org_gpe_keep: true
|
||
blacklist:
|
||
force_mask_terms: []
|
||
force_mask_regex: []
|
||
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
|
||
regex_overrides:
|
||
- name: OGC_court
|
||
pattern: |-
|
||
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
|
||
placeholder: '[OGC]'
|
||
flags: [IGNORECASE]
|
||
flags:
|
||
case_insensitive: true
|
||
unicode_word_boundaries: true
|
||
regex_engine: "python"
|
||
"""
|
||
|
||
# Palette dérivée du logo aivanonym (gradient magenta → rose → pêche → noir)
|
||
# Magenta du logo : primaire (boutons, accents)
|
||
# Pêche : secondaire (tags, highlights)
|
||
# Noir/gris : texte et neutres
|
||
# Blanc/gris clair : fonds
|
||
CLR_PRIMARY = "#E91E63" # magenta logo (CTA, liens)
|
||
CLR_PRIMARY_DARK = "#C2185B" # hover / pressed
|
||
CLR_PRIMARY_LIGHT = "#FCE4EC" # fond léger (cartes sélectionnées)
|
||
CLR_ACCENT = "#FFB74D" # pêche logo (tags secondaires)
|
||
CLR_ACCENT_LIGHT = "#FFF3E0" # fond accent léger
|
||
CLR_GREEN = "#2E7D32" # succès
|
||
CLR_GREEN_LIGHT = "#E8F5E9"
|
||
CLR_RED = "#C62828" # erreur / danger
|
||
CLR_RED_LIGHT = "#FFEBEE"
|
||
CLR_BLUE_LIGHT = "#FCE4EC" # conservé pour compat (remappé vers primary_light)
|
||
CLR_CARD_BG = "#FFFFFF"
|
||
CLR_CARD_BORDER = "#E0E0E0"
|
||
CLR_BG = "#FAFAFA" # fond principal (gris très clair)
|
||
CLR_TEXT = "#212121" # quasi-noir (du logo)
|
||
CLR_TEXT_SECONDARY = "#757575" # gris moyen
|
||
CLR_DIVIDER = "#EEEEEE"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Messages worker → UI
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class MsgType(enum.Enum):
|
||
LOG = "log"
|
||
PROGRESS = "progress"
|
||
DONE = "done"
|
||
|
||
|
||
@dataclass
|
||
class UiMessage:
|
||
kind: MsgType
|
||
text: str = ""
|
||
current: int = 0
|
||
total: int = 0
|
||
filename: str = ""
|
||
ok: int = 0
|
||
ko: int = 0
|
||
masked: int = 0
|
||
outdir: str = ""
|
||
total_time: float = 0.0 # Temps total de traitement en secondes
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def open_folder(path: Path):
|
||
try:
|
||
if platform.system() == "Windows":
|
||
os.startfile(str(path)) # type: ignore
|
||
elif platform.system() == "Darwin":
|
||
subprocess.Popen(["open", str(path)])
|
||
else:
|
||
subprocess.Popen(["xdg-open", str(path)])
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _detect_font() -> str:
|
||
"""Retourne la meilleure police sans-serif disponible."""
|
||
for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"):
|
||
try:
|
||
test = tk.Label(font=(name, 10))
|
||
actual = test.cget("font")
|
||
test.destroy()
|
||
if name.lower().replace(" ", "") in actual.lower().replace(" ", ""):
|
||
return name
|
||
except Exception:
|
||
continue
|
||
return "TkDefaultFont"
|
||
|
||
|
||
def _detect_dark_mode() -> bool:
|
||
"""Détecte le thème sombre GNOME."""
|
||
try:
|
||
result = subprocess.run(
|
||
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
|
||
capture_output=True, text=True, timeout=2,
|
||
)
|
||
return "dark" in result.stdout.lower()
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# ToolTip amélioré
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class ToolTip:
|
||
def __init__(self, widget: tk.Widget, text: str, delay: int = 400):
|
||
self.widget = widget
|
||
self.text = text
|
||
self.delay = delay
|
||
self.tip: Optional[tk.Toplevel] = None
|
||
self._after_id: Optional[str] = None
|
||
widget.bind("<Enter>", self._schedule)
|
||
widget.bind("<Leave>", self.hide)
|
||
|
||
def _schedule(self, *_):
|
||
self._cancel()
|
||
self._after_id = self.widget.after(self.delay, self._show)
|
||
|
||
def _cancel(self):
|
||
if self._after_id:
|
||
self.widget.after_cancel(self._after_id)
|
||
self._after_id = None
|
||
|
||
def _show(self):
|
||
if self.tip:
|
||
return
|
||
x = self.widget.winfo_rootx() + 20
|
||
y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
|
||
self.tip = tw = tk.Toplevel(self.widget)
|
||
tw.wm_overrideredirect(True)
|
||
tw.wm_geometry(f"+{x}+{y}")
|
||
lbl = tk.Label(
|
||
tw, text=self.text, justify=tk.LEFT,
|
||
background="#1f2937", foreground="#f9fafb",
|
||
relief=tk.SOLID, borderwidth=1,
|
||
padx=8, pady=5, wraplength=320,
|
||
)
|
||
lbl.pack(ipadx=1)
|
||
|
||
def hide(self, *_):
|
||
self._cancel()
|
||
if self.tip:
|
||
self.tip.destroy()
|
||
self.tip = None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Application principale
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class App:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root
|
||
# Titre avec version longue pour identifier la build au premier coup d'œil
|
||
# (évite les confusions entre exe ancien/nouveau lors des tests).
|
||
self.root.title(f"{APP_TITLE} — {_version_long()}")
|
||
self.root.geometry("780x820")
|
||
self.root.minsize(600, 650)
|
||
|
||
# Icône de la fenêtre (coin haut-gauche + taskbar Windows).
|
||
# En mode dev (Linux) tkinter lit iconphoto PNG ; sur Windows, iconbitmap
|
||
# accepte .ico. On tente les deux pour couvrir.
|
||
self._icon_refs: list = [] # refs pour éviter garbage collection
|
||
self._apply_window_icon()
|
||
|
||
# Préchargement logo pour l'en-tête (besoin de ref persistante sinon
|
||
# tkinter nettoie l'image → label blanc).
|
||
self._logo_img = self._load_image_safe(_asset('logo_header.png'))
|
||
|
||
# --- Thème ---
|
||
self._apply_theme()
|
||
|
||
# --- Polices ---
|
||
self._font_family = _detect_font()
|
||
self._f_title = (self._font_family, 20, "bold")
|
||
self._f_body = (self._font_family, 11)
|
||
self._f_body_bold = (self._font_family, 11, "bold")
|
||
self._f_button = (self._font_family, 13, "bold")
|
||
self._f_stat = (self._font_family, 24, "bold")
|
||
self._f_small = (self._font_family, 10)
|
||
self._f_card_title = (self._font_family, 12, "bold")
|
||
self._f_card_desc = (self._font_family, 10)
|
||
|
||
# --- Variables ---
|
||
self.dir_var = tk.StringVar()
|
||
self.status_var = tk.StringVar(value="Prêt.")
|
||
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
|
||
self.queue: "queue.Queue[UiMessage]" = queue.Queue()
|
||
|
||
# --- NER (interne) ---
|
||
self.use_hf = False
|
||
self.th_per = 0.90
|
||
self.th_org = 0.90
|
||
self.th_loc = 0.90
|
||
self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=MODELS_DIR) if NerModelManager else None
|
||
self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=MODELS_DIR) if EdsPseudoManager else None
|
||
self._active_manager: Optional[Any] = None
|
||
self.cfg_data: Dict[str, Any] = {}
|
||
|
||
# --- VLM (optionnel) ---
|
||
self.use_vlm = tk.BooleanVar(value=False)
|
||
self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None
|
||
self._vlm_available = False
|
||
|
||
# --- Fusion catalogue modèles ---
|
||
catalog: Dict[str, str] = {}
|
||
if self._onnx_manager:
|
||
catalog.update(self._onnx_manager.models_catalog())
|
||
if self._eds_manager:
|
||
catalog.update(self._eds_manager.models_catalog())
|
||
self._merged_catalog = catalog
|
||
|
||
# --- Résultats ---
|
||
self._last_outdir: Optional[Path] = None
|
||
|
||
# --- Contrôle d'arrêt ---
|
||
self._stop_requested = False
|
||
# --- Fichier unique (None = mode dossier) ---
|
||
self._single_file: Optional[Path] = None
|
||
|
||
# --- Construction UI ---
|
||
self._build_ui()
|
||
# Afficher l'onglet Anonymisation par défaut
|
||
self._switch_tab("anonym")
|
||
self._pump_logs()
|
||
self._ensure_cfg_exists()
|
||
self._load_cfg()
|
||
|
||
# --- Chargement automatique du modèle NER ---
|
||
self._auto_load_ner()
|
||
|
||
# ---------------------------------------------------------------
|
||
# Onglets custom
|
||
# ---------------------------------------------------------------
|
||
def _switch_tab(self, name: str):
|
||
"""Affiche l'onglet nommé, met à jour les styles des boutons."""
|
||
if name not in self._tab_frames:
|
||
return
|
||
# Cacher tous les contenus
|
||
for frame in self._tab_frames.values():
|
||
frame.pack_forget()
|
||
# Afficher l'onglet demandé
|
||
self._tab_frames[name].pack(fill=tk.BOTH, expand=True)
|
||
# Mettre à jour les styles des boutons d'onglets
|
||
for tab_name, widgets in self._tab_buttons.items():
|
||
if tab_name == name:
|
||
widgets["label"].configure(fg=CLR_PRIMARY, bg=CLR_BG)
|
||
widgets["underline"].configure(bg=CLR_PRIMARY)
|
||
else:
|
||
widgets["label"].configure(fg=CLR_TEXT_SECONDARY, bg=CLR_BG)
|
||
widgets["underline"].configure(bg=CLR_BG)
|
||
self._active_tab = name
|
||
|
||
# ---------------------------------------------------------------
|
||
# Icônes & assets
|
||
# ---------------------------------------------------------------
|
||
def _apply_window_icon(self):
|
||
"""Définit l'icône de la fenêtre. Windows : .ico préférable ; Linux : PNG."""
|
||
try:
|
||
ico = _asset('icons/app.ico')
|
||
if sys.platform == 'win32' and ico.exists():
|
||
try:
|
||
self.root.iconbitmap(str(ico))
|
||
return
|
||
except Exception:
|
||
pass
|
||
# Fallback : iconphoto PNG (toutes plateformes)
|
||
png = _asset('icons/icon_128.png')
|
||
if png.exists() and _PIL_AVAILABLE:
|
||
img = Image.open(png)
|
||
photo = ImageTk.PhotoImage(img)
|
||
self._icon_refs.append(photo)
|
||
self.root.iconphoto(True, photo)
|
||
except Exception:
|
||
pass # dégradation silencieuse — l'icône n'est pas bloquante
|
||
|
||
def _load_image_safe(self, path: Path):
|
||
"""Charge une image et garde la ref pour éviter le GC. None si PIL absent."""
|
||
if not _PIL_AVAILABLE or not path.exists():
|
||
return None
|
||
try:
|
||
img = Image.open(path).convert('RGBA')
|
||
photo = ImageTk.PhotoImage(img)
|
||
self._icon_refs.append(photo)
|
||
return photo
|
||
except Exception:
|
||
return None
|
||
|
||
# ---------------------------------------------------------------
|
||
# Thème
|
||
# ---------------------------------------------------------------
|
||
def _apply_theme(self):
|
||
if sv_ttk is not None:
|
||
mode = "dark" if _detect_dark_mode() else "light"
|
||
sv_ttk.set_theme(mode)
|
||
else:
|
||
try:
|
||
style = ttk.Style()
|
||
style.theme_use("clam")
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Construction de la vue unique
|
||
# ---------------------------------------------------------------
|
||
def _build_ui(self):
|
||
self.root.configure(bg=CLR_BG)
|
||
pad_x = 32
|
||
|
||
# =============================================================
|
||
# HEADER fixe (logo + titre + baseline), hors onglets
|
||
# =============================================================
|
||
header = tk.Frame(self.root, bg=CLR_BG)
|
||
header.pack(fill=tk.X, padx=pad_x, pady=(16, 8))
|
||
|
||
if self._logo_img is not None:
|
||
tk.Label(header, image=self._logo_img, bg=CLR_BG).pack(anchor="w")
|
||
else:
|
||
tk.Label(header, text="aivanonym", font=(self._font_family, 22, "bold"),
|
||
bg=CLR_BG, fg=CLR_PRIMARY).pack(anchor="w")
|
||
|
||
tk.Label(
|
||
header,
|
||
text="Pseudonymisation de documents médicaux — 100% local",
|
||
font=(self._font_family, 10),
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X, pady=(4, 0))
|
||
|
||
# Ligne colorée inspirée du gradient du logo
|
||
accent_bar = tk.Frame(self.root, bg=CLR_PRIMARY, height=3)
|
||
accent_bar.pack(fill=tk.X)
|
||
|
||
# =============================================================
|
||
# ONGLETS CUSTOM (boutons uniformes — rendu pro)
|
||
# Remplace ttk.Notebook dont les onglets ont des tailles/styles
|
||
# variables selon l'état actif. Ici : tous les onglets identiques,
|
||
# seule une bordure basse magenta signale l'onglet actif.
|
||
# =============================================================
|
||
tabs_bar = tk.Frame(self.root, bg=CLR_BG)
|
||
tabs_bar.pack(fill=tk.X, padx=0, pady=(4, 0))
|
||
|
||
self._tab_frames: dict = {} # nom → frame outer
|
||
self._tab_buttons: dict = {} # nom → dict(container, label, underline)
|
||
self._active_tab: Optional[str] = None
|
||
|
||
def _make_tab_button(parent, name: str, label: str):
|
||
"""Crée un onglet cliquable uniforme (fond, texte, underline)."""
|
||
container = tk.Frame(parent, bg=CLR_BG, cursor="hand2")
|
||
container.pack(side=tk.LEFT)
|
||
|
||
txt = tk.Label(
|
||
container, text=label,
|
||
font=(self._font_family, 11, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
|
||
padx=26, pady=10, cursor="hand2",
|
||
)
|
||
txt.pack(fill=tk.X)
|
||
|
||
# Bordure basse qui devient magenta quand actif
|
||
underline = tk.Frame(container, bg=CLR_BG, height=3)
|
||
underline.pack(fill=tk.X)
|
||
|
||
def _on_click(_e=None):
|
||
self._switch_tab(name)
|
||
for w in (container, txt, underline):
|
||
w.bind("<Button-1>", _on_click)
|
||
|
||
self._tab_buttons[name] = {
|
||
"container": container, "label": txt, "underline": underline,
|
||
}
|
||
|
||
_make_tab_button(tabs_bar, "anonym", "Anonymisation")
|
||
_make_tab_button(tabs_bar, "params", "Paramètres")
|
||
|
||
# Séparateur gris clair sous les onglets
|
||
tk.Frame(self.root, bg=CLR_DIVIDER, height=1).pack(fill=tk.X)
|
||
|
||
# Conteneur des contenus (un seul visible à la fois)
|
||
tabs_content = tk.Frame(self.root, bg=CLR_BG)
|
||
tabs_content.pack(fill=tk.BOTH, expand=True)
|
||
|
||
tab_anonym_outer = tk.Frame(tabs_content, bg=CLR_BG)
|
||
tab_params_outer = tk.Frame(tabs_content, bg=CLR_BG)
|
||
self._tab_frames["anonym"] = tab_anonym_outer
|
||
self._tab_frames["params"] = tab_params_outer
|
||
|
||
# --- Scroll pour l'onglet Anonymisation ---
|
||
canvas = tk.Canvas(tab_anonym_outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar = ttk.Scrollbar(tab_anonym_outer, orient=tk.VERTICAL, command=canvas.yview)
|
||
self._scroll_frame = tk.Frame(canvas, bg=CLR_BG)
|
||
self._scroll_frame.bind(
|
||
"<Configure>",
|
||
lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
|
||
)
|
||
canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw")
|
||
canvas.configure(yscrollcommand=scrollbar.set)
|
||
|
||
def _on_canvas_configure(event):
|
||
canvas.itemconfig(canvas_window, width=event.width)
|
||
canvas.bind("<Configure>", _on_canvas_configure)
|
||
|
||
def _on_mousewheel(event):
|
||
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
|
||
def _on_mousewheel_linux(event):
|
||
if event.num == 4:
|
||
canvas.yview_scroll(-3, "units")
|
||
elif event.num == 5:
|
||
canvas.yview_scroll(3, "units")
|
||
canvas.bind_all("<MouseWheel>", _on_mousewheel)
|
||
canvas.bind_all("<Button-4>", _on_mousewheel_linux)
|
||
canvas.bind_all("<Button-5>", _on_mousewheel_linux)
|
||
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# --- Scroll pour l'onglet Paramètres ---
|
||
canvas2 = tk.Canvas(tab_params_outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar2 = ttk.Scrollbar(tab_params_outer, orient=tk.VERTICAL, command=canvas2.yview)
|
||
self._params_scroll = tk.Frame(canvas2, bg=CLR_BG)
|
||
self._params_scroll.bind(
|
||
"<Configure>",
|
||
lambda e: canvas2.configure(scrollregion=canvas2.bbox("all")),
|
||
)
|
||
canvas2_window = canvas2.create_window((0, 0), window=self._params_scroll, anchor="nw")
|
||
canvas2.configure(yscrollcommand=scrollbar2.set)
|
||
def _on_canvas2_configure(event):
|
||
canvas2.itemconfig(canvas2_window, width=event.width)
|
||
canvas2.bind("<Configure>", _on_canvas2_configure)
|
||
canvas2.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar2.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# "main" pointe désormais sur le scroll de l'onglet Anonymisation.
|
||
# Tout le contenu existant (étape 1, formats, boutons, progress, résultats)
|
||
# reste inchangé — seul le parent implicite a changé.
|
||
main = self._scroll_frame
|
||
|
||
# =============================================================
|
||
# ÉTAPE 1 — Choix du dossier
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="1. Choisir les documents ou fichiers (PDF, Word, Images, Texte)", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
self._folder_zone = tk.Frame(
|
||
main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER,
|
||
highlightthickness=2, cursor="hand2",
|
||
)
|
||
self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
# Contenu initial (invite à cliquer)
|
||
self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG)
|
||
self._folder_inner.pack(fill=tk.X, padx=20, pady=18)
|
||
|
||
self._folder_icon_lbl = tk.Label(
|
||
self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28),
|
||
bg=CLR_CARD_BG,
|
||
)
|
||
self._folder_icon_lbl.pack()
|
||
|
||
self._folder_text_lbl = tk.Label(
|
||
self._folder_inner,
|
||
text="Cliquez pour choisir un dossier ou un fichier",
|
||
font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._folder_text_lbl.pack(pady=(4, 0))
|
||
|
||
# Rendre toute la zone cliquable
|
||
for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl):
|
||
w.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# =============================================================
|
||
# ÉTAPE 2 — Info formats générés
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="2. Formats générés", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
info_frame = tk.Frame(
|
||
main, bg=CLR_BLUE_LIGHT,
|
||
highlightbackground=CLR_CARD_BORDER, highlightthickness=1,
|
||
)
|
||
info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT)
|
||
info_inner.pack(fill=tk.X, padx=16, pady=12)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text="Paramètres de traitement :",
|
||
font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text=("\u2022 Recherche récursive de tous les documents dans les sous-dossiers\n"
|
||
"\u2022 Sortie PDF Image (raster) — sécurité maximale, aucun texte résiduel\n"
|
||
"\u2022 Résultats dans le dossier « anonymise/ » à la racine"),
|
||
font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||
anchor="w", justify=tk.LEFT,
|
||
).pack(fill=tk.X, pady=(4, 0))
|
||
|
||
# --- Checkbox VLM ---
|
||
if VlmManager is not None:
|
||
vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT)
|
||
vlm_row.pack(fill=tk.X, pady=(8, 0))
|
||
self._vlm_check = tk.Checkbutton(
|
||
vlm_row, text="Analyse visuelle VLM (Ollama)",
|
||
variable=self.use_vlm, font=self._f_card_desc,
|
||
bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT,
|
||
command=self._on_vlm_toggle,
|
||
)
|
||
self._vlm_check.pack(side=tk.LEFT)
|
||
self._vlm_status_lbl = tk.Label(
|
||
vlm_row, text="", font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0))
|
||
ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.")
|
||
|
||
# =============================================================
|
||
# BOUTONS LANCER / STOPPER
|
||
# =============================================================
|
||
buttons_frame = tk.Frame(main, bg=CLR_BG)
|
||
buttons_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
self.btn_run = tk.Button(
|
||
buttons_frame, text="Lancer l'anonymisation",
|
||
font=self._f_button, bg=CLR_PRIMARY, fg="white",
|
||
activebackground="#1d4ed8", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._run,
|
||
)
|
||
self.btn_run.pack(fill=tk.X)
|
||
|
||
self.btn_stop = tk.Button(
|
||
buttons_frame, text="Arrêter le traitement",
|
||
font=self._f_button, bg=CLR_RED, fg="white",
|
||
activebackground="#b91c1c", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._stop,
|
||
)
|
||
# NE PAS pack — sera affiché pendant le traitement
|
||
|
||
# Lien aide
|
||
help_lbl = tk.Label(
|
||
main, text="Comment ça marche ?", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
help_lbl.pack(pady=(0, 8))
|
||
help_lbl.bind("<Button-1>", lambda e: self._show_help())
|
||
|
||
# =============================================================
|
||
# ONGLET "PARAMÈTRES" — contenu monté dans self._params_scroll
|
||
# =============================================================
|
||
self._params_frame = self._params_scroll
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text="Personnaliser le masquage",
|
||
font=(self._font_family, 14, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(20, 4))
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text=("Ces listes complètent les détections automatiques du programme. "
|
||
"Utile pour gérer les spécificités de votre établissement."),
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 16))
|
||
|
||
# Conteneur interne avec padding latéral pour les listboxes
|
||
params_inner = tk.Frame(self._params_frame, bg=CLR_BG)
|
||
params_inner.pack(fill=tk.X, padx=pad_x, pady=(0, 12))
|
||
|
||
# --- Whitelist (phrases à ne pas anonymiser) ---
|
||
self._wl_listbox, self._wl_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u2705 Phrases à ne PAS anonymiser :",
|
||
placeholder="Ajouter une phrase à protéger...",
|
||
color_tag=CLR_GREEN_LIGHT,
|
||
)
|
||
|
||
# --- Blacklist (phrases à toujours masquer) ---
|
||
self._bl_listbox, self._bl_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u26d4 Mots/phrases à TOUJOURS masquer :",
|
||
placeholder="Ajouter un mot ou phrase à masquer...",
|
||
color_tag=CLR_PRIMARY_LIGHT,
|
||
)
|
||
|
||
# --- Stop-words additionnels (mots à ne jamais identifier comme noms) ---
|
||
# Différent de la whitelist : agit en amont, pour les sigles, acronymes,
|
||
# termes métier locaux qui ressemblent à des noms mais n'en sont pas.
|
||
self._sw_listbox, self._sw_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u26a0 Mots à ne jamais identifier comme noms (sigles, acronymes...) :",
|
||
placeholder="Ajouter un mot (ex: sigle local, acronyme métier)...",
|
||
color_tag=CLR_ACCENT_LIGHT,
|
||
)
|
||
|
||
# Boutons sauvegarder + exporter
|
||
btn_row = tk.Frame(params_inner, bg=CLR_BG)
|
||
btn_row.pack(fill=tk.X, pady=(12, 12))
|
||
|
||
export_btn = tk.Button(
|
||
btn_row, text="\u2709 Exporter pour envoi",
|
||
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._export_params,
|
||
)
|
||
export_btn.pack(side=tk.LEFT)
|
||
|
||
import_btn = tk.Button(
|
||
btn_row, text="\u2B07 Importer",
|
||
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._import_params,
|
||
)
|
||
import_btn.pack(side=tk.LEFT, padx=(4, 0))
|
||
|
||
save_btn = tk.Button(
|
||
btn_row, text="Sauvegarder",
|
||
font=self._f_small, bg=CLR_PRIMARY, fg="white",
|
||
activebackground=CLR_PRIMARY_DARK, activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", padx=14, pady=6,
|
||
command=self._save_params,
|
||
)
|
||
save_btn.pack(side=tk.RIGHT)
|
||
|
||
# Charger les valeurs initiales depuis la config
|
||
self._load_params()
|
||
|
||
# Retour dans l'onglet Anonymisation
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 8))
|
||
|
||
# =============================================================
|
||
# BARRE DE PROGRESSION (masquée)
|
||
# =============================================================
|
||
self._progress_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack — sera affiché dynamiquement
|
||
|
||
self._progressbar = ttk.Progressbar(
|
||
self._progress_frame, orient=tk.HORIZONTAL, mode="determinate",
|
||
)
|
||
self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4))
|
||
|
||
self._progress_label = tk.Label(
|
||
self._progress_frame, text="", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
)
|
||
self._progress_label.pack(fill=tk.X)
|
||
|
||
# =============================================================
|
||
# SECTION RÉSULTATS (masquée)
|
||
# =============================================================
|
||
self._results_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
tk.Label(
|
||
self._results_frame, text="Résultats", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 8))
|
||
|
||
stats_row = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
stats_row.pack(fill=tk.X, pady=(0, 12))
|
||
stats_row.columnconfigure(0, weight=1)
|
||
stats_row.columnconfigure(1, weight=1)
|
||
stats_row.columnconfigure(2, weight=1)
|
||
|
||
self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0)
|
||
self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1)
|
||
self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2)
|
||
|
||
# Indicateurs de qualité et sécurité
|
||
quality_row = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
quality_row.pack(fill=tk.X, pady=(0, 12))
|
||
|
||
# Badge de fuites
|
||
self._leak_badge = tk.Label(
|
||
quality_row,
|
||
text="🔒 Vérification en cours...",
|
||
font=self._f_body_bold,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_PRIMARY,
|
||
padx=12, pady=6,
|
||
)
|
||
self._leak_badge.pack(side=tk.LEFT, padx=(0, 8))
|
||
|
||
# Temps de traitement
|
||
self._perf_label = tk.Label(
|
||
quality_row,
|
||
text="⏱️ Calcul en cours...",
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._perf_label.pack(side=tk.LEFT)
|
||
|
||
self.btn_open_out = tk.Button(
|
||
self._results_frame, text="Ouvrir le dossier de résultats",
|
||
font=self._f_button, bg=CLR_GREEN, fg="white",
|
||
activebackground="#15803d", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._open_out,
|
||
)
|
||
self.btn_open_out.pack(fill=tk.X, pady=(0, 8))
|
||
|
||
# Toggle journal
|
||
self._log_visible = False
|
||
self._log_toggle = tk.Label(
|
||
self._results_frame, text="Voir le journal détaillé \u25BC",
|
||
font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
self._log_toggle.pack(pady=(0, 4))
|
||
self._log_toggle.bind("<Button-1>", lambda e: self._toggle_log())
|
||
|
||
self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
self.txt = tk.Text(
|
||
self._log_frame, height=14, font=self._f_small,
|
||
bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD,
|
||
state=tk.DISABLED,
|
||
)
|
||
log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview)
|
||
self.txt.configure(yscrollcommand=log_scrollbar.set)
|
||
self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# =============================================================
|
||
# BARRE DE STATUT
|
||
# =============================================================
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0))
|
||
|
||
status_bar = tk.Frame(main, bg=CLR_BG)
|
||
status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12))
|
||
|
||
tk.Label(
|
||
status_bar, textvariable=self.status_var, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(side=tk.LEFT)
|
||
|
||
tk.Label(
|
||
status_bar, text=_version_long(), font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e",
|
||
).pack(side=tk.RIGHT)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Cartes de statistiques
|
||
# ---------------------------------------------------------------
|
||
def _make_stat_card(self, parent, number: str, label: str,
|
||
fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]:
|
||
padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0)
|
||
frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1)
|
||
frame.grid(row=0, column=col, sticky="nsew", padx=padx)
|
||
|
||
num_lbl = tk.Label(
|
||
frame, text=number, font=self._f_stat,
|
||
bg=bg_color, fg=fg_color,
|
||
)
|
||
num_lbl.pack(pady=(12, 2))
|
||
|
||
txt_lbl = tk.Label(
|
||
frame, text=label, font=self._f_small,
|
||
bg=bg_color, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
txt_lbl.pack(pady=(0, 12))
|
||
|
||
return {"frame": frame, "number": num_lbl, "label": txt_lbl}
|
||
|
||
def _update_stat_card(self, card: Dict[str, tk.Label], value: int,
|
||
fg_color: str, bg_color: str):
|
||
card["number"].configure(text=str(value), fg=fg_color, bg=bg_color)
|
||
card["frame"].configure(bg=bg_color, highlightbackground=bg_color)
|
||
card["label"].configure(bg=bg_color)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Actions dossier
|
||
# ---------------------------------------------------------------
|
||
def _browse(self):
|
||
"""Propose le choix entre dossier et fichier unique via un menu contextuel."""
|
||
menu = tk.Menu(self.root, tearoff=0)
|
||
menu.add_command(label="Choisir un dossier", command=self._browse_folder)
|
||
menu.add_command(label="Choisir un fichier", command=self._browse_file)
|
||
# Afficher le menu sous le curseur
|
||
try:
|
||
menu.tk_popup(self.root.winfo_pointerx(), self.root.winfo_pointery())
|
||
finally:
|
||
menu.grab_release()
|
||
|
||
def _browse_folder(self):
|
||
d = filedialog.askdirectory()
|
||
if d:
|
||
self._single_file = None
|
||
self.dir_var.set(d)
|
||
self._update_folder_display()
|
||
|
||
def _browse_file(self):
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
# Construire les filtres pour le dialogue
|
||
ext_list = " ".join(f"*{e}" for e in sorted(SUPPORTED_EXTENSIONS))
|
||
f = filedialog.askopenfilename(
|
||
title="Choisir un document à anonymiser",
|
||
filetypes=[
|
||
("Documents supportés", ext_list),
|
||
("PDF", "*.pdf"),
|
||
("Word", "*.docx"),
|
||
("Images", "*.jpg *.jpeg *.png *.tiff *.tif *.bmp"),
|
||
("Texte", "*.txt *.rtf *.odt *.html *.htm"),
|
||
("Tous", "*.*"),
|
||
],
|
||
)
|
||
if f:
|
||
self._single_file = Path(f)
|
||
self.dir_var.set(str(self._single_file.parent))
|
||
self._update_folder_display()
|
||
|
||
def _update_folder_display(self):
|
||
folder = self.dir_var.get()
|
||
if not folder:
|
||
return
|
||
|
||
is_single = getattr(self, '_single_file', None) is not None
|
||
|
||
if is_single:
|
||
doc_count = 1
|
||
display_label = self._single_file.name
|
||
else:
|
||
# Compter les documents supportés (récursif)
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
doc_count = 0
|
||
try:
|
||
doc_count = len([
|
||
p for p in Path(folder).rglob("*")
|
||
if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS
|
||
])
|
||
except Exception:
|
||
pass
|
||
display_label = folder
|
||
|
||
# Vider et reconstruire l'intérieur
|
||
for w in self._folder_inner.winfo_children():
|
||
w.destroy()
|
||
|
||
row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG)
|
||
row.pack(fill=tk.X)
|
||
|
||
icon = "\U0001f4c4" if is_single else "\U0001f4c2" # 📄 ou 📂
|
||
tk.Label(
|
||
row, text=icon, font=(self._font_family, 16),
|
||
bg=CLR_CARD_BG,
|
||
).pack(side=tk.LEFT, padx=(0, 8))
|
||
|
||
info_frame = tk.Frame(row, bg=CLR_CARD_BG)
|
||
info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||
|
||
# Chemin (tronqué si trop long)
|
||
display_path = display_label
|
||
if len(display_path) > 60:
|
||
display_path = "..." + display_path[-57:]
|
||
tk.Label(
|
||
info_frame, text=display_path, font=self._f_body_bold,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
if is_single:
|
||
subtitle = f"Fichier unique — {self._single_file.suffix.upper().lstrip('.')}"
|
||
else:
|
||
suffix = "document trouvé (récursif)" if doc_count <= 1 else "documents trouvés (récursif)"
|
||
subtitle = f"{doc_count} {suffix}"
|
||
tk.Label(
|
||
info_frame, text=subtitle,
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
change_btn = tk.Label(
|
||
row, text="Changer", font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
change_btn.pack(side=tk.RIGHT, padx=(8, 0))
|
||
change_btn.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# Mettre à jour la bordure
|
||
self._folder_zone.configure(highlightbackground=CLR_GREEN)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Lancement
|
||
# ---------------------------------------------------------------
|
||
def _run(self):
|
||
is_single = getattr(self, '_single_file', None) is not None
|
||
|
||
if is_single:
|
||
# Mode fichier unique
|
||
if not self._single_file.is_file():
|
||
messagebox.showwarning("Fichier introuvable", f"{self._single_file}")
|
||
return
|
||
folder = self._single_file.parent
|
||
pdfs = [self._single_file]
|
||
else:
|
||
# Mode dossier
|
||
folder = Path(self.dir_var.get().strip())
|
||
if not folder.is_dir():
|
||
messagebox.showwarning(
|
||
"Dossier invalide",
|
||
"Choisissez un dossier ou un fichier.",
|
||
)
|
||
return
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
pdfs = sorted([
|
||
p for p in folder.rglob("*")
|
||
if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS
|
||
])
|
||
if not pdfs:
|
||
exts = ", ".join(sorted(SUPPORTED_EXTENSIONS))
|
||
messagebox.showwarning(
|
||
"Aucun document",
|
||
f"Aucun fichier supporté trouvé.\n"
|
||
f"Formats acceptés : {exts}\n"
|
||
f"(recherche récursive dans les sous-dossiers)",
|
||
)
|
||
return
|
||
|
||
self._stop_requested = False
|
||
self.btn_run.pack_forget()
|
||
self.btn_stop.pack(fill=tk.X)
|
||
self._show_progress(total=len(pdfs))
|
||
self._hide_results()
|
||
threading.Thread(target=self._worker, args=(folder, pdfs), daemon=True).start()
|
||
|
||
def _stop(self):
|
||
"""Demande l'arrêt du traitement en cours."""
|
||
self._stop_requested = True
|
||
self.btn_stop.config(state=tk.DISABLED, bg="#fca5a5", text="Arrêt en cours...")
|
||
self.status_var.set("Arrêt demandé, fin du document en cours...")
|
||
|
||
def _worker(self, folder: Path, pdfs: List[Path]):
|
||
import time
|
||
start_time = time.time()
|
||
|
||
try:
|
||
outdir = folder / "anonymise"
|
||
outdir.mkdir(exist_ok=True)
|
||
ok = ko = 0
|
||
global_counts: Dict[str, int] = {}
|
||
|
||
for i, pdf in enumerate(pdfs, start=1):
|
||
# Vérifier si l'arrêt a été demandé
|
||
if self._stop_requested:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\n⚠️ Arrêt demandé par l'utilisateur"))
|
||
break
|
||
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.PROGRESS, current=i, total=len(pdfs),
|
||
filename=pdf.name,
|
||
))
|
||
|
||
try:
|
||
active = self._active_manager
|
||
use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded())
|
||
thresholds = None
|
||
if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)):
|
||
thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85)
|
||
|
||
# Extraire le numéro OGC du nom du répertoire parent
|
||
# Ex: "257_23209962" → OGC = "257"
|
||
parent_name = pdf.parent.name
|
||
ogc = parent_name.split("_")[0] if "_" in parent_name else None
|
||
|
||
# VLM
|
||
vlm_active = bool(
|
||
self.use_vlm.get()
|
||
and self._vlm_available
|
||
and self._vlm_manager
|
||
and self._vlm_manager.is_loaded()
|
||
)
|
||
|
||
# Utiliser process_document (multi-formats) si disponible,
|
||
# sinon fallback sur process_pdf (PDF uniquement)
|
||
_process_fn = getattr(core, 'process_document', None) or core.process_pdf
|
||
_path_key = "doc_path" if _process_fn.__name__ == "process_document" else "pdf_path"
|
||
outputs = _process_fn(
|
||
**{_path_key: pdf},
|
||
out_dir=outdir,
|
||
make_vector_redaction=False,
|
||
also_make_raster_burn=True,
|
||
config_path=Path(self.cfg_path.get()),
|
||
use_hf=use_ner,
|
||
ner_manager=active,
|
||
ner_thresholds=thresholds,
|
||
ogc_label=ogc,
|
||
vlm_manager=self._vlm_manager if vlm_active else None,
|
||
)
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}"))
|
||
for k, v in outputs.items():
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}"))
|
||
|
||
audit_path = Path(outputs.get("audit", ""))
|
||
counts = self._count_audit(audit_path)
|
||
if counts:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())),
|
||
))
|
||
for k, v in counts.items():
|
||
global_counts[k] = global_counts.get(k, 0) + v
|
||
ok += 1
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {pdf.name} \u2192 ERREUR: {e}"))
|
||
ko += 1
|
||
|
||
total_time = time.time() - start_time
|
||
total_masked = sum(global_counts.values())
|
||
|
||
# Message différent si arrêt demandé
|
||
if self._stop_requested:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
|
||
outdir=str(outdir) if ok > 0 else "", total_time=total_time,
|
||
))
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=f"⚠️ TRAITEMENT INTERROMPU : {ok} fichiers traités, {len(pdfs) - ok - ko} ignorés",
|
||
))
|
||
else:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
|
||
outdir=str(outdir), total_time=total_time,
|
||
))
|
||
|
||
if ok and global_counts:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())),
|
||
))
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}"))
|
||
total_time = time.time() - start_time
|
||
self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir="", total_time=total_time))
|
||
|
||
# ---------------------------------------------------------------
|
||
# Pompe de messages
|
||
# ---------------------------------------------------------------
|
||
def _pump_logs(self):
|
||
try:
|
||
while True:
|
||
msg = self.queue.get_nowait()
|
||
if msg.kind == MsgType.LOG:
|
||
self._append_log(msg.text)
|
||
elif msg.kind == MsgType.PROGRESS:
|
||
self._update_progress(msg.current, msg.total, msg.filename)
|
||
elif msg.kind == MsgType.DONE:
|
||
self._on_done(msg)
|
||
except queue.Empty:
|
||
pass
|
||
finally:
|
||
self.root.after(60, self._pump_logs)
|
||
|
||
def _append_log(self, text: str):
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.insert(tk.END, text + "\n")
|
||
self.txt.see(tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Progression
|
||
# ---------------------------------------------------------------
|
||
def _show_progress(self, total: int):
|
||
self._progressbar.configure(maximum=total, value=0)
|
||
self._progress_label.configure(text="")
|
||
self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18),
|
||
before=self._results_frame if self._results_frame.winfo_manager() else None)
|
||
|
||
def _hide_progress(self):
|
||
self._progress_frame.pack_forget()
|
||
|
||
def _update_progress(self, current: int, total: int, filename: str):
|
||
self._progressbar.configure(value=current)
|
||
self._progress_label.configure(text=f"{current}/{total} — {filename}")
|
||
self.status_var.set(f"{current}/{total} — {filename}")
|
||
|
||
# ---------------------------------------------------------------
|
||
# Résultats
|
||
# ---------------------------------------------------------------
|
||
def _show_results(self, ok: int, ko: int, masked: int):
|
||
self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT)
|
||
self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT)
|
||
|
||
err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY
|
||
err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6"
|
||
self._update_stat_card(self._stat_errors, ko, err_fg, err_bg)
|
||
|
||
self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12))
|
||
|
||
def _hide_results(self):
|
||
self._results_frame.pack_forget()
|
||
self._log_frame.pack_forget()
|
||
self._log_visible = False
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
# Vider le journal
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.delete("1.0", tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
def _on_done(self, msg: UiMessage):
|
||
self._hide_progress()
|
||
self.btn_stop.pack_forget()
|
||
self.btn_stop.config(state=tk.NORMAL, bg=CLR_RED, text="Arrêter le traitement")
|
||
self.btn_run.pack(fill=tk.X)
|
||
|
||
if self._stop_requested:
|
||
self.status_var.set(f"Interrompu : {msg.ok} traités, {msg.ko} erreurs.")
|
||
else:
|
||
self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.")
|
||
|
||
if msg.outdir:
|
||
self._last_outdir = Path(msg.outdir)
|
||
|
||
# Vérifier les fuites
|
||
leak_count = self._check_leaks(Path(msg.outdir))
|
||
self._update_leak_indicator(leak_count)
|
||
|
||
# Calculer les performances
|
||
perf_string = self._calculate_performance(msg.ok, msg.total_time)
|
||
self._perf_label.configure(text=perf_string)
|
||
|
||
self._show_results(msg.ok, msg.ko, msg.masked)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Toggle journal
|
||
# ---------------------------------------------------------------
|
||
def _toggle_log(self):
|
||
if self._log_visible:
|
||
self._log_frame.pack_forget()
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
else:
|
||
self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
|
||
self._log_toggle.configure(text="Masquer le journal \u25B2")
|
||
self._log_visible = not self._log_visible
|
||
|
||
# ---------------------------------------------------------------
|
||
# Ouvrir dossier résultats
|
||
# ---------------------------------------------------------------
|
||
def _open_out(self):
|
||
if self._last_outdir:
|
||
open_folder(self._last_outdir)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Aide
|
||
# ---------------------------------------------------------------
|
||
def _show_help(self):
|
||
messagebox.showinfo(
|
||
"Comment ça marche ?",
|
||
"1) Choisissez le dossier racine contenant vos fichiers PDF.\n\n"
|
||
"2) Cliquez sur « Lancer la pseudonymisation ».\n\n"
|
||
"Tous les fichiers PDF sont traités\n"
|
||
"(recherche récursive dans les sous-dossiers).\n\n"
|
||
"Un PDF Image (raster) est généré pour chaque fichier :\n"
|
||
"chaque page devient une image avec les données masquées.\n"
|
||
"Sécurité maximale, aucun texte résiduel.\n\n"
|
||
"Les résultats sont regroupés à plat dans le dossier\n"
|
||
"« anonymise/ » à la racine du dossier sélectionné.",
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Paramètres avancés (whitelist/blacklist)
|
||
# ---------------------------------------------------------------
|
||
def _build_phrase_list(self, parent, title: str, placeholder: str, color_tag: str):
|
||
"""Construit un widget liste + ajout/suppression pour les phrases."""
|
||
frame = tk.Frame(parent, bg=CLR_BG)
|
||
frame.pack(fill=tk.X, pady=(4, 8))
|
||
|
||
tk.Label(
|
||
frame, text=title, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 4))
|
||
|
||
# Zone de saisie + bouton ajouter
|
||
input_row = tk.Frame(frame, bg=CLR_BG)
|
||
input_row.pack(fill=tk.X, pady=(0, 4))
|
||
|
||
entry = tk.Entry(input_row, font=self._f_small, relief=tk.GROOVE, bd=1)
|
||
entry.insert(0, placeholder)
|
||
entry.configure(fg="#999")
|
||
|
||
def _on_focus_in(e):
|
||
if entry.get() == placeholder:
|
||
entry.delete(0, tk.END)
|
||
entry.configure(fg=CLR_TEXT)
|
||
|
||
def _on_focus_out(e):
|
||
if not entry.get().strip():
|
||
entry.insert(0, placeholder)
|
||
entry.configure(fg="#999")
|
||
|
||
entry.bind("<FocusIn>", _on_focus_in)
|
||
entry.bind("<FocusOut>", _on_focus_out)
|
||
entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4))
|
||
|
||
def _add(event=None):
|
||
text = entry.get().strip()
|
||
if text and text != placeholder:
|
||
# Éviter les doublons
|
||
items = list(listbox.get(0, tk.END))
|
||
if text not in items:
|
||
listbox.insert(tk.END, text)
|
||
entry.delete(0, tk.END)
|
||
|
||
add_btn = tk.Button(
|
||
input_row, text="+ Ajouter", font=self._f_small,
|
||
bg=color_tag, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2",
|
||
command=_add, padx=8,
|
||
)
|
||
add_btn.pack(side=tk.LEFT)
|
||
entry.bind("<Return>", _add)
|
||
|
||
# Liste des phrases
|
||
list_frame = tk.Frame(frame, bg=CLR_BG)
|
||
list_frame.pack(fill=tk.X)
|
||
|
||
listbox = tk.Listbox(
|
||
list_frame, height=4, font=("Consolas", 9),
|
||
relief=tk.GROOVE, bd=1, selectmode=tk.EXTENDED,
|
||
bg=color_tag,
|
||
)
|
||
scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=listbox.yview)
|
||
listbox.configure(yscrollcommand=scrollbar.set)
|
||
listbox.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# Bouton supprimer
|
||
def _remove():
|
||
sel = listbox.curselection()
|
||
for idx in reversed(sel):
|
||
listbox.delete(idx)
|
||
|
||
rm_btn = tk.Button(
|
||
frame, text="Supprimer la sélection", font=self._f_small,
|
||
bg="#ffcdd2", fg="#b71c1c", relief=tk.GROOVE, cursor="hand2",
|
||
command=_remove, padx=8,
|
||
)
|
||
rm_btn.pack(anchor="e", pady=(2, 0))
|
||
|
||
return listbox, entry
|
||
|
||
def _load_params(self):
|
||
"""Charge les whitelist/blacklist depuis la config YAML."""
|
||
try:
|
||
cfg_path = Path(self.cfg_path.get())
|
||
if cfg_path.exists() and yaml is not None:
|
||
data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
|
||
# Whitelist
|
||
wl = data.get("whitelist_phrases", [])
|
||
self._wl_listbox.delete(0, tk.END)
|
||
for phrase in wl:
|
||
if phrase and phrase.strip():
|
||
self._wl_listbox.insert(tk.END, phrase.strip())
|
||
# Blacklist
|
||
bl = data.get("blacklist", {}).get("force_mask_terms", [])
|
||
self._bl_listbox.delete(0, tk.END)
|
||
for term in bl:
|
||
if term and str(term).strip():
|
||
self._bl_listbox.insert(tk.END, str(term).strip())
|
||
# Stop-words additionnels
|
||
sw = data.get("additional_stopwords", [])
|
||
self._sw_listbox.delete(0, tk.END)
|
||
for term in sw:
|
||
if term and str(term).strip():
|
||
self._sw_listbox.insert(tk.END, str(term).strip())
|
||
except Exception:
|
||
pass
|
||
|
||
def _export_params(self):
|
||
"""Exporte les paramètres whitelist/blacklist dans un fichier JSON pour envoi par email."""
|
||
try:
|
||
import json as _json
|
||
from datetime import datetime
|
||
|
||
wl = list(self._wl_listbox.get(0, tk.END))
|
||
bl = list(self._bl_listbox.get(0, tk.END))
|
||
sw = list(self._sw_listbox.get(0, tk.END))
|
||
|
||
export_data = {
|
||
"version": APP_VERSION,
|
||
"date_export": datetime.now().isoformat(),
|
||
"etablissement": "", # à remplir par l'utilisateur
|
||
"whitelist_phrases": wl,
|
||
"blacklist_force_mask_terms": bl,
|
||
"additional_stopwords": sw,
|
||
"instructions": (
|
||
"Ce fichier contient les paramètres d'anonymisation personnalisés. "
|
||
"Envoyez-le par email à l'équipe technique pour mise à jour du programme."
|
||
),
|
||
}
|
||
|
||
# Proposer le Bureau comme destination par défaut
|
||
desktop = Path.home() / "Desktop"
|
||
if not desktop.exists():
|
||
desktop = Path.home() / "Bureau"
|
||
if not desktop.exists():
|
||
desktop = Path.home()
|
||
|
||
dest = filedialog.asksaveasfilename(
|
||
title="Exporter les paramètres",
|
||
initialdir=str(desktop),
|
||
initialfile="parametres_anonymisation.json",
|
||
defaultextension=".json",
|
||
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
|
||
)
|
||
if dest:
|
||
Path(dest).write_text(
|
||
_json.dumps(export_data, ensure_ascii=False, indent=2),
|
||
encoding="utf-8",
|
||
)
|
||
messagebox.showinfo(
|
||
"Export réussi",
|
||
f"Paramètres exportés dans :\n{dest}\n\n"
|
||
f"Vous pouvez envoyer ce fichier par email\n"
|
||
f"à l'équipe technique.",
|
||
)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Erreur à l'export :\n{e}")
|
||
|
||
def _import_params(self):
|
||
"""Importe des paramètres depuis un fichier JSON (fusionne avec l'existant)."""
|
||
try:
|
||
import json as _json
|
||
|
||
src = filedialog.askopenfilename(
|
||
title="Importer des paramètres",
|
||
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
|
||
)
|
||
if not src:
|
||
return
|
||
|
||
data = _json.loads(Path(src).read_text(encoding="utf-8"))
|
||
|
||
# Fusionner whitelist
|
||
new_wl = data.get("whitelist_phrases", [])
|
||
existing_wl = set(self._wl_listbox.get(0, tk.END))
|
||
added_wl = 0
|
||
for phrase in new_wl:
|
||
if phrase and phrase.strip() and phrase.strip() not in existing_wl:
|
||
self._wl_listbox.insert(tk.END, phrase.strip())
|
||
added_wl += 1
|
||
|
||
# Fusionner blacklist
|
||
new_bl = data.get("blacklist_force_mask_terms", [])
|
||
existing_bl = set(self._bl_listbox.get(0, tk.END))
|
||
added_bl = 0
|
||
for term in new_bl:
|
||
if term and str(term).strip() and str(term).strip() not in existing_bl:
|
||
self._bl_listbox.insert(tk.END, str(term).strip())
|
||
added_bl += 1
|
||
|
||
# Fusionner stop-words additionnels
|
||
new_sw = data.get("additional_stopwords", [])
|
||
existing_sw = set(self._sw_listbox.get(0, tk.END))
|
||
added_sw = 0
|
||
for term in new_sw:
|
||
if term and str(term).strip() and str(term).strip() not in existing_sw:
|
||
self._sw_listbox.insert(tk.END, str(term).strip())
|
||
added_sw += 1
|
||
|
||
version = data.get("version", "?")
|
||
date_exp = data.get("date_export", "?")[:10]
|
||
messagebox.showinfo(
|
||
"Import réussi",
|
||
f"Paramètres importés (v{version}, {date_exp}) :\n\n"
|
||
f" + {added_wl} phrase(s) ajoutée(s) à la whitelist\n"
|
||
f" + {added_bl} terme(s) ajouté(s) à la blacklist\n"
|
||
f" + {added_sw} mot(s) ajouté(s) aux stop-words\n\n"
|
||
f"Cliquez sur « Sauvegarder » pour appliquer.",
|
||
)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Erreur à l'import :\n{e}")
|
||
|
||
def _save_params(self):
|
||
"""Sauvegarde les whitelist/blacklist dans la config YAML."""
|
||
try:
|
||
cfg_path = Path(self.cfg_path.get())
|
||
if not cfg_path.exists() or yaml is None:
|
||
messagebox.showwarning("Erreur", "Fichier de configuration introuvable.")
|
||
return
|
||
|
||
data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
|
||
|
||
# Whitelist phrases
|
||
data["whitelist_phrases"] = list(self._wl_listbox.get(0, tk.END))
|
||
|
||
# Blacklist terms
|
||
if "blacklist" not in data:
|
||
data["blacklist"] = {}
|
||
data["blacklist"]["force_mask_terms"] = list(self._bl_listbox.get(0, tk.END))
|
||
|
||
# Stop-words additionnels (mots à ne jamais identifier comme noms)
|
||
data["additional_stopwords"] = list(self._sw_listbox.get(0, tk.END))
|
||
|
||
cfg_path.write_text(
|
||
yaml.dump(data, allow_unicode=True, default_flow_style=False, sort_keys=False),
|
||
encoding="utf-8",
|
||
)
|
||
messagebox.showinfo("Paramètres", "Paramètres sauvegardés avec succès.")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible de sauvegarder :\n{e}")
|
||
|
||
# ---------------------------------------------------------------
|
||
# YAML (interne)
|
||
# ---------------------------------------------------------------
|
||
def _ensure_cfg_exists(self):
|
||
p = Path(self.cfg_path.get())
|
||
p.parent.mkdir(parents=True, exist_ok=True)
|
||
if not p.exists():
|
||
p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
|
||
|
||
def _load_cfg(self):
|
||
if yaml is None:
|
||
return
|
||
self._ensure_cfg_exists()
|
||
try:
|
||
self.cfg_data = yaml.safe_load(
|
||
Path(self.cfg_path.get()).read_text(encoding="utf-8")
|
||
) or {}
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Audit
|
||
# ---------------------------------------------------------------
|
||
def _count_audit(self, audit_path: Path) -> Dict[str, int]:
|
||
d: Dict[str, int] = {}
|
||
try:
|
||
with open(audit_path, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
try:
|
||
obj = json.loads(line)
|
||
k = obj.get("kind", "?")
|
||
d[k] = d.get(k, 0) + 1
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
return d
|
||
|
||
# ---------------------------------------------------------------
|
||
# Vérification des fuites
|
||
# ---------------------------------------------------------------
|
||
def _check_leaks(self, output_dir: Path) -> int:
|
||
"""Vérifie les fuites dans les textes anonymisés."""
|
||
leak_count = 0
|
||
try:
|
||
# Patterns de fuites critiques
|
||
import re
|
||
patterns = {
|
||
"date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
|
||
"chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
|
||
}
|
||
|
||
for txt_file in output_dir.glob("*.pseudonymise.txt"):
|
||
try:
|
||
with open(txt_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
for pattern in patterns.values():
|
||
matches = pattern.findall(content)
|
||
leak_count += len(matches)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
return leak_count
|
||
|
||
# ---------------------------------------------------------------
|
||
# Calcul des performances
|
||
# ---------------------------------------------------------------
|
||
def _calculate_performance(self, total_files: int, total_time: float) -> str:
|
||
"""Calcule et formate les performances de traitement."""
|
||
if total_files == 0 or total_time == 0:
|
||
return "⏱️ Temps de traitement non disponible"
|
||
|
||
avg_time = total_time / total_files
|
||
|
||
# Formater le temps total
|
||
if total_time < 60:
|
||
time_str = f"{total_time:.0f}s"
|
||
elif total_time < 3600:
|
||
minutes = int(total_time // 60)
|
||
seconds = int(total_time % 60)
|
||
time_str = f"{minutes}m {seconds}s"
|
||
else:
|
||
hours = int(total_time // 3600)
|
||
minutes = int((total_time % 3600) // 60)
|
||
time_str = f"{hours}h {minutes}m"
|
||
|
||
return f"⏱️ Traité en {time_str} ({avg_time:.1f}s/document)"
|
||
|
||
# ---------------------------------------------------------------
|
||
# Mise à jour de l'indicateur de fuites
|
||
# ---------------------------------------------------------------
|
||
def _update_leak_indicator(self, leak_count: int):
|
||
"""Met à jour l'indicateur de fuites."""
|
||
if leak_count == 0:
|
||
self._leak_badge.configure(
|
||
text="🔒 0 fuite détectée",
|
||
bg=CLR_GREEN_LIGHT, fg=CLR_GREEN
|
||
)
|
||
else:
|
||
self._leak_badge.configure(
|
||
text=f"⚠️ {leak_count} fuite{'s' if leak_count > 1 else ''} potentielle{'s' if leak_count > 1 else ''}",
|
||
bg=CLR_RED_LIGHT, fg=CLR_RED
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Chargement automatique NER au démarrage
|
||
# ---------------------------------------------------------------
|
||
def _auto_load_ner(self):
|
||
"""Charge le modèle NER par défaut en arrière-plan.
|
||
Priorité : EDS-Pseudo (meilleur sur données cliniques) → DistilCamemBERT-NER (fallback).
|
||
"""
|
||
if not self._eds_manager and not self._onnx_manager:
|
||
return
|
||
self.status_var.set("Chargement du modèle NER...")
|
||
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
|
||
|
||
def _auto_load_ner_worker(self):
|
||
# 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques)
|
||
if self._eds_manager:
|
||
try:
|
||
self._eds_manager.load("AP-HP/eds-pseudo-public")
|
||
self._active_manager = self._eds_manager
|
||
self.use_hf = True
|
||
self.status_var.set("Prêt — EDS-Pseudo actif.")
|
||
return
|
||
except Exception as e:
|
||
import logging
|
||
logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e)
|
||
|
||
# 2) Fallback : DistilCamemBERT-NER ONNX
|
||
if self._onnx_manager:
|
||
try:
|
||
self._onnx_manager.load("cmarkea/distilcamembert-base-ner")
|
||
self._active_manager = self._onnx_manager
|
||
self.use_hf = True
|
||
self.status_var.set("Prêt — NER ONNX actif.")
|
||
return
|
||
except Exception as e2:
|
||
self.status_var.set(f"Prêt (NER indisponible : {e2})")
|
||
return
|
||
|
||
self.status_var.set("Prêt (aucun backend NER disponible).")
|
||
|
||
# ---------------------------------------------------------------
|
||
# VLM toggle
|
||
# ---------------------------------------------------------------
|
||
def _on_vlm_toggle(self):
|
||
"""Appelé quand l'utilisateur coche/décoche la checkbox VLM."""
|
||
if not self.use_vlm.get():
|
||
self._vlm_available = False
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
|
||
return
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY)
|
||
threading.Thread(target=self._vlm_connect_worker, daemon=True).start()
|
||
|
||
def _vlm_connect_worker(self):
|
||
"""Vérifie la connexion Ollama en arrière-plan."""
|
||
try:
|
||
if self._vlm_manager is None:
|
||
raise RuntimeError("VlmManager non disponible")
|
||
self._vlm_manager.load()
|
||
self._vlm_available = True
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN)
|
||
except Exception as e:
|
||
self._vlm_available = False
|
||
self.use_vlm.set(False)
|
||
err = str(e)
|
||
if len(err) > 60:
|
||
err = err[:57] + "..."
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Modèles NER (API interne)
|
||
# ---------------------------------------------------------------
|
||
def _load_model(self, model_id: Optional[str] = None):
|
||
mid = model_id or "cmarkea/distilcamembert-base-ner"
|
||
is_eds = False
|
||
if self._eds_manager:
|
||
eds_ids = set(self._eds_manager.models_catalog().values())
|
||
if mid in eds_ids:
|
||
is_eds = True
|
||
if is_eds:
|
||
if not self._eds_manager:
|
||
return
|
||
manager = self._eds_manager
|
||
else:
|
||
if not self._onnx_manager:
|
||
return
|
||
manager = self._onnx_manager
|
||
try:
|
||
manager.load(mid)
|
||
self._active_manager = manager
|
||
self.use_hf = True
|
||
except Exception:
|
||
self.use_hf = False
|
||
|
||
def _unload_model(self):
|
||
if self._onnx_manager:
|
||
self._onnx_manager.unload()
|
||
if self._eds_manager:
|
||
self._eds_manager.unload()
|
||
self._active_manager = None
|
||
self.use_hf = False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Point d'entrée
|
||
# ---------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
try:
|
||
root = tk.Tk()
|
||
App(root)
|
||
root.mainloop()
|
||
except Exception as exc:
|
||
import traceback, sys
|
||
err = traceback.format_exc()
|
||
# Écrire dans un fichier log à côté de l'exe
|
||
log_path = Path(__file__).resolve().parent / "crash.log"
|
||
try:
|
||
log_path.write_text(err, encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
# Tenter d'afficher une messagebox (même sans console)
|
||
try:
|
||
import tkinter as _tk
|
||
_r = _tk.Tk()
|
||
_r.withdraw()
|
||
from tkinter import messagebox as _mb
|
||
_mb.showerror("Erreur fatale", f"L'application a planté :\n\n{exc}\n\nVoir crash.log")
|
||
_r.destroy()
|
||
except Exception:
|
||
pass
|
||
raise
|