#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Pseudonymisation – GUI v5 (Vue unique épurée) ---------------------------------------------- - Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés) - Thème système natif (sv_ttk optionnel, fallback clam) - Backend NER ONNX/EDS-Pseudo conservé en interne - Pas d'onglet Avancé (NER + YAML chargés silencieusement) Fichiers requis à côté : - anonymizer_core_refactored_onnx.py - ner_manager_onnx.py """ from __future__ import annotations import enum import json import os import platform import queue import re import shutil import subprocess import sys import threading from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional import tkinter as tk from tkinter import filedialog, messagebox, ttk # --------------------------------------------------------------------------- # Core # --------------------------------------------------------------------------- try: import anonymizer_core_refactored_onnx as core except Exception as e: _err_msg = f"Impossible d'importer le core ONNX : {e}" # Écrire l'erreur dans un fichier log à côté du script/exe try: _log = Path(__file__).resolve().parent / "crash.log" import traceback as _tb _log.write_text(f"{_err_msg}\n\n{_tb.format_exc()}", encoding="utf-8") except Exception: pass try: _r = tk.Tk(); _r.withdraw() messagebox.showerror("Erreur d'import", _err_msg) _r.destroy() except Exception: pass raise SystemExit(_err_msg) try: from ner_manager_onnx import NerModelManager, NerThresholds except Exception: NerModelManager = None # type: ignore NerThresholds = None # type: ignore try: from eds_pseudo_manager import EdsPseudoManager except Exception: EdsPseudoManager = None # type: ignore try: from vlm_manager import VlmManager, VlmConfig except Exception: VlmManager = None # type: ignore VlmConfig = None # type: ignore try: import yaml except Exception: yaml = None # --------------------------------------------------------------------------- # Thème optionnel # --------------------------------------------------------------------------- try: import sv_ttk # type: ignore except ImportError: sv_ttk = None # PIL pour charger le logo / icônes (optionnel — dégradation si absent). try: from PIL import Image, ImageTk _PIL_AVAILABLE = True except Exception: _PIL_AVAILABLE = False # --------------------------------------------------------------------------- # Constantes # --------------------------------------------------------------------------- APP_TITLE = "Pseudonymisation de vos documents" APP_VERSION = "v5.5" # Métadonnées de build — chargées depuis build_info.py (régénéré par rebuild_anon.ps1) try: from build_info import BUILD_DATE, BUILD_COMMIT, BUILD_BRANCH except Exception: BUILD_DATE = "dev" BUILD_COMMIT = "dev" BUILD_BRANCH = "dev" def _version_long() -> str: """Version étendue : v5.4 · 2026-04-15 18:15 · 234137e""" parts = [APP_VERSION] if BUILD_DATE != "dev": parts.append(BUILD_DATE) if BUILD_COMMIT != "dev": parts.append(f"#{BUILD_COMMIT}") return " · ".join(parts) def _asset(name: str) -> Path: """Résout le chemin d'un asset dans assets/ (compatible frozen PyInstaller).""" if getattr(sys, 'frozen', False): base = Path(sys._MEIPASS) else: base = Path(__file__).resolve().parent return base / 'assets' / name def _app_dir() -> Path: """Répertoire racine de l'application (compatible PyInstaller/Nuitka).""" if getattr(sys, 'frozen', False): return Path(sys._MEIPASS) return Path(__file__).resolve().parent def _exe_dir() -> Path: """Répertoire de l'exécutable (pour les fichiers persistants : config, logs).""" if getattr(sys, 'frozen', False): return Path(sys.executable).parent return Path(__file__).resolve().parent def _resolve_config() -> Path: """Cherche la config en priorité à côté de l'exe, sinon dans l'app embarquée. Si le fichier n'existe pas à côté de l'exe, copie la version embarquée pour que l'utilisateur puisse la modifier sans recompiler. """ exe_cfg = _exe_dir() / "config" / "dictionnaires.yml" app_cfg = _app_dir() / "config" / "dictionnaires.yml" if exe_cfg.exists(): return exe_cfg # Premier lancement : copier la config embarquée à côté de l'exe if app_cfg.exists(): exe_cfg.parent.mkdir(parents=True, exist_ok=True) import shutil shutil.copy2(str(app_cfg), str(exe_cfg)) return exe_cfg return app_cfg # fallback DEFAULT_CFG = _resolve_config() MODELS_DIR = _app_dir() / "models" DEFAULTS_CFG_TEXT = r""" # dictionnaires.yml – valeurs par défaut (bloc littéral pour les regex) version: 1 encoding: "utf-8" normalization: "NFKC" whitelist: sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC] noms_maj_excepts: ["Médecin DIM", "Praticien conseil"] org_gpe_keep: true blacklist: force_mask_terms: [] force_mask_regex: [] kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement] regex_overrides: - name: OGC_court pattern: |- \b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b placeholder: '[OGC]' flags: [IGNORECASE] flags: case_insensitive: true unicode_word_boundaries: true regex_engine: "python" """ # Palette dérivée du logo aivanonym (gradient magenta → rose → pêche → noir) # Magenta du logo : primaire (boutons, accents) # Pêche : secondaire (tags, highlights) # Noir/gris : texte et neutres # Blanc/gris clair : fonds CLR_PRIMARY = "#E91E63" # magenta logo (CTA, liens) CLR_PRIMARY_DARK = "#C2185B" # hover / pressed CLR_PRIMARY_LIGHT = "#FCE4EC" # fond léger (cartes sélectionnées) CLR_ACCENT = "#FFB74D" # pêche logo (tags secondaires) CLR_ACCENT_LIGHT = "#FFF3E0" # fond accent léger CLR_GREEN = "#2E7D32" # succès CLR_GREEN_LIGHT = "#E8F5E9" CLR_RED = "#C62828" # erreur / danger CLR_RED_LIGHT = "#FFEBEE" CLR_BLUE_LIGHT = "#FCE4EC" # conservé pour compat (remappé vers primary_light) CLR_CARD_BG = "#FFFFFF" CLR_CARD_BORDER = "#E0E0E0" CLR_BG = "#FAFAFA" # fond principal (gris très clair) CLR_TEXT = "#212121" # quasi-noir (du logo) CLR_TEXT_SECONDARY = "#757575" # gris moyen CLR_DIVIDER = "#EEEEEE" # --------------------------------------------------------------------------- # Messages worker → UI # --------------------------------------------------------------------------- class MsgType(enum.Enum): LOG = "log" PROGRESS = "progress" DONE = "done" @dataclass class UiMessage: kind: MsgType text: str = "" current: int = 0 total: int = 0 filename: str = "" ok: int = 0 ko: int = 0 masked: int = 0 outdir: str = "" total_time: float = 0.0 # Temps total de traitement en secondes # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def open_folder(path: Path): try: if platform.system() == "Windows": os.startfile(str(path)) # type: ignore elif platform.system() == "Darwin": subprocess.Popen(["open", str(path)]) else: subprocess.Popen(["xdg-open", str(path)]) except Exception: pass def _detect_font() -> str: """Retourne la meilleure police sans-serif disponible.""" for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"): try: test = tk.Label(font=(name, 10)) actual = test.cget("font") test.destroy() if name.lower().replace(" ", "") in actual.lower().replace(" ", ""): return name except Exception: continue return "TkDefaultFont" def _detect_dark_mode() -> bool: """Détecte le thème sombre GNOME.""" try: result = subprocess.run( ["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"], capture_output=True, text=True, timeout=2, ) return "dark" in result.stdout.lower() except Exception: return False # --------------------------------------------------------------------------- # ToolTip amélioré # --------------------------------------------------------------------------- class ToolTip: def __init__(self, widget: tk.Widget, text: str, delay: int = 400): self.widget = widget self.text = text self.delay = delay self.tip: Optional[tk.Toplevel] = None self._after_id: Optional[str] = None widget.bind("", self._schedule) widget.bind("", self.hide) def _schedule(self, *_): self._cancel() self._after_id = self.widget.after(self.delay, self._show) def _cancel(self): if self._after_id: self.widget.after_cancel(self._after_id) self._after_id = None def _show(self): if self.tip: return x = self.widget.winfo_rootx() + 20 y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4 self.tip = tw = tk.Toplevel(self.widget) tw.wm_overrideredirect(True) tw.wm_geometry(f"+{x}+{y}") lbl = tk.Label( tw, text=self.text, justify=tk.LEFT, background="#1f2937", foreground="#f9fafb", relief=tk.SOLID, borderwidth=1, padx=8, pady=5, wraplength=320, ) lbl.pack(ipadx=1) def hide(self, *_): self._cancel() if self.tip: self.tip.destroy() self.tip = None # --------------------------------------------------------------------------- # Application principale # --------------------------------------------------------------------------- class App: def __init__(self, root: tk.Tk): self.root = root # Titre avec version longue pour identifier la build au premier coup d'œil # (évite les confusions entre exe ancien/nouveau lors des tests). self.root.title(f"{APP_TITLE} — {_version_long()}") self.root.geometry("780x820") self.root.minsize(600, 650) # Icône de la fenêtre (coin haut-gauche + taskbar Windows). # En mode dev (Linux) tkinter lit iconphoto PNG ; sur Windows, iconbitmap # accepte .ico. On tente les deux pour couvrir. self._icon_refs: list = [] # refs pour éviter garbage collection self._apply_window_icon() # Préchargement logo pour l'en-tête (besoin de ref persistante sinon # tkinter nettoie l'image → label blanc). self._logo_img = self._load_image_safe(_asset('logo_header.png')) # --- Thème --- self._apply_theme() # --- Polices --- self._font_family = _detect_font() self._f_title = (self._font_family, 20, "bold") self._f_body = (self._font_family, 11) self._f_body_bold = (self._font_family, 11, "bold") self._f_button = (self._font_family, 13, "bold") self._f_stat = (self._font_family, 24, "bold") self._f_small = (self._font_family, 10) self._f_card_title = (self._font_family, 12, "bold") self._f_card_desc = (self._font_family, 10) # --- Variables --- self.dir_var = tk.StringVar() self.status_var = tk.StringVar(value="Prêt.") self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG)) self.queue: "queue.Queue[UiMessage]" = queue.Queue() # --- NER (interne) --- self.use_hf = False self.th_per = 0.90 self.th_org = 0.90 self.th_loc = 0.90 self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=MODELS_DIR) if NerModelManager else None self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=MODELS_DIR) if EdsPseudoManager else None self._active_manager: Optional[Any] = None self.cfg_data: Dict[str, Any] = {} # --- VLM (optionnel) --- self.use_vlm = tk.BooleanVar(value=False) self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None self._vlm_available = False # --- Fusion catalogue modèles --- catalog: Dict[str, str] = {} if self._onnx_manager: catalog.update(self._onnx_manager.models_catalog()) if self._eds_manager: catalog.update(self._eds_manager.models_catalog()) self._merged_catalog = catalog # --- Résultats --- self._last_outdir: Optional[Path] = None # --- Contrôle d'arrêt --- self._stop_requested = False # --- Fichier unique (None = mode dossier) --- self._single_file: Optional[Path] = None # --- Construction UI --- self._build_ui() # Afficher l'onglet Anonymisation par défaut self._switch_tab("anonym") self._pump_logs() self._ensure_cfg_exists() self._load_cfg() # --- Chargement automatique du modèle NER --- self._auto_load_ner() # --------------------------------------------------------------- # Onglets custom # --------------------------------------------------------------- def _switch_tab(self, name: str): """Affiche l'onglet nommé, met à jour les styles des boutons.""" if name not in self._tab_frames: return # Cacher tous les contenus for frame in self._tab_frames.values(): frame.pack_forget() # Afficher l'onglet demandé self._tab_frames[name].pack(fill=tk.BOTH, expand=True) # Mettre à jour les styles des boutons d'onglets for tab_name, widgets in self._tab_buttons.items(): if tab_name == name: widgets["label"].configure(fg=CLR_PRIMARY, bg=CLR_BG) widgets["underline"].configure(bg=CLR_PRIMARY) else: widgets["label"].configure(fg=CLR_TEXT_SECONDARY, bg=CLR_BG) widgets["underline"].configure(bg=CLR_BG) self._active_tab = name # --------------------------------------------------------------- # Icônes & assets # --------------------------------------------------------------- def _apply_window_icon(self): """Définit l'icône de la fenêtre. Windows : .ico préférable ; Linux : PNG.""" try: ico = _asset('icons/app.ico') if sys.platform == 'win32' and ico.exists(): try: self.root.iconbitmap(str(ico)) return except Exception: pass # Fallback : iconphoto PNG (toutes plateformes) png = _asset('icons/icon_128.png') if png.exists() and _PIL_AVAILABLE: img = Image.open(png) photo = ImageTk.PhotoImage(img) self._icon_refs.append(photo) self.root.iconphoto(True, photo) except Exception: pass # dégradation silencieuse — l'icône n'est pas bloquante def _load_image_safe(self, path: Path): """Charge une image et garde la ref pour éviter le GC. None si PIL absent.""" if not _PIL_AVAILABLE or not path.exists(): return None try: img = Image.open(path).convert('RGBA') photo = ImageTk.PhotoImage(img) self._icon_refs.append(photo) return photo except Exception: return None # --------------------------------------------------------------- # Thème # --------------------------------------------------------------- def _apply_theme(self): if sv_ttk is not None: mode = "dark" if _detect_dark_mode() else "light" sv_ttk.set_theme(mode) else: try: style = ttk.Style() style.theme_use("clam") except Exception: pass # --------------------------------------------------------------- # Construction de la vue unique # --------------------------------------------------------------- def _build_ui(self): self.root.configure(bg=CLR_BG) pad_x = 32 # ============================================================= # HEADER fixe (logo + titre + baseline), hors onglets # ============================================================= header = tk.Frame(self.root, bg=CLR_BG) header.pack(fill=tk.X, padx=pad_x, pady=(16, 8)) if self._logo_img is not None: tk.Label(header, image=self._logo_img, bg=CLR_BG).pack(anchor="w") else: tk.Label(header, text="aivanonym", font=(self._font_family, 22, "bold"), bg=CLR_BG, fg=CLR_PRIMARY).pack(anchor="w") tk.Label( header, text="Pseudonymisation de documents médicaux — 100% local", font=(self._font_family, 10), bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", ).pack(fill=tk.X, pady=(4, 0)) # Ligne colorée inspirée du gradient du logo accent_bar = tk.Frame(self.root, bg=CLR_PRIMARY, height=3) accent_bar.pack(fill=tk.X) # ============================================================= # ONGLETS CUSTOM (boutons uniformes — rendu pro) # Remplace ttk.Notebook dont les onglets ont des tailles/styles # variables selon l'état actif. Ici : tous les onglets identiques, # seule une bordure basse magenta signale l'onglet actif. # ============================================================= tabs_bar = tk.Frame(self.root, bg=CLR_BG) tabs_bar.pack(fill=tk.X, padx=0, pady=(4, 0)) self._tab_frames: dict = {} # nom → frame outer self._tab_buttons: dict = {} # nom → dict(container, label, underline) self._active_tab: Optional[str] = None def _make_tab_button(parent, name: str, label: str): """Crée un onglet cliquable uniforme (fond, texte, underline).""" container = tk.Frame(parent, bg=CLR_BG, cursor="hand2") container.pack(side=tk.LEFT) txt = tk.Label( container, text=label, font=(self._font_family, 11, "bold"), bg=CLR_BG, fg=CLR_TEXT_SECONDARY, padx=26, pady=10, cursor="hand2", ) txt.pack(fill=tk.X) # Bordure basse qui devient magenta quand actif underline = tk.Frame(container, bg=CLR_BG, height=3) underline.pack(fill=tk.X) def _on_click(_e=None): self._switch_tab(name) for w in (container, txt, underline): w.bind("", _on_click) self._tab_buttons[name] = { "container": container, "label": txt, "underline": underline, } _make_tab_button(tabs_bar, "anonym", "Anonymisation") _make_tab_button(tabs_bar, "params", "Paramètres") # Séparateur gris clair sous les onglets tk.Frame(self.root, bg=CLR_DIVIDER, height=1).pack(fill=tk.X) # Conteneur des contenus (un seul visible à la fois) tabs_content = tk.Frame(self.root, bg=CLR_BG) tabs_content.pack(fill=tk.BOTH, expand=True) tab_anonym_outer = tk.Frame(tabs_content, bg=CLR_BG) tab_params_outer = tk.Frame(tabs_content, bg=CLR_BG) self._tab_frames["anonym"] = tab_anonym_outer self._tab_frames["params"] = tab_params_outer # --- Scroll pour l'onglet Anonymisation --- canvas = tk.Canvas(tab_anonym_outer, bg=CLR_BG, highlightthickness=0) scrollbar = ttk.Scrollbar(tab_anonym_outer, orient=tk.VERTICAL, command=canvas.yview) self._scroll_frame = tk.Frame(canvas, bg=CLR_BG) self._scroll_frame.bind( "", lambda e: canvas.configure(scrollregion=canvas.bbox("all")), ) canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw") canvas.configure(yscrollcommand=scrollbar.set) def _on_canvas_configure(event): canvas.itemconfig(canvas_window, width=event.width) canvas.bind("", _on_canvas_configure) def _on_mousewheel(event): canvas.yview_scroll(int(-1 * (event.delta / 120)), "units") def _on_mousewheel_linux(event): if event.num == 4: canvas.yview_scroll(-3, "units") elif event.num == 5: canvas.yview_scroll(3, "units") canvas.bind_all("", _on_mousewheel) canvas.bind_all("", _on_mousewheel_linux) canvas.bind_all("", _on_mousewheel_linux) canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) # --- Scroll pour l'onglet Paramètres --- canvas2 = tk.Canvas(tab_params_outer, bg=CLR_BG, highlightthickness=0) scrollbar2 = ttk.Scrollbar(tab_params_outer, orient=tk.VERTICAL, command=canvas2.yview) self._params_scroll = tk.Frame(canvas2, bg=CLR_BG) self._params_scroll.bind( "", lambda e: canvas2.configure(scrollregion=canvas2.bbox("all")), ) canvas2_window = canvas2.create_window((0, 0), window=self._params_scroll, anchor="nw") canvas2.configure(yscrollcommand=scrollbar2.set) def _on_canvas2_configure(event): canvas2.itemconfig(canvas2_window, width=event.width) canvas2.bind("", _on_canvas2_configure) canvas2.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) scrollbar2.pack(side=tk.RIGHT, fill=tk.Y) # "main" pointe désormais sur le scroll de l'onglet Anonymisation. # Tout le contenu existant (étape 1, formats, boutons, progress, résultats) # reste inchangé — seul le parent implicite a changé. main = self._scroll_frame # ============================================================= # ÉTAPE 1 — Choix du dossier # ============================================================= tk.Label( main, text="1. Choisir les documents ou fichiers (PDF, Word, Images, Texte)", font=self._f_body_bold, bg=CLR_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X, padx=pad_x, pady=(0, 6)) self._folder_zone = tk.Frame( main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER, highlightthickness=2, cursor="hand2", ) self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18)) # Contenu initial (invite à cliquer) self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG) self._folder_inner.pack(fill=tk.X, padx=20, pady=18) self._folder_icon_lbl = tk.Label( self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28), bg=CLR_CARD_BG, ) self._folder_icon_lbl.pack() self._folder_text_lbl = tk.Label( self._folder_inner, text="Cliquez pour choisir un dossier ou un fichier", font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, ) self._folder_text_lbl.pack(pady=(4, 0)) # Rendre toute la zone cliquable for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl): w.bind("", lambda e: self._browse()) # ============================================================= # ÉTAPE 2 — Info formats générés # ============================================================= tk.Label( main, text="2. Formats générés", font=self._f_body_bold, bg=CLR_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X, padx=pad_x, pady=(0, 6)) info_frame = tk.Frame( main, bg=CLR_BLUE_LIGHT, highlightbackground=CLR_CARD_BORDER, highlightthickness=1, ) info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18)) info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT) info_inner.pack(fill=tk.X, padx=16, pady=12) tk.Label( info_inner, text="Paramètres de traitement :", font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X) tk.Label( info_inner, text=("\u2022 Recherche récursive de tous les documents dans les sous-dossiers\n" "\u2022 Sortie PDF Image (raster) — sécurité maximale, aucun texte résiduel\n" "\u2022 Résultats dans le dossier « anonymise/ » à la racine"), font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, ).pack(fill=tk.X, pady=(4, 0)) # --- Checkbox VLM --- if VlmManager is not None: vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT) vlm_row.pack(fill=tk.X, pady=(8, 0)) self._vlm_check = tk.Checkbutton( vlm_row, text="Analyse visuelle VLM (Ollama)", variable=self.use_vlm, font=self._f_card_desc, bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT, command=self._on_vlm_toggle, ) self._vlm_check.pack(side=tk.LEFT) self._vlm_status_lbl = tk.Label( vlm_row, text="", font=self._f_small, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, ) self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0)) ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.") # ============================================================= # BOUTONS LANCER / STOPPER # ============================================================= buttons_frame = tk.Frame(main, bg=CLR_BG) buttons_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 4)) self.btn_run = tk.Button( buttons_frame, text="Lancer l'anonymisation", font=self._f_button, bg=CLR_PRIMARY, fg="white", activebackground="#1d4ed8", activeforeground="white", relief=tk.FLAT, cursor="hand2", pady=10, command=self._run, ) self.btn_run.pack(fill=tk.X) self.btn_stop = tk.Button( buttons_frame, text="Arrêter le traitement", font=self._f_button, bg=CLR_RED, fg="white", activebackground="#b91c1c", activeforeground="white", relief=tk.FLAT, cursor="hand2", pady=10, command=self._stop, ) # NE PAS pack — sera affiché pendant le traitement # Lien aide help_lbl = tk.Label( main, text="Comment ça marche ?", font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2", ) help_lbl.pack(pady=(0, 8)) help_lbl.bind("", lambda e: self._show_help()) # ============================================================= # ONGLET "PARAMÈTRES" — contenu monté dans self._params_scroll # ============================================================= self._params_frame = self._params_scroll tk.Label( self._params_frame, text="Personnaliser le masquage", font=(self._font_family, 14, "bold"), bg=CLR_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X, padx=pad_x, pady=(20, 4)) tk.Label( self._params_frame, text=("Ces listes complètent les détections automatiques du programme. " "Utile pour gérer les spécificités de votre établissement."), font=self._f_small, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700, ).pack(fill=tk.X, padx=pad_x, pady=(0, 16)) # Conteneur interne avec padding latéral pour les listboxes params_inner = tk.Frame(self._params_frame, bg=CLR_BG) params_inner.pack(fill=tk.X, padx=pad_x, pady=(0, 12)) # --- Whitelist (phrases à ne pas anonymiser) --- self._wl_listbox, self._wl_entry = self._build_phrase_list( params_inner, title="\u2705 Phrases à ne PAS anonymiser :", placeholder="Ajouter une phrase à protéger...", color_tag=CLR_GREEN_LIGHT, ) # --- Blacklist (phrases à toujours masquer) --- self._bl_listbox, self._bl_entry = self._build_phrase_list( params_inner, title="\u26d4 Mots/phrases à TOUJOURS masquer :", placeholder="Ajouter un mot ou phrase à masquer...", color_tag=CLR_PRIMARY_LIGHT, ) # --- Stop-words additionnels (mots à ne jamais identifier comme noms) --- # Différent de la whitelist : agit en amont, pour les sigles, acronymes, # termes métier locaux qui ressemblent à des noms mais n'en sont pas. self._sw_listbox, self._sw_entry = self._build_phrase_list( params_inner, title="\u26a0 Mots à ne jamais identifier comme noms (sigles, acronymes...) :", placeholder="Ajouter un mot (ex: sigle local, acronyme métier)...", color_tag=CLR_ACCENT_LIGHT, ) # Boutons sauvegarder + exporter btn_row = tk.Frame(params_inner, bg=CLR_BG) btn_row.pack(fill=tk.X, pady=(12, 12)) export_btn = tk.Button( btn_row, text="\u2709 Exporter pour envoi", font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2", padx=10, pady=6, command=self._export_params, ) export_btn.pack(side=tk.LEFT) import_btn = tk.Button( btn_row, text="\u2B07 Importer", font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2", padx=10, pady=6, command=self._import_params, ) import_btn.pack(side=tk.LEFT, padx=(4, 0)) save_btn = tk.Button( btn_row, text="Sauvegarder", font=self._f_small, bg=CLR_PRIMARY, fg="white", activebackground=CLR_PRIMARY_DARK, activeforeground="white", relief=tk.FLAT, cursor="hand2", padx=14, pady=6, command=self._save_params, ) save_btn.pack(side=tk.RIGHT) # Charger les valeurs initiales depuis la config self._load_params() # Retour dans l'onglet Anonymisation ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 8)) # ============================================================= # BARRE DE PROGRESSION (masquée) # ============================================================= self._progress_frame = tk.Frame(main, bg=CLR_BG) # NE PAS pack — sera affiché dynamiquement self._progressbar = ttk.Progressbar( self._progress_frame, orient=tk.HORIZONTAL, mode="determinate", ) self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4)) self._progress_label = tk.Label( self._progress_frame, text="", font=self._f_small, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", ) self._progress_label.pack(fill=tk.X) # ============================================================= # SECTION RÉSULTATS (masquée) # ============================================================= self._results_frame = tk.Frame(main, bg=CLR_BG) # NE PAS pack tk.Label( self._results_frame, text="Résultats", font=self._f_body_bold, bg=CLR_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X, pady=(0, 8)) stats_row = tk.Frame(self._results_frame, bg=CLR_BG) stats_row.pack(fill=tk.X, pady=(0, 12)) stats_row.columnconfigure(0, weight=1) stats_row.columnconfigure(1, weight=1) stats_row.columnconfigure(2, weight=1) self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0) self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1) self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2) # Indicateurs de qualité et sécurité quality_row = tk.Frame(self._results_frame, bg=CLR_BG) quality_row.pack(fill=tk.X, pady=(0, 12)) # Badge de fuites self._leak_badge = tk.Label( quality_row, text="🔒 Vérification en cours...", font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_PRIMARY, padx=12, pady=6, ) self._leak_badge.pack(side=tk.LEFT, padx=(0, 8)) # Temps de traitement self._perf_label = tk.Label( quality_row, text="⏱️ Calcul en cours...", font=self._f_small, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, ) self._perf_label.pack(side=tk.LEFT) self.btn_open_out = tk.Button( self._results_frame, text="Ouvrir le dossier de résultats", font=self._f_button, bg=CLR_GREEN, fg="white", activebackground="#15803d", activeforeground="white", relief=tk.FLAT, cursor="hand2", pady=10, command=self._open_out, ) self.btn_open_out.pack(fill=tk.X, pady=(0, 8)) # Toggle journal self._log_visible = False self._log_toggle = tk.Label( self._results_frame, text="Voir le journal détaillé \u25BC", font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2", ) self._log_toggle.pack(pady=(0, 4)) self._log_toggle.bind("", lambda e: self._toggle_log()) self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG) # NE PAS pack self.txt = tk.Text( self._log_frame, height=14, font=self._f_small, bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD, state=tk.DISABLED, ) log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview) self.txt.configure(yscrollcommand=log_scrollbar.set) self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y) # ============================================================= # BARRE DE STATUT # ============================================================= ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0)) status_bar = tk.Frame(main, bg=CLR_BG) status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12)) tk.Label( status_bar, textvariable=self.status_var, font=self._f_small, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", ).pack(side=tk.LEFT) tk.Label( status_bar, text=_version_long(), font=self._f_small, bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e", ).pack(side=tk.RIGHT) # --------------------------------------------------------------- # Cartes de statistiques # --------------------------------------------------------------- def _make_stat_card(self, parent, number: str, label: str, fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]: padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0) frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1) frame.grid(row=0, column=col, sticky="nsew", padx=padx) num_lbl = tk.Label( frame, text=number, font=self._f_stat, bg=bg_color, fg=fg_color, ) num_lbl.pack(pady=(12, 2)) txt_lbl = tk.Label( frame, text=label, font=self._f_small, bg=bg_color, fg=CLR_TEXT_SECONDARY, ) txt_lbl.pack(pady=(0, 12)) return {"frame": frame, "number": num_lbl, "label": txt_lbl} def _update_stat_card(self, card: Dict[str, tk.Label], value: int, fg_color: str, bg_color: str): card["number"].configure(text=str(value), fg=fg_color, bg=bg_color) card["frame"].configure(bg=bg_color, highlightbackground=bg_color) card["label"].configure(bg=bg_color) # --------------------------------------------------------------- # Actions dossier # --------------------------------------------------------------- def _browse(self): """Propose le choix entre dossier et fichier unique via un menu contextuel.""" menu = tk.Menu(self.root, tearoff=0) menu.add_command(label="Choisir un dossier", command=self._browse_folder) menu.add_command(label="Choisir un fichier", command=self._browse_file) # Afficher le menu sous le curseur try: menu.tk_popup(self.root.winfo_pointerx(), self.root.winfo_pointery()) finally: menu.grab_release() def _browse_folder(self): d = filedialog.askdirectory() if d: self._single_file = None self.dir_var.set(d) self._update_folder_display() def _browse_file(self): try: from format_converter import SUPPORTED_EXTENSIONS except ImportError: SUPPORTED_EXTENSIONS = {".pdf"} # Construire les filtres pour le dialogue ext_list = " ".join(f"*{e}" for e in sorted(SUPPORTED_EXTENSIONS)) f = filedialog.askopenfilename( title="Choisir un document à anonymiser", filetypes=[ ("Documents supportés", ext_list), ("PDF", "*.pdf"), ("Word", "*.docx"), ("Images", "*.jpg *.jpeg *.png *.tiff *.tif *.bmp"), ("Texte", "*.txt *.rtf *.odt *.html *.htm"), ("Tous", "*.*"), ], ) if f: self._single_file = Path(f) self.dir_var.set(str(self._single_file.parent)) self._update_folder_display() def _update_folder_display(self): folder = self.dir_var.get() if not folder: return is_single = getattr(self, '_single_file', None) is not None if is_single: doc_count = 1 display_label = self._single_file.name else: # Compter les documents supportés (récursif) try: from format_converter import SUPPORTED_EXTENSIONS except ImportError: SUPPORTED_EXTENSIONS = {".pdf"} doc_count = 0 try: doc_count = len([ p for p in Path(folder).rglob("*") if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS ]) except Exception: pass display_label = folder # Vider et reconstruire l'intérieur for w in self._folder_inner.winfo_children(): w.destroy() row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG) row.pack(fill=tk.X) icon = "\U0001f4c4" if is_single else "\U0001f4c2" # 📄 ou 📂 tk.Label( row, text=icon, font=(self._font_family, 16), bg=CLR_CARD_BG, ).pack(side=tk.LEFT, padx=(0, 8)) info_frame = tk.Frame(row, bg=CLR_CARD_BG) info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True) # Chemin (tronqué si trop long) display_path = display_label if len(display_path) > 60: display_path = "..." + display_path[-57:] tk.Label( info_frame, text=display_path, font=self._f_body_bold, bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X) if is_single: subtitle = f"Fichier unique — {self._single_file.suffix.upper().lstrip('.')}" else: suffix = "document trouvé (récursif)" if doc_count <= 1 else "documents trouvés (récursif)" subtitle = f"{doc_count} {suffix}" tk.Label( info_frame, text=subtitle, font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w", ).pack(fill=tk.X) change_btn = tk.Label( row, text="Changer", font=self._f_small, bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2", ) change_btn.pack(side=tk.RIGHT, padx=(8, 0)) change_btn.bind("", lambda e: self._browse()) # Mettre à jour la bordure self._folder_zone.configure(highlightbackground=CLR_GREEN) # --------------------------------------------------------------- # Lancement # --------------------------------------------------------------- def _run(self): is_single = getattr(self, '_single_file', None) is not None if is_single: # Mode fichier unique if not self._single_file.is_file(): messagebox.showwarning("Fichier introuvable", f"{self._single_file}") return folder = self._single_file.parent pdfs = [self._single_file] else: # Mode dossier folder = Path(self.dir_var.get().strip()) if not folder.is_dir(): messagebox.showwarning( "Dossier invalide", "Choisissez un dossier ou un fichier.", ) return try: from format_converter import SUPPORTED_EXTENSIONS except ImportError: SUPPORTED_EXTENSIONS = {".pdf"} pdfs = sorted([ p for p in folder.rglob("*") if p.is_file() and p.suffix.lower() in SUPPORTED_EXTENSIONS ]) if not pdfs: exts = ", ".join(sorted(SUPPORTED_EXTENSIONS)) messagebox.showwarning( "Aucun document", f"Aucun fichier supporté trouvé.\n" f"Formats acceptés : {exts}\n" f"(recherche récursive dans les sous-dossiers)", ) return self._stop_requested = False self.btn_run.pack_forget() self.btn_stop.pack(fill=tk.X) self._show_progress(total=len(pdfs)) self._hide_results() threading.Thread(target=self._worker, args=(folder, pdfs), daemon=True).start() def _stop(self): """Demande l'arrêt du traitement en cours.""" self._stop_requested = True self.btn_stop.config(state=tk.DISABLED, bg="#fca5a5", text="Arrêt en cours...") self.status_var.set("Arrêt demandé, fin du document en cours...") def _worker(self, folder: Path, pdfs: List[Path]): import time start_time = time.time() try: outdir = folder / "anonymise" outdir.mkdir(exist_ok=True) ok = ko = 0 global_counts: Dict[str, int] = {} for i, pdf in enumerate(pdfs, start=1): # Vérifier si l'arrêt a été demandé if self._stop_requested: self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\n⚠️ Arrêt demandé par l'utilisateur")) break self.queue.put(UiMessage( kind=MsgType.PROGRESS, current=i, total=len(pdfs), filename=pdf.name, )) try: active = self._active_manager use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded()) thresholds = None if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)): thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85) # Extraire le numéro OGC du nom du répertoire parent # Ex: "257_23209962" → OGC = "257" parent_name = pdf.parent.name ogc = parent_name.split("_")[0] if "_" in parent_name else None # VLM vlm_active = bool( self.use_vlm.get() and self._vlm_available and self._vlm_manager and self._vlm_manager.is_loaded() ) # Utiliser process_document (multi-formats) si disponible, # sinon fallback sur process_pdf (PDF uniquement) _process_fn = getattr(core, 'process_document', None) or core.process_pdf _path_key = "doc_path" if _process_fn.__name__ == "process_document" else "pdf_path" outputs = _process_fn( **{_path_key: pdf}, out_dir=outdir, make_vector_redaction=False, also_make_raster_burn=True, config_path=Path(self.cfg_path.get()), use_hf=use_ner, ner_manager=active, ner_thresholds=thresholds, ogc_label=ogc, vlm_manager=self._vlm_manager if vlm_active else None, ) self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}")) for k, v in outputs.items(): self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}")) audit_path = Path(outputs.get("audit", "")) counts = self._count_audit(audit_path) if counts: self.queue.put(UiMessage( kind=MsgType.LOG, text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())), )) for k, v in counts.items(): global_counts[k] = global_counts.get(k, 0) + v ok += 1 except Exception as e: self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {pdf.name} \u2192 ERREUR: {e}")) ko += 1 total_time = time.time() - start_time total_masked = sum(global_counts.values()) # Message différent si arrêt demandé if self._stop_requested: self.queue.put(UiMessage( kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked, outdir=str(outdir) if ok > 0 else "", total_time=total_time, )) self.queue.put(UiMessage( kind=MsgType.LOG, text=f"⚠️ TRAITEMENT INTERROMPU : {ok} fichiers traités, {len(pdfs) - ok - ko} ignorés", )) else: self.queue.put(UiMessage( kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked, outdir=str(outdir), total_time=total_time, )) if ok and global_counts: self.queue.put(UiMessage( kind=MsgType.LOG, text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())), )) except Exception as e: self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}")) total_time = time.time() - start_time self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir="", total_time=total_time)) # --------------------------------------------------------------- # Pompe de messages # --------------------------------------------------------------- def _pump_logs(self): try: while True: msg = self.queue.get_nowait() if msg.kind == MsgType.LOG: self._append_log(msg.text) elif msg.kind == MsgType.PROGRESS: self._update_progress(msg.current, msg.total, msg.filename) elif msg.kind == MsgType.DONE: self._on_done(msg) except queue.Empty: pass finally: self.root.after(60, self._pump_logs) def _append_log(self, text: str): self.txt.configure(state=tk.NORMAL) self.txt.insert(tk.END, text + "\n") self.txt.see(tk.END) self.txt.configure(state=tk.DISABLED) # --------------------------------------------------------------- # Progression # --------------------------------------------------------------- def _show_progress(self, total: int): self._progressbar.configure(maximum=total, value=0) self._progress_label.configure(text="") self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18), before=self._results_frame if self._results_frame.winfo_manager() else None) def _hide_progress(self): self._progress_frame.pack_forget() def _update_progress(self, current: int, total: int, filename: str): self._progressbar.configure(value=current) self._progress_label.configure(text=f"{current}/{total} — {filename}") self.status_var.set(f"{current}/{total} — {filename}") # --------------------------------------------------------------- # Résultats # --------------------------------------------------------------- def _show_results(self, ok: int, ko: int, masked: int): self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT) self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT) err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6" self._update_stat_card(self._stat_errors, ko, err_fg, err_bg) self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12)) def _hide_results(self): self._results_frame.pack_forget() self._log_frame.pack_forget() self._log_visible = False self._log_toggle.configure(text="Voir le journal détaillé \u25BC") # Vider le journal self.txt.configure(state=tk.NORMAL) self.txt.delete("1.0", tk.END) self.txt.configure(state=tk.DISABLED) def _on_done(self, msg: UiMessage): self._hide_progress() self.btn_stop.pack_forget() self.btn_stop.config(state=tk.NORMAL, bg=CLR_RED, text="Arrêter le traitement") self.btn_run.pack(fill=tk.X) if self._stop_requested: self.status_var.set(f"Interrompu : {msg.ok} traités, {msg.ko} erreurs.") else: self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.") if msg.outdir: self._last_outdir = Path(msg.outdir) # Vérifier les fuites leak_count = self._check_leaks(Path(msg.outdir)) self._update_leak_indicator(leak_count) # Calculer les performances perf_string = self._calculate_performance(msg.ok, msg.total_time) self._perf_label.configure(text=perf_string) self._show_results(msg.ok, msg.ko, msg.masked) # --------------------------------------------------------------- # Toggle journal # --------------------------------------------------------------- def _toggle_log(self): if self._log_visible: self._log_frame.pack_forget() self._log_toggle.configure(text="Voir le journal détaillé \u25BC") else: self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0)) self._log_toggle.configure(text="Masquer le journal \u25B2") self._log_visible = not self._log_visible # --------------------------------------------------------------- # Ouvrir dossier résultats # --------------------------------------------------------------- def _open_out(self): if self._last_outdir: open_folder(self._last_outdir) # --------------------------------------------------------------- # Aide # --------------------------------------------------------------- def _show_help(self): messagebox.showinfo( "Comment ça marche ?", "1) Choisissez le dossier racine contenant vos fichiers PDF.\n\n" "2) Cliquez sur « Lancer la pseudonymisation ».\n\n" "Tous les fichiers PDF sont traités\n" "(recherche récursive dans les sous-dossiers).\n\n" "Un PDF Image (raster) est généré pour chaque fichier :\n" "chaque page devient une image avec les données masquées.\n" "Sécurité maximale, aucun texte résiduel.\n\n" "Les résultats sont regroupés à plat dans le dossier\n" "« anonymise/ » à la racine du dossier sélectionné.", ) # --------------------------------------------------------------- # Paramètres avancés (whitelist/blacklist) # --------------------------------------------------------------- def _build_phrase_list(self, parent, title: str, placeholder: str, color_tag: str): """Construit un widget liste + ajout/suppression pour les phrases.""" frame = tk.Frame(parent, bg=CLR_BG) frame.pack(fill=tk.X, pady=(4, 8)) tk.Label( frame, text=title, font=self._f_small, bg=CLR_BG, fg=CLR_TEXT, anchor="w", ).pack(fill=tk.X, pady=(0, 4)) # Zone de saisie + bouton ajouter input_row = tk.Frame(frame, bg=CLR_BG) input_row.pack(fill=tk.X, pady=(0, 4)) entry = tk.Entry(input_row, font=self._f_small, relief=tk.GROOVE, bd=1) entry.insert(0, placeholder) entry.configure(fg="#999") def _on_focus_in(e): if entry.get() == placeholder: entry.delete(0, tk.END) entry.configure(fg=CLR_TEXT) def _on_focus_out(e): if not entry.get().strip(): entry.insert(0, placeholder) entry.configure(fg="#999") entry.bind("", _on_focus_in) entry.bind("", _on_focus_out) entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4)) def _add(event=None): text = entry.get().strip() if text and text != placeholder: # Éviter les doublons items = list(listbox.get(0, tk.END)) if text not in items: listbox.insert(tk.END, text) entry.delete(0, tk.END) add_btn = tk.Button( input_row, text="+ Ajouter", font=self._f_small, bg=color_tag, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2", command=_add, padx=8, ) add_btn.pack(side=tk.LEFT) entry.bind("", _add) # Liste des phrases list_frame = tk.Frame(frame, bg=CLR_BG) list_frame.pack(fill=tk.X) listbox = tk.Listbox( list_frame, height=4, font=("Consolas", 9), relief=tk.GROOVE, bd=1, selectmode=tk.EXTENDED, bg=color_tag, ) scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=listbox.yview) listbox.configure(yscrollcommand=scrollbar.set) listbox.pack(side=tk.LEFT, fill=tk.X, expand=True) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) # Bouton supprimer def _remove(): sel = listbox.curselection() for idx in reversed(sel): listbox.delete(idx) rm_btn = tk.Button( frame, text="Supprimer la sélection", font=self._f_small, bg="#ffcdd2", fg="#b71c1c", relief=tk.GROOVE, cursor="hand2", command=_remove, padx=8, ) rm_btn.pack(anchor="e", pady=(2, 0)) return listbox, entry def _load_params(self): """Charge les whitelist/blacklist depuis la config YAML.""" try: cfg_path = Path(self.cfg_path.get()) if cfg_path.exists() and yaml is not None: data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {} # Whitelist wl = data.get("whitelist_phrases", []) self._wl_listbox.delete(0, tk.END) for phrase in wl: if phrase and phrase.strip(): self._wl_listbox.insert(tk.END, phrase.strip()) # Blacklist bl = data.get("blacklist", {}).get("force_mask_terms", []) self._bl_listbox.delete(0, tk.END) for term in bl: if term and str(term).strip(): self._bl_listbox.insert(tk.END, str(term).strip()) # Stop-words additionnels sw = data.get("additional_stopwords", []) self._sw_listbox.delete(0, tk.END) for term in sw: if term and str(term).strip(): self._sw_listbox.insert(tk.END, str(term).strip()) except Exception: pass def _export_params(self): """Exporte les paramètres whitelist/blacklist dans un fichier JSON pour envoi par email.""" try: import json as _json from datetime import datetime wl = list(self._wl_listbox.get(0, tk.END)) bl = list(self._bl_listbox.get(0, tk.END)) sw = list(self._sw_listbox.get(0, tk.END)) export_data = { "version": APP_VERSION, "date_export": datetime.now().isoformat(), "etablissement": "", # à remplir par l'utilisateur "whitelist_phrases": wl, "blacklist_force_mask_terms": bl, "additional_stopwords": sw, "instructions": ( "Ce fichier contient les paramètres d'anonymisation personnalisés. " "Envoyez-le par email à l'équipe technique pour mise à jour du programme." ), } # Proposer le Bureau comme destination par défaut desktop = Path.home() / "Desktop" if not desktop.exists(): desktop = Path.home() / "Bureau" if not desktop.exists(): desktop = Path.home() dest = filedialog.asksaveasfilename( title="Exporter les paramètres", initialdir=str(desktop), initialfile="parametres_anonymisation.json", defaultextension=".json", filetypes=[("JSON", "*.json"), ("Tous", "*.*")], ) if dest: Path(dest).write_text( _json.dumps(export_data, ensure_ascii=False, indent=2), encoding="utf-8", ) messagebox.showinfo( "Export réussi", f"Paramètres exportés dans :\n{dest}\n\n" f"Vous pouvez envoyer ce fichier par email\n" f"à l'équipe technique.", ) except Exception as e: messagebox.showerror("Erreur", f"Erreur à l'export :\n{e}") def _import_params(self): """Importe des paramètres depuis un fichier JSON (fusionne avec l'existant).""" try: import json as _json src = filedialog.askopenfilename( title="Importer des paramètres", filetypes=[("JSON", "*.json"), ("Tous", "*.*")], ) if not src: return data = _json.loads(Path(src).read_text(encoding="utf-8")) # Fusionner whitelist new_wl = data.get("whitelist_phrases", []) existing_wl = set(self._wl_listbox.get(0, tk.END)) added_wl = 0 for phrase in new_wl: if phrase and phrase.strip() and phrase.strip() not in existing_wl: self._wl_listbox.insert(tk.END, phrase.strip()) added_wl += 1 # Fusionner blacklist new_bl = data.get("blacklist_force_mask_terms", []) existing_bl = set(self._bl_listbox.get(0, tk.END)) added_bl = 0 for term in new_bl: if term and str(term).strip() and str(term).strip() not in existing_bl: self._bl_listbox.insert(tk.END, str(term).strip()) added_bl += 1 # Fusionner stop-words additionnels new_sw = data.get("additional_stopwords", []) existing_sw = set(self._sw_listbox.get(0, tk.END)) added_sw = 0 for term in new_sw: if term and str(term).strip() and str(term).strip() not in existing_sw: self._sw_listbox.insert(tk.END, str(term).strip()) added_sw += 1 version = data.get("version", "?") date_exp = data.get("date_export", "?")[:10] messagebox.showinfo( "Import réussi", f"Paramètres importés (v{version}, {date_exp}) :\n\n" f" + {added_wl} phrase(s) ajoutée(s) à la whitelist\n" f" + {added_bl} terme(s) ajouté(s) à la blacklist\n" f" + {added_sw} mot(s) ajouté(s) aux stop-words\n\n" f"Cliquez sur « Sauvegarder » pour appliquer.", ) except Exception as e: messagebox.showerror("Erreur", f"Erreur à l'import :\n{e}") def _save_params(self): """Sauvegarde les whitelist/blacklist dans la config YAML.""" try: cfg_path = Path(self.cfg_path.get()) if not cfg_path.exists() or yaml is None: messagebox.showwarning("Erreur", "Fichier de configuration introuvable.") return data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {} # Whitelist phrases data["whitelist_phrases"] = list(self._wl_listbox.get(0, tk.END)) # Blacklist terms if "blacklist" not in data: data["blacklist"] = {} data["blacklist"]["force_mask_terms"] = list(self._bl_listbox.get(0, tk.END)) # Stop-words additionnels (mots à ne jamais identifier comme noms) data["additional_stopwords"] = list(self._sw_listbox.get(0, tk.END)) cfg_path.write_text( yaml.dump(data, allow_unicode=True, default_flow_style=False, sort_keys=False), encoding="utf-8", ) messagebox.showinfo("Paramètres", "Paramètres sauvegardés avec succès.") except Exception as e: messagebox.showerror("Erreur", f"Impossible de sauvegarder :\n{e}") # --------------------------------------------------------------- # YAML (interne) # --------------------------------------------------------------- def _ensure_cfg_exists(self): p = Path(self.cfg_path.get()) p.parent.mkdir(parents=True, exist_ok=True) if not p.exists(): p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8") def _load_cfg(self): if yaml is None: return self._ensure_cfg_exists() try: self.cfg_data = yaml.safe_load( Path(self.cfg_path.get()).read_text(encoding="utf-8") ) or {} except Exception: pass # --------------------------------------------------------------- # Audit # --------------------------------------------------------------- def _count_audit(self, audit_path: Path) -> Dict[str, int]: d: Dict[str, int] = {} try: with open(audit_path, "r", encoding="utf-8") as f: for line in f: try: obj = json.loads(line) k = obj.get("kind", "?") d[k] = d.get(k, 0) + 1 except Exception: pass except Exception: pass return d # --------------------------------------------------------------- # Vérification des fuites # --------------------------------------------------------------- def _check_leaks(self, output_dir: Path) -> int: """Vérifie les fuites dans les textes anonymisés.""" leak_count = 0 try: # Patterns de fuites critiques import re patterns = { "date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE), "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE), } for txt_file in output_dir.glob("*.pseudonymise.txt"): try: with open(txt_file, 'r', encoding='utf-8') as f: content = f.read() for pattern in patterns.values(): matches = pattern.findall(content) leak_count += len(matches) except Exception: pass except Exception: pass return leak_count # --------------------------------------------------------------- # Calcul des performances # --------------------------------------------------------------- def _calculate_performance(self, total_files: int, total_time: float) -> str: """Calcule et formate les performances de traitement.""" if total_files == 0 or total_time == 0: return "⏱️ Temps de traitement non disponible" avg_time = total_time / total_files # Formater le temps total if total_time < 60: time_str = f"{total_time:.0f}s" elif total_time < 3600: minutes = int(total_time // 60) seconds = int(total_time % 60) time_str = f"{minutes}m {seconds}s" else: hours = int(total_time // 3600) minutes = int((total_time % 3600) // 60) time_str = f"{hours}h {minutes}m" return f"⏱️ Traité en {time_str} ({avg_time:.1f}s/document)" # --------------------------------------------------------------- # Mise à jour de l'indicateur de fuites # --------------------------------------------------------------- def _update_leak_indicator(self, leak_count: int): """Met à jour l'indicateur de fuites.""" if leak_count == 0: self._leak_badge.configure( text="🔒 0 fuite détectée", bg=CLR_GREEN_LIGHT, fg=CLR_GREEN ) else: self._leak_badge.configure( text=f"⚠️ {leak_count} fuite{'s' if leak_count > 1 else ''} potentielle{'s' if leak_count > 1 else ''}", bg=CLR_RED_LIGHT, fg=CLR_RED ) # --------------------------------------------------------------- # Chargement automatique NER au démarrage # --------------------------------------------------------------- def _auto_load_ner(self): """Charge le modèle NER par défaut en arrière-plan. Priorité : EDS-Pseudo (meilleur sur données cliniques) → DistilCamemBERT-NER (fallback). """ if not self._eds_manager and not self._onnx_manager: return self.status_var.set("Chargement du modèle NER...") threading.Thread(target=self._auto_load_ner_worker, daemon=True).start() def _auto_load_ner_worker(self): # 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques) if self._eds_manager: try: self._eds_manager.load("AP-HP/eds-pseudo-public") self._active_manager = self._eds_manager self.use_hf = True self.status_var.set("Prêt — EDS-Pseudo actif.") return except Exception as e: import logging logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e) # 2) Fallback : DistilCamemBERT-NER ONNX if self._onnx_manager: try: self._onnx_manager.load("cmarkea/distilcamembert-base-ner") self._active_manager = self._onnx_manager self.use_hf = True self.status_var.set("Prêt — NER ONNX actif.") return except Exception as e2: self.status_var.set(f"Prêt (NER indisponible : {e2})") return self.status_var.set("Prêt (aucun backend NER disponible).") # --------------------------------------------------------------- # VLM toggle # --------------------------------------------------------------- def _on_vlm_toggle(self): """Appelé quand l'utilisateur coche/décoche la checkbox VLM.""" if not self.use_vlm.get(): self._vlm_available = False if hasattr(self, '_vlm_status_lbl'): self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY) return if hasattr(self, '_vlm_status_lbl'): self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY) threading.Thread(target=self._vlm_connect_worker, daemon=True).start() def _vlm_connect_worker(self): """Vérifie la connexion Ollama en arrière-plan.""" try: if self._vlm_manager is None: raise RuntimeError("VlmManager non disponible") self._vlm_manager.load() self._vlm_available = True if hasattr(self, '_vlm_status_lbl'): self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN) except Exception as e: self._vlm_available = False self.use_vlm.set(False) err = str(e) if len(err) > 60: err = err[:57] + "..." if hasattr(self, '_vlm_status_lbl'): self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED) # --------------------------------------------------------------- # Modèles NER (API interne) # --------------------------------------------------------------- def _load_model(self, model_id: Optional[str] = None): mid = model_id or "cmarkea/distilcamembert-base-ner" is_eds = False if self._eds_manager: eds_ids = set(self._eds_manager.models_catalog().values()) if mid in eds_ids: is_eds = True if is_eds: if not self._eds_manager: return manager = self._eds_manager else: if not self._onnx_manager: return manager = self._onnx_manager try: manager.load(mid) self._active_manager = manager self.use_hf = True except Exception: self.use_hf = False def _unload_model(self): if self._onnx_manager: self._onnx_manager.unload() if self._eds_manager: self._eds_manager.unload() self._active_manager = None self.use_hf = False # --------------------------------------------------------------------------- # Point d'entrée # --------------------------------------------------------------------------- if __name__ == "__main__": try: root = tk.Tk() App(root) root.mainloop() except Exception as exc: import traceback, sys err = traceback.format_exc() # Écrire dans un fichier log à côté de l'exe log_path = Path(__file__).resolve().parent / "crash.log" try: log_path.write_text(err, encoding="utf-8") except Exception: pass # Tenter d'afficher une messagebox (même sans console) try: import tkinter as _tk _r = _tk.Tk() _r.withdraw() from tkinter import messagebox as _mb _mb.showerror("Erreur fatale", f"L'application a planté :\n\n{exc}\n\nVoir crash.log") _r.destroy() except Exception: pass raise