Files
anonymisation/Pseudonymisation_Gui_V5.py

2875 lines
116 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Pseudonymisation GUI v5 (Vue unique épurée)
----------------------------------------------
- Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés)
- Thème système natif (sv_ttk optionnel, fallback clam)
- Backend NER ONNX/EDS-Pseudo conservé en interne
- Pas d'onglet Avancé (NER + YAML chargés silencieusement)
Fichiers requis à côté :
- anonymizer_core_refactored_onnx.py
- ner_manager_onnx.py
"""
from __future__ import annotations
import enum
import json
import os
import platform
import queue
import re
import subprocess
import sys
import tempfile
import threading
import unicodedata
from copy import deepcopy
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog, ttk
# ---------------------------------------------------------------------------
# Core
# ---------------------------------------------------------------------------
try:
import anonymizer_core_refactored_onnx as core
except Exception as e:
_err_msg = f"Impossible d'importer le core ONNX : {e}"
# Écrire l'erreur dans un fichier log à côté du script/exe
try:
_log = Path(__file__).resolve().parent / "crash.log"
import traceback as _tb
_log.write_text(f"{_err_msg}\n\n{_tb.format_exc()}", encoding="utf-8")
except Exception:
pass
try:
_r = tk.Tk(); _r.withdraw()
messagebox.showerror("Erreur d'import", _err_msg)
_r.destroy()
except Exception:
pass
raise SystemExit(_err_msg)
try:
from ner_manager_onnx import NerModelManager, NerThresholds
except Exception:
NerModelManager = None # type: ignore
NerThresholds = None # type: ignore
try:
from eds_pseudo_manager import EdsPseudoManager
except Exception:
EdsPseudoManager = None # type: ignore
try:
from camembert_ner_manager import CamembertNerManager
except Exception:
CamembertNerManager = None # type: ignore
try:
from vlm_manager import VlmManager, VlmConfig
except Exception:
VlmManager = None # type: ignore
VlmConfig = None # type: ignore
try:
import yaml
except Exception:
yaml = None
from config_defaults import (
deep_merge_dict,
load_effective_dictionaries_dict,
load_effective_param_lists,
read_default_dictionaries_text,
read_runtime_dictionaries_overlay_text,
)
from gui_batch_paths import (
build_batch_output_dir,
iter_pseudonymized_texts,
list_supported_documents,
)
from manual_masking import (
append_jsonl_file,
ensure_mask_templates_dir,
list_mask_templates,
mask_template_label,
resolve_manual_mask_pdf,
)
from profile_defaults import (
delete_runtime_profile,
ensure_runtime_profiles_config,
get_default_profile_key,
list_default_profile_keys,
list_effective_profiles,
read_runtime_profiles_overlay_text,
save_runtime_profile,
set_runtime_default_profile,
)
try:
from pdf_mask_designer import (
MaskDesignerApp,
Template,
apply_template_vector,
load_template_yaml,
)
except Exception:
MaskDesignerApp = None # type: ignore
Template = None # type: ignore
apply_template_vector = None # type: ignore
load_template_yaml = None # type: ignore
# ---------------------------------------------------------------------------
# Thème optionnel
# ---------------------------------------------------------------------------
try:
import sv_ttk # type: ignore
except ImportError:
sv_ttk = None
# PIL pour charger le logo / icônes (optionnel — dégradation si absent).
try:
from PIL import Image, ImageTk
_PIL_AVAILABLE = True
except Exception:
_PIL_AVAILABLE = False
# ---------------------------------------------------------------------------
# Constantes
# ---------------------------------------------------------------------------
APP_TITLE = "Pseudonymisation de vos documents"
APP_VERSION = "v5.5"
MANUAL_MASK_NONE_LABEL = "Aucun masque manuel"
# Métadonnées de build — chargées depuis build_info.py (régénéré par rebuild_anon.ps1)
try:
from build_info import BUILD_DATE, BUILD_COMMIT, BUILD_BRANCH
except Exception:
BUILD_DATE = "dev"
BUILD_COMMIT = "dev"
BUILD_BRANCH = "dev"
def _version_long() -> str:
"""Version étendue : v5.4 · 2026-04-15 18:15 · 234137e"""
parts = [APP_VERSION]
if BUILD_DATE != "dev":
parts.append(BUILD_DATE)
if BUILD_COMMIT != "dev":
parts.append(f"#{BUILD_COMMIT}")
return " · ".join(parts)
def _asset(name: str) -> Path:
"""Résout le chemin d'un asset dans assets/ (compatible frozen PyInstaller)."""
if getattr(sys, 'frozen', False):
base = Path(sys._MEIPASS)
else:
base = Path(__file__).resolve().parent
return base / 'assets' / name
def _app_dir() -> Path:
"""Répertoire racine de l'application (compatible PyInstaller/Nuitka)."""
if getattr(sys, 'frozen', False):
return Path(sys._MEIPASS)
return Path(__file__).resolve().parent
def _exe_dir() -> Path:
"""Répertoire de l'exécutable (pour les fichiers persistants : config, logs)."""
if getattr(sys, 'frozen', False):
return Path(sys.executable).parent
return Path(__file__).resolve().parent
def _resolve_config() -> Path:
"""Cherche la config en priorité à côté de l'exe, sinon dans l'app embarquée.
Si le fichier n'existe pas à côté de l'exe, copie la version embarquée
pour que l'utilisateur puisse la modifier sans recompiler.
"""
exe_cfg = _exe_dir() / "config" / "dictionnaires.yml"
if exe_cfg.exists():
return exe_cfg
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
exe_cfg.write_text(read_runtime_dictionaries_overlay_text(), encoding="utf-8")
return exe_cfg
def _resolve_profiles_config() -> Path:
exe_cfg = _exe_dir() / "config" / "profiles.yml"
if exe_cfg.exists():
return exe_cfg
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
exe_cfg.write_text(read_runtime_profiles_overlay_text(), encoding="utf-8")
return exe_cfg
DEFAULT_CFG = _resolve_config()
DEFAULT_PROFILES_CFG = _resolve_profiles_config()
MODELS_DIR = _app_dir() / "models"
DEFAULTS_CFG_TEXT = read_default_dictionaries_text()
RUNTIME_CFG_TEXT = read_runtime_dictionaries_overlay_text()
# Palette dérivée du logo aivanonym (gradient magenta → rose → pêche → noir)
# Magenta du logo : primaire (boutons, accents)
# Pêche : secondaire (tags, highlights)
# Noir/gris : texte et neutres
# Blanc/gris clair : fonds
CLR_PRIMARY = "#E91E63" # magenta logo (CTA, liens)
CLR_PRIMARY_DARK = "#C2185B" # hover / pressed
CLR_PRIMARY_LIGHT = "#FCE4EC" # fond léger (cartes sélectionnées)
CLR_ACCENT = "#FFB74D" # pêche logo (tags secondaires)
CLR_ACCENT_LIGHT = "#FFF3E0" # fond accent léger
CLR_GREEN = "#2E7D32" # succès
CLR_GREEN_LIGHT = "#E8F5E9"
CLR_RED = "#C62828" # erreur / danger
CLR_RED_LIGHT = "#FFEBEE"
CLR_BLUE_LIGHT = "#FCE4EC" # conservé pour compat (remappé vers primary_light)
CLR_CARD_BG = "#FFFFFF"
CLR_CARD_BORDER = "#E0E0E0"
CLR_BG = "#FAFAFA" # fond principal (gris très clair)
CLR_TEXT = "#212121" # quasi-noir (du logo)
CLR_TEXT_SECONDARY = "#757575" # gris moyen
CLR_DIVIDER = "#EEEEEE"
# ---------------------------------------------------------------------------
# Messages worker → UI
# ---------------------------------------------------------------------------
class MsgType(enum.Enum):
LOG = "log"
PROGRESS = "progress"
DONE = "done"
@dataclass
class UiMessage:
kind: MsgType
text: str = ""
current: int = 0
total: int = 0
filename: str = ""
ok: int = 0
ko: int = 0
masked: int = 0
outdir: str = ""
total_time: float = 0.0 # Temps total de traitement en secondes
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def open_folder(path: Path):
try:
if platform.system() == "Windows":
os.startfile(str(path)) # type: ignore
elif platform.system() == "Darwin":
subprocess.Popen(["open", str(path)])
else:
subprocess.Popen(["xdg-open", str(path)])
except Exception:
pass
def _detect_font() -> str:
"""Retourne la meilleure police sans-serif disponible."""
for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"):
try:
test = tk.Label(font=(name, 10))
actual = test.cget("font")
test.destroy()
if name.lower().replace(" ", "") in actual.lower().replace(" ", ""):
return name
except Exception:
continue
return "TkDefaultFont"
def _detect_dark_mode() -> bool:
"""Détecte le thème sombre GNOME."""
try:
result = subprocess.run(
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
capture_output=True, text=True, timeout=2,
)
return "dark" in result.stdout.lower()
except Exception:
return False
# ---------------------------------------------------------------------------
# ToolTip amélioré
# ---------------------------------------------------------------------------
class ToolTip:
def __init__(self, widget: tk.Widget, text: str, delay: int = 400):
self.widget = widget
self.text = text
self.delay = delay
self.tip: Optional[tk.Toplevel] = None
self._after_id: Optional[str] = None
widget.bind("<Enter>", self._schedule)
widget.bind("<Leave>", self.hide)
def _schedule(self, *_):
self._cancel()
self._after_id = self.widget.after(self.delay, self._show)
def _cancel(self):
if self._after_id:
self.widget.after_cancel(self._after_id)
self._after_id = None
def _show(self):
if self.tip:
return
x = self.widget.winfo_rootx() + 20
y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
self.tip = tw = tk.Toplevel(self.widget)
tw.wm_overrideredirect(True)
tw.wm_geometry(f"+{x}+{y}")
lbl = tk.Label(
tw, text=self.text, justify=tk.LEFT,
background="#1f2937", foreground="#f9fafb",
relief=tk.SOLID, borderwidth=1,
padx=8, pady=5, wraplength=320,
)
lbl.pack(ipadx=1)
def hide(self, *_):
self._cancel()
if self.tip:
self.tip.destroy()
self.tip = None
# ---------------------------------------------------------------------------
# Application principale
# ---------------------------------------------------------------------------
class App:
def __init__(self, root: tk.Tk):
self.root = root
# Titre avec version longue pour identifier la build au premier coup d'œil
# (évite les confusions entre exe ancien/nouveau lors des tests).
self.root.title(f"{APP_TITLE}{_version_long()}")
self.root.geometry("780x820")
self.root.minsize(600, 650)
# Icône de la fenêtre (coin haut-gauche + taskbar Windows).
# En mode dev (Linux) tkinter lit iconphoto PNG ; sur Windows, iconbitmap
# accepte .ico. On tente les deux pour couvrir.
self._icon_refs: list = [] # refs pour éviter garbage collection
self._apply_window_icon()
# Préchargement logo pour l'en-tête (besoin de ref persistante sinon
# tkinter nettoie l'image → label blanc).
self._logo_img = self._load_image_safe(_asset('logo_header.png'))
# --- Thème ---
self._apply_theme()
# --- Polices ---
self._font_family = _detect_font()
self._f_title = (self._font_family, 20, "bold")
self._f_body = (self._font_family, 11)
self._f_body_bold = (self._font_family, 11, "bold")
self._f_button = (self._font_family, 13, "bold")
self._f_stat = (self._font_family, 24, "bold")
self._f_small = (self._font_family, 10)
self._f_card_title = (self._font_family, 12, "bold")
self._f_card_desc = (self._font_family, 10)
# --- Variables ---
self.dir_var = tk.StringVar()
self.status_var = tk.StringVar(value="Prêt.")
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
self.profiles_path = tk.StringVar(value=str(DEFAULT_PROFILES_CFG))
self.processing_profile_label_var = tk.StringVar(value="")
self.manual_mask_template_var = tk.StringVar(value=MANUAL_MASK_NONE_LABEL)
self.profile_description_var = tk.StringVar(value="")
self.profile_require_manual_mask_var = tk.BooleanVar(value=False)
self.profile_force_disable_vlm_var = tk.BooleanVar(value=False)
self.queue: "queue.Queue[UiMessage]" = queue.Queue()
self._processing_profiles: Dict[str, Dict[str, Any]] = {}
self._processing_profile_labels_to_keys: Dict[str, str] = {}
self._manual_mask_templates: Dict[str, Optional[Path]] = {
MANUAL_MASK_NONE_LABEL: None,
}
self._profile_base_description = ""
self._profile_manager_win: Optional[tk.Toplevel] = None
self._advanced_params_win: Optional[tk.Toplevel] = None
# --- NER (interne) ---
self.use_hf = False
self.th_per = 0.90
self.th_org = 0.90
self.th_loc = 0.90
self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=MODELS_DIR) if NerModelManager else None
self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=MODELS_DIR) if EdsPseudoManager else None
self._camembert_manager: Optional[Any] = CamembertNerManager() if CamembertNerManager else None
self._active_manager: Optional[Any] = None
self.cfg_data: Dict[str, Any] = {}
# --- VLM (optionnel) ---
self.use_vlm = tk.BooleanVar(value=False)
self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None
self._vlm_available = False
# --- Fusion catalogue modèles ---
catalog: Dict[str, str] = {}
if self._onnx_manager:
catalog.update(self._onnx_manager.models_catalog())
if self._eds_manager:
catalog.update(self._eds_manager.models_catalog())
self._merged_catalog = catalog
# --- Résultats ---
self._last_outdir: Optional[Path] = None
# --- Contrôle d'arrêt ---
self._stop_requested = False
# --- Fichier unique (None = mode dossier) ---
self._single_file: Optional[Path] = None
# --- Construction UI ---
self._build_ui()
# Afficher l'onglet Anonymisation par défaut
self._switch_tab("anonym")
self._pump_logs()
self._ensure_cfg_exists()
self._load_cfg()
# --- Chargement automatique du modèle NER ---
self._auto_load_ner()
# ---------------------------------------------------------------
# Onglets custom
# ---------------------------------------------------------------
def _switch_tab(self, name: str):
"""Affiche l'onglet nommé, met à jour les styles des boutons."""
if name not in self._tab_frames:
return
# Cacher tous les contenus
for frame in self._tab_frames.values():
frame.pack_forget()
# Afficher l'onglet demandé
self._tab_frames[name].pack(fill=tk.BOTH, expand=True)
# Mettre à jour les styles des boutons d'onglets
for tab_name, widgets in self._tab_buttons.items():
if tab_name == name:
widgets["label"].configure(fg=CLR_PRIMARY, bg=CLR_BG)
widgets["underline"].configure(bg=CLR_PRIMARY)
else:
widgets["label"].configure(fg=CLR_TEXT_SECONDARY, bg=CLR_BG)
widgets["underline"].configure(bg=CLR_BG)
self._active_tab = name
# ---------------------------------------------------------------
# Icônes & assets
# ---------------------------------------------------------------
def _apply_window_icon(self):
"""Définit l'icône de la fenêtre. Windows : .ico préférable ; Linux : PNG."""
try:
ico = _asset('icons/app.ico')
if sys.platform == 'win32' and ico.exists():
try:
self.root.iconbitmap(str(ico))
return
except Exception:
pass
# Fallback : iconphoto PNG (toutes plateformes)
png = _asset('icons/icon_128.png')
if png.exists() and _PIL_AVAILABLE:
img = Image.open(png)
photo = ImageTk.PhotoImage(img)
self._icon_refs.append(photo)
self.root.iconphoto(True, photo)
except Exception:
pass # dégradation silencieuse — l'icône n'est pas bloquante
def _load_image_safe(self, path: Path):
"""Charge une image et garde la ref pour éviter le GC. None si PIL absent."""
if not _PIL_AVAILABLE or not path.exists():
return None
try:
img = Image.open(path).convert('RGBA')
photo = ImageTk.PhotoImage(img)
self._icon_refs.append(photo)
return photo
except Exception:
return None
# ---------------------------------------------------------------
# Thème
# ---------------------------------------------------------------
def _apply_theme(self):
if sv_ttk is not None:
mode = "dark" if _detect_dark_mode() else "light"
sv_ttk.set_theme(mode)
else:
try:
style = ttk.Style()
style.theme_use("clam")
except Exception:
pass
# ---------------------------------------------------------------
# Construction de la vue unique
# ---------------------------------------------------------------
def _build_ui(self):
self.root.configure(bg=CLR_BG)
pad_x = 32
# =============================================================
# HEADER fixe (logo + titre + baseline), hors onglets
# =============================================================
header = tk.Frame(self.root, bg=CLR_BG)
header.pack(fill=tk.X, padx=pad_x, pady=(16, 8))
if self._logo_img is not None:
tk.Label(header, image=self._logo_img, bg=CLR_BG).pack(anchor="w")
else:
tk.Label(header, text="aivanonym", font=(self._font_family, 22, "bold"),
bg=CLR_BG, fg=CLR_PRIMARY).pack(anchor="w")
tk.Label(
header,
text="Pseudonymisation de documents médicaux — 100% local",
font=(self._font_family, 10),
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(fill=tk.X, pady=(4, 0))
# Ligne colorée inspirée du gradient du logo
accent_bar = tk.Frame(self.root, bg=CLR_PRIMARY, height=3)
accent_bar.pack(fill=tk.X)
# =============================================================
# ONGLETS CUSTOM (boutons uniformes — rendu pro)
# Remplace ttk.Notebook dont les onglets ont des tailles/styles
# variables selon l'état actif. Ici : tous les onglets identiques,
# seule une bordure basse magenta signale l'onglet actif.
# =============================================================
tabs_bar = tk.Frame(self.root, bg=CLR_BG)
tabs_bar.pack(fill=tk.X, padx=0, pady=(4, 0))
self._tab_frames: dict = {} # nom → frame outer
self._tab_buttons: dict = {} # nom → dict(container, label, underline)
self._active_tab: Optional[str] = None
def _make_tab_button(parent, name: str, label: str):
"""Crée un onglet cliquable uniforme (fond, texte, underline)."""
container = tk.Frame(parent, bg=CLR_BG, cursor="hand2")
container.pack(side=tk.LEFT)
txt = tk.Label(
container, text=label,
font=(self._font_family, 11, "bold"),
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
padx=26, pady=10, cursor="hand2",
)
txt.pack(fill=tk.X)
# Bordure basse qui devient magenta quand actif
underline = tk.Frame(container, bg=CLR_BG, height=3)
underline.pack(fill=tk.X)
def _on_click(_e=None):
self._switch_tab(name)
for w in (container, txt, underline):
w.bind("<Button-1>", _on_click)
self._tab_buttons[name] = {
"container": container, "label": txt, "underline": underline,
}
_make_tab_button(tabs_bar, "anonym", "Anonymisation")
_make_tab_button(tabs_bar, "params", "Paramètres")
_make_tab_button(tabs_bar, "profiles", "Profils")
# Séparateur gris clair sous les onglets
tk.Frame(self.root, bg=CLR_DIVIDER, height=1).pack(fill=tk.X)
# Conteneur des contenus (un seul visible à la fois)
tabs_content = tk.Frame(self.root, bg=CLR_BG)
tabs_content.pack(fill=tk.BOTH, expand=True)
tab_anonym_outer = tk.Frame(tabs_content, bg=CLR_BG)
tab_params_outer = tk.Frame(tabs_content, bg=CLR_BG)
tab_profiles_outer = tk.Frame(tabs_content, bg=CLR_BG)
self._tab_frames["anonym"] = tab_anonym_outer
self._tab_frames["params"] = tab_params_outer
self._tab_frames["profiles"] = tab_profiles_outer
# --- Scroll pour l'onglet Anonymisation ---
canvas = tk.Canvas(tab_anonym_outer, bg=CLR_BG, highlightthickness=0)
scrollbar = ttk.Scrollbar(tab_anonym_outer, orient=tk.VERTICAL, command=canvas.yview)
self._scroll_frame = tk.Frame(canvas, bg=CLR_BG)
self._scroll_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
)
canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
def _on_canvas_configure(event):
canvas.itemconfig(canvas_window, width=event.width)
canvas.bind("<Configure>", _on_canvas_configure)
def _on_mousewheel(event):
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
def _on_mousewheel_linux(event):
if event.num == 4:
canvas.yview_scroll(-3, "units")
elif event.num == 5:
canvas.yview_scroll(3, "units")
canvas.bind_all("<MouseWheel>", _on_mousewheel)
canvas.bind_all("<Button-4>", _on_mousewheel_linux)
canvas.bind_all("<Button-5>", _on_mousewheel_linux)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# --- Scroll pour l'onglet Paramètres ---
canvas2 = tk.Canvas(tab_params_outer, bg=CLR_BG, highlightthickness=0)
scrollbar2 = ttk.Scrollbar(tab_params_outer, orient=tk.VERTICAL, command=canvas2.yview)
self._params_scroll = tk.Frame(canvas2, bg=CLR_BG)
self._params_scroll.bind(
"<Configure>",
lambda e: canvas2.configure(scrollregion=canvas2.bbox("all")),
)
canvas2_window = canvas2.create_window((0, 0), window=self._params_scroll, anchor="nw")
canvas2.configure(yscrollcommand=scrollbar2.set)
def _on_canvas2_configure(event):
canvas2.itemconfig(canvas2_window, width=event.width)
canvas2.bind("<Configure>", _on_canvas2_configure)
canvas2.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar2.pack(side=tk.RIGHT, fill=tk.Y)
# --- Scroll pour l'onglet Profils ---
canvas3 = tk.Canvas(tab_profiles_outer, bg=CLR_BG, highlightthickness=0)
scrollbar3 = ttk.Scrollbar(tab_profiles_outer, orient=tk.VERTICAL, command=canvas3.yview)
self._profiles_scroll = tk.Frame(canvas3, bg=CLR_BG)
self._profiles_scroll.bind(
"<Configure>",
lambda e: canvas3.configure(scrollregion=canvas3.bbox("all")),
)
canvas3_window = canvas3.create_window((0, 0), window=self._profiles_scroll, anchor="nw")
canvas3.configure(yscrollcommand=scrollbar3.set)
def _on_canvas3_configure(event):
canvas3.itemconfig(canvas3_window, width=event.width)
canvas3.bind("<Configure>", _on_canvas3_configure)
canvas3.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar3.pack(side=tk.RIGHT, fill=tk.Y)
# "main" pointe désormais sur le scroll de l'onglet Anonymisation.
# Tout le contenu existant (étape 1, formats, boutons, progress, résultats)
# reste inchangé — seul le parent implicite a changé.
main = self._scroll_frame
# =============================================================
# ÉTAPE 1 — Choix du dossier
# =============================================================
tk.Label(
main, text="1. Choisir les documents ou fichiers (PDF, Word, Images, Texte)", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
self._folder_zone = tk.Frame(
main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER,
highlightthickness=2, cursor="hand2",
)
self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
# Contenu initial (invite à cliquer)
self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG)
self._folder_inner.pack(fill=tk.X, padx=20, pady=18)
self._folder_icon_lbl = tk.Label(
self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28),
bg=CLR_CARD_BG,
)
self._folder_icon_lbl.pack()
self._folder_text_lbl = tk.Label(
self._folder_inner,
text="Cliquez pour choisir un dossier ou un fichier",
font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY,
)
self._folder_text_lbl.pack(pady=(4, 0))
# Rendre toute la zone cliquable
for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl):
w.bind("<Button-1>", lambda e: self._browse())
# =============================================================
# ÉTAPE 2 — Info formats générés
# =============================================================
tk.Label(
main, text="2. Formats générés", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
info_frame = tk.Frame(
main, bg=CLR_BLUE_LIGHT,
highlightbackground=CLR_CARD_BORDER, highlightthickness=1,
)
info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT)
info_inner.pack(fill=tk.X, padx=16, pady=12)
tk.Label(
info_inner,
text="Paramètres de traitement :",
font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X)
tk.Label(
info_inner,
text=("\u2022 Recherche récursive de tous les documents dans les sous-dossiers\n"
"\u2022 Sortie PDF Image (raster) — sécurité maximale, aucun texte résiduel\n"
"\u2022 Résultats dans « anonymise/ » en conservant les sous-dossiers source"),
font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
anchor="w", justify=tk.LEFT,
).pack(fill=tk.X, pady=(4, 0))
# --- Checkbox VLM ---
if VlmManager is not None:
vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT)
vlm_row.pack(fill=tk.X, pady=(8, 0))
self._vlm_check = tk.Checkbutton(
vlm_row, text="Analyse visuelle VLM (Ollama)",
variable=self.use_vlm, font=self._f_card_desc,
bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT,
command=self._on_vlm_toggle,
)
self._vlm_check.pack(side=tk.LEFT)
self._vlm_status_lbl = tk.Label(
vlm_row, text="", font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
)
self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0))
ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.")
# =============================================================
# BOUTONS LANCER / STOPPER
# =============================================================
buttons_frame = tk.Frame(main, bg=CLR_BG)
buttons_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
self.btn_run = tk.Button(
buttons_frame, text="Lancer l'anonymisation",
font=self._f_button, bg=CLR_PRIMARY, fg="white",
activebackground="#1d4ed8", activeforeground="white",
relief=tk.FLAT, cursor="hand2", pady=10,
command=self._run,
)
self.btn_run.pack(fill=tk.X)
self.btn_stop = tk.Button(
buttons_frame, text="Arrêter le traitement",
font=self._f_button, bg=CLR_RED, fg="white",
activebackground="#b91c1c", activeforeground="white",
relief=tk.FLAT, cursor="hand2", pady=10,
command=self._stop,
)
# NE PAS pack — sera affiché pendant le traitement
# Lien aide
help_lbl = tk.Label(
main, text="Comment ça marche ?", font=self._f_small,
bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
)
help_lbl.pack(pady=(0, 8))
help_lbl.bind("<Button-1>", lambda e: self._show_help())
# =============================================================
# ONGLET "PARAMÈTRES" — contenu monté dans self._params_scroll
# =============================================================
self._params_frame = self._params_scroll
tk.Label(
self._params_frame,
text="Personnaliser le masquage",
font=(self._font_family, 14, "bold"),
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(20, 4))
tk.Label(
self._params_frame,
text=("Ces listes complètent les détections automatiques du programme. "
"Utile pour gérer les spécificités de votre établissement."),
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
).pack(fill=tk.X, padx=pad_x, pady=(0, 4))
self._params_summary = tk.Label(
self._params_frame,
text="",
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT, anchor="w", justify=tk.LEFT, wraplength=700,
)
self._params_summary.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
tk.Label(
self._params_frame,
text=("Les listes ci-dessous ne montrent que les paramètres manuels éditables. "
"Le moteur applique aussi des règles automatiques non listées ici "
"(regex, gazetteers FINESS/INSEE, dictionnaires et règles admin)."),
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
).pack(fill=tk.X, padx=pad_x, pady=(0, 16))
tk.Label(
self._params_frame,
text="Masques PDF réutilisables",
font=(self._font_family, 12, "bold"),
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(0, 4))
tk.Label(
self._params_frame,
text=(
"Pour les formulaires toujours mis en page de la même façon, "
"ouvrez l'éditeur de masques PDF, dessinez les zones à caviarder "
"puis enregistrez un modèle réutilisable."
),
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
).pack(fill=tk.X, padx=pad_x, pady=(0, 8))
manual_mask_row = tk.Frame(self._params_frame, bg=CLR_BG)
manual_mask_row.pack(fill=tk.X, padx=pad_x, pady=(0, 16))
manual_mask_btn = tk.Button(
manual_mask_row, text="Ouvrir l'éditeur de masques PDF",
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._open_manual_mask_designer,
)
manual_mask_btn.pack(side=tk.LEFT)
self._manual_mask_combo = ttk.Combobox(
manual_mask_row,
textvariable=self.manual_mask_template_var,
state="readonly",
width=34,
)
self._manual_mask_combo.pack(side=tk.LEFT, padx=(6, 0))
self._manual_mask_combo.bind("<<ComboboxSelected>>", lambda _e: self._refresh_manual_mask_hint())
refresh_templates_btn = tk.Button(
manual_mask_row, text="Actualiser les modèles",
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._refresh_manual_mask_templates,
)
refresh_templates_btn.pack(side=tk.LEFT, padx=(6, 0))
templates_btn = tk.Button(
manual_mask_row, text="Ouvrir le dossier des modèles",
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._open_manual_mask_templates_dir,
)
templates_btn.pack(side=tk.LEFT, padx=(6, 0))
self._manual_mask_hint = tk.Label(
self._params_frame,
text="",
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
)
self._manual_mask_hint.pack(fill=tk.X, padx=pad_x, pady=(0, 12))
# Conteneur interne visible : réglages manuels éditables.
params_inner = tk.Frame(self._params_frame, bg=CLR_BG)
params_inner.pack(fill=tk.X, padx=pad_x, pady=(0, 12))
# --- Whitelist (phrases à ne pas anonymiser) ---
self._wl_listbox, self._wl_entry = self._build_phrase_list(
params_inner,
title="\u2705 Phrases à ne PAS anonymiser :",
placeholder="Ajouter une phrase à protéger...",
color_tag=CLR_GREEN_LIGHT,
on_change=self._refresh_params_summary,
)
# --- Blacklist (phrases à toujours masquer) ---
self._bl_listbox, self._bl_entry = self._build_phrase_list(
params_inner,
title="\u26d4 Mots/phrases à TOUJOURS masquer :",
placeholder="Ajouter un mot ou phrase à masquer...",
color_tag=CLR_PRIMARY_LIGHT,
on_change=self._refresh_params_summary,
)
# --- Stop-words additionnels (mots à ne jamais identifier comme noms) ---
# Différent de la whitelist : agit en amont, pour les sigles, acronymes,
# termes métier locaux qui ressemblent à des noms mais n'en sont pas.
self._sw_listbox, self._sw_entry = self._build_phrase_list(
params_inner,
title="\u26a0 Mots à ne jamais identifier comme noms (sigles, acronymes...) :",
placeholder="Ajouter un mot (ex: sigle local, acronyme métier)...",
color_tag=CLR_ACCENT_LIGHT,
on_change=self._refresh_params_summary,
)
# Boutons sauvegarder + exporter
btn_row = tk.Frame(params_inner, bg=CLR_BG)
btn_row.pack(fill=tk.X, pady=(12, 12))
export_btn = tk.Button(
btn_row, text="\u2709 Exporter pour envoi",
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._export_params,
)
export_btn.pack(side=tk.LEFT)
import_btn = tk.Button(
btn_row, text="\u2B07 Importer",
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._import_params,
)
import_btn.pack(side=tk.LEFT, padx=(4, 0))
save_btn = tk.Button(
btn_row, text="Sauvegarder",
font=self._f_small, bg=CLR_PRIMARY, fg="white",
activebackground=CLR_PRIMARY_DARK, activeforeground="white",
relief=tk.FLAT, cursor="hand2", padx=14, pady=6,
command=self._save_params,
)
save_btn.pack(side=tk.RIGHT)
# Charger les valeurs initiales depuis la config
self._load_params()
self._refresh_manual_mask_templates()
# =============================================================
# ONGLET "PROFILS"
# =============================================================
self._profiles_frame = self._profiles_scroll
tk.Label(
self._profiles_frame,
text="Profils métier",
font=(self._font_family, 14, "bold"),
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, padx=pad_x, pady=(20, 4))
tk.Label(
self._profiles_frame,
text=(
"Un profil mémorise les réglages courants de l'application. "
"Utilise cet onglet pour choisir le profil actif, modifier sa description, "
"et enregistrer un nouveau profil utilisateur."
),
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
).pack(fill=tk.X, padx=pad_x, pady=(0, 12))
profile_card = tk.Frame(
self._profiles_frame,
bg=CLR_CARD_BG,
highlightbackground=CLR_CARD_BORDER,
highlightthickness=1,
)
profile_card.pack(fill=tk.X, padx=pad_x, pady=(0, 16))
profile_card_inner = tk.Frame(profile_card, bg=CLR_CARD_BG)
profile_card_inner.pack(fill=tk.X, padx=16, pady=14)
profile_card_inner.columnconfigure(0, weight=3)
profile_card_inner.columnconfigure(1, weight=2)
profile_left = tk.Frame(profile_card_inner, bg=CLR_CARD_BG)
profile_left.grid(row=0, column=0, sticky="nsew", padx=(0, 10))
profile_right = tk.Frame(profile_card_inner, bg=CLR_BLUE_LIGHT)
profile_right.grid(row=0, column=1, sticky="nsew")
tk.Label(
profile_left,
text="Profil actif",
font=self._f_body_bold,
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 4))
profile_select_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
profile_select_row.pack(fill=tk.X, pady=(0, 10))
self._profile_combo = ttk.Combobox(
profile_select_row,
textvariable=self.processing_profile_label_var,
state="readonly",
width=34,
)
self._profile_combo.pack(side=tk.LEFT)
self._profile_combo.bind("<<ComboboxSelected>>", lambda _e: self._apply_selected_processing_profile())
refresh_profiles_btn = tk.Button(
profile_select_row, text="Actualiser",
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._refresh_processing_profiles,
)
refresh_profiles_btn.pack(side=tk.LEFT, padx=(6, 0))
self._profile_kind_label = tk.Label(
profile_left,
text="",
font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
)
self._profile_kind_label.pack(fill=tk.X, pady=(0, 8))
tk.Label(
profile_left,
text="Description",
font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 4))
self._profile_description_entry = tk.Entry(
profile_left,
textvariable=self.profile_description_var,
font=self._f_small,
relief=tk.GROOVE,
bd=1,
)
self._profile_description_entry.pack(fill=tk.X, pady=(0, 10))
self.profile_description_var.trace_add("write", self._on_profile_description_change)
flags_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
flags_row.pack(fill=tk.X, pady=(0, 10))
self._profile_require_manual_mask_check = tk.Checkbutton(
flags_row,
text="Masque manuel obligatoire",
variable=self.profile_require_manual_mask_var,
font=self._f_small,
bg=CLR_CARD_BG,
activebackground=CLR_CARD_BG,
command=self._on_profile_editor_change,
)
self._profile_require_manual_mask_check.pack(side=tk.LEFT)
self._profile_force_disable_vlm_check = tk.Checkbutton(
flags_row,
text="Désactiver le VLM",
variable=self.profile_force_disable_vlm_var,
font=self._f_small,
bg=CLR_CARD_BG,
activebackground=CLR_CARD_BG,
command=self._on_profile_editor_change,
)
self._profile_force_disable_vlm_check.pack(side=tk.LEFT, padx=(12, 0))
tk.Label(
profile_left,
text="Masque PDF mémorisé par ce profil",
font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 4))
profile_mask_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
profile_mask_row.pack(fill=tk.X, pady=(0, 10))
self._profile_manual_mask_combo = ttk.Combobox(
profile_mask_row,
textvariable=self.manual_mask_template_var,
state="readonly",
width=34,
)
self._profile_manual_mask_combo.pack(side=tk.LEFT)
self._profile_manual_mask_combo.bind(
"<<ComboboxSelected>>",
lambda _e: self._refresh_manual_mask_hint(),
)
tk.Button(
profile_mask_row, text="Actualiser les modèles",
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._refresh_manual_mask_templates,
).pack(side=tk.LEFT, padx=(6, 0))
self._profile_mask_explainer = tk.Label(
profile_left,
text=(
"Ce choix est enregistré dans le profil. "
"Quand tu recharges ce profil, ce masque est re-sélectionné automatiquement."
),
font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=420,
)
self._profile_mask_explainer.pack(fill=tk.X, pady=(0, 10))
profile_actions_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
profile_actions_row.pack(fill=tk.X)
tk.Button(
profile_actions_row, text="Nouveau profil...",
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._create_processing_profile,
).pack(side=tk.LEFT)
tk.Button(
profile_actions_row, text="Enregistrer",
font=self._f_small, bg=CLR_PRIMARY, fg="white",
activebackground=CLR_PRIMARY_DARK, activeforeground="white",
relief=tk.FLAT, cursor="hand2", padx=10, pady=6,
command=self._save_selected_processing_profile,
).pack(side=tk.LEFT, padx=(6, 0))
tk.Button(
profile_actions_row, text="Renommer...",
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._rename_selected_processing_profile,
).pack(side=tk.LEFT, padx=(6, 0))
tk.Button(
profile_actions_row, text="Définir par défaut",
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._set_selected_processing_profile_default,
).pack(side=tk.LEFT, padx=(6, 0))
tk.Button(
profile_actions_row, text="Supprimer",
font=self._f_small, bg=CLR_RED_LIGHT, fg=CLR_RED,
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
command=self._delete_selected_processing_profile,
).pack(side=tk.LEFT, padx=(6, 0))
profile_right_inner = tk.Frame(profile_right, bg=CLR_BLUE_LIGHT)
profile_right_inner.pack(fill=tk.BOTH, expand=True, padx=14, pady=14)
tk.Label(
profile_right_inner,
text="Résumé du profil",
font=self._f_body_bold,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 6))
self._profile_description = tk.Label(
profile_right_inner,
text="",
font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
)
self._profile_description.pack(fill=tk.X, pady=(0, 10))
self._profile_capture_summary = tk.Label(
profile_right_inner,
text="",
font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w", justify=tk.LEFT, wraplength=300,
)
self._profile_capture_summary.pack(fill=tk.X, pady=(0, 10))
tk.Label(
profile_right_inner,
text=(
"Sens de « masque manuel obligatoire » : le profil n'impose pas un masque précis, "
"mais il bloque le lancement si aucun masque PDF n'est sélectionné."
),
font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
).pack(fill=tk.X, pady=(0, 10))
tk.Label(
profile_right_inner,
text=(
"Lien profil ↔ masque : le masque actuellement choisi dans cet onglet "
"est mémorisé dans le profil lors de l'enregistrement."
),
font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
).pack(fill=tk.X)
self._refresh_processing_profiles()
# Retour dans l'onglet Anonymisation
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 8))
# =============================================================
# BARRE DE PROGRESSION (masquée)
# =============================================================
self._progress_frame = tk.Frame(main, bg=CLR_BG)
# NE PAS pack — sera affiché dynamiquement
self._progressbar = ttk.Progressbar(
self._progress_frame, orient=tk.HORIZONTAL, mode="determinate",
)
self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4))
self._progress_label = tk.Label(
self._progress_frame, text="", font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
)
self._progress_label.pack(fill=tk.X)
# =============================================================
# SECTION RÉSULTATS (masquée)
# =============================================================
self._results_frame = tk.Frame(main, bg=CLR_BG)
# NE PAS pack
tk.Label(
self._results_frame, text="Résultats", font=self._f_body_bold,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 8))
stats_row = tk.Frame(self._results_frame, bg=CLR_BG)
stats_row.pack(fill=tk.X, pady=(0, 12))
stats_row.columnconfigure(0, weight=1)
stats_row.columnconfigure(1, weight=1)
stats_row.columnconfigure(2, weight=1)
self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0)
self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1)
self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2)
# Indicateurs de qualité et sécurité
quality_row = tk.Frame(self._results_frame, bg=CLR_BG)
quality_row.pack(fill=tk.X, pady=(0, 12))
# Badge de fuites
self._leak_badge = tk.Label(
quality_row,
text="🔒 Vérification en cours...",
font=self._f_body_bold,
bg=CLR_BLUE_LIGHT, fg=CLR_PRIMARY,
padx=12, pady=6,
)
self._leak_badge.pack(side=tk.LEFT, padx=(0, 8))
# Temps de traitement
self._perf_label = tk.Label(
quality_row,
text="⏱️ Calcul en cours...",
font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
)
self._perf_label.pack(side=tk.LEFT)
self.btn_open_out = tk.Button(
self._results_frame, text="Ouvrir le dossier de résultats",
font=self._f_button, bg=CLR_GREEN, fg="white",
activebackground="#15803d", activeforeground="white",
relief=tk.FLAT, cursor="hand2", pady=10,
command=self._open_out,
)
self.btn_open_out.pack(fill=tk.X, pady=(0, 8))
# Toggle journal
self._log_visible = False
self._log_toggle = tk.Label(
self._results_frame, text="Voir le journal détaillé \u25BC",
font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
)
self._log_toggle.pack(pady=(0, 4))
self._log_toggle.bind("<Button-1>", lambda e: self._toggle_log())
self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG)
# NE PAS pack
self.txt = tk.Text(
self._log_frame, height=14, font=self._f_small,
bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD,
state=tk.DISABLED,
)
log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview)
self.txt.configure(yscrollcommand=log_scrollbar.set)
self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# =============================================================
# BARRE DE STATUT
# =============================================================
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0))
status_bar = tk.Frame(main, bg=CLR_BG)
status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12))
tk.Label(
status_bar, textvariable=self.status_var, font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(side=tk.LEFT)
tk.Label(
status_bar, text=_version_long(), font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e",
).pack(side=tk.RIGHT)
# ---------------------------------------------------------------
# Cartes de statistiques
# ---------------------------------------------------------------
def _make_stat_card(self, parent, number: str, label: str,
fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]:
padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0)
frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1)
frame.grid(row=0, column=col, sticky="nsew", padx=padx)
num_lbl = tk.Label(
frame, text=number, font=self._f_stat,
bg=bg_color, fg=fg_color,
)
num_lbl.pack(pady=(12, 2))
txt_lbl = tk.Label(
frame, text=label, font=self._f_small,
bg=bg_color, fg=CLR_TEXT_SECONDARY,
)
txt_lbl.pack(pady=(0, 12))
return {"frame": frame, "number": num_lbl, "label": txt_lbl}
def _update_stat_card(self, card: Dict[str, tk.Label], value: int,
fg_color: str, bg_color: str):
card["number"].configure(text=str(value), fg=fg_color, bg=bg_color)
card["frame"].configure(bg=bg_color, highlightbackground=bg_color)
card["label"].configure(bg=bg_color)
# ---------------------------------------------------------------
# Actions dossier
# ---------------------------------------------------------------
def _browse(self):
"""Propose le choix entre dossier et fichier unique via un menu contextuel."""
menu = tk.Menu(self.root, tearoff=0)
menu.add_command(label="Choisir un dossier", command=self._browse_folder)
menu.add_command(label="Choisir un fichier", command=self._browse_file)
# Afficher le menu sous le curseur
try:
menu.tk_popup(self.root.winfo_pointerx(), self.root.winfo_pointery())
finally:
menu.grab_release()
def _browse_folder(self):
d = filedialog.askdirectory()
if d:
self._single_file = None
self.dir_var.set(d)
self._update_folder_display()
def _browse_file(self):
try:
from format_converter import SUPPORTED_EXTENSIONS
except ImportError:
SUPPORTED_EXTENSIONS = {".pdf"}
# Construire les filtres pour le dialogue
ext_list = " ".join(f"*{e}" for e in sorted(SUPPORTED_EXTENSIONS))
f = filedialog.askopenfilename(
title="Choisir un document à anonymiser",
filetypes=[
("Documents supportés", ext_list),
("PDF", "*.pdf"),
("Word", "*.docx"),
("Images", "*.jpg *.jpeg *.png *.tiff *.tif *.bmp"),
("Texte", "*.txt *.rtf *.odt *.html *.htm"),
("Tous", "*.*"),
],
)
if f:
self._single_file = Path(f)
self.dir_var.set(str(self._single_file.parent))
self._update_folder_display()
def _update_folder_display(self):
folder = self.dir_var.get()
if not folder:
return
is_single = getattr(self, '_single_file', None) is not None
if is_single:
doc_count = 1
display_label = self._single_file.name
else:
# Compter les documents supportés (récursif)
try:
from format_converter import SUPPORTED_EXTENSIONS
except ImportError:
SUPPORTED_EXTENSIONS = {".pdf"}
doc_count = 0
try:
doc_count = len(list_supported_documents(Path(folder), SUPPORTED_EXTENSIONS))
except Exception:
pass
display_label = folder
# Vider et reconstruire l'intérieur
for w in self._folder_inner.winfo_children():
w.destroy()
row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG)
row.pack(fill=tk.X)
icon = "\U0001f4c4" if is_single else "\U0001f4c2" # 📄 ou 📂
tk.Label(
row, text=icon, font=(self._font_family, 16),
bg=CLR_CARD_BG,
).pack(side=tk.LEFT, padx=(0, 8))
info_frame = tk.Frame(row, bg=CLR_CARD_BG)
info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True)
# Chemin (tronqué si trop long)
display_path = display_label
if len(display_path) > 60:
display_path = "..." + display_path[-57:]
tk.Label(
info_frame, text=display_path, font=self._f_body_bold,
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X)
if is_single:
subtitle = f"Fichier unique — {self._single_file.suffix.upper().lstrip('.')}"
else:
suffix = "document trouvé (récursif)" if doc_count <= 1 else "documents trouvés (récursif)"
subtitle = f"{doc_count} {suffix}"
tk.Label(
info_frame, text=subtitle,
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
).pack(fill=tk.X)
change_btn = tk.Label(
row, text="Changer", font=self._f_small,
bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2",
)
change_btn.pack(side=tk.RIGHT, padx=(8, 0))
change_btn.bind("<Button-1>", lambda e: self._browse())
# Mettre à jour la bordure
self._folder_zone.configure(highlightbackground=CLR_GREEN)
# ---------------------------------------------------------------
# Lancement
# ---------------------------------------------------------------
def _run(self):
is_single = getattr(self, '_single_file', None) is not None
profile_key = self._selected_processing_profile_key()
profile_spec = self._build_live_profile_spec()
manual_mask_template = self._selected_manual_mask_template_path()
if is_single:
# Mode fichier unique
if not self._single_file.is_file():
messagebox.showwarning("Fichier introuvable", f"{self._single_file}")
return
folder = self._single_file.parent
pdfs = [self._single_file]
else:
# Mode dossier
folder = Path(self.dir_var.get().strip())
if not folder.is_dir():
messagebox.showwarning(
"Dossier invalide",
"Choisissez un dossier ou un fichier.",
)
return
try:
from format_converter import SUPPORTED_EXTENSIONS
except ImportError:
SUPPORTED_EXTENSIONS = {".pdf"}
pdfs = list_supported_documents(folder, SUPPORTED_EXTENSIONS)
if not pdfs:
exts = ", ".join(sorted(SUPPORTED_EXTENSIONS))
messagebox.showwarning(
"Aucun document",
f"Aucun fichier supporté trouvé.\n"
f"Formats acceptés : {exts}\n"
f"(recherche récursive dans les sous-dossiers, hors anonymise/)",
)
return
if profile_spec.get("require_manual_mask") and manual_mask_template is None:
messagebox.showwarning(
"Masque manuel requis",
"Le profil sélectionné exige un masque manuel.\n"
"Choisissez un modèle de masque avant de lancer le traitement.",
)
return
if manual_mask_template is not None:
if apply_template_vector is None or Template is None or load_template_yaml is None:
messagebox.showwarning(
"Masque manuel indisponible",
"Le template sélectionné ne peut pas être appliqué car "
"la bibliothèque PDF n'est pas disponible.",
)
return
if not manual_mask_template.is_file():
messagebox.showwarning(
"Masque manuel introuvable",
f"Le modèle sélectionné est introuvable :\n{manual_mask_template}",
)
self._refresh_manual_mask_templates()
return
try:
self._load_manual_mask_template(manual_mask_template)
except Exception as e:
messagebox.showwarning(
"Masque manuel invalide",
f"Impossible de charger le modèle sélectionné :\n{e}",
)
return
self._stop_requested = False
self.btn_run.pack_forget()
self.btn_stop.pack(fill=tk.X)
self._show_progress(total=len(pdfs))
self._hide_results()
threading.Thread(
target=self._worker,
args=(folder, pdfs, manual_mask_template, profile_key, profile_spec),
daemon=True,
).start()
def _stop(self):
"""Demande l'arrêt du traitement en cours."""
self._stop_requested = True
self.btn_stop.config(state=tk.DISABLED, bg="#fca5a5", text="Arrêt en cours...")
self.status_var.set("Arrêt demandé, fin du document en cours...")
def _worker(
self,
folder: Path,
pdfs: List[Path],
manual_mask_template_path: Optional[Path],
profile_key: str,
profile_spec: Dict[str, Any],
):
import time
start_time = time.time()
manual_mask_template = None
temp_profile_cfg_path: Optional[Path] = None
try:
config_path = Path(self.cfg_path.get())
merged_cfg = load_effective_dictionaries_dict(config_path)
param_lists = profile_spec.get("param_lists") or {}
if isinstance(param_lists, dict):
merged_cfg["whitelist_phrases"] = list(param_lists.get("whitelist_phrases", []))
if not isinstance(merged_cfg.get("blacklist"), dict):
merged_cfg["blacklist"] = {}
merged_cfg["blacklist"]["force_mask_terms"] = list(
param_lists.get("blacklist_force_mask_terms", [])
)
merged_cfg["additional_stopwords"] = list(
param_lists.get("additional_stopwords", [])
)
profile_overlay = profile_spec.get("dictionaries_overlay") or {}
if profile_overlay:
merged_cfg = deep_merge_dict(merged_cfg, profile_overlay)
if yaml is not None:
fd, temp_name = tempfile.mkstemp(
prefix="profile_",
suffix=".yml",
dir=str(config_path.parent),
)
os.close(fd)
temp_profile_cfg_path = Path(temp_name)
temp_profile_cfg_path.write_text(
yaml.safe_dump(
merged_cfg,
allow_unicode=True,
default_flow_style=False,
sort_keys=False,
),
encoding="utf-8",
)
config_path = temp_profile_cfg_path
if profile_spec:
label = profile_spec.get("label") or profile_key
self.queue.put(
UiMessage(
kind=MsgType.LOG,
text=f"~ profil métier actif : {label}",
)
)
if manual_mask_template_path is not None:
manual_mask_template = self._load_manual_mask_template(manual_mask_template_path)
self.queue.put(
UiMessage(
kind=MsgType.LOG,
text=f"~ masque manuel actif : {manual_mask_template_path.name}",
)
)
outdir = folder / "anonymise"
outdir.mkdir(exist_ok=True)
ok = ko = 0
global_counts: Dict[str, int] = {}
for i, pdf in enumerate(pdfs, start=1):
# Vérifier si l'arrêt a été demandé
if self._stop_requested:
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\n⚠️ Arrêt demandé par l'utilisateur"))
break
display_name = pdf.name
if folder in pdf.parents:
display_name = str(pdf.relative_to(folder))
self.queue.put(UiMessage(
kind=MsgType.PROGRESS, current=i, total=len(pdfs),
filename=display_name,
))
try:
source_doc = pdf
temp_dir_ctx = None
manual_mask_audit = None
if manual_mask_template is not None:
if pdf.suffix.lower() == ".pdf":
temp_dir_ctx = tempfile.TemporaryDirectory(prefix="manual-mask-")
temp_dir = Path(temp_dir_ctx.name)
source_doc = temp_dir / pdf.name
manual_mask_audit = temp_dir / f"{pdf.stem}.manual_mask.audit.jsonl"
apply_template_vector(pdf, source_doc, manual_mask_template, manual_mask_audit)
self.queue.put(
UiMessage(
kind=MsgType.LOG,
text=f" ~ masque manuel appliqué : {manual_mask_template.name}",
)
)
else:
self.queue.put(
UiMessage(
kind=MsgType.LOG,
text=" ~ masque manuel ignoré : format non PDF",
)
)
active = self._active_manager
use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded())
camembert_active = (
self._camembert_manager
if self._camembert_manager
and hasattr(self._camembert_manager, "is_loaded")
and self._camembert_manager.is_loaded()
else None
)
thresholds = None
if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)):
thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85)
# Extraire le numéro OGC du nom du répertoire parent
# Ex: "257_23209962" → OGC = "257"
parent_name = pdf.parent.name
ogc = parent_name.split("_")[0] if "_" in parent_name else None
# VLM
vlm_active = bool(
self.use_vlm.get()
and self._vlm_available
and self._vlm_manager
and self._vlm_manager.is_loaded()
)
# Utiliser process_document (multi-formats) si disponible,
# sinon fallback sur process_pdf (PDF uniquement)
_process_fn = getattr(core, 'process_document', None) or core.process_pdf
_path_key = "doc_path" if _process_fn.__name__ == "process_document" else "pdf_path"
doc_outdir = build_batch_output_dir(folder, outdir, pdf)
doc_outdir.mkdir(parents=True, exist_ok=True)
outputs = _process_fn(
**{_path_key: source_doc},
out_dir=doc_outdir,
make_vector_redaction=False,
also_make_raster_burn=True,
config_path=config_path,
use_hf=use_ner,
ner_manager=active,
ner_thresholds=thresholds,
ogc_label=ogc,
vlm_manager=self._vlm_manager if vlm_active else None,
camembert_manager=camembert_active,
)
if manual_mask_audit is not None and "audit" in outputs:
append_jsonl_file(Path(outputs["audit"]), manual_mask_audit)
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {display_name}"))
for k, v in outputs.items():
self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}"))
audit_path = Path(outputs.get("audit", ""))
counts = self._count_audit(audit_path)
if counts:
self.queue.put(UiMessage(
kind=MsgType.LOG,
text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())),
))
for k, v in counts.items():
global_counts[k] = global_counts.get(k, 0) + v
ok += 1
except Exception as e:
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {display_name} \u2192 ERREUR: {e}"))
ko += 1
finally:
if temp_dir_ctx is not None:
temp_dir_ctx.cleanup()
total_time = time.time() - start_time
total_masked = sum(global_counts.values())
# Message différent si arrêt demandé
if self._stop_requested:
self.queue.put(UiMessage(
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
outdir=str(outdir) if ok > 0 else "", total_time=total_time,
))
self.queue.put(UiMessage(
kind=MsgType.LOG,
text=f"⚠️ TRAITEMENT INTERROMPU : {ok} fichiers traités, {len(pdfs) - ok - ko} ignorés",
))
else:
self.queue.put(UiMessage(
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
outdir=str(outdir), total_time=total_time,
))
if ok and global_counts:
self.queue.put(UiMessage(
kind=MsgType.LOG,
text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())),
))
except Exception as e:
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}"))
total_time = time.time() - start_time
self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir="", total_time=total_time))
finally:
if temp_profile_cfg_path is not None:
try:
temp_profile_cfg_path.unlink()
except Exception:
pass
# ---------------------------------------------------------------
# Pompe de messages
# ---------------------------------------------------------------
def _pump_logs(self):
try:
while True:
msg = self.queue.get_nowait()
if msg.kind == MsgType.LOG:
self._append_log(msg.text)
elif msg.kind == MsgType.PROGRESS:
self._update_progress(msg.current, msg.total, msg.filename)
elif msg.kind == MsgType.DONE:
self._on_done(msg)
except queue.Empty:
pass
finally:
self.root.after(60, self._pump_logs)
def _append_log(self, text: str):
self.txt.configure(state=tk.NORMAL)
self.txt.insert(tk.END, text + "\n")
self.txt.see(tk.END)
self.txt.configure(state=tk.DISABLED)
# ---------------------------------------------------------------
# Progression
# ---------------------------------------------------------------
def _show_progress(self, total: int):
self._progressbar.configure(maximum=total, value=0)
self._progress_label.configure(text="")
self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18),
before=self._results_frame if self._results_frame.winfo_manager() else None)
def _hide_progress(self):
self._progress_frame.pack_forget()
def _update_progress(self, current: int, total: int, filename: str):
self._progressbar.configure(value=current)
self._progress_label.configure(text=f"{current}/{total}{filename}")
self.status_var.set(f"{current}/{total}{filename}")
# ---------------------------------------------------------------
# Résultats
# ---------------------------------------------------------------
def _show_results(self, ok: int, ko: int, masked: int):
self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT)
self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT)
err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY
err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6"
self._update_stat_card(self._stat_errors, ko, err_fg, err_bg)
self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12))
def _hide_results(self):
self._results_frame.pack_forget()
self._log_frame.pack_forget()
self._log_visible = False
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
# Vider le journal
self.txt.configure(state=tk.NORMAL)
self.txt.delete("1.0", tk.END)
self.txt.configure(state=tk.DISABLED)
def _on_done(self, msg: UiMessage):
self._hide_progress()
self.btn_stop.pack_forget()
self.btn_stop.config(state=tk.NORMAL, bg=CLR_RED, text="Arrêter le traitement")
self.btn_run.pack(fill=tk.X)
if self._stop_requested:
self.status_var.set(f"Interrompu : {msg.ok} traités, {msg.ko} erreurs.")
else:
self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.")
if msg.outdir:
self._last_outdir = Path(msg.outdir)
# Vérifier les fuites
leak_count = self._check_leaks(Path(msg.outdir))
self._update_leak_indicator(leak_count)
# Calculer les performances
perf_string = self._calculate_performance(msg.ok, msg.total_time)
self._perf_label.configure(text=perf_string)
self._show_results(msg.ok, msg.ko, msg.masked)
# ---------------------------------------------------------------
# Toggle journal
# ---------------------------------------------------------------
def _toggle_log(self):
if self._log_visible:
self._log_frame.pack_forget()
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
else:
self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
self._log_toggle.configure(text="Masquer le journal \u25B2")
self._log_visible = not self._log_visible
# ---------------------------------------------------------------
# Ouvrir dossier résultats
# ---------------------------------------------------------------
def _open_out(self):
if self._last_outdir:
open_folder(self._last_outdir)
def _manual_mask_templates_dir(self) -> Path:
return ensure_mask_templates_dir(_exe_dir())
def _selected_processing_profile_key(self) -> str:
label = self.processing_profile_label_var.get()
return self._processing_profile_labels_to_keys.get(label, "")
def _selected_processing_profile_spec(self) -> Dict[str, Any]:
key = self._selected_processing_profile_key()
return self._processing_profiles.get(key, {})
def _set_listbox_values(self, listbox: tk.Listbox, values: List[str]):
listbox.delete(0, tk.END)
for value in values:
listbox.insert(tk.END, value)
def _current_param_lists(self) -> Dict[str, List[str]]:
return {
"whitelist_phrases": list(self._wl_listbox.get(0, tk.END)),
"blacklist_force_mask_terms": list(self._bl_listbox.get(0, tk.END)),
"additional_stopwords": list(self._sw_listbox.get(0, tk.END)),
}
def _apply_param_lists_to_widgets(self, param_lists: Dict[str, List[str]]):
self._set_listbox_values(
self._wl_listbox,
list(param_lists.get("whitelist_phrases", [])),
)
self._set_listbox_values(
self._bl_listbox,
list(param_lists.get("blacklist_force_mask_terms", [])),
)
self._set_listbox_values(
self._sw_listbox,
list(param_lists.get("additional_stopwords", [])),
)
self._refresh_params_summary()
def _current_manual_mask_template_setting(self) -> str:
selected = self._selected_manual_mask_template_path()
if selected is None:
return ""
return mask_template_label(selected, _exe_dir())
def _select_manual_mask_template_from_setting(self, template_name: str):
wanted = str(template_name or "").strip()
if not wanted:
self.manual_mask_template_var.set(MANUAL_MASK_NONE_LABEL)
return
template_path = self._manual_mask_templates_dir() / wanted
selected_label = MANUAL_MASK_NONE_LABEL
for label, path in self._manual_mask_templates.items():
if path == template_path:
selected_label = label
break
self.manual_mask_template_var.set(selected_label)
def _build_live_profile_spec(
self,
*,
label: Optional[str] = None,
description: Optional[str] = None,
base_spec: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
spec = dict(base_spec or self._selected_processing_profile_spec())
return {
"label": str(label if label is not None else spec.get("label") or self.processing_profile_label_var.get() or "Profil"),
"description": str(
description
if description is not None
else self.profile_description_var.get() or spec.get("description") or ""
),
"require_manual_mask": bool(self.profile_require_manual_mask_var.get()),
"force_disable_vlm": bool(self.profile_force_disable_vlm_var.get()),
"dictionaries_overlay": deepcopy(spec.get("dictionaries_overlay") or {}),
"param_lists": self._current_param_lists(),
"has_param_lists": True,
"preferred_manual_mask_template": self._current_manual_mask_template_setting(),
"has_preferred_manual_mask_template": True,
}
def _profile_key_from_label(self, label: str) -> str:
ascii_label = unicodedata.normalize("NFKD", label).encode("ascii", "ignore").decode("ascii")
slug = re.sub(r"[^a-zA-Z0-9]+", "_", ascii_label.lower()).strip("_") or "profil"
existing = set(self._processing_profiles.keys())
candidate = slug
index = 2
while candidate in existing:
candidate = f"{slug}_{index}"
index += 1
return candidate
def _refresh_profile_description(self):
description = self.profile_description_var.get().strip()
hints: list[str] = []
if self.profile_require_manual_mask_var.get():
hints.append("masque manuel requis")
if self.profile_force_disable_vlm_var.get():
hints.append("VLM désactivé")
spec = self._selected_processing_profile_spec()
if spec.get("dictionaries_overlay"):
hints.append("règles de masquage renforcées")
if hints:
description = f"{description}\nOptions actives : {', '.join(hints)}." if description else f"Options actives : {', '.join(hints)}."
self._profile_description.configure(text=description)
def _on_profile_editor_change(self):
self._apply_processing_profile_gui_state()
self._refresh_profile_description()
self._refresh_manual_mask_hint()
self._refresh_profile_capture_summary()
def _on_profile_description_change(self, *_args):
self._refresh_profile_description()
def _builtin_processing_profile_keys(self) -> set[str]:
return list_default_profile_keys()
def _open_profile_manager(self):
self._switch_tab("profiles")
def _refresh_profile_capture_summary(self):
if not hasattr(self, "_profile_capture_summary"):
return
profile_key = self._selected_processing_profile_key()
param_lists = self._current_param_lists()
wl_count = len(param_lists.get("whitelist_phrases", []))
bl_count = len(param_lists.get("blacklist_force_mask_terms", []))
sw_count = len(param_lists.get("additional_stopwords", []))
mask_label = self.manual_mask_template_var.get()
default_key = get_default_profile_key(Path(self.profiles_path.get()))
default_text = "profil par défaut" if profile_key and profile_key == default_key else "profil secondaire"
self._profile_capture_summary.configure(
text=(
f"Ce profil enregistrera : {wl_count} préservations, {bl_count} masquages forcés, "
f"{sw_count} stop-word additionnel. Masque PDF courant : {mask_label}. "
f"Statut : {default_text}."
)
)
def _refresh_profile_kind_label(self):
if not hasattr(self, "_profile_kind_label"):
return
profile_key = self._selected_processing_profile_key()
if not profile_key:
self._profile_kind_label.configure(text="")
return
profile_kind = "profil fourni" if profile_key in self._builtin_processing_profile_keys() else "profil utilisateur"
self._profile_kind_label.configure(text=f"Type : {profile_kind} ({profile_key})")
def _rename_selected_processing_profile(self):
profile_key = self._selected_processing_profile_key()
if not profile_key:
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
return
base_spec = self._selected_processing_profile_spec()
current_label_text = str(base_spec.get("label") or profile_key)
new_label = simpledialog.askstring(
"Renommer le profil",
"Nouveau nom visible du profil :",
initialvalue=current_label_text,
parent=self.root,
)
if new_label is None:
return
new_label = new_label.strip()
if not new_label:
messagebox.showwarning("Profils", "Le nom du profil ne peut pas être vide.")
return
updated_spec = self._build_live_profile_spec(label=new_label, base_spec=base_spec)
save_runtime_profile(profile_key, updated_spec, Path(self.profiles_path.get()))
self._refresh_processing_profiles(preferred_key=profile_key)
messagebox.showinfo("Profils", f"Profil renommé : {new_label}")
def _set_selected_processing_profile_default(self):
profile_key = self._selected_processing_profile_key()
if not profile_key:
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
return
set_runtime_default_profile(profile_key, Path(self.profiles_path.get()))
self._refresh_processing_profiles(preferred_key=profile_key)
messagebox.showinfo("Profils", "Profil par défaut mis à jour.")
def _delete_selected_processing_profile(self):
profile_key = self._selected_processing_profile_key()
spec = self._selected_processing_profile_spec()
profile_label = str(spec.get("label") or profile_key)
if not profile_key:
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
return
if profile_key in self._builtin_processing_profile_keys():
messagebox.showwarning(
"Profils",
"Les profils fournis par défaut ne peuvent pas être supprimés.\n"
"Crée un profil utilisateur si tu veux un profil spécifique.",
)
return
confirmed = messagebox.askyesno(
"Supprimer le profil",
f"Supprimer définitivement le profil utilisateur « {profile_label} » ?",
parent=self.root,
)
if not confirmed:
return
delete_runtime_profile(profile_key, Path(self.profiles_path.get()))
self._refresh_processing_profiles()
messagebox.showinfo("Profils", f"Profil supprimé : {profile_label}")
def _create_processing_profile(self):
base_spec = self._selected_processing_profile_spec()
initial_label = f"{base_spec.get('label') or 'Profil'} copie"
label = simpledialog.askstring(
"Nouveau profil",
"Nom du nouveau profil :",
initialvalue=initial_label,
parent=self.root,
)
if label is None:
return
label = label.strip()
if not label:
messagebox.showwarning("Profils", "Le nom du profil ne peut pas être vide.")
return
description = simpledialog.askstring(
"Nouveau profil",
"Description du profil (optionnelle) :",
initialvalue=str(base_spec.get("description") or ""),
parent=self.root,
)
if description is None:
description = str(base_spec.get("description") or "")
profile_key = self._profile_key_from_label(label)
profile_spec = self._build_live_profile_spec(
label=label,
description=description.strip(),
base_spec=base_spec,
)
set_default = messagebox.askyesno(
"Nouveau profil",
"Définir ce nouveau profil comme profil par défaut ?",
parent=self.root,
)
save_runtime_profile(
profile_key,
profile_spec,
Path(self.profiles_path.get()),
set_default=set_default,
)
self._refresh_processing_profiles(preferred_key=profile_key)
messagebox.showinfo(
"Profils",
f"Profil enregistré : {label}",
parent=self.root,
)
def _save_selected_processing_profile(self):
profile_key = self._selected_processing_profile_key()
if not profile_key:
messagebox.showwarning(
"Profils",
"Aucun profil sélectionné. Créez d'abord un nouveau profil.",
parent=self.root,
)
return
base_spec = self._selected_processing_profile_spec()
profile_label = str(base_spec.get("label") or profile_key)
if profile_key in {"standard_local", "chcb_strict", "partage_recherche", "dossier_audit", "demo"}:
confirmed = messagebox.askyesno(
"Profils",
"Vous allez enregistrer une surcharge locale sur un profil fourni par défaut.\n\n"
f"Continuer pour « {profile_label} » ?",
parent=self.root,
)
if not confirmed:
return
profile_spec = self._build_live_profile_spec(base_spec=base_spec)
save_runtime_profile(
profile_key,
profile_spec,
Path(self.profiles_path.get()),
)
self._refresh_processing_profiles(preferred_key=profile_key)
messagebox.showinfo(
"Profils",
f"Profil mis à jour : {profile_label}",
parent=self.root,
)
def _refresh_processing_profiles(self, preferred_key: Optional[str] = None):
ensure_runtime_profiles_config(Path(self.profiles_path.get()))
current_key = preferred_key or self._selected_processing_profile_key()
profiles = list_effective_profiles(Path(self.profiles_path.get()))
self._processing_profiles = profiles
self._processing_profile_labels_to_keys = {
spec.get("label") or key: key
for key, spec in profiles.items()
}
labels = list(self._processing_profile_labels_to_keys.keys())
self._profile_combo.configure(values=labels)
selected_key = current_key
if not selected_key or selected_key not in profiles:
selected_key = get_default_profile_key(Path(self.profiles_path.get()))
selected_label = next(
(
label
for label, key in self._processing_profile_labels_to_keys.items()
if key == selected_key
),
labels[0] if labels else "",
)
if selected_label:
self.processing_profile_label_var.set(selected_label)
self._apply_selected_processing_profile()
def _apply_selected_processing_profile(self):
spec = self._selected_processing_profile_spec()
if not spec:
self._profile_base_description = ""
self.profile_description_var.set("")
self._profile_description.configure(text="")
return
self._profile_base_description = str(spec.get("description") or "")
self.profile_description_var.set(self._profile_base_description)
self.profile_require_manual_mask_var.set(bool(spec.get("require_manual_mask")))
self.profile_force_disable_vlm_var.set(bool(spec.get("force_disable_vlm")))
if spec.get("has_param_lists"):
self._apply_param_lists_to_widgets(spec.get("param_lists") or {})
else:
self._load_params()
self._select_manual_mask_template_from_setting(
spec.get("preferred_manual_mask_template") or ""
)
self._on_profile_editor_change()
self._refresh_profile_kind_label()
self._refresh_profile_description()
self._refresh_manual_mask_hint()
self._refresh_profile_capture_summary()
def _apply_processing_profile_gui_state(self):
force_disable_vlm = bool(self.profile_force_disable_vlm_var.get())
if not hasattr(self, "_vlm_check"):
return
if force_disable_vlm:
self.use_vlm.set(False)
self._vlm_available = False
self._vlm_check.configure(state=tk.DISABLED)
if hasattr(self, "_vlm_status_lbl"):
self._vlm_status_lbl.configure(text="Désactivé par profil", fg=CLR_TEXT_SECONDARY)
else:
self._vlm_check.configure(state=tk.NORMAL)
if hasattr(self, "_vlm_status_lbl") and self._vlm_status_lbl.cget("text") == "Désactivé par profil":
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
self._refresh_manual_mask_hint()
def _selected_manual_mask_template_path(self) -> Optional[Path]:
return self._manual_mask_templates.get(self.manual_mask_template_var.get())
def _refresh_manual_mask_templates(self):
selected_path = self._selected_manual_mask_template_path()
templates = list_mask_templates(_exe_dir())
options: Dict[str, Optional[Path]] = {MANUAL_MASK_NONE_LABEL: None}
for path in templates:
options[mask_template_label(path, _exe_dir())] = path
self._manual_mask_templates = options
labels = list(options.keys())
self._manual_mask_combo.configure(values=labels)
if hasattr(self, "_profile_manual_mask_combo"):
self._profile_manual_mask_combo.configure(values=labels)
selected_label = MANUAL_MASK_NONE_LABEL
if selected_path is not None:
for label, path in options.items():
if path == selected_path:
selected_label = label
break
self.manual_mask_template_var.set(selected_label)
self._refresh_manual_mask_hint()
self._refresh_profile_capture_summary()
def _refresh_manual_mask_hint(self):
selected = self._selected_manual_mask_template_path()
manual_mask_required = bool(self.profile_require_manual_mask_var.get())
if selected is None:
if manual_mask_required:
text = (
"Le profil sélectionné exige un masque manuel. "
"Choisissez un modèle avant de lancer le traitement."
)
elif len(self._manual_mask_templates) == 1:
text = (
"Aucun modèle enregistré. Crée un masque avec l'éditeur PDF, "
"puis clique sur « Actualiser les modèles »."
)
else:
text = "Aucun masque manuel sélectionné pour ce lancement."
else:
text = (
f"Masque sélectionné : {selected.name}. "
"Il sera appliqué à tous les PDF du lot avant l'anonymisation automatique."
)
self._manual_mask_hint.configure(text=text)
self._refresh_profile_capture_summary()
def _load_manual_mask_template(self, path: Path):
if load_template_yaml is None or Template is None:
raise RuntimeError("bibliothèque de templates PDF indisponible")
if path.suffix.lower() in (".yml", ".yaml"):
return load_template_yaml(path)
return Template.from_dict(json.loads(path.read_text(encoding="utf-8")))
def _open_manual_mask_templates_dir(self):
open_folder(self._manual_mask_templates_dir())
def _open_manual_mask_designer(self):
if MaskDesignerApp is None:
messagebox.showerror(
"Masques PDF",
"L'éditeur de masques PDF n'a pas pu être chargé.\n"
"Vérifiez que PyMuPDF, Pillow et PyYAML sont disponibles.",
)
return
initial_pdf = resolve_manual_mask_pdf(getattr(self, "_single_file", None))
win = tk.Toplevel(self.root)
if initial_pdf is None:
message = (
"L'éditeur s'ouvre sans PDF préchargé.\n\n"
"Astuce : choisissez d'abord un fichier PDF dans l'onglet "
"Anonymisation pour l'ouvrir automatiquement ici."
)
self.status_var.set("Éditeur de masques PDF ouvert.")
messagebox.showinfo("Masques PDF", message)
else:
self.status_var.set(f"Éditeur de masques PDF ouvert pour {initial_pdf.name}.")
MaskDesignerApp(
win,
initial_pdf=initial_pdf,
templates_dir=self._manual_mask_templates_dir(),
)
# ---------------------------------------------------------------
# Aide
# ---------------------------------------------------------------
def _show_help(self):
messagebox.showinfo(
"Comment ça marche ?",
"1) Choisissez le dossier racine contenant vos fichiers PDF.\n\n"
"2) Cliquez sur « Lancer la pseudonymisation ».\n\n"
"Tous les fichiers PDF sont traités\n"
"(recherche récursive dans les sous-dossiers).\n\n"
"Un PDF Image (raster) est généré pour chaque fichier :\n"
"chaque page devient une image avec les données masquées.\n"
"Sécurité maximale, aucun texte résiduel.\n\n"
"Les résultats sont écrits dans le dossier\n"
"« anonymise/ » à la racine du dossier sélectionné,\n"
"en conservant l'arborescence des sous-dossiers source.\n\n"
"Le sous-dossier « anonymise/ » est ignoré en entrée\n"
"pour éviter de retraiter d'anciennes sorties.",
)
# ---------------------------------------------------------------
# Paramètres avancés (whitelist/blacklist)
# ---------------------------------------------------------------
def _build_phrase_list(self, parent, title: str, placeholder: str, color_tag: str, on_change=None):
"""Construit un widget liste + ajout/suppression pour les phrases."""
frame = tk.Frame(parent, bg=CLR_BG)
frame.pack(fill=tk.X, pady=(4, 8))
tk.Label(
frame, text=title, font=self._f_small,
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
).pack(fill=tk.X, pady=(0, 4))
# Zone de saisie + bouton ajouter
input_row = tk.Frame(frame, bg=CLR_BG)
input_row.pack(fill=tk.X, pady=(0, 4))
entry = tk.Entry(input_row, font=self._f_small, relief=tk.GROOVE, bd=1)
entry.insert(0, placeholder)
entry.configure(fg="#999")
def _on_focus_in(e):
if entry.get() == placeholder:
entry.delete(0, tk.END)
entry.configure(fg=CLR_TEXT)
def _on_focus_out(e):
if not entry.get().strip():
entry.insert(0, placeholder)
entry.configure(fg="#999")
entry.bind("<FocusIn>", _on_focus_in)
entry.bind("<FocusOut>", _on_focus_out)
entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4))
def _add(event=None):
text = entry.get().strip()
if text and text != placeholder:
# Éviter les doublons
items = list(listbox.get(0, tk.END))
if text not in items:
listbox.insert(tk.END, text)
if on_change:
on_change()
entry.delete(0, tk.END)
add_btn = tk.Button(
input_row, text="+ Ajouter", font=self._f_small,
bg=color_tag, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2",
command=_add, padx=8,
)
add_btn.pack(side=tk.LEFT)
entry.bind("<Return>", _add)
# Liste des phrases
list_frame = tk.Frame(frame, bg=CLR_BG)
list_frame.pack(fill=tk.X)
listbox = tk.Listbox(
list_frame, height=4, font=("Consolas", 9),
relief=tk.GROOVE, bd=1, selectmode=tk.EXTENDED,
bg=color_tag,
)
scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=listbox.yview)
listbox.configure(yscrollcommand=scrollbar.set)
listbox.pack(side=tk.LEFT, fill=tk.X, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# Bouton supprimer
def _remove():
sel = listbox.curselection()
removed = False
for idx in reversed(sel):
listbox.delete(idx)
removed = True
if removed and on_change:
on_change()
rm_btn = tk.Button(
frame, text="Supprimer la sélection", font=self._f_small,
bg="#ffcdd2", fg="#b71c1c", relief=tk.GROOVE, cursor="hand2",
command=_remove, padx=8,
)
rm_btn.pack(anchor="e", pady=(2, 0))
return listbox, entry
def _refresh_params_summary(self):
wl_count = self._wl_listbox.size()
bl_count = self._bl_listbox.size()
sw_count = self._sw_listbox.size()
self._params_summary.configure(
text=(
f"Listes visibles chargées : {wl_count} préservations, "
f"{bl_count} masquages forcés, {sw_count} stop-word additionnel."
)
)
self._refresh_profile_capture_summary()
def _load_params(self):
"""Charge les whitelist/blacklist depuis la config YAML."""
try:
cfg_path = Path(self.cfg_path.get())
if cfg_path.exists():
param_lists = load_effective_param_lists(cfg_path)
self._wl_listbox.delete(0, tk.END)
for phrase in param_lists["whitelist_phrases"]:
self._wl_listbox.insert(tk.END, phrase)
self._bl_listbox.delete(0, tk.END)
for term in param_lists["blacklist_force_mask_terms"]:
self._bl_listbox.insert(tk.END, term)
self._sw_listbox.delete(0, tk.END)
for term in param_lists["additional_stopwords"]:
self._sw_listbox.insert(tk.END, term)
self._refresh_params_summary()
except Exception:
pass
def _listbox_values(self, listbox: tk.Listbox) -> List[str]:
return list(listbox.get(0, tk.END))
def _copy_param_listboxes(
self,
source_wl: tk.Listbox,
source_bl: tk.Listbox,
source_sw: tk.Listbox,
target_wl: tk.Listbox,
target_bl: tk.Listbox,
target_sw: tk.Listbox,
):
self._set_listbox_values(target_wl, self._listbox_values(source_wl))
self._set_listbox_values(target_bl, self._listbox_values(source_bl))
self._set_listbox_values(target_sw, self._listbox_values(source_sw))
def _export_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
"""Exporte les paramètres visibles dans un fichier JSON pour envoi ou sauvegarde locale."""
try:
import json as _json
from datetime import datetime
wl = self._listbox_values(wl_listbox)
bl = self._listbox_values(bl_listbox)
sw = self._listbox_values(sw_listbox)
export_data = {
"version": APP_VERSION,
"date_export": datetime.now().isoformat(),
"etablissement": "", # à remplir par l'utilisateur
"whitelist_phrases": wl,
"blacklist_force_mask_terms": bl,
"additional_stopwords": sw,
"instructions": (
"Ce fichier contient les paramètres d'anonymisation personnalisés. "
"Envoyez-le par email à l'équipe technique pour mise à jour du programme."
),
}
# Proposer le Bureau comme destination par défaut
desktop = Path.home() / "Desktop"
if not desktop.exists():
desktop = Path.home() / "Bureau"
if not desktop.exists():
desktop = Path.home()
dest = filedialog.asksaveasfilename(
title="Exporter les paramètres",
initialdir=str(desktop),
initialfile="parametres_anonymisation.json",
defaultextension=".json",
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
)
if dest:
Path(dest).write_text(
_json.dumps(export_data, ensure_ascii=False, indent=2),
encoding="utf-8",
)
messagebox.showinfo(
"Export réussi",
f"Paramètres exportés dans :\n{dest}\n\n"
f"Vous pouvez envoyer ce fichier par email\n"
f"à l'équipe technique.",
)
except Exception as e:
messagebox.showerror("Erreur", f"Erreur à l'export :\n{e}")
def _export_params(self):
self._export_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
def _import_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
"""Importe des paramètres depuis un fichier JSON (fusionne avec l'existant)."""
try:
import json as _json
src = filedialog.askopenfilename(
title="Importer des paramètres",
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
)
if not src:
return
data = _json.loads(Path(src).read_text(encoding="utf-8"))
# Fusionner whitelist
new_wl = data.get("whitelist_phrases", [])
existing_wl = set(wl_listbox.get(0, tk.END))
added_wl = 0
for phrase in new_wl:
if phrase and phrase.strip() and phrase.strip() not in existing_wl:
wl_listbox.insert(tk.END, phrase.strip())
added_wl += 1
# Fusionner blacklist
new_bl = data.get("blacklist_force_mask_terms", [])
existing_bl = set(bl_listbox.get(0, tk.END))
added_bl = 0
for term in new_bl:
if term and str(term).strip() and str(term).strip() not in existing_bl:
bl_listbox.insert(tk.END, str(term).strip())
added_bl += 1
# Fusionner stop-words additionnels
new_sw = data.get("additional_stopwords", [])
existing_sw = set(sw_listbox.get(0, tk.END))
added_sw = 0
for term in new_sw:
if term and str(term).strip() and str(term).strip() not in existing_sw:
sw_listbox.insert(tk.END, str(term).strip())
added_sw += 1
version = data.get("version", "?")
date_exp = data.get("date_export", "?")[:10]
messagebox.showinfo(
"Import réussi",
f"Paramètres importés (v{version}, {date_exp}) :\n\n"
f" + {added_wl} phrase(s) ajoutée(s) à la whitelist\n"
f" + {added_bl} terme(s) ajouté(s) à la blacklist\n"
f" + {added_sw} mot(s) ajouté(s) aux stop-words\n\n"
f"Cliquez sur « Sauvegarder » pour appliquer.",
)
except Exception as e:
messagebox.showerror("Erreur", f"Erreur à l'import :\n{e}")
def _import_params(self):
self._import_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
self._refresh_params_summary()
def _save_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
"""Sauvegarde les listes visibles dans la config YAML générale."""
try:
cfg_path = Path(self.cfg_path.get())
if not cfg_path.exists() or yaml is None:
messagebox.showwarning("Erreur", "Fichier de configuration introuvable.")
return
data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
# Whitelist phrases
data["whitelist_phrases"] = self._listbox_values(wl_listbox)
# Blacklist terms
if "blacklist" not in data:
data["blacklist"] = {}
data["blacklist"]["force_mask_terms"] = self._listbox_values(bl_listbox)
# Stop-words additionnels (mots à ne jamais identifier comme noms)
data["additional_stopwords"] = self._listbox_values(sw_listbox)
cfg_path.write_text(
yaml.dump(data, allow_unicode=True, default_flow_style=False, sort_keys=False),
encoding="utf-8",
)
messagebox.showinfo("Paramètres", "Paramètres sauvegardés avec succès.")
except Exception as e:
messagebox.showerror("Erreur", f"Impossible de sauvegarder :\n{e}")
def _save_params(self):
self._save_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
self._refresh_params_summary()
# ---------------------------------------------------------------
# YAML (interne)
# ---------------------------------------------------------------
def _ensure_cfg_exists(self):
p = Path(self.cfg_path.get())
p.parent.mkdir(parents=True, exist_ok=True)
if not p.exists():
p.write_text(RUNTIME_CFG_TEXT, encoding="utf-8")
def _load_cfg(self):
self._ensure_cfg_exists()
try:
self.cfg_data = load_effective_dictionaries_dict(Path(self.cfg_path.get()))
except Exception:
pass
# ---------------------------------------------------------------
# Audit
# ---------------------------------------------------------------
def _count_audit(self, audit_path: Path) -> Dict[str, int]:
d: Dict[str, int] = {}
try:
with open(audit_path, "r", encoding="utf-8") as f:
for line in f:
try:
obj = json.loads(line)
k = obj.get("kind", "?")
d[k] = d.get(k, 0) + 1
except Exception:
pass
except Exception:
pass
return d
# ---------------------------------------------------------------
# Vérification des fuites
# ---------------------------------------------------------------
def _check_leaks(self, output_dir: Path) -> int:
"""Vérifie les fuites dans les textes anonymisés."""
leak_count = 0
try:
# Patterns de fuites critiques
import re
patterns = {
"date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
"chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
}
for txt_file in iter_pseudonymized_texts(output_dir):
try:
with open(txt_file, 'r', encoding='utf-8') as f:
content = f.read()
for pattern in patterns.values():
matches = pattern.findall(content)
leak_count += len(matches)
except Exception:
pass
except Exception:
pass
return leak_count
# ---------------------------------------------------------------
# Calcul des performances
# ---------------------------------------------------------------
def _calculate_performance(self, total_files: int, total_time: float) -> str:
"""Calcule et formate les performances de traitement."""
if total_files == 0 or total_time == 0:
return "⏱️ Temps de traitement non disponible"
avg_time = total_time / total_files
# Formater le temps total
if total_time < 60:
time_str = f"{total_time:.0f}s"
elif total_time < 3600:
minutes = int(total_time // 60)
seconds = int(total_time % 60)
time_str = f"{minutes}m {seconds}s"
else:
hours = int(total_time // 3600)
minutes = int((total_time % 3600) // 60)
time_str = f"{hours}h {minutes}m"
return f"⏱️ Traité en {time_str} ({avg_time:.1f}s/document)"
# ---------------------------------------------------------------
# Mise à jour de l'indicateur de fuites
# ---------------------------------------------------------------
def _update_leak_indicator(self, leak_count: int):
"""Met à jour l'indicateur de fuites."""
if leak_count == 0:
self._leak_badge.configure(
text="🔒 0 fuite détectée",
bg=CLR_GREEN_LIGHT, fg=CLR_GREEN
)
else:
self._leak_badge.configure(
text=f"⚠️ {leak_count} fuite{'s' if leak_count > 1 else ''} potentielle{'s' if leak_count > 1 else ''}",
bg=CLR_RED_LIGHT, fg=CLR_RED
)
# ---------------------------------------------------------------
# Chargement automatique NER au démarrage
# ---------------------------------------------------------------
def _auto_load_ner(self):
"""Charge le modèle NER par défaut en arrière-plan.
Priorité : EDS-Pseudo → CamemBERT-bio local → DistilCamemBERT-NER legacy.
"""
if not self._eds_manager and not self._camembert_manager and not self._onnx_manager:
return
self.status_var.set("Chargement du modèle NER...")
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
def _auto_load_ner_worker(self):
camembert_loaded = False
# 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques)
if self._eds_manager:
try:
self._eds_manager.load("AP-HP/eds-pseudo-public")
self._active_manager = self._eds_manager
self.use_hf = True
if self._camembert_manager:
try:
self._camembert_manager.load()
camembert_loaded = True
except Exception as cam_err:
import logging
logging.getLogger(__name__).info("CamemBERT-bio local indisponible : %s", cam_err)
suffix = " + CamemBERT-bio local" if camembert_loaded else ""
self.status_var.set(f"Prêt — EDS-Pseudo actif{suffix}.")
return
except Exception as e:
import logging
logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e)
# 2) Fallback local embarqué : CamemBERT-bio ONNX.
# Il est utilisé par le core comme signal NER-first séparé, pas comme
# ner_manager HuggingFace legacy.
if self._camembert_manager:
try:
self._camembert_manager.load()
self.use_hf = False
self.status_var.set("Prêt — CamemBERT-bio local actif.")
return
except Exception as cam_err:
import logging
logging.getLogger(__name__).info("CamemBERT-bio local indisponible : %s", cam_err)
# 3) Fallback legacy : DistilCamemBERT-NER via optimum.onnxruntime.
if self._onnx_manager:
try:
self._onnx_manager.load("cmarkea/distilcamembert-base-ner")
self._active_manager = self._onnx_manager
self.use_hf = True
self.status_var.set("Prêt — NER ONNX legacy actif.")
return
except Exception as e2:
self.status_var.set(f"Prêt (NER legacy indisponible : {e2})")
return
self.status_var.set("Prêt (aucun backend NER disponible).")
# ---------------------------------------------------------------
# VLM toggle
# ---------------------------------------------------------------
def _on_vlm_toggle(self):
"""Appelé quand l'utilisateur coche/décoche la checkbox VLM."""
if not self.use_vlm.get():
self._vlm_available = False
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
return
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY)
threading.Thread(target=self._vlm_connect_worker, daemon=True).start()
def _vlm_connect_worker(self):
"""Vérifie la connexion Ollama en arrière-plan."""
try:
if self._vlm_manager is None:
raise RuntimeError("VlmManager non disponible")
self._vlm_manager.load()
self._vlm_available = True
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN)
except Exception as e:
self._vlm_available = False
self.use_vlm.set(False)
err = str(e)
if len(err) > 60:
err = err[:57] + "..."
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED)
# ---------------------------------------------------------------
# Modèles NER (API interne)
# ---------------------------------------------------------------
def _load_model(self, model_id: Optional[str] = None):
mid = model_id or "cmarkea/distilcamembert-base-ner"
is_eds = False
if self._eds_manager:
eds_ids = set(self._eds_manager.models_catalog().values())
if mid in eds_ids:
is_eds = True
if is_eds:
if not self._eds_manager:
return
manager = self._eds_manager
else:
if not self._onnx_manager:
return
manager = self._onnx_manager
try:
manager.load(mid)
self._active_manager = manager
self.use_hf = True
except Exception:
self.use_hf = False
def _unload_model(self):
if self._onnx_manager:
self._onnx_manager.unload()
if self._eds_manager:
self._eds_manager.unload()
if self._camembert_manager:
self._camembert_manager.unload()
self._active_manager = None
self.use_hf = False
# ---------------------------------------------------------------------------
# Point d'entrée
# ---------------------------------------------------------------------------
if __name__ == "__main__":
try:
root = tk.Tk()
App(root)
root.mainloop()
except Exception as exc:
import traceback, sys
err = traceback.format_exc()
# Écrire dans un fichier log à côté de l'exe
log_path = Path(__file__).resolve().parent / "crash.log"
try:
log_path.write_text(err, encoding="utf-8")
except Exception:
pass
# Tenter d'afficher une messagebox (même sans console)
try:
import tkinter as _tk
_r = _tk.Tk()
_r.withdraw()
from tkinter import messagebox as _mb
_mb.showerror("Erreur fatale", f"L'application a planté :\n\n{exc}\n\nVoir crash.log")
_r.destroy()
except Exception:
pass
raise