Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis, Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils, et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels. - profile key chcb_strict → chuxx_strict - CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield, Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999, préfixe tél 05.59.44 → 0X.XX.XX - renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2875 lines
116 KiB
Python
2875 lines
116 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Pseudonymisation – GUI v5 (Vue unique épurée)
|
||
----------------------------------------------
|
||
- Vue unique en 2 étapes : dossier → lancer (les deux formats sont générés)
|
||
- Thème système natif (sv_ttk optionnel, fallback clam)
|
||
- Backend NER ONNX/EDS-Pseudo conservé en interne
|
||
- Pas d'onglet Avancé (NER + YAML chargés silencieusement)
|
||
|
||
Fichiers requis à côté :
|
||
- anonymizer_core_refactored_onnx.py
|
||
- ner_manager_onnx.py
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import enum
|
||
import json
|
||
import os
|
||
import platform
|
||
import queue
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
import threading
|
||
import unicodedata
|
||
from copy import deepcopy
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, simpledialog, ttk
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Core
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import anonymizer_core_refactored_onnx as core
|
||
except Exception as e:
|
||
_err_msg = f"Impossible d'importer le core ONNX : {e}"
|
||
# Écrire l'erreur dans un fichier log à côté du script/exe
|
||
try:
|
||
_log = Path(__file__).resolve().parent / "crash.log"
|
||
import traceback as _tb
|
||
_log.write_text(f"{_err_msg}\n\n{_tb.format_exc()}", encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
try:
|
||
_r = tk.Tk(); _r.withdraw()
|
||
messagebox.showerror("Erreur d'import", _err_msg)
|
||
_r.destroy()
|
||
except Exception:
|
||
pass
|
||
raise SystemExit(_err_msg)
|
||
|
||
try:
|
||
from ner_manager_onnx import NerModelManager, NerThresholds
|
||
except Exception:
|
||
NerModelManager = None # type: ignore
|
||
NerThresholds = None # type: ignore
|
||
|
||
try:
|
||
from eds_pseudo_manager import EdsPseudoManager
|
||
except Exception:
|
||
EdsPseudoManager = None # type: ignore
|
||
|
||
try:
|
||
from camembert_ner_manager import CamembertNerManager
|
||
except Exception:
|
||
CamembertNerManager = None # type: ignore
|
||
|
||
try:
|
||
from vlm_manager import VlmManager, VlmConfig
|
||
except Exception:
|
||
VlmManager = None # type: ignore
|
||
VlmConfig = None # type: ignore
|
||
|
||
try:
|
||
import yaml
|
||
except Exception:
|
||
yaml = None
|
||
|
||
from config_defaults import (
|
||
deep_merge_dict,
|
||
load_effective_dictionaries_dict,
|
||
load_effective_param_lists,
|
||
read_default_dictionaries_text,
|
||
read_runtime_dictionaries_overlay_text,
|
||
)
|
||
from gui_batch_paths import (
|
||
build_batch_output_dir,
|
||
iter_pseudonymized_texts,
|
||
list_supported_documents,
|
||
)
|
||
from manual_masking import (
|
||
append_jsonl_file,
|
||
ensure_mask_templates_dir,
|
||
list_mask_templates,
|
||
mask_template_label,
|
||
resolve_manual_mask_pdf,
|
||
)
|
||
from profile_defaults import (
|
||
delete_runtime_profile,
|
||
ensure_runtime_profiles_config,
|
||
get_default_profile_key,
|
||
list_default_profile_keys,
|
||
list_effective_profiles,
|
||
read_runtime_profiles_overlay_text,
|
||
save_runtime_profile,
|
||
set_runtime_default_profile,
|
||
)
|
||
|
||
try:
|
||
from pdf_mask_designer import (
|
||
MaskDesignerApp,
|
||
Template,
|
||
apply_template_vector,
|
||
load_template_yaml,
|
||
)
|
||
except Exception:
|
||
MaskDesignerApp = None # type: ignore
|
||
Template = None # type: ignore
|
||
apply_template_vector = None # type: ignore
|
||
load_template_yaml = None # type: ignore
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Thème optionnel
|
||
# ---------------------------------------------------------------------------
|
||
try:
|
||
import sv_ttk # type: ignore
|
||
except ImportError:
|
||
sv_ttk = None
|
||
|
||
# PIL pour charger le logo / icônes (optionnel — dégradation si absent).
|
||
try:
|
||
from PIL import Image, ImageTk
|
||
_PIL_AVAILABLE = True
|
||
except Exception:
|
||
_PIL_AVAILABLE = False
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Constantes
|
||
# ---------------------------------------------------------------------------
|
||
APP_TITLE = "Pseudonymisation de vos documents"
|
||
APP_VERSION = "v5.5"
|
||
MANUAL_MASK_NONE_LABEL = "Aucun masque manuel"
|
||
|
||
# Métadonnées de build — chargées depuis build_info.py (régénéré par rebuild_anon.ps1)
|
||
try:
|
||
from build_info import BUILD_DATE, BUILD_COMMIT, BUILD_BRANCH
|
||
except Exception:
|
||
BUILD_DATE = "dev"
|
||
BUILD_COMMIT = "dev"
|
||
BUILD_BRANCH = "dev"
|
||
|
||
|
||
def _version_long() -> str:
|
||
"""Version étendue : v5.4 · 2026-04-15 18:15 · 234137e"""
|
||
parts = [APP_VERSION]
|
||
if BUILD_DATE != "dev":
|
||
parts.append(BUILD_DATE)
|
||
if BUILD_COMMIT != "dev":
|
||
parts.append(f"#{BUILD_COMMIT}")
|
||
return " · ".join(parts)
|
||
|
||
|
||
def _asset(name: str) -> Path:
|
||
"""Résout le chemin d'un asset dans assets/ (compatible frozen PyInstaller)."""
|
||
if getattr(sys, 'frozen', False):
|
||
base = Path(sys._MEIPASS)
|
||
else:
|
||
base = Path(__file__).resolve().parent
|
||
return base / 'assets' / name
|
||
|
||
def _app_dir() -> Path:
|
||
"""Répertoire racine de l'application (compatible PyInstaller/Nuitka)."""
|
||
if getattr(sys, 'frozen', False):
|
||
return Path(sys._MEIPASS)
|
||
return Path(__file__).resolve().parent
|
||
|
||
def _exe_dir() -> Path:
|
||
"""Répertoire de l'exécutable (pour les fichiers persistants : config, logs)."""
|
||
if getattr(sys, 'frozen', False):
|
||
return Path(sys.executable).parent
|
||
return Path(__file__).resolve().parent
|
||
|
||
def _resolve_config() -> Path:
|
||
"""Cherche la config en priorité à côté de l'exe, sinon dans l'app embarquée.
|
||
|
||
Si le fichier n'existe pas à côté de l'exe, copie la version embarquée
|
||
pour que l'utilisateur puisse la modifier sans recompiler.
|
||
"""
|
||
exe_cfg = _exe_dir() / "config" / "dictionnaires.yml"
|
||
|
||
if exe_cfg.exists():
|
||
return exe_cfg
|
||
|
||
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
|
||
exe_cfg.write_text(read_runtime_dictionaries_overlay_text(), encoding="utf-8")
|
||
return exe_cfg
|
||
|
||
|
||
def _resolve_profiles_config() -> Path:
|
||
exe_cfg = _exe_dir() / "config" / "profiles.yml"
|
||
|
||
if exe_cfg.exists():
|
||
return exe_cfg
|
||
|
||
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
|
||
exe_cfg.write_text(read_runtime_profiles_overlay_text(), encoding="utf-8")
|
||
return exe_cfg
|
||
|
||
DEFAULT_CFG = _resolve_config()
|
||
DEFAULT_PROFILES_CFG = _resolve_profiles_config()
|
||
MODELS_DIR = _app_dir() / "models"
|
||
|
||
DEFAULTS_CFG_TEXT = read_default_dictionaries_text()
|
||
RUNTIME_CFG_TEXT = read_runtime_dictionaries_overlay_text()
|
||
|
||
# Palette dérivée du logo aivanonym (gradient magenta → rose → pêche → noir)
|
||
# Magenta du logo : primaire (boutons, accents)
|
||
# Pêche : secondaire (tags, highlights)
|
||
# Noir/gris : texte et neutres
|
||
# Blanc/gris clair : fonds
|
||
CLR_PRIMARY = "#E91E63" # magenta logo (CTA, liens)
|
||
CLR_PRIMARY_DARK = "#C2185B" # hover / pressed
|
||
CLR_PRIMARY_LIGHT = "#FCE4EC" # fond léger (cartes sélectionnées)
|
||
CLR_ACCENT = "#FFB74D" # pêche logo (tags secondaires)
|
||
CLR_ACCENT_LIGHT = "#FFF3E0" # fond accent léger
|
||
CLR_GREEN = "#2E7D32" # succès
|
||
CLR_GREEN_LIGHT = "#E8F5E9"
|
||
CLR_RED = "#C62828" # erreur / danger
|
||
CLR_RED_LIGHT = "#FFEBEE"
|
||
CLR_BLUE_LIGHT = "#FCE4EC" # conservé pour compat (remappé vers primary_light)
|
||
CLR_CARD_BG = "#FFFFFF"
|
||
CLR_CARD_BORDER = "#E0E0E0"
|
||
CLR_BG = "#FAFAFA" # fond principal (gris très clair)
|
||
CLR_TEXT = "#212121" # quasi-noir (du logo)
|
||
CLR_TEXT_SECONDARY = "#757575" # gris moyen
|
||
CLR_DIVIDER = "#EEEEEE"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Messages worker → UI
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class MsgType(enum.Enum):
|
||
LOG = "log"
|
||
PROGRESS = "progress"
|
||
DONE = "done"
|
||
|
||
|
||
@dataclass
|
||
class UiMessage:
|
||
kind: MsgType
|
||
text: str = ""
|
||
current: int = 0
|
||
total: int = 0
|
||
filename: str = ""
|
||
ok: int = 0
|
||
ko: int = 0
|
||
masked: int = 0
|
||
outdir: str = ""
|
||
total_time: float = 0.0 # Temps total de traitement en secondes
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def open_folder(path: Path):
|
||
try:
|
||
if platform.system() == "Windows":
|
||
os.startfile(str(path)) # type: ignore
|
||
elif platform.system() == "Darwin":
|
||
subprocess.Popen(["open", str(path)])
|
||
else:
|
||
subprocess.Popen(["xdg-open", str(path)])
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _detect_font() -> str:
|
||
"""Retourne la meilleure police sans-serif disponible."""
|
||
for name in ("Noto Sans", "Ubuntu", "Cantarell", "Helvetica Neue", "Helvetica"):
|
||
try:
|
||
test = tk.Label(font=(name, 10))
|
||
actual = test.cget("font")
|
||
test.destroy()
|
||
if name.lower().replace(" ", "") in actual.lower().replace(" ", ""):
|
||
return name
|
||
except Exception:
|
||
continue
|
||
return "TkDefaultFont"
|
||
|
||
|
||
def _detect_dark_mode() -> bool:
|
||
"""Détecte le thème sombre GNOME."""
|
||
try:
|
||
result = subprocess.run(
|
||
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
|
||
capture_output=True, text=True, timeout=2,
|
||
)
|
||
return "dark" in result.stdout.lower()
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# ToolTip amélioré
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class ToolTip:
|
||
def __init__(self, widget: tk.Widget, text: str, delay: int = 400):
|
||
self.widget = widget
|
||
self.text = text
|
||
self.delay = delay
|
||
self.tip: Optional[tk.Toplevel] = None
|
||
self._after_id: Optional[str] = None
|
||
widget.bind("<Enter>", self._schedule)
|
||
widget.bind("<Leave>", self.hide)
|
||
|
||
def _schedule(self, *_):
|
||
self._cancel()
|
||
self._after_id = self.widget.after(self.delay, self._show)
|
||
|
||
def _cancel(self):
|
||
if self._after_id:
|
||
self.widget.after_cancel(self._after_id)
|
||
self._after_id = None
|
||
|
||
def _show(self):
|
||
if self.tip:
|
||
return
|
||
x = self.widget.winfo_rootx() + 20
|
||
y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
|
||
self.tip = tw = tk.Toplevel(self.widget)
|
||
tw.wm_overrideredirect(True)
|
||
tw.wm_geometry(f"+{x}+{y}")
|
||
lbl = tk.Label(
|
||
tw, text=self.text, justify=tk.LEFT,
|
||
background="#1f2937", foreground="#f9fafb",
|
||
relief=tk.SOLID, borderwidth=1,
|
||
padx=8, pady=5, wraplength=320,
|
||
)
|
||
lbl.pack(ipadx=1)
|
||
|
||
def hide(self, *_):
|
||
self._cancel()
|
||
if self.tip:
|
||
self.tip.destroy()
|
||
self.tip = None
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Application principale
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class App:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root
|
||
# Titre avec version longue pour identifier la build au premier coup d'œil
|
||
# (évite les confusions entre exe ancien/nouveau lors des tests).
|
||
self.root.title(f"{APP_TITLE} — {_version_long()}")
|
||
self.root.geometry("780x820")
|
||
self.root.minsize(600, 650)
|
||
|
||
# Icône de la fenêtre (coin haut-gauche + taskbar Windows).
|
||
# En mode dev (Linux) tkinter lit iconphoto PNG ; sur Windows, iconbitmap
|
||
# accepte .ico. On tente les deux pour couvrir.
|
||
self._icon_refs: list = [] # refs pour éviter garbage collection
|
||
self._apply_window_icon()
|
||
|
||
# Préchargement logo pour l'en-tête (besoin de ref persistante sinon
|
||
# tkinter nettoie l'image → label blanc).
|
||
self._logo_img = self._load_image_safe(_asset('logo_header.png'))
|
||
|
||
# --- Thème ---
|
||
self._apply_theme()
|
||
|
||
# --- Polices ---
|
||
self._font_family = _detect_font()
|
||
self._f_title = (self._font_family, 20, "bold")
|
||
self._f_body = (self._font_family, 11)
|
||
self._f_body_bold = (self._font_family, 11, "bold")
|
||
self._f_button = (self._font_family, 13, "bold")
|
||
self._f_stat = (self._font_family, 24, "bold")
|
||
self._f_small = (self._font_family, 10)
|
||
self._f_card_title = (self._font_family, 12, "bold")
|
||
self._f_card_desc = (self._font_family, 10)
|
||
|
||
# --- Variables ---
|
||
self.dir_var = tk.StringVar()
|
||
self.status_var = tk.StringVar(value="Prêt.")
|
||
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
|
||
self.profiles_path = tk.StringVar(value=str(DEFAULT_PROFILES_CFG))
|
||
self.processing_profile_label_var = tk.StringVar(value="")
|
||
self.manual_mask_template_var = tk.StringVar(value=MANUAL_MASK_NONE_LABEL)
|
||
self.profile_description_var = tk.StringVar(value="")
|
||
self.profile_require_manual_mask_var = tk.BooleanVar(value=False)
|
||
self.profile_force_disable_vlm_var = tk.BooleanVar(value=False)
|
||
self.queue: "queue.Queue[UiMessage]" = queue.Queue()
|
||
self._processing_profiles: Dict[str, Dict[str, Any]] = {}
|
||
self._processing_profile_labels_to_keys: Dict[str, str] = {}
|
||
self._manual_mask_templates: Dict[str, Optional[Path]] = {
|
||
MANUAL_MASK_NONE_LABEL: None,
|
||
}
|
||
self._profile_base_description = ""
|
||
self._profile_manager_win: Optional[tk.Toplevel] = None
|
||
self._advanced_params_win: Optional[tk.Toplevel] = None
|
||
|
||
# --- NER (interne) ---
|
||
self.use_hf = False
|
||
self.th_per = 0.90
|
||
self.th_org = 0.90
|
||
self.th_loc = 0.90
|
||
self._onnx_manager: Optional[Any] = NerModelManager(cache_dir=MODELS_DIR) if NerModelManager else None
|
||
self._eds_manager: Optional[Any] = EdsPseudoManager(cache_dir=MODELS_DIR) if EdsPseudoManager else None
|
||
self._camembert_manager: Optional[Any] = CamembertNerManager() if CamembertNerManager else None
|
||
self._active_manager: Optional[Any] = None
|
||
self.cfg_data: Dict[str, Any] = {}
|
||
|
||
# --- VLM (optionnel) ---
|
||
self.use_vlm = tk.BooleanVar(value=False)
|
||
self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None
|
||
self._vlm_available = False
|
||
|
||
# --- Fusion catalogue modèles ---
|
||
catalog: Dict[str, str] = {}
|
||
if self._onnx_manager:
|
||
catalog.update(self._onnx_manager.models_catalog())
|
||
if self._eds_manager:
|
||
catalog.update(self._eds_manager.models_catalog())
|
||
self._merged_catalog = catalog
|
||
|
||
# --- Résultats ---
|
||
self._last_outdir: Optional[Path] = None
|
||
|
||
# --- Contrôle d'arrêt ---
|
||
self._stop_requested = False
|
||
# --- Fichier unique (None = mode dossier) ---
|
||
self._single_file: Optional[Path] = None
|
||
|
||
# --- Construction UI ---
|
||
self._build_ui()
|
||
# Afficher l'onglet Anonymisation par défaut
|
||
self._switch_tab("anonym")
|
||
self._pump_logs()
|
||
self._ensure_cfg_exists()
|
||
self._load_cfg()
|
||
|
||
# --- Chargement automatique du modèle NER ---
|
||
self._auto_load_ner()
|
||
|
||
# ---------------------------------------------------------------
|
||
# Onglets custom
|
||
# ---------------------------------------------------------------
|
||
def _switch_tab(self, name: str):
|
||
"""Affiche l'onglet nommé, met à jour les styles des boutons."""
|
||
if name not in self._tab_frames:
|
||
return
|
||
# Cacher tous les contenus
|
||
for frame in self._tab_frames.values():
|
||
frame.pack_forget()
|
||
# Afficher l'onglet demandé
|
||
self._tab_frames[name].pack(fill=tk.BOTH, expand=True)
|
||
# Mettre à jour les styles des boutons d'onglets
|
||
for tab_name, widgets in self._tab_buttons.items():
|
||
if tab_name == name:
|
||
widgets["label"].configure(fg=CLR_PRIMARY, bg=CLR_BG)
|
||
widgets["underline"].configure(bg=CLR_PRIMARY)
|
||
else:
|
||
widgets["label"].configure(fg=CLR_TEXT_SECONDARY, bg=CLR_BG)
|
||
widgets["underline"].configure(bg=CLR_BG)
|
||
self._active_tab = name
|
||
|
||
# ---------------------------------------------------------------
|
||
# Icônes & assets
|
||
# ---------------------------------------------------------------
|
||
def _apply_window_icon(self):
|
||
"""Définit l'icône de la fenêtre. Windows : .ico préférable ; Linux : PNG."""
|
||
try:
|
||
ico = _asset('icons/app.ico')
|
||
if sys.platform == 'win32' and ico.exists():
|
||
try:
|
||
self.root.iconbitmap(str(ico))
|
||
return
|
||
except Exception:
|
||
pass
|
||
# Fallback : iconphoto PNG (toutes plateformes)
|
||
png = _asset('icons/icon_128.png')
|
||
if png.exists() and _PIL_AVAILABLE:
|
||
img = Image.open(png)
|
||
photo = ImageTk.PhotoImage(img)
|
||
self._icon_refs.append(photo)
|
||
self.root.iconphoto(True, photo)
|
||
except Exception:
|
||
pass # dégradation silencieuse — l'icône n'est pas bloquante
|
||
|
||
def _load_image_safe(self, path: Path):
|
||
"""Charge une image et garde la ref pour éviter le GC. None si PIL absent."""
|
||
if not _PIL_AVAILABLE or not path.exists():
|
||
return None
|
||
try:
|
||
img = Image.open(path).convert('RGBA')
|
||
photo = ImageTk.PhotoImage(img)
|
||
self._icon_refs.append(photo)
|
||
return photo
|
||
except Exception:
|
||
return None
|
||
|
||
# ---------------------------------------------------------------
|
||
# Thème
|
||
# ---------------------------------------------------------------
|
||
def _apply_theme(self):
|
||
if sv_ttk is not None:
|
||
mode = "dark" if _detect_dark_mode() else "light"
|
||
sv_ttk.set_theme(mode)
|
||
else:
|
||
try:
|
||
style = ttk.Style()
|
||
style.theme_use("clam")
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Construction de la vue unique
|
||
# ---------------------------------------------------------------
|
||
def _build_ui(self):
|
||
self.root.configure(bg=CLR_BG)
|
||
pad_x = 32
|
||
|
||
# =============================================================
|
||
# HEADER fixe (logo + titre + baseline), hors onglets
|
||
# =============================================================
|
||
header = tk.Frame(self.root, bg=CLR_BG)
|
||
header.pack(fill=tk.X, padx=pad_x, pady=(16, 8))
|
||
|
||
if self._logo_img is not None:
|
||
tk.Label(header, image=self._logo_img, bg=CLR_BG).pack(anchor="w")
|
||
else:
|
||
tk.Label(header, text="aivanonym", font=(self._font_family, 22, "bold"),
|
||
bg=CLR_BG, fg=CLR_PRIMARY).pack(anchor="w")
|
||
|
||
tk.Label(
|
||
header,
|
||
text="Pseudonymisation de documents médicaux — 100% local",
|
||
font=(self._font_family, 10),
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X, pady=(4, 0))
|
||
|
||
# Ligne colorée inspirée du gradient du logo
|
||
accent_bar = tk.Frame(self.root, bg=CLR_PRIMARY, height=3)
|
||
accent_bar.pack(fill=tk.X)
|
||
|
||
# =============================================================
|
||
# ONGLETS CUSTOM (boutons uniformes — rendu pro)
|
||
# Remplace ttk.Notebook dont les onglets ont des tailles/styles
|
||
# variables selon l'état actif. Ici : tous les onglets identiques,
|
||
# seule une bordure basse magenta signale l'onglet actif.
|
||
# =============================================================
|
||
tabs_bar = tk.Frame(self.root, bg=CLR_BG)
|
||
tabs_bar.pack(fill=tk.X, padx=0, pady=(4, 0))
|
||
|
||
self._tab_frames: dict = {} # nom → frame outer
|
||
self._tab_buttons: dict = {} # nom → dict(container, label, underline)
|
||
self._active_tab: Optional[str] = None
|
||
|
||
def _make_tab_button(parent, name: str, label: str):
|
||
"""Crée un onglet cliquable uniforme (fond, texte, underline)."""
|
||
container = tk.Frame(parent, bg=CLR_BG, cursor="hand2")
|
||
container.pack(side=tk.LEFT)
|
||
|
||
txt = tk.Label(
|
||
container, text=label,
|
||
font=(self._font_family, 11, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
|
||
padx=26, pady=10, cursor="hand2",
|
||
)
|
||
txt.pack(fill=tk.X)
|
||
|
||
# Bordure basse qui devient magenta quand actif
|
||
underline = tk.Frame(container, bg=CLR_BG, height=3)
|
||
underline.pack(fill=tk.X)
|
||
|
||
def _on_click(_e=None):
|
||
self._switch_tab(name)
|
||
for w in (container, txt, underline):
|
||
w.bind("<Button-1>", _on_click)
|
||
|
||
self._tab_buttons[name] = {
|
||
"container": container, "label": txt, "underline": underline,
|
||
}
|
||
|
||
_make_tab_button(tabs_bar, "anonym", "Anonymisation")
|
||
_make_tab_button(tabs_bar, "params", "Paramètres")
|
||
_make_tab_button(tabs_bar, "profiles", "Profils")
|
||
|
||
# Séparateur gris clair sous les onglets
|
||
tk.Frame(self.root, bg=CLR_DIVIDER, height=1).pack(fill=tk.X)
|
||
|
||
# Conteneur des contenus (un seul visible à la fois)
|
||
tabs_content = tk.Frame(self.root, bg=CLR_BG)
|
||
tabs_content.pack(fill=tk.BOTH, expand=True)
|
||
|
||
tab_anonym_outer = tk.Frame(tabs_content, bg=CLR_BG)
|
||
tab_params_outer = tk.Frame(tabs_content, bg=CLR_BG)
|
||
tab_profiles_outer = tk.Frame(tabs_content, bg=CLR_BG)
|
||
self._tab_frames["anonym"] = tab_anonym_outer
|
||
self._tab_frames["params"] = tab_params_outer
|
||
self._tab_frames["profiles"] = tab_profiles_outer
|
||
|
||
# --- Scroll pour l'onglet Anonymisation ---
|
||
canvas = tk.Canvas(tab_anonym_outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar = ttk.Scrollbar(tab_anonym_outer, orient=tk.VERTICAL, command=canvas.yview)
|
||
self._scroll_frame = tk.Frame(canvas, bg=CLR_BG)
|
||
self._scroll_frame.bind(
|
||
"<Configure>",
|
||
lambda e: canvas.configure(scrollregion=canvas.bbox("all")),
|
||
)
|
||
canvas_window = canvas.create_window((0, 0), window=self._scroll_frame, anchor="nw")
|
||
canvas.configure(yscrollcommand=scrollbar.set)
|
||
|
||
def _on_canvas_configure(event):
|
||
canvas.itemconfig(canvas_window, width=event.width)
|
||
canvas.bind("<Configure>", _on_canvas_configure)
|
||
|
||
def _on_mousewheel(event):
|
||
canvas.yview_scroll(int(-1 * (event.delta / 120)), "units")
|
||
def _on_mousewheel_linux(event):
|
||
if event.num == 4:
|
||
canvas.yview_scroll(-3, "units")
|
||
elif event.num == 5:
|
||
canvas.yview_scroll(3, "units")
|
||
canvas.bind_all("<MouseWheel>", _on_mousewheel)
|
||
canvas.bind_all("<Button-4>", _on_mousewheel_linux)
|
||
canvas.bind_all("<Button-5>", _on_mousewheel_linux)
|
||
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# --- Scroll pour l'onglet Paramètres ---
|
||
canvas2 = tk.Canvas(tab_params_outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar2 = ttk.Scrollbar(tab_params_outer, orient=tk.VERTICAL, command=canvas2.yview)
|
||
self._params_scroll = tk.Frame(canvas2, bg=CLR_BG)
|
||
self._params_scroll.bind(
|
||
"<Configure>",
|
||
lambda e: canvas2.configure(scrollregion=canvas2.bbox("all")),
|
||
)
|
||
canvas2_window = canvas2.create_window((0, 0), window=self._params_scroll, anchor="nw")
|
||
canvas2.configure(yscrollcommand=scrollbar2.set)
|
||
def _on_canvas2_configure(event):
|
||
canvas2.itemconfig(canvas2_window, width=event.width)
|
||
canvas2.bind("<Configure>", _on_canvas2_configure)
|
||
canvas2.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar2.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# --- Scroll pour l'onglet Profils ---
|
||
canvas3 = tk.Canvas(tab_profiles_outer, bg=CLR_BG, highlightthickness=0)
|
||
scrollbar3 = ttk.Scrollbar(tab_profiles_outer, orient=tk.VERTICAL, command=canvas3.yview)
|
||
self._profiles_scroll = tk.Frame(canvas3, bg=CLR_BG)
|
||
self._profiles_scroll.bind(
|
||
"<Configure>",
|
||
lambda e: canvas3.configure(scrollregion=canvas3.bbox("all")),
|
||
)
|
||
canvas3_window = canvas3.create_window((0, 0), window=self._profiles_scroll, anchor="nw")
|
||
canvas3.configure(yscrollcommand=scrollbar3.set)
|
||
def _on_canvas3_configure(event):
|
||
canvas3.itemconfig(canvas3_window, width=event.width)
|
||
canvas3.bind("<Configure>", _on_canvas3_configure)
|
||
canvas3.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
scrollbar3.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# "main" pointe désormais sur le scroll de l'onglet Anonymisation.
|
||
# Tout le contenu existant (étape 1, formats, boutons, progress, résultats)
|
||
# reste inchangé — seul le parent implicite a changé.
|
||
main = self._scroll_frame
|
||
|
||
# =============================================================
|
||
# ÉTAPE 1 — Choix du dossier
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="1. Choisir les documents ou fichiers (PDF, Word, Images, Texte)", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
self._folder_zone = tk.Frame(
|
||
main, bg=CLR_CARD_BG, highlightbackground=CLR_CARD_BORDER,
|
||
highlightthickness=2, cursor="hand2",
|
||
)
|
||
self._folder_zone.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
# Contenu initial (invite à cliquer)
|
||
self._folder_inner = tk.Frame(self._folder_zone, bg=CLR_CARD_BG)
|
||
self._folder_inner.pack(fill=tk.X, padx=20, pady=18)
|
||
|
||
self._folder_icon_lbl = tk.Label(
|
||
self._folder_inner, text="\U0001f4c2", font=(self._font_family, 28),
|
||
bg=CLR_CARD_BG,
|
||
)
|
||
self._folder_icon_lbl.pack()
|
||
|
||
self._folder_text_lbl = tk.Label(
|
||
self._folder_inner,
|
||
text="Cliquez pour choisir un dossier ou un fichier",
|
||
font=self._f_body, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._folder_text_lbl.pack(pady=(4, 0))
|
||
|
||
# Rendre toute la zone cliquable
|
||
for w in (self._folder_zone, self._folder_inner, self._folder_icon_lbl, self._folder_text_lbl):
|
||
w.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# =============================================================
|
||
# ÉTAPE 2 — Info formats générés
|
||
# =============================================================
|
||
tk.Label(
|
||
main, text="2. Formats générés", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 6))
|
||
|
||
info_frame = tk.Frame(
|
||
main, bg=CLR_BLUE_LIGHT,
|
||
highlightbackground=CLR_CARD_BORDER, highlightthickness=1,
|
||
)
|
||
info_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 18))
|
||
|
||
info_inner = tk.Frame(info_frame, bg=CLR_BLUE_LIGHT)
|
||
info_inner.pack(fill=tk.X, padx=16, pady=12)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text="Paramètres de traitement :",
|
||
font=self._f_body_bold, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
tk.Label(
|
||
info_inner,
|
||
text=("\u2022 Recherche récursive de tous les documents dans les sous-dossiers\n"
|
||
"\u2022 Sortie PDF Image (raster) — sécurité maximale, aucun texte résiduel\n"
|
||
"\u2022 Résultats dans « anonymise/ » en conservant les sous-dossiers source"),
|
||
font=self._f_card_desc, bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||
anchor="w", justify=tk.LEFT,
|
||
).pack(fill=tk.X, pady=(4, 0))
|
||
|
||
# --- Checkbox VLM ---
|
||
if VlmManager is not None:
|
||
vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT)
|
||
vlm_row.pack(fill=tk.X, pady=(8, 0))
|
||
self._vlm_check = tk.Checkbutton(
|
||
vlm_row, text="Analyse visuelle VLM (Ollama)",
|
||
variable=self.use_vlm, font=self._f_card_desc,
|
||
bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT,
|
||
command=self._on_vlm_toggle,
|
||
)
|
||
self._vlm_check.pack(side=tk.LEFT)
|
||
self._vlm_status_lbl = tk.Label(
|
||
vlm_row, text="", font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0))
|
||
ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.")
|
||
|
||
# =============================================================
|
||
# BOUTONS LANCER / STOPPER
|
||
# =============================================================
|
||
buttons_frame = tk.Frame(main, bg=CLR_BG)
|
||
buttons_frame.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
self.btn_run = tk.Button(
|
||
buttons_frame, text="Lancer l'anonymisation",
|
||
font=self._f_button, bg=CLR_PRIMARY, fg="white",
|
||
activebackground="#1d4ed8", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._run,
|
||
)
|
||
self.btn_run.pack(fill=tk.X)
|
||
|
||
self.btn_stop = tk.Button(
|
||
buttons_frame, text="Arrêter le traitement",
|
||
font=self._f_button, bg=CLR_RED, fg="white",
|
||
activebackground="#b91c1c", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._stop,
|
||
)
|
||
# NE PAS pack — sera affiché pendant le traitement
|
||
|
||
# Lien aide
|
||
help_lbl = tk.Label(
|
||
main, text="Comment ça marche ?", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
help_lbl.pack(pady=(0, 8))
|
||
help_lbl.bind("<Button-1>", lambda e: self._show_help())
|
||
|
||
# =============================================================
|
||
# ONGLET "PARAMÈTRES" — contenu monté dans self._params_scroll
|
||
# =============================================================
|
||
self._params_frame = self._params_scroll
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text="Personnaliser le masquage",
|
||
font=(self._font_family, 14, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(20, 4))
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text=("Ces listes complètent les détections automatiques du programme. "
|
||
"Utile pour gérer les spécificités de votre établissement."),
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
self._params_summary = tk.Label(
|
||
self._params_frame,
|
||
text="",
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
)
|
||
self._params_summary.pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text=("Les listes ci-dessous ne montrent que les paramètres manuels éditables. "
|
||
"Le moteur applique aussi des règles automatiques non listées ici "
|
||
"(regex, gazetteers FINESS/INSEE, dictionnaires et règles admin)."),
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 16))
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text="Masques PDF réutilisables",
|
||
font=(self._font_family, 12, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 4))
|
||
|
||
tk.Label(
|
||
self._params_frame,
|
||
text=(
|
||
"Pour les formulaires toujours mis en page de la même façon, "
|
||
"ouvrez l'éditeur de masques PDF, dessinez les zones à caviarder "
|
||
"puis enregistrez un modèle réutilisable."
|
||
),
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 8))
|
||
|
||
manual_mask_row = tk.Frame(self._params_frame, bg=CLR_BG)
|
||
manual_mask_row.pack(fill=tk.X, padx=pad_x, pady=(0, 16))
|
||
|
||
manual_mask_btn = tk.Button(
|
||
manual_mask_row, text="Ouvrir l'éditeur de masques PDF",
|
||
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._open_manual_mask_designer,
|
||
)
|
||
manual_mask_btn.pack(side=tk.LEFT)
|
||
|
||
self._manual_mask_combo = ttk.Combobox(
|
||
manual_mask_row,
|
||
textvariable=self.manual_mask_template_var,
|
||
state="readonly",
|
||
width=34,
|
||
)
|
||
self._manual_mask_combo.pack(side=tk.LEFT, padx=(6, 0))
|
||
self._manual_mask_combo.bind("<<ComboboxSelected>>", lambda _e: self._refresh_manual_mask_hint())
|
||
|
||
refresh_templates_btn = tk.Button(
|
||
manual_mask_row, text="Actualiser les modèles",
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._refresh_manual_mask_templates,
|
||
)
|
||
refresh_templates_btn.pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
templates_btn = tk.Button(
|
||
manual_mask_row, text="Ouvrir le dossier des modèles",
|
||
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._open_manual_mask_templates_dir,
|
||
)
|
||
templates_btn.pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
self._manual_mask_hint = tk.Label(
|
||
self._params_frame,
|
||
text="",
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
)
|
||
self._manual_mask_hint.pack(fill=tk.X, padx=pad_x, pady=(0, 12))
|
||
|
||
# Conteneur interne visible : réglages manuels éditables.
|
||
params_inner = tk.Frame(self._params_frame, bg=CLR_BG)
|
||
params_inner.pack(fill=tk.X, padx=pad_x, pady=(0, 12))
|
||
|
||
# --- Whitelist (phrases à ne pas anonymiser) ---
|
||
self._wl_listbox, self._wl_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u2705 Phrases à ne PAS anonymiser :",
|
||
placeholder="Ajouter une phrase à protéger...",
|
||
color_tag=CLR_GREEN_LIGHT,
|
||
on_change=self._refresh_params_summary,
|
||
)
|
||
|
||
# --- Blacklist (phrases à toujours masquer) ---
|
||
self._bl_listbox, self._bl_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u26d4 Mots/phrases à TOUJOURS masquer :",
|
||
placeholder="Ajouter un mot ou phrase à masquer...",
|
||
color_tag=CLR_PRIMARY_LIGHT,
|
||
on_change=self._refresh_params_summary,
|
||
)
|
||
|
||
# --- Stop-words additionnels (mots à ne jamais identifier comme noms) ---
|
||
# Différent de la whitelist : agit en amont, pour les sigles, acronymes,
|
||
# termes métier locaux qui ressemblent à des noms mais n'en sont pas.
|
||
self._sw_listbox, self._sw_entry = self._build_phrase_list(
|
||
params_inner,
|
||
title="\u26a0 Mots à ne jamais identifier comme noms (sigles, acronymes...) :",
|
||
placeholder="Ajouter un mot (ex: sigle local, acronyme métier)...",
|
||
color_tag=CLR_ACCENT_LIGHT,
|
||
on_change=self._refresh_params_summary,
|
||
)
|
||
|
||
# Boutons sauvegarder + exporter
|
||
btn_row = tk.Frame(params_inner, bg=CLR_BG)
|
||
btn_row.pack(fill=tk.X, pady=(12, 12))
|
||
|
||
export_btn = tk.Button(
|
||
btn_row, text="\u2709 Exporter pour envoi",
|
||
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._export_params,
|
||
)
|
||
export_btn.pack(side=tk.LEFT)
|
||
|
||
import_btn = tk.Button(
|
||
btn_row, text="\u2B07 Importer",
|
||
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._import_params,
|
||
)
|
||
import_btn.pack(side=tk.LEFT, padx=(4, 0))
|
||
|
||
save_btn = tk.Button(
|
||
btn_row, text="Sauvegarder",
|
||
font=self._f_small, bg=CLR_PRIMARY, fg="white",
|
||
activebackground=CLR_PRIMARY_DARK, activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", padx=14, pady=6,
|
||
command=self._save_params,
|
||
)
|
||
save_btn.pack(side=tk.RIGHT)
|
||
|
||
# Charger les valeurs initiales depuis la config
|
||
self._load_params()
|
||
self._refresh_manual_mask_templates()
|
||
|
||
# =============================================================
|
||
# ONGLET "PROFILS"
|
||
# =============================================================
|
||
self._profiles_frame = self._profiles_scroll
|
||
|
||
tk.Label(
|
||
self._profiles_frame,
|
||
text="Profils métier",
|
||
font=(self._font_family, 14, "bold"),
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, padx=pad_x, pady=(20, 4))
|
||
|
||
tk.Label(
|
||
self._profiles_frame,
|
||
text=(
|
||
"Un profil mémorise les réglages courants de l'application. "
|
||
"Utilise cet onglet pour choisir le profil actif, modifier sa description, "
|
||
"et enregistrer un nouveau profil utilisateur."
|
||
),
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=700,
|
||
).pack(fill=tk.X, padx=pad_x, pady=(0, 12))
|
||
|
||
profile_card = tk.Frame(
|
||
self._profiles_frame,
|
||
bg=CLR_CARD_BG,
|
||
highlightbackground=CLR_CARD_BORDER,
|
||
highlightthickness=1,
|
||
)
|
||
profile_card.pack(fill=tk.X, padx=pad_x, pady=(0, 16))
|
||
|
||
profile_card_inner = tk.Frame(profile_card, bg=CLR_CARD_BG)
|
||
profile_card_inner.pack(fill=tk.X, padx=16, pady=14)
|
||
profile_card_inner.columnconfigure(0, weight=3)
|
||
profile_card_inner.columnconfigure(1, weight=2)
|
||
|
||
profile_left = tk.Frame(profile_card_inner, bg=CLR_CARD_BG)
|
||
profile_left.grid(row=0, column=0, sticky="nsew", padx=(0, 10))
|
||
|
||
profile_right = tk.Frame(profile_card_inner, bg=CLR_BLUE_LIGHT)
|
||
profile_right.grid(row=0, column=1, sticky="nsew")
|
||
|
||
tk.Label(
|
||
profile_left,
|
||
text="Profil actif",
|
||
font=self._f_body_bold,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 4))
|
||
|
||
profile_select_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
|
||
profile_select_row.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
self._profile_combo = ttk.Combobox(
|
||
profile_select_row,
|
||
textvariable=self.processing_profile_label_var,
|
||
state="readonly",
|
||
width=34,
|
||
)
|
||
self._profile_combo.pack(side=tk.LEFT)
|
||
self._profile_combo.bind("<<ComboboxSelected>>", lambda _e: self._apply_selected_processing_profile())
|
||
|
||
refresh_profiles_btn = tk.Button(
|
||
profile_select_row, text="Actualiser",
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._refresh_processing_profiles,
|
||
)
|
||
refresh_profiles_btn.pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
self._profile_kind_label = tk.Label(
|
||
profile_left,
|
||
text="",
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
)
|
||
self._profile_kind_label.pack(fill=tk.X, pady=(0, 8))
|
||
|
||
tk.Label(
|
||
profile_left,
|
||
text="Description",
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 4))
|
||
|
||
self._profile_description_entry = tk.Entry(
|
||
profile_left,
|
||
textvariable=self.profile_description_var,
|
||
font=self._f_small,
|
||
relief=tk.GROOVE,
|
||
bd=1,
|
||
)
|
||
self._profile_description_entry.pack(fill=tk.X, pady=(0, 10))
|
||
self.profile_description_var.trace_add("write", self._on_profile_description_change)
|
||
|
||
flags_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
|
||
flags_row.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
self._profile_require_manual_mask_check = tk.Checkbutton(
|
||
flags_row,
|
||
text="Masque manuel obligatoire",
|
||
variable=self.profile_require_manual_mask_var,
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG,
|
||
activebackground=CLR_CARD_BG,
|
||
command=self._on_profile_editor_change,
|
||
)
|
||
self._profile_require_manual_mask_check.pack(side=tk.LEFT)
|
||
|
||
self._profile_force_disable_vlm_check = tk.Checkbutton(
|
||
flags_row,
|
||
text="Désactiver le VLM",
|
||
variable=self.profile_force_disable_vlm_var,
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG,
|
||
activebackground=CLR_CARD_BG,
|
||
command=self._on_profile_editor_change,
|
||
)
|
||
self._profile_force_disable_vlm_check.pack(side=tk.LEFT, padx=(12, 0))
|
||
|
||
tk.Label(
|
||
profile_left,
|
||
text="Masque PDF mémorisé par ce profil",
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 4))
|
||
|
||
profile_mask_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
|
||
profile_mask_row.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
self._profile_manual_mask_combo = ttk.Combobox(
|
||
profile_mask_row,
|
||
textvariable=self.manual_mask_template_var,
|
||
state="readonly",
|
||
width=34,
|
||
)
|
||
self._profile_manual_mask_combo.pack(side=tk.LEFT)
|
||
self._profile_manual_mask_combo.bind(
|
||
"<<ComboboxSelected>>",
|
||
lambda _e: self._refresh_manual_mask_hint(),
|
||
)
|
||
|
||
tk.Button(
|
||
profile_mask_row, text="Actualiser les modèles",
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._refresh_manual_mask_templates,
|
||
).pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
self._profile_mask_explainer = tk.Label(
|
||
profile_left,
|
||
text=(
|
||
"Ce choix est enregistré dans le profil. "
|
||
"Quand tu recharges ce profil, ce masque est re-sélectionné automatiquement."
|
||
),
|
||
font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=420,
|
||
)
|
||
self._profile_mask_explainer.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
profile_actions_row = tk.Frame(profile_left, bg=CLR_CARD_BG)
|
||
profile_actions_row.pack(fill=tk.X)
|
||
|
||
tk.Button(
|
||
profile_actions_row, text="Nouveau profil...",
|
||
font=self._f_small, bg=CLR_PRIMARY_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._create_processing_profile,
|
||
).pack(side=tk.LEFT)
|
||
|
||
tk.Button(
|
||
profile_actions_row, text="Enregistrer",
|
||
font=self._f_small, bg=CLR_PRIMARY, fg="white",
|
||
activebackground=CLR_PRIMARY_DARK, activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", padx=10, pady=6,
|
||
command=self._save_selected_processing_profile,
|
||
).pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
tk.Button(
|
||
profile_actions_row, text="Renommer...",
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._rename_selected_processing_profile,
|
||
).pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
tk.Button(
|
||
profile_actions_row, text="Définir par défaut",
|
||
font=self._f_small, bg=CLR_ACCENT_LIGHT, fg=CLR_TEXT,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._set_selected_processing_profile_default,
|
||
).pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
tk.Button(
|
||
profile_actions_row, text="Supprimer",
|
||
font=self._f_small, bg=CLR_RED_LIGHT, fg=CLR_RED,
|
||
relief=tk.GROOVE, cursor="hand2", padx=10, pady=6,
|
||
command=self._delete_selected_processing_profile,
|
||
).pack(side=tk.LEFT, padx=(6, 0))
|
||
|
||
profile_right_inner = tk.Frame(profile_right, bg=CLR_BLUE_LIGHT)
|
||
profile_right_inner.pack(fill=tk.BOTH, expand=True, padx=14, pady=14)
|
||
|
||
tk.Label(
|
||
profile_right_inner,
|
||
text="Résumé du profil",
|
||
font=self._f_body_bold,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 6))
|
||
|
||
self._profile_description = tk.Label(
|
||
profile_right_inner,
|
||
text="",
|
||
font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
|
||
)
|
||
self._profile_description.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
self._profile_capture_summary = tk.Label(
|
||
profile_right_inner,
|
||
text="",
|
||
font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT, anchor="w", justify=tk.LEFT, wraplength=300,
|
||
)
|
||
self._profile_capture_summary.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
tk.Label(
|
||
profile_right_inner,
|
||
text=(
|
||
"Sens de « masque manuel obligatoire » : le profil n'impose pas un masque précis, "
|
||
"mais il bloque le lancement si aucun masque PDF n'est sélectionné."
|
||
),
|
||
font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
|
||
).pack(fill=tk.X, pady=(0, 10))
|
||
|
||
tk.Label(
|
||
profile_right_inner,
|
||
text=(
|
||
"Lien profil ↔ masque : le masque actuellement choisi dans cet onglet "
|
||
"est mémorisé dans le profil lors de l'enregistrement."
|
||
),
|
||
font=self._f_small,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY, anchor="w", justify=tk.LEFT, wraplength=300,
|
||
).pack(fill=tk.X)
|
||
|
||
self._refresh_processing_profiles()
|
||
|
||
# Retour dans l'onglet Anonymisation
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(0, 8))
|
||
|
||
# =============================================================
|
||
# BARRE DE PROGRESSION (masquée)
|
||
# =============================================================
|
||
self._progress_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack — sera affiché dynamiquement
|
||
|
||
self._progressbar = ttk.Progressbar(
|
||
self._progress_frame, orient=tk.HORIZONTAL, mode="determinate",
|
||
)
|
||
self._progressbar.pack(fill=tk.X, padx=0, pady=(0, 4))
|
||
|
||
self._progress_label = tk.Label(
|
||
self._progress_frame, text="", font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
)
|
||
self._progress_label.pack(fill=tk.X)
|
||
|
||
# =============================================================
|
||
# SECTION RÉSULTATS (masquée)
|
||
# =============================================================
|
||
self._results_frame = tk.Frame(main, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
tk.Label(
|
||
self._results_frame, text="Résultats", font=self._f_body_bold,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 8))
|
||
|
||
stats_row = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
stats_row.pack(fill=tk.X, pady=(0, 12))
|
||
stats_row.columnconfigure(0, weight=1)
|
||
stats_row.columnconfigure(1, weight=1)
|
||
stats_row.columnconfigure(2, weight=1)
|
||
|
||
self._stat_files = self._make_stat_card(stats_row, "0", "fichiers traités", CLR_GREEN, CLR_GREEN_LIGHT, 0)
|
||
self._stat_masked = self._make_stat_card(stats_row, "0", "données masquées", CLR_PRIMARY, CLR_PRIMARY_LIGHT, 1)
|
||
self._stat_errors = self._make_stat_card(stats_row, "0", "erreurs", CLR_TEXT_SECONDARY, "#f3f4f6", 2)
|
||
|
||
# Indicateurs de qualité et sécurité
|
||
quality_row = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
quality_row.pack(fill=tk.X, pady=(0, 12))
|
||
|
||
# Badge de fuites
|
||
self._leak_badge = tk.Label(
|
||
quality_row,
|
||
text="🔒 Vérification en cours...",
|
||
font=self._f_body_bold,
|
||
bg=CLR_BLUE_LIGHT, fg=CLR_PRIMARY,
|
||
padx=12, pady=6,
|
||
)
|
||
self._leak_badge.pack(side=tk.LEFT, padx=(0, 8))
|
||
|
||
# Temps de traitement
|
||
self._perf_label = tk.Label(
|
||
quality_row,
|
||
text="⏱️ Calcul en cours...",
|
||
font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
self._perf_label.pack(side=tk.LEFT)
|
||
|
||
self.btn_open_out = tk.Button(
|
||
self._results_frame, text="Ouvrir le dossier de résultats",
|
||
font=self._f_button, bg=CLR_GREEN, fg="white",
|
||
activebackground="#15803d", activeforeground="white",
|
||
relief=tk.FLAT, cursor="hand2", pady=10,
|
||
command=self._open_out,
|
||
)
|
||
self.btn_open_out.pack(fill=tk.X, pady=(0, 8))
|
||
|
||
# Toggle journal
|
||
self._log_visible = False
|
||
self._log_toggle = tk.Label(
|
||
self._results_frame, text="Voir le journal détaillé \u25BC",
|
||
font=self._f_small, bg=CLR_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
self._log_toggle.pack(pady=(0, 4))
|
||
self._log_toggle.bind("<Button-1>", lambda e: self._toggle_log())
|
||
|
||
self._log_frame = tk.Frame(self._results_frame, bg=CLR_BG)
|
||
# NE PAS pack
|
||
|
||
self.txt = tk.Text(
|
||
self._log_frame, height=14, font=self._f_small,
|
||
bg="#f3f4f6", fg=CLR_TEXT, relief=tk.FLAT, wrap=tk.WORD,
|
||
state=tk.DISABLED,
|
||
)
|
||
log_scrollbar = ttk.Scrollbar(self._log_frame, command=self.txt.yview)
|
||
self.txt.configure(yscrollcommand=log_scrollbar.set)
|
||
self.txt.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
||
log_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# =============================================================
|
||
# BARRE DE STATUT
|
||
# =============================================================
|
||
ttk.Separator(main).pack(fill=tk.X, padx=pad_x, pady=(18, 0))
|
||
|
||
status_bar = tk.Frame(main, bg=CLR_BG)
|
||
status_bar.pack(fill=tk.X, padx=pad_x, pady=(6, 12))
|
||
|
||
tk.Label(
|
||
status_bar, textvariable=self.status_var, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(side=tk.LEFT)
|
||
|
||
tk.Label(
|
||
status_bar, text=_version_long(), font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT_SECONDARY, anchor="e",
|
||
).pack(side=tk.RIGHT)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Cartes de statistiques
|
||
# ---------------------------------------------------------------
|
||
def _make_stat_card(self, parent, number: str, label: str,
|
||
fg_color: str, bg_color: str, col: int) -> Dict[str, tk.Label]:
|
||
padx = (0, 4) if col == 0 else (4, 4) if col == 1 else (4, 0)
|
||
frame = tk.Frame(parent, bg=bg_color, highlightbackground=bg_color, highlightthickness=1)
|
||
frame.grid(row=0, column=col, sticky="nsew", padx=padx)
|
||
|
||
num_lbl = tk.Label(
|
||
frame, text=number, font=self._f_stat,
|
||
bg=bg_color, fg=fg_color,
|
||
)
|
||
num_lbl.pack(pady=(12, 2))
|
||
|
||
txt_lbl = tk.Label(
|
||
frame, text=label, font=self._f_small,
|
||
bg=bg_color, fg=CLR_TEXT_SECONDARY,
|
||
)
|
||
txt_lbl.pack(pady=(0, 12))
|
||
|
||
return {"frame": frame, "number": num_lbl, "label": txt_lbl}
|
||
|
||
def _update_stat_card(self, card: Dict[str, tk.Label], value: int,
|
||
fg_color: str, bg_color: str):
|
||
card["number"].configure(text=str(value), fg=fg_color, bg=bg_color)
|
||
card["frame"].configure(bg=bg_color, highlightbackground=bg_color)
|
||
card["label"].configure(bg=bg_color)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Actions dossier
|
||
# ---------------------------------------------------------------
|
||
def _browse(self):
|
||
"""Propose le choix entre dossier et fichier unique via un menu contextuel."""
|
||
menu = tk.Menu(self.root, tearoff=0)
|
||
menu.add_command(label="Choisir un dossier", command=self._browse_folder)
|
||
menu.add_command(label="Choisir un fichier", command=self._browse_file)
|
||
# Afficher le menu sous le curseur
|
||
try:
|
||
menu.tk_popup(self.root.winfo_pointerx(), self.root.winfo_pointery())
|
||
finally:
|
||
menu.grab_release()
|
||
|
||
def _browse_folder(self):
|
||
d = filedialog.askdirectory()
|
||
if d:
|
||
self._single_file = None
|
||
self.dir_var.set(d)
|
||
self._update_folder_display()
|
||
|
||
def _browse_file(self):
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
# Construire les filtres pour le dialogue
|
||
ext_list = " ".join(f"*{e}" for e in sorted(SUPPORTED_EXTENSIONS))
|
||
f = filedialog.askopenfilename(
|
||
title="Choisir un document à anonymiser",
|
||
filetypes=[
|
||
("Documents supportés", ext_list),
|
||
("PDF", "*.pdf"),
|
||
("Word", "*.docx"),
|
||
("Images", "*.jpg *.jpeg *.png *.tiff *.tif *.bmp"),
|
||
("Texte", "*.txt *.rtf *.odt *.html *.htm"),
|
||
("Tous", "*.*"),
|
||
],
|
||
)
|
||
if f:
|
||
self._single_file = Path(f)
|
||
self.dir_var.set(str(self._single_file.parent))
|
||
self._update_folder_display()
|
||
|
||
def _update_folder_display(self):
|
||
folder = self.dir_var.get()
|
||
if not folder:
|
||
return
|
||
|
||
is_single = getattr(self, '_single_file', None) is not None
|
||
|
||
if is_single:
|
||
doc_count = 1
|
||
display_label = self._single_file.name
|
||
else:
|
||
# Compter les documents supportés (récursif)
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
doc_count = 0
|
||
try:
|
||
doc_count = len(list_supported_documents(Path(folder), SUPPORTED_EXTENSIONS))
|
||
except Exception:
|
||
pass
|
||
display_label = folder
|
||
|
||
# Vider et reconstruire l'intérieur
|
||
for w in self._folder_inner.winfo_children():
|
||
w.destroy()
|
||
|
||
row = tk.Frame(self._folder_inner, bg=CLR_CARD_BG)
|
||
row.pack(fill=tk.X)
|
||
|
||
icon = "\U0001f4c4" if is_single else "\U0001f4c2" # 📄 ou 📂
|
||
tk.Label(
|
||
row, text=icon, font=(self._font_family, 16),
|
||
bg=CLR_CARD_BG,
|
||
).pack(side=tk.LEFT, padx=(0, 8))
|
||
|
||
info_frame = tk.Frame(row, bg=CLR_CARD_BG)
|
||
info_frame.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||
|
||
# Chemin (tronqué si trop long)
|
||
display_path = display_label
|
||
if len(display_path) > 60:
|
||
display_path = "..." + display_path[-57:]
|
||
tk.Label(
|
||
info_frame, text=display_path, font=self._f_body_bold,
|
||
bg=CLR_CARD_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
if is_single:
|
||
subtitle = f"Fichier unique — {self._single_file.suffix.upper().lstrip('.')}"
|
||
else:
|
||
suffix = "document trouvé (récursif)" if doc_count <= 1 else "documents trouvés (récursif)"
|
||
subtitle = f"{doc_count} {suffix}"
|
||
tk.Label(
|
||
info_frame, text=subtitle,
|
||
font=self._f_small, bg=CLR_CARD_BG, fg=CLR_TEXT_SECONDARY, anchor="w",
|
||
).pack(fill=tk.X)
|
||
|
||
change_btn = tk.Label(
|
||
row, text="Changer", font=self._f_small,
|
||
bg=CLR_CARD_BG, fg=CLR_PRIMARY, cursor="hand2",
|
||
)
|
||
change_btn.pack(side=tk.RIGHT, padx=(8, 0))
|
||
change_btn.bind("<Button-1>", lambda e: self._browse())
|
||
|
||
# Mettre à jour la bordure
|
||
self._folder_zone.configure(highlightbackground=CLR_GREEN)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Lancement
|
||
# ---------------------------------------------------------------
|
||
def _run(self):
|
||
is_single = getattr(self, '_single_file', None) is not None
|
||
profile_key = self._selected_processing_profile_key()
|
||
profile_spec = self._build_live_profile_spec()
|
||
manual_mask_template = self._selected_manual_mask_template_path()
|
||
|
||
if is_single:
|
||
# Mode fichier unique
|
||
if not self._single_file.is_file():
|
||
messagebox.showwarning("Fichier introuvable", f"{self._single_file}")
|
||
return
|
||
folder = self._single_file.parent
|
||
pdfs = [self._single_file]
|
||
else:
|
||
# Mode dossier
|
||
folder = Path(self.dir_var.get().strip())
|
||
if not folder.is_dir():
|
||
messagebox.showwarning(
|
||
"Dossier invalide",
|
||
"Choisissez un dossier ou un fichier.",
|
||
)
|
||
return
|
||
try:
|
||
from format_converter import SUPPORTED_EXTENSIONS
|
||
except ImportError:
|
||
SUPPORTED_EXTENSIONS = {".pdf"}
|
||
pdfs = list_supported_documents(folder, SUPPORTED_EXTENSIONS)
|
||
if not pdfs:
|
||
exts = ", ".join(sorted(SUPPORTED_EXTENSIONS))
|
||
messagebox.showwarning(
|
||
"Aucun document",
|
||
f"Aucun fichier supporté trouvé.\n"
|
||
f"Formats acceptés : {exts}\n"
|
||
f"(recherche récursive dans les sous-dossiers, hors anonymise/)",
|
||
)
|
||
return
|
||
|
||
if profile_spec.get("require_manual_mask") and manual_mask_template is None:
|
||
messagebox.showwarning(
|
||
"Masque manuel requis",
|
||
"Le profil sélectionné exige un masque manuel.\n"
|
||
"Choisissez un modèle de masque avant de lancer le traitement.",
|
||
)
|
||
return
|
||
|
||
if manual_mask_template is not None:
|
||
if apply_template_vector is None or Template is None or load_template_yaml is None:
|
||
messagebox.showwarning(
|
||
"Masque manuel indisponible",
|
||
"Le template sélectionné ne peut pas être appliqué car "
|
||
"la bibliothèque PDF n'est pas disponible.",
|
||
)
|
||
return
|
||
if not manual_mask_template.is_file():
|
||
messagebox.showwarning(
|
||
"Masque manuel introuvable",
|
||
f"Le modèle sélectionné est introuvable :\n{manual_mask_template}",
|
||
)
|
||
self._refresh_manual_mask_templates()
|
||
return
|
||
try:
|
||
self._load_manual_mask_template(manual_mask_template)
|
||
except Exception as e:
|
||
messagebox.showwarning(
|
||
"Masque manuel invalide",
|
||
f"Impossible de charger le modèle sélectionné :\n{e}",
|
||
)
|
||
return
|
||
|
||
self._stop_requested = False
|
||
self.btn_run.pack_forget()
|
||
self.btn_stop.pack(fill=tk.X)
|
||
self._show_progress(total=len(pdfs))
|
||
self._hide_results()
|
||
threading.Thread(
|
||
target=self._worker,
|
||
args=(folder, pdfs, manual_mask_template, profile_key, profile_spec),
|
||
daemon=True,
|
||
).start()
|
||
|
||
def _stop(self):
|
||
"""Demande l'arrêt du traitement en cours."""
|
||
self._stop_requested = True
|
||
self.btn_stop.config(state=tk.DISABLED, bg="#fca5a5", text="Arrêt en cours...")
|
||
self.status_var.set("Arrêt demandé, fin du document en cours...")
|
||
|
||
def _worker(
|
||
self,
|
||
folder: Path,
|
||
pdfs: List[Path],
|
||
manual_mask_template_path: Optional[Path],
|
||
profile_key: str,
|
||
profile_spec: Dict[str, Any],
|
||
):
|
||
import time
|
||
start_time = time.time()
|
||
manual_mask_template = None
|
||
temp_profile_cfg_path: Optional[Path] = None
|
||
|
||
try:
|
||
config_path = Path(self.cfg_path.get())
|
||
merged_cfg = load_effective_dictionaries_dict(config_path)
|
||
param_lists = profile_spec.get("param_lists") or {}
|
||
if isinstance(param_lists, dict):
|
||
merged_cfg["whitelist_phrases"] = list(param_lists.get("whitelist_phrases", []))
|
||
if not isinstance(merged_cfg.get("blacklist"), dict):
|
||
merged_cfg["blacklist"] = {}
|
||
merged_cfg["blacklist"]["force_mask_terms"] = list(
|
||
param_lists.get("blacklist_force_mask_terms", [])
|
||
)
|
||
merged_cfg["additional_stopwords"] = list(
|
||
param_lists.get("additional_stopwords", [])
|
||
)
|
||
profile_overlay = profile_spec.get("dictionaries_overlay") or {}
|
||
if profile_overlay:
|
||
merged_cfg = deep_merge_dict(merged_cfg, profile_overlay)
|
||
if yaml is not None:
|
||
fd, temp_name = tempfile.mkstemp(
|
||
prefix="profile_",
|
||
suffix=".yml",
|
||
dir=str(config_path.parent),
|
||
)
|
||
os.close(fd)
|
||
temp_profile_cfg_path = Path(temp_name)
|
||
temp_profile_cfg_path.write_text(
|
||
yaml.safe_dump(
|
||
merged_cfg,
|
||
allow_unicode=True,
|
||
default_flow_style=False,
|
||
sort_keys=False,
|
||
),
|
||
encoding="utf-8",
|
||
)
|
||
config_path = temp_profile_cfg_path
|
||
|
||
if profile_spec:
|
||
label = profile_spec.get("label") or profile_key
|
||
self.queue.put(
|
||
UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=f"~ profil métier actif : {label}",
|
||
)
|
||
)
|
||
|
||
if manual_mask_template_path is not None:
|
||
manual_mask_template = self._load_manual_mask_template(manual_mask_template_path)
|
||
self.queue.put(
|
||
UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=f"~ masque manuel actif : {manual_mask_template_path.name}",
|
||
)
|
||
)
|
||
outdir = folder / "anonymise"
|
||
outdir.mkdir(exist_ok=True)
|
||
ok = ko = 0
|
||
global_counts: Dict[str, int] = {}
|
||
|
||
for i, pdf in enumerate(pdfs, start=1):
|
||
# Vérifier si l'arrêt a été demandé
|
||
if self._stop_requested:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\n⚠️ Arrêt demandé par l'utilisateur"))
|
||
break
|
||
|
||
display_name = pdf.name
|
||
if folder in pdf.parents:
|
||
display_name = str(pdf.relative_to(folder))
|
||
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.PROGRESS, current=i, total=len(pdfs),
|
||
filename=display_name,
|
||
))
|
||
|
||
try:
|
||
source_doc = pdf
|
||
temp_dir_ctx = None
|
||
manual_mask_audit = None
|
||
if manual_mask_template is not None:
|
||
if pdf.suffix.lower() == ".pdf":
|
||
temp_dir_ctx = tempfile.TemporaryDirectory(prefix="manual-mask-")
|
||
temp_dir = Path(temp_dir_ctx.name)
|
||
source_doc = temp_dir / pdf.name
|
||
manual_mask_audit = temp_dir / f"{pdf.stem}.manual_mask.audit.jsonl"
|
||
apply_template_vector(pdf, source_doc, manual_mask_template, manual_mask_audit)
|
||
self.queue.put(
|
||
UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=f" ~ masque manuel appliqué : {manual_mask_template.name}",
|
||
)
|
||
)
|
||
else:
|
||
self.queue.put(
|
||
UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=" ~ masque manuel ignoré : format non PDF",
|
||
)
|
||
)
|
||
|
||
active = self._active_manager
|
||
use_ner = bool(active and self.use_hf and hasattr(active, 'is_loaded') and active.is_loaded())
|
||
camembert_active = (
|
||
self._camembert_manager
|
||
if self._camembert_manager
|
||
and hasattr(self._camembert_manager, "is_loaded")
|
||
and self._camembert_manager.is_loaded()
|
||
else None
|
||
)
|
||
thresholds = None
|
||
if use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager)):
|
||
thresholds = NerThresholds(self.th_per, self.th_org, self.th_loc, 0.85)
|
||
|
||
# Extraire le numéro OGC du nom du répertoire parent
|
||
# Ex: "257_23209962" → OGC = "257"
|
||
parent_name = pdf.parent.name
|
||
ogc = parent_name.split("_")[0] if "_" in parent_name else None
|
||
|
||
# VLM
|
||
vlm_active = bool(
|
||
self.use_vlm.get()
|
||
and self._vlm_available
|
||
and self._vlm_manager
|
||
and self._vlm_manager.is_loaded()
|
||
)
|
||
|
||
# Utiliser process_document (multi-formats) si disponible,
|
||
# sinon fallback sur process_pdf (PDF uniquement)
|
||
_process_fn = getattr(core, 'process_document', None) or core.process_pdf
|
||
_path_key = "doc_path" if _process_fn.__name__ == "process_document" else "pdf_path"
|
||
doc_outdir = build_batch_output_dir(folder, outdir, pdf)
|
||
doc_outdir.mkdir(parents=True, exist_ok=True)
|
||
outputs = _process_fn(
|
||
**{_path_key: source_doc},
|
||
out_dir=doc_outdir,
|
||
make_vector_redaction=False,
|
||
also_make_raster_burn=True,
|
||
config_path=config_path,
|
||
use_hf=use_ner,
|
||
ner_manager=active,
|
||
ner_thresholds=thresholds,
|
||
ogc_label=ogc,
|
||
vlm_manager=self._vlm_manager if vlm_active else None,
|
||
camembert_manager=camembert_active,
|
||
)
|
||
if manual_mask_audit is not None and "audit" in outputs:
|
||
append_jsonl_file(Path(outputs["audit"]), manual_mask_audit)
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {display_name}"))
|
||
for k, v in outputs.items():
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f" - {k}: {v}"))
|
||
|
||
audit_path = Path(outputs.get("audit", ""))
|
||
counts = self._count_audit(audit_path)
|
||
if counts:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())),
|
||
))
|
||
for k, v in counts.items():
|
||
global_counts[k] = global_counts.get(k, 0) + v
|
||
ok += 1
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2717 {display_name} \u2192 ERREUR: {e}"))
|
||
ko += 1
|
||
finally:
|
||
if temp_dir_ctx is not None:
|
||
temp_dir_ctx.cleanup()
|
||
|
||
total_time = time.time() - start_time
|
||
total_masked = sum(global_counts.values())
|
||
|
||
# Message différent si arrêt demandé
|
||
if self._stop_requested:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
|
||
outdir=str(outdir) if ok > 0 else "", total_time=total_time,
|
||
))
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text=f"⚠️ TRAITEMENT INTERROMPU : {ok} fichiers traités, {len(pdfs) - ok - ko} ignorés",
|
||
))
|
||
else:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.DONE, ok=ok, ko=ko, masked=total_masked,
|
||
outdir=str(outdir), total_time=total_time,
|
||
))
|
||
|
||
if ok and global_counts:
|
||
self.queue.put(UiMessage(
|
||
kind=MsgType.LOG,
|
||
text="RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())),
|
||
))
|
||
except Exception as e:
|
||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"Erreur fatale : {e}"))
|
||
total_time = time.time() - start_time
|
||
self.queue.put(UiMessage(kind=MsgType.DONE, ok=0, ko=len(pdfs), masked=0, outdir="", total_time=total_time))
|
||
finally:
|
||
if temp_profile_cfg_path is not None:
|
||
try:
|
||
temp_profile_cfg_path.unlink()
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Pompe de messages
|
||
# ---------------------------------------------------------------
|
||
def _pump_logs(self):
|
||
try:
|
||
while True:
|
||
msg = self.queue.get_nowait()
|
||
if msg.kind == MsgType.LOG:
|
||
self._append_log(msg.text)
|
||
elif msg.kind == MsgType.PROGRESS:
|
||
self._update_progress(msg.current, msg.total, msg.filename)
|
||
elif msg.kind == MsgType.DONE:
|
||
self._on_done(msg)
|
||
except queue.Empty:
|
||
pass
|
||
finally:
|
||
self.root.after(60, self._pump_logs)
|
||
|
||
def _append_log(self, text: str):
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.insert(tk.END, text + "\n")
|
||
self.txt.see(tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Progression
|
||
# ---------------------------------------------------------------
|
||
def _show_progress(self, total: int):
|
||
self._progressbar.configure(maximum=total, value=0)
|
||
self._progress_label.configure(text="")
|
||
self._progress_frame.pack(fill=tk.X, padx=32, pady=(0, 18),
|
||
before=self._results_frame if self._results_frame.winfo_manager() else None)
|
||
|
||
def _hide_progress(self):
|
||
self._progress_frame.pack_forget()
|
||
|
||
def _update_progress(self, current: int, total: int, filename: str):
|
||
self._progressbar.configure(value=current)
|
||
self._progress_label.configure(text=f"{current}/{total} — {filename}")
|
||
self.status_var.set(f"{current}/{total} — {filename}")
|
||
|
||
# ---------------------------------------------------------------
|
||
# Résultats
|
||
# ---------------------------------------------------------------
|
||
def _show_results(self, ok: int, ko: int, masked: int):
|
||
self._update_stat_card(self._stat_files, ok, CLR_GREEN, CLR_GREEN_LIGHT)
|
||
self._update_stat_card(self._stat_masked, masked, CLR_PRIMARY, CLR_PRIMARY_LIGHT)
|
||
|
||
err_fg = CLR_RED if ko > 0 else CLR_TEXT_SECONDARY
|
||
err_bg = CLR_RED_LIGHT if ko > 0 else "#f3f4f6"
|
||
self._update_stat_card(self._stat_errors, ko, err_fg, err_bg)
|
||
|
||
self._results_frame.pack(fill=tk.X, padx=32, pady=(0, 12))
|
||
|
||
def _hide_results(self):
|
||
self._results_frame.pack_forget()
|
||
self._log_frame.pack_forget()
|
||
self._log_visible = False
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
# Vider le journal
|
||
self.txt.configure(state=tk.NORMAL)
|
||
self.txt.delete("1.0", tk.END)
|
||
self.txt.configure(state=tk.DISABLED)
|
||
|
||
def _on_done(self, msg: UiMessage):
|
||
self._hide_progress()
|
||
self.btn_stop.pack_forget()
|
||
self.btn_stop.config(state=tk.NORMAL, bg=CLR_RED, text="Arrêter le traitement")
|
||
self.btn_run.pack(fill=tk.X)
|
||
|
||
if self._stop_requested:
|
||
self.status_var.set(f"Interrompu : {msg.ok} traités, {msg.ko} erreurs.")
|
||
else:
|
||
self.status_var.set(f"Terminé : {msg.ok} OK, {msg.ko} erreurs.")
|
||
|
||
if msg.outdir:
|
||
self._last_outdir = Path(msg.outdir)
|
||
|
||
# Vérifier les fuites
|
||
leak_count = self._check_leaks(Path(msg.outdir))
|
||
self._update_leak_indicator(leak_count)
|
||
|
||
# Calculer les performances
|
||
perf_string = self._calculate_performance(msg.ok, msg.total_time)
|
||
self._perf_label.configure(text=perf_string)
|
||
|
||
self._show_results(msg.ok, msg.ko, msg.masked)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Toggle journal
|
||
# ---------------------------------------------------------------
|
||
def _toggle_log(self):
|
||
if self._log_visible:
|
||
self._log_frame.pack_forget()
|
||
self._log_toggle.configure(text="Voir le journal détaillé \u25BC")
|
||
else:
|
||
self._log_frame.pack(fill=tk.BOTH, expand=True, pady=(4, 0))
|
||
self._log_toggle.configure(text="Masquer le journal \u25B2")
|
||
self._log_visible = not self._log_visible
|
||
|
||
# ---------------------------------------------------------------
|
||
# Ouvrir dossier résultats
|
||
# ---------------------------------------------------------------
|
||
def _open_out(self):
|
||
if self._last_outdir:
|
||
open_folder(self._last_outdir)
|
||
|
||
def _manual_mask_templates_dir(self) -> Path:
|
||
return ensure_mask_templates_dir(_exe_dir())
|
||
|
||
def _selected_processing_profile_key(self) -> str:
|
||
label = self.processing_profile_label_var.get()
|
||
return self._processing_profile_labels_to_keys.get(label, "")
|
||
|
||
def _selected_processing_profile_spec(self) -> Dict[str, Any]:
|
||
key = self._selected_processing_profile_key()
|
||
return self._processing_profiles.get(key, {})
|
||
|
||
def _set_listbox_values(self, listbox: tk.Listbox, values: List[str]):
|
||
listbox.delete(0, tk.END)
|
||
for value in values:
|
||
listbox.insert(tk.END, value)
|
||
|
||
def _current_param_lists(self) -> Dict[str, List[str]]:
|
||
return {
|
||
"whitelist_phrases": list(self._wl_listbox.get(0, tk.END)),
|
||
"blacklist_force_mask_terms": list(self._bl_listbox.get(0, tk.END)),
|
||
"additional_stopwords": list(self._sw_listbox.get(0, tk.END)),
|
||
}
|
||
|
||
def _apply_param_lists_to_widgets(self, param_lists: Dict[str, List[str]]):
|
||
self._set_listbox_values(
|
||
self._wl_listbox,
|
||
list(param_lists.get("whitelist_phrases", [])),
|
||
)
|
||
self._set_listbox_values(
|
||
self._bl_listbox,
|
||
list(param_lists.get("blacklist_force_mask_terms", [])),
|
||
)
|
||
self._set_listbox_values(
|
||
self._sw_listbox,
|
||
list(param_lists.get("additional_stopwords", [])),
|
||
)
|
||
self._refresh_params_summary()
|
||
|
||
def _current_manual_mask_template_setting(self) -> str:
|
||
selected = self._selected_manual_mask_template_path()
|
||
if selected is None:
|
||
return ""
|
||
return mask_template_label(selected, _exe_dir())
|
||
|
||
def _select_manual_mask_template_from_setting(self, template_name: str):
|
||
wanted = str(template_name or "").strip()
|
||
if not wanted:
|
||
self.manual_mask_template_var.set(MANUAL_MASK_NONE_LABEL)
|
||
return
|
||
template_path = self._manual_mask_templates_dir() / wanted
|
||
selected_label = MANUAL_MASK_NONE_LABEL
|
||
for label, path in self._manual_mask_templates.items():
|
||
if path == template_path:
|
||
selected_label = label
|
||
break
|
||
self.manual_mask_template_var.set(selected_label)
|
||
|
||
def _build_live_profile_spec(
|
||
self,
|
||
*,
|
||
label: Optional[str] = None,
|
||
description: Optional[str] = None,
|
||
base_spec: Optional[Dict[str, Any]] = None,
|
||
) -> Dict[str, Any]:
|
||
spec = dict(base_spec or self._selected_processing_profile_spec())
|
||
return {
|
||
"label": str(label if label is not None else spec.get("label") or self.processing_profile_label_var.get() or "Profil"),
|
||
"description": str(
|
||
description
|
||
if description is not None
|
||
else self.profile_description_var.get() or spec.get("description") or ""
|
||
),
|
||
"require_manual_mask": bool(self.profile_require_manual_mask_var.get()),
|
||
"force_disable_vlm": bool(self.profile_force_disable_vlm_var.get()),
|
||
"dictionaries_overlay": deepcopy(spec.get("dictionaries_overlay") or {}),
|
||
"param_lists": self._current_param_lists(),
|
||
"has_param_lists": True,
|
||
"preferred_manual_mask_template": self._current_manual_mask_template_setting(),
|
||
"has_preferred_manual_mask_template": True,
|
||
}
|
||
|
||
def _profile_key_from_label(self, label: str) -> str:
|
||
ascii_label = unicodedata.normalize("NFKD", label).encode("ascii", "ignore").decode("ascii")
|
||
slug = re.sub(r"[^a-zA-Z0-9]+", "_", ascii_label.lower()).strip("_") or "profil"
|
||
existing = set(self._processing_profiles.keys())
|
||
candidate = slug
|
||
index = 2
|
||
while candidate in existing:
|
||
candidate = f"{slug}_{index}"
|
||
index += 1
|
||
return candidate
|
||
|
||
def _refresh_profile_description(self):
|
||
description = self.profile_description_var.get().strip()
|
||
hints: list[str] = []
|
||
if self.profile_require_manual_mask_var.get():
|
||
hints.append("masque manuel requis")
|
||
if self.profile_force_disable_vlm_var.get():
|
||
hints.append("VLM désactivé")
|
||
spec = self._selected_processing_profile_spec()
|
||
if spec.get("dictionaries_overlay"):
|
||
hints.append("règles de masquage renforcées")
|
||
if hints:
|
||
description = f"{description}\nOptions actives : {', '.join(hints)}." if description else f"Options actives : {', '.join(hints)}."
|
||
self._profile_description.configure(text=description)
|
||
|
||
def _on_profile_editor_change(self):
|
||
self._apply_processing_profile_gui_state()
|
||
self._refresh_profile_description()
|
||
self._refresh_manual_mask_hint()
|
||
self._refresh_profile_capture_summary()
|
||
|
||
def _on_profile_description_change(self, *_args):
|
||
self._refresh_profile_description()
|
||
|
||
def _builtin_processing_profile_keys(self) -> set[str]:
|
||
return list_default_profile_keys()
|
||
|
||
def _open_profile_manager(self):
|
||
self._switch_tab("profiles")
|
||
|
||
def _refresh_profile_capture_summary(self):
|
||
if not hasattr(self, "_profile_capture_summary"):
|
||
return
|
||
profile_key = self._selected_processing_profile_key()
|
||
param_lists = self._current_param_lists()
|
||
wl_count = len(param_lists.get("whitelist_phrases", []))
|
||
bl_count = len(param_lists.get("blacklist_force_mask_terms", []))
|
||
sw_count = len(param_lists.get("additional_stopwords", []))
|
||
mask_label = self.manual_mask_template_var.get()
|
||
default_key = get_default_profile_key(Path(self.profiles_path.get()))
|
||
default_text = "profil par défaut" if profile_key and profile_key == default_key else "profil secondaire"
|
||
self._profile_capture_summary.configure(
|
||
text=(
|
||
f"Ce profil enregistrera : {wl_count} préservations, {bl_count} masquages forcés, "
|
||
f"{sw_count} stop-word additionnel. Masque PDF courant : {mask_label}. "
|
||
f"Statut : {default_text}."
|
||
)
|
||
)
|
||
|
||
def _refresh_profile_kind_label(self):
|
||
if not hasattr(self, "_profile_kind_label"):
|
||
return
|
||
profile_key = self._selected_processing_profile_key()
|
||
if not profile_key:
|
||
self._profile_kind_label.configure(text="")
|
||
return
|
||
profile_kind = "profil fourni" if profile_key in self._builtin_processing_profile_keys() else "profil utilisateur"
|
||
self._profile_kind_label.configure(text=f"Type : {profile_kind} ({profile_key})")
|
||
|
||
def _rename_selected_processing_profile(self):
|
||
profile_key = self._selected_processing_profile_key()
|
||
if not profile_key:
|
||
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
|
||
return
|
||
base_spec = self._selected_processing_profile_spec()
|
||
current_label_text = str(base_spec.get("label") or profile_key)
|
||
new_label = simpledialog.askstring(
|
||
"Renommer le profil",
|
||
"Nouveau nom visible du profil :",
|
||
initialvalue=current_label_text,
|
||
parent=self.root,
|
||
)
|
||
if new_label is None:
|
||
return
|
||
new_label = new_label.strip()
|
||
if not new_label:
|
||
messagebox.showwarning("Profils", "Le nom du profil ne peut pas être vide.")
|
||
return
|
||
updated_spec = self._build_live_profile_spec(label=new_label, base_spec=base_spec)
|
||
save_runtime_profile(profile_key, updated_spec, Path(self.profiles_path.get()))
|
||
self._refresh_processing_profiles(preferred_key=profile_key)
|
||
messagebox.showinfo("Profils", f"Profil renommé : {new_label}")
|
||
|
||
def _set_selected_processing_profile_default(self):
|
||
profile_key = self._selected_processing_profile_key()
|
||
if not profile_key:
|
||
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
|
||
return
|
||
set_runtime_default_profile(profile_key, Path(self.profiles_path.get()))
|
||
self._refresh_processing_profiles(preferred_key=profile_key)
|
||
messagebox.showinfo("Profils", "Profil par défaut mis à jour.")
|
||
|
||
def _delete_selected_processing_profile(self):
|
||
profile_key = self._selected_processing_profile_key()
|
||
spec = self._selected_processing_profile_spec()
|
||
profile_label = str(spec.get("label") or profile_key)
|
||
if not profile_key:
|
||
messagebox.showwarning("Profils", "Aucun profil sélectionné.")
|
||
return
|
||
if profile_key in self._builtin_processing_profile_keys():
|
||
messagebox.showwarning(
|
||
"Profils",
|
||
"Les profils fournis par défaut ne peuvent pas être supprimés.\n"
|
||
"Crée un profil utilisateur si tu veux un profil spécifique.",
|
||
)
|
||
return
|
||
confirmed = messagebox.askyesno(
|
||
"Supprimer le profil",
|
||
f"Supprimer définitivement le profil utilisateur « {profile_label} » ?",
|
||
parent=self.root,
|
||
)
|
||
if not confirmed:
|
||
return
|
||
delete_runtime_profile(profile_key, Path(self.profiles_path.get()))
|
||
self._refresh_processing_profiles()
|
||
messagebox.showinfo("Profils", f"Profil supprimé : {profile_label}")
|
||
|
||
def _create_processing_profile(self):
|
||
base_spec = self._selected_processing_profile_spec()
|
||
initial_label = f"{base_spec.get('label') or 'Profil'} copie"
|
||
label = simpledialog.askstring(
|
||
"Nouveau profil",
|
||
"Nom du nouveau profil :",
|
||
initialvalue=initial_label,
|
||
parent=self.root,
|
||
)
|
||
if label is None:
|
||
return
|
||
label = label.strip()
|
||
if not label:
|
||
messagebox.showwarning("Profils", "Le nom du profil ne peut pas être vide.")
|
||
return
|
||
|
||
description = simpledialog.askstring(
|
||
"Nouveau profil",
|
||
"Description du profil (optionnelle) :",
|
||
initialvalue=str(base_spec.get("description") or ""),
|
||
parent=self.root,
|
||
)
|
||
if description is None:
|
||
description = str(base_spec.get("description") or "")
|
||
|
||
profile_key = self._profile_key_from_label(label)
|
||
profile_spec = self._build_live_profile_spec(
|
||
label=label,
|
||
description=description.strip(),
|
||
base_spec=base_spec,
|
||
)
|
||
set_default = messagebox.askyesno(
|
||
"Nouveau profil",
|
||
"Définir ce nouveau profil comme profil par défaut ?",
|
||
parent=self.root,
|
||
)
|
||
save_runtime_profile(
|
||
profile_key,
|
||
profile_spec,
|
||
Path(self.profiles_path.get()),
|
||
set_default=set_default,
|
||
)
|
||
self._refresh_processing_profiles(preferred_key=profile_key)
|
||
messagebox.showinfo(
|
||
"Profils",
|
||
f"Profil enregistré : {label}",
|
||
parent=self.root,
|
||
)
|
||
|
||
def _save_selected_processing_profile(self):
|
||
profile_key = self._selected_processing_profile_key()
|
||
if not profile_key:
|
||
messagebox.showwarning(
|
||
"Profils",
|
||
"Aucun profil sélectionné. Créez d'abord un nouveau profil.",
|
||
parent=self.root,
|
||
)
|
||
return
|
||
base_spec = self._selected_processing_profile_spec()
|
||
profile_label = str(base_spec.get("label") or profile_key)
|
||
if profile_key in {"standard_local", "chuxx_strict", "partage_recherche", "dossier_audit", "demo"}:
|
||
confirmed = messagebox.askyesno(
|
||
"Profils",
|
||
"Vous allez enregistrer une surcharge locale sur un profil fourni par défaut.\n\n"
|
||
f"Continuer pour « {profile_label} » ?",
|
||
parent=self.root,
|
||
)
|
||
if not confirmed:
|
||
return
|
||
profile_spec = self._build_live_profile_spec(base_spec=base_spec)
|
||
save_runtime_profile(
|
||
profile_key,
|
||
profile_spec,
|
||
Path(self.profiles_path.get()),
|
||
)
|
||
self._refresh_processing_profiles(preferred_key=profile_key)
|
||
messagebox.showinfo(
|
||
"Profils",
|
||
f"Profil mis à jour : {profile_label}",
|
||
parent=self.root,
|
||
)
|
||
|
||
def _refresh_processing_profiles(self, preferred_key: Optional[str] = None):
|
||
ensure_runtime_profiles_config(Path(self.profiles_path.get()))
|
||
current_key = preferred_key or self._selected_processing_profile_key()
|
||
profiles = list_effective_profiles(Path(self.profiles_path.get()))
|
||
self._processing_profiles = profiles
|
||
self._processing_profile_labels_to_keys = {
|
||
spec.get("label") or key: key
|
||
for key, spec in profiles.items()
|
||
}
|
||
labels = list(self._processing_profile_labels_to_keys.keys())
|
||
self._profile_combo.configure(values=labels)
|
||
selected_key = current_key
|
||
if not selected_key or selected_key not in profiles:
|
||
selected_key = get_default_profile_key(Path(self.profiles_path.get()))
|
||
selected_label = next(
|
||
(
|
||
label
|
||
for label, key in self._processing_profile_labels_to_keys.items()
|
||
if key == selected_key
|
||
),
|
||
labels[0] if labels else "",
|
||
)
|
||
if selected_label:
|
||
self.processing_profile_label_var.set(selected_label)
|
||
self._apply_selected_processing_profile()
|
||
|
||
def _apply_selected_processing_profile(self):
|
||
spec = self._selected_processing_profile_spec()
|
||
if not spec:
|
||
self._profile_base_description = ""
|
||
self.profile_description_var.set("")
|
||
self._profile_description.configure(text="")
|
||
return
|
||
|
||
self._profile_base_description = str(spec.get("description") or "")
|
||
self.profile_description_var.set(self._profile_base_description)
|
||
self.profile_require_manual_mask_var.set(bool(spec.get("require_manual_mask")))
|
||
self.profile_force_disable_vlm_var.set(bool(spec.get("force_disable_vlm")))
|
||
if spec.get("has_param_lists"):
|
||
self._apply_param_lists_to_widgets(spec.get("param_lists") or {})
|
||
else:
|
||
self._load_params()
|
||
self._select_manual_mask_template_from_setting(
|
||
spec.get("preferred_manual_mask_template") or ""
|
||
)
|
||
self._on_profile_editor_change()
|
||
self._refresh_profile_kind_label()
|
||
self._refresh_profile_description()
|
||
self._refresh_manual_mask_hint()
|
||
self._refresh_profile_capture_summary()
|
||
|
||
def _apply_processing_profile_gui_state(self):
|
||
force_disable_vlm = bool(self.profile_force_disable_vlm_var.get())
|
||
if not hasattr(self, "_vlm_check"):
|
||
return
|
||
if force_disable_vlm:
|
||
self.use_vlm.set(False)
|
||
self._vlm_available = False
|
||
self._vlm_check.configure(state=tk.DISABLED)
|
||
if hasattr(self, "_vlm_status_lbl"):
|
||
self._vlm_status_lbl.configure(text="Désactivé par profil", fg=CLR_TEXT_SECONDARY)
|
||
else:
|
||
self._vlm_check.configure(state=tk.NORMAL)
|
||
if hasattr(self, "_vlm_status_lbl") and self._vlm_status_lbl.cget("text") == "Désactivé par profil":
|
||
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
|
||
self._refresh_manual_mask_hint()
|
||
|
||
def _selected_manual_mask_template_path(self) -> Optional[Path]:
|
||
return self._manual_mask_templates.get(self.manual_mask_template_var.get())
|
||
|
||
def _refresh_manual_mask_templates(self):
|
||
selected_path = self._selected_manual_mask_template_path()
|
||
templates = list_mask_templates(_exe_dir())
|
||
options: Dict[str, Optional[Path]] = {MANUAL_MASK_NONE_LABEL: None}
|
||
for path in templates:
|
||
options[mask_template_label(path, _exe_dir())] = path
|
||
self._manual_mask_templates = options
|
||
labels = list(options.keys())
|
||
self._manual_mask_combo.configure(values=labels)
|
||
if hasattr(self, "_profile_manual_mask_combo"):
|
||
self._profile_manual_mask_combo.configure(values=labels)
|
||
|
||
selected_label = MANUAL_MASK_NONE_LABEL
|
||
if selected_path is not None:
|
||
for label, path in options.items():
|
||
if path == selected_path:
|
||
selected_label = label
|
||
break
|
||
self.manual_mask_template_var.set(selected_label)
|
||
self._refresh_manual_mask_hint()
|
||
self._refresh_profile_capture_summary()
|
||
|
||
def _refresh_manual_mask_hint(self):
|
||
selected = self._selected_manual_mask_template_path()
|
||
manual_mask_required = bool(self.profile_require_manual_mask_var.get())
|
||
if selected is None:
|
||
if manual_mask_required:
|
||
text = (
|
||
"Le profil sélectionné exige un masque manuel. "
|
||
"Choisissez un modèle avant de lancer le traitement."
|
||
)
|
||
elif len(self._manual_mask_templates) == 1:
|
||
text = (
|
||
"Aucun modèle enregistré. Crée un masque avec l'éditeur PDF, "
|
||
"puis clique sur « Actualiser les modèles »."
|
||
)
|
||
else:
|
||
text = "Aucun masque manuel sélectionné pour ce lancement."
|
||
else:
|
||
text = (
|
||
f"Masque sélectionné : {selected.name}. "
|
||
"Il sera appliqué à tous les PDF du lot avant l'anonymisation automatique."
|
||
)
|
||
self._manual_mask_hint.configure(text=text)
|
||
self._refresh_profile_capture_summary()
|
||
|
||
def _load_manual_mask_template(self, path: Path):
|
||
if load_template_yaml is None or Template is None:
|
||
raise RuntimeError("bibliothèque de templates PDF indisponible")
|
||
if path.suffix.lower() in (".yml", ".yaml"):
|
||
return load_template_yaml(path)
|
||
return Template.from_dict(json.loads(path.read_text(encoding="utf-8")))
|
||
|
||
def _open_manual_mask_templates_dir(self):
|
||
open_folder(self._manual_mask_templates_dir())
|
||
|
||
def _open_manual_mask_designer(self):
|
||
if MaskDesignerApp is None:
|
||
messagebox.showerror(
|
||
"Masques PDF",
|
||
"L'éditeur de masques PDF n'a pas pu être chargé.\n"
|
||
"Vérifiez que PyMuPDF, Pillow et PyYAML sont disponibles.",
|
||
)
|
||
return
|
||
|
||
initial_pdf = resolve_manual_mask_pdf(getattr(self, "_single_file", None))
|
||
win = tk.Toplevel(self.root)
|
||
if initial_pdf is None:
|
||
message = (
|
||
"L'éditeur s'ouvre sans PDF préchargé.\n\n"
|
||
"Astuce : choisissez d'abord un fichier PDF dans l'onglet "
|
||
"Anonymisation pour l'ouvrir automatiquement ici."
|
||
)
|
||
self.status_var.set("Éditeur de masques PDF ouvert.")
|
||
messagebox.showinfo("Masques PDF", message)
|
||
else:
|
||
self.status_var.set(f"Éditeur de masques PDF ouvert pour {initial_pdf.name}.")
|
||
|
||
MaskDesignerApp(
|
||
win,
|
||
initial_pdf=initial_pdf,
|
||
templates_dir=self._manual_mask_templates_dir(),
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Aide
|
||
# ---------------------------------------------------------------
|
||
def _show_help(self):
|
||
messagebox.showinfo(
|
||
"Comment ça marche ?",
|
||
"1) Choisissez le dossier racine contenant vos fichiers PDF.\n\n"
|
||
"2) Cliquez sur « Lancer la pseudonymisation ».\n\n"
|
||
"Tous les fichiers PDF sont traités\n"
|
||
"(recherche récursive dans les sous-dossiers).\n\n"
|
||
"Un PDF Image (raster) est généré pour chaque fichier :\n"
|
||
"chaque page devient une image avec les données masquées.\n"
|
||
"Sécurité maximale, aucun texte résiduel.\n\n"
|
||
"Les résultats sont écrits dans le dossier\n"
|
||
"« anonymise/ » à la racine du dossier sélectionné,\n"
|
||
"en conservant l'arborescence des sous-dossiers source.\n\n"
|
||
"Le sous-dossier « anonymise/ » est ignoré en entrée\n"
|
||
"pour éviter de retraiter d'anciennes sorties.",
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Paramètres avancés (whitelist/blacklist)
|
||
# ---------------------------------------------------------------
|
||
def _build_phrase_list(self, parent, title: str, placeholder: str, color_tag: str, on_change=None):
|
||
"""Construit un widget liste + ajout/suppression pour les phrases."""
|
||
frame = tk.Frame(parent, bg=CLR_BG)
|
||
frame.pack(fill=tk.X, pady=(4, 8))
|
||
|
||
tk.Label(
|
||
frame, text=title, font=self._f_small,
|
||
bg=CLR_BG, fg=CLR_TEXT, anchor="w",
|
||
).pack(fill=tk.X, pady=(0, 4))
|
||
|
||
# Zone de saisie + bouton ajouter
|
||
input_row = tk.Frame(frame, bg=CLR_BG)
|
||
input_row.pack(fill=tk.X, pady=(0, 4))
|
||
|
||
entry = tk.Entry(input_row, font=self._f_small, relief=tk.GROOVE, bd=1)
|
||
entry.insert(0, placeholder)
|
||
entry.configure(fg="#999")
|
||
|
||
def _on_focus_in(e):
|
||
if entry.get() == placeholder:
|
||
entry.delete(0, tk.END)
|
||
entry.configure(fg=CLR_TEXT)
|
||
|
||
def _on_focus_out(e):
|
||
if not entry.get().strip():
|
||
entry.insert(0, placeholder)
|
||
entry.configure(fg="#999")
|
||
|
||
entry.bind("<FocusIn>", _on_focus_in)
|
||
entry.bind("<FocusOut>", _on_focus_out)
|
||
entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 4))
|
||
|
||
def _add(event=None):
|
||
text = entry.get().strip()
|
||
if text and text != placeholder:
|
||
# Éviter les doublons
|
||
items = list(listbox.get(0, tk.END))
|
||
if text not in items:
|
||
listbox.insert(tk.END, text)
|
||
if on_change:
|
||
on_change()
|
||
entry.delete(0, tk.END)
|
||
|
||
add_btn = tk.Button(
|
||
input_row, text="+ Ajouter", font=self._f_small,
|
||
bg=color_tag, fg=CLR_TEXT, relief=tk.GROOVE, cursor="hand2",
|
||
command=_add, padx=8,
|
||
)
|
||
add_btn.pack(side=tk.LEFT)
|
||
entry.bind("<Return>", _add)
|
||
|
||
# Liste des phrases
|
||
list_frame = tk.Frame(frame, bg=CLR_BG)
|
||
list_frame.pack(fill=tk.X)
|
||
|
||
listbox = tk.Listbox(
|
||
list_frame, height=4, font=("Consolas", 9),
|
||
relief=tk.GROOVE, bd=1, selectmode=tk.EXTENDED,
|
||
bg=color_tag,
|
||
)
|
||
scrollbar = ttk.Scrollbar(list_frame, orient=tk.VERTICAL, command=listbox.yview)
|
||
listbox.configure(yscrollcommand=scrollbar.set)
|
||
listbox.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
||
|
||
# Bouton supprimer
|
||
def _remove():
|
||
sel = listbox.curselection()
|
||
removed = False
|
||
for idx in reversed(sel):
|
||
listbox.delete(idx)
|
||
removed = True
|
||
if removed and on_change:
|
||
on_change()
|
||
|
||
rm_btn = tk.Button(
|
||
frame, text="Supprimer la sélection", font=self._f_small,
|
||
bg="#ffcdd2", fg="#b71c1c", relief=tk.GROOVE, cursor="hand2",
|
||
command=_remove, padx=8,
|
||
)
|
||
rm_btn.pack(anchor="e", pady=(2, 0))
|
||
|
||
return listbox, entry
|
||
|
||
def _refresh_params_summary(self):
|
||
wl_count = self._wl_listbox.size()
|
||
bl_count = self._bl_listbox.size()
|
||
sw_count = self._sw_listbox.size()
|
||
self._params_summary.configure(
|
||
text=(
|
||
f"Listes visibles chargées : {wl_count} préservations, "
|
||
f"{bl_count} masquages forcés, {sw_count} stop-word additionnel."
|
||
)
|
||
)
|
||
self._refresh_profile_capture_summary()
|
||
|
||
def _load_params(self):
|
||
"""Charge les whitelist/blacklist depuis la config YAML."""
|
||
try:
|
||
cfg_path = Path(self.cfg_path.get())
|
||
if cfg_path.exists():
|
||
param_lists = load_effective_param_lists(cfg_path)
|
||
self._wl_listbox.delete(0, tk.END)
|
||
for phrase in param_lists["whitelist_phrases"]:
|
||
self._wl_listbox.insert(tk.END, phrase)
|
||
self._bl_listbox.delete(0, tk.END)
|
||
for term in param_lists["blacklist_force_mask_terms"]:
|
||
self._bl_listbox.insert(tk.END, term)
|
||
self._sw_listbox.delete(0, tk.END)
|
||
for term in param_lists["additional_stopwords"]:
|
||
self._sw_listbox.insert(tk.END, term)
|
||
self._refresh_params_summary()
|
||
except Exception:
|
||
pass
|
||
|
||
def _listbox_values(self, listbox: tk.Listbox) -> List[str]:
|
||
return list(listbox.get(0, tk.END))
|
||
|
||
def _copy_param_listboxes(
|
||
self,
|
||
source_wl: tk.Listbox,
|
||
source_bl: tk.Listbox,
|
||
source_sw: tk.Listbox,
|
||
target_wl: tk.Listbox,
|
||
target_bl: tk.Listbox,
|
||
target_sw: tk.Listbox,
|
||
):
|
||
self._set_listbox_values(target_wl, self._listbox_values(source_wl))
|
||
self._set_listbox_values(target_bl, self._listbox_values(source_bl))
|
||
self._set_listbox_values(target_sw, self._listbox_values(source_sw))
|
||
|
||
def _export_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
|
||
"""Exporte les paramètres visibles dans un fichier JSON pour envoi ou sauvegarde locale."""
|
||
try:
|
||
import json as _json
|
||
from datetime import datetime
|
||
|
||
wl = self._listbox_values(wl_listbox)
|
||
bl = self._listbox_values(bl_listbox)
|
||
sw = self._listbox_values(sw_listbox)
|
||
|
||
export_data = {
|
||
"version": APP_VERSION,
|
||
"date_export": datetime.now().isoformat(),
|
||
"etablissement": "", # à remplir par l'utilisateur
|
||
"whitelist_phrases": wl,
|
||
"blacklist_force_mask_terms": bl,
|
||
"additional_stopwords": sw,
|
||
"instructions": (
|
||
"Ce fichier contient les paramètres d'anonymisation personnalisés. "
|
||
"Envoyez-le par email à l'équipe technique pour mise à jour du programme."
|
||
),
|
||
}
|
||
|
||
# Proposer le Bureau comme destination par défaut
|
||
desktop = Path.home() / "Desktop"
|
||
if not desktop.exists():
|
||
desktop = Path.home() / "Bureau"
|
||
if not desktop.exists():
|
||
desktop = Path.home()
|
||
|
||
dest = filedialog.asksaveasfilename(
|
||
title="Exporter les paramètres",
|
||
initialdir=str(desktop),
|
||
initialfile="parametres_anonymisation.json",
|
||
defaultextension=".json",
|
||
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
|
||
)
|
||
if dest:
|
||
Path(dest).write_text(
|
||
_json.dumps(export_data, ensure_ascii=False, indent=2),
|
||
encoding="utf-8",
|
||
)
|
||
messagebox.showinfo(
|
||
"Export réussi",
|
||
f"Paramètres exportés dans :\n{dest}\n\n"
|
||
f"Vous pouvez envoyer ce fichier par email\n"
|
||
f"à l'équipe technique.",
|
||
)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Erreur à l'export :\n{e}")
|
||
|
||
def _export_params(self):
|
||
self._export_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
|
||
|
||
def _import_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
|
||
"""Importe des paramètres depuis un fichier JSON (fusionne avec l'existant)."""
|
||
try:
|
||
import json as _json
|
||
|
||
src = filedialog.askopenfilename(
|
||
title="Importer des paramètres",
|
||
filetypes=[("JSON", "*.json"), ("Tous", "*.*")],
|
||
)
|
||
if not src:
|
||
return
|
||
|
||
data = _json.loads(Path(src).read_text(encoding="utf-8"))
|
||
|
||
# Fusionner whitelist
|
||
new_wl = data.get("whitelist_phrases", [])
|
||
existing_wl = set(wl_listbox.get(0, tk.END))
|
||
added_wl = 0
|
||
for phrase in new_wl:
|
||
if phrase and phrase.strip() and phrase.strip() not in existing_wl:
|
||
wl_listbox.insert(tk.END, phrase.strip())
|
||
added_wl += 1
|
||
|
||
# Fusionner blacklist
|
||
new_bl = data.get("blacklist_force_mask_terms", [])
|
||
existing_bl = set(bl_listbox.get(0, tk.END))
|
||
added_bl = 0
|
||
for term in new_bl:
|
||
if term and str(term).strip() and str(term).strip() not in existing_bl:
|
||
bl_listbox.insert(tk.END, str(term).strip())
|
||
added_bl += 1
|
||
|
||
# Fusionner stop-words additionnels
|
||
new_sw = data.get("additional_stopwords", [])
|
||
existing_sw = set(sw_listbox.get(0, tk.END))
|
||
added_sw = 0
|
||
for term in new_sw:
|
||
if term and str(term).strip() and str(term).strip() not in existing_sw:
|
||
sw_listbox.insert(tk.END, str(term).strip())
|
||
added_sw += 1
|
||
|
||
version = data.get("version", "?")
|
||
date_exp = data.get("date_export", "?")[:10]
|
||
messagebox.showinfo(
|
||
"Import réussi",
|
||
f"Paramètres importés (v{version}, {date_exp}) :\n\n"
|
||
f" + {added_wl} phrase(s) ajoutée(s) à la whitelist\n"
|
||
f" + {added_bl} terme(s) ajouté(s) à la blacklist\n"
|
||
f" + {added_sw} mot(s) ajouté(s) aux stop-words\n\n"
|
||
f"Cliquez sur « Sauvegarder » pour appliquer.",
|
||
)
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Erreur à l'import :\n{e}")
|
||
|
||
def _import_params(self):
|
||
self._import_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
|
||
self._refresh_params_summary()
|
||
|
||
def _save_param_listboxes(self, wl_listbox: tk.Listbox, bl_listbox: tk.Listbox, sw_listbox: tk.Listbox):
|
||
"""Sauvegarde les listes visibles dans la config YAML générale."""
|
||
try:
|
||
cfg_path = Path(self.cfg_path.get())
|
||
if not cfg_path.exists() or yaml is None:
|
||
messagebox.showwarning("Erreur", "Fichier de configuration introuvable.")
|
||
return
|
||
|
||
data = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or {}
|
||
|
||
# Whitelist phrases
|
||
data["whitelist_phrases"] = self._listbox_values(wl_listbox)
|
||
|
||
# Blacklist terms
|
||
if "blacklist" not in data:
|
||
data["blacklist"] = {}
|
||
data["blacklist"]["force_mask_terms"] = self._listbox_values(bl_listbox)
|
||
|
||
# Stop-words additionnels (mots à ne jamais identifier comme noms)
|
||
data["additional_stopwords"] = self._listbox_values(sw_listbox)
|
||
|
||
cfg_path.write_text(
|
||
yaml.dump(data, allow_unicode=True, default_flow_style=False, sort_keys=False),
|
||
encoding="utf-8",
|
||
)
|
||
messagebox.showinfo("Paramètres", "Paramètres sauvegardés avec succès.")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible de sauvegarder :\n{e}")
|
||
|
||
def _save_params(self):
|
||
self._save_param_listboxes(self._wl_listbox, self._bl_listbox, self._sw_listbox)
|
||
self._refresh_params_summary()
|
||
|
||
# ---------------------------------------------------------------
|
||
# YAML (interne)
|
||
# ---------------------------------------------------------------
|
||
def _ensure_cfg_exists(self):
|
||
p = Path(self.cfg_path.get())
|
||
p.parent.mkdir(parents=True, exist_ok=True)
|
||
if not p.exists():
|
||
p.write_text(RUNTIME_CFG_TEXT, encoding="utf-8")
|
||
|
||
def _load_cfg(self):
|
||
self._ensure_cfg_exists()
|
||
try:
|
||
self.cfg_data = load_effective_dictionaries_dict(Path(self.cfg_path.get()))
|
||
except Exception:
|
||
pass
|
||
|
||
# ---------------------------------------------------------------
|
||
# Audit
|
||
# ---------------------------------------------------------------
|
||
def _count_audit(self, audit_path: Path) -> Dict[str, int]:
|
||
d: Dict[str, int] = {}
|
||
try:
|
||
with open(audit_path, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
try:
|
||
obj = json.loads(line)
|
||
k = obj.get("kind", "?")
|
||
d[k] = d.get(k, 0) + 1
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
return d
|
||
|
||
# ---------------------------------------------------------------
|
||
# Vérification des fuites
|
||
# ---------------------------------------------------------------
|
||
def _check_leaks(self, output_dir: Path) -> int:
|
||
"""Vérifie les fuites dans les textes anonymisés."""
|
||
leak_count = 0
|
||
try:
|
||
# Patterns de fuites critiques
|
||
import re
|
||
patterns = {
|
||
"date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
|
||
"force_term": re.compile(r"\bCHUXX\b", re.IGNORECASE),
|
||
}
|
||
|
||
for txt_file in iter_pseudonymized_texts(output_dir):
|
||
try:
|
||
with open(txt_file, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
for pattern in patterns.values():
|
||
matches = pattern.findall(content)
|
||
leak_count += len(matches)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
return leak_count
|
||
|
||
# ---------------------------------------------------------------
|
||
# Calcul des performances
|
||
# ---------------------------------------------------------------
|
||
def _calculate_performance(self, total_files: int, total_time: float) -> str:
|
||
"""Calcule et formate les performances de traitement."""
|
||
if total_files == 0 or total_time == 0:
|
||
return "⏱️ Temps de traitement non disponible"
|
||
|
||
avg_time = total_time / total_files
|
||
|
||
# Formater le temps total
|
||
if total_time < 60:
|
||
time_str = f"{total_time:.0f}s"
|
||
elif total_time < 3600:
|
||
minutes = int(total_time // 60)
|
||
seconds = int(total_time % 60)
|
||
time_str = f"{minutes}m {seconds}s"
|
||
else:
|
||
hours = int(total_time // 3600)
|
||
minutes = int((total_time % 3600) // 60)
|
||
time_str = f"{hours}h {minutes}m"
|
||
|
||
return f"⏱️ Traité en {time_str} ({avg_time:.1f}s/document)"
|
||
|
||
# ---------------------------------------------------------------
|
||
# Mise à jour de l'indicateur de fuites
|
||
# ---------------------------------------------------------------
|
||
def _update_leak_indicator(self, leak_count: int):
|
||
"""Met à jour l'indicateur de fuites."""
|
||
if leak_count == 0:
|
||
self._leak_badge.configure(
|
||
text="🔒 0 fuite détectée",
|
||
bg=CLR_GREEN_LIGHT, fg=CLR_GREEN
|
||
)
|
||
else:
|
||
self._leak_badge.configure(
|
||
text=f"⚠️ {leak_count} fuite{'s' if leak_count > 1 else ''} potentielle{'s' if leak_count > 1 else ''}",
|
||
bg=CLR_RED_LIGHT, fg=CLR_RED
|
||
)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Chargement automatique NER au démarrage
|
||
# ---------------------------------------------------------------
|
||
def _auto_load_ner(self):
|
||
"""Charge le modèle NER par défaut en arrière-plan.
|
||
Priorité : EDS-Pseudo → CamemBERT-bio local → DistilCamemBERT-NER legacy.
|
||
"""
|
||
if not self._eds_manager and not self._camembert_manager and not self._onnx_manager:
|
||
return
|
||
self.status_var.set("Chargement du modèle NER...")
|
||
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
|
||
|
||
def _auto_load_ner_worker(self):
|
||
camembert_loaded = False
|
||
|
||
# 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques)
|
||
if self._eds_manager:
|
||
try:
|
||
self._eds_manager.load("AP-HP/eds-pseudo-public")
|
||
self._active_manager = self._eds_manager
|
||
self.use_hf = True
|
||
if self._camembert_manager:
|
||
try:
|
||
self._camembert_manager.load()
|
||
camembert_loaded = True
|
||
except Exception as cam_err:
|
||
import logging
|
||
logging.getLogger(__name__).info("CamemBERT-bio local indisponible : %s", cam_err)
|
||
suffix = " + CamemBERT-bio local" if camembert_loaded else ""
|
||
self.status_var.set(f"Prêt — EDS-Pseudo actif{suffix}.")
|
||
return
|
||
except Exception as e:
|
||
import logging
|
||
logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e)
|
||
|
||
# 2) Fallback local embarqué : CamemBERT-bio ONNX.
|
||
# Il est utilisé par le core comme signal NER-first séparé, pas comme
|
||
# ner_manager HuggingFace legacy.
|
||
if self._camembert_manager:
|
||
try:
|
||
self._camembert_manager.load()
|
||
self.use_hf = False
|
||
self.status_var.set("Prêt — CamemBERT-bio local actif.")
|
||
return
|
||
except Exception as cam_err:
|
||
import logging
|
||
logging.getLogger(__name__).info("CamemBERT-bio local indisponible : %s", cam_err)
|
||
|
||
# 3) Fallback legacy : DistilCamemBERT-NER via optimum.onnxruntime.
|
||
if self._onnx_manager:
|
||
try:
|
||
self._onnx_manager.load("cmarkea/distilcamembert-base-ner")
|
||
self._active_manager = self._onnx_manager
|
||
self.use_hf = True
|
||
self.status_var.set("Prêt — NER ONNX legacy actif.")
|
||
return
|
||
except Exception as e2:
|
||
self.status_var.set(f"Prêt (NER legacy indisponible : {e2})")
|
||
return
|
||
|
||
self.status_var.set("Prêt (aucun backend NER disponible).")
|
||
|
||
# ---------------------------------------------------------------
|
||
# VLM toggle
|
||
# ---------------------------------------------------------------
|
||
def _on_vlm_toggle(self):
|
||
"""Appelé quand l'utilisateur coche/décoche la checkbox VLM."""
|
||
if not self.use_vlm.get():
|
||
self._vlm_available = False
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
|
||
return
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY)
|
||
threading.Thread(target=self._vlm_connect_worker, daemon=True).start()
|
||
|
||
def _vlm_connect_worker(self):
|
||
"""Vérifie la connexion Ollama en arrière-plan."""
|
||
try:
|
||
if self._vlm_manager is None:
|
||
raise RuntimeError("VlmManager non disponible")
|
||
self._vlm_manager.load()
|
||
self._vlm_available = True
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN)
|
||
except Exception as e:
|
||
self._vlm_available = False
|
||
self.use_vlm.set(False)
|
||
err = str(e)
|
||
if len(err) > 60:
|
||
err = err[:57] + "..."
|
||
if hasattr(self, '_vlm_status_lbl'):
|
||
self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED)
|
||
|
||
# ---------------------------------------------------------------
|
||
# Modèles NER (API interne)
|
||
# ---------------------------------------------------------------
|
||
def _load_model(self, model_id: Optional[str] = None):
|
||
mid = model_id or "cmarkea/distilcamembert-base-ner"
|
||
is_eds = False
|
||
if self._eds_manager:
|
||
eds_ids = set(self._eds_manager.models_catalog().values())
|
||
if mid in eds_ids:
|
||
is_eds = True
|
||
if is_eds:
|
||
if not self._eds_manager:
|
||
return
|
||
manager = self._eds_manager
|
||
else:
|
||
if not self._onnx_manager:
|
||
return
|
||
manager = self._onnx_manager
|
||
try:
|
||
manager.load(mid)
|
||
self._active_manager = manager
|
||
self.use_hf = True
|
||
except Exception:
|
||
self.use_hf = False
|
||
|
||
def _unload_model(self):
|
||
if self._onnx_manager:
|
||
self._onnx_manager.unload()
|
||
if self._eds_manager:
|
||
self._eds_manager.unload()
|
||
if self._camembert_manager:
|
||
self._camembert_manager.unload()
|
||
self._active_manager = None
|
||
self.use_hf = False
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Point d'entrée
|
||
# ---------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
try:
|
||
root = tk.Tk()
|
||
App(root)
|
||
root.mainloop()
|
||
except Exception as exc:
|
||
import traceback, sys
|
||
err = traceback.format_exc()
|
||
# Écrire dans un fichier log à côté de l'exe
|
||
log_path = Path(__file__).resolve().parent / "crash.log"
|
||
try:
|
||
log_path.write_text(err, encoding="utf-8")
|
||
except Exception:
|
||
pass
|
||
# Tenter d'afficher une messagebox (même sans console)
|
||
try:
|
||
import tkinter as _tk
|
||
_r = _tk.Tk()
|
||
_r.withdraw()
|
||
from tkinter import messagebox as _mb
|
||
_mb.showerror("Erreur fatale", f"L'application a planté :\n\n{exc}\n\nVoir crash.log")
|
||
_r.destroy()
|
||
except Exception:
|
||
pass
|
||
raise
|