- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
408 lines
21 KiB
Python
408 lines
21 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Pseudonymisation – GUI v4 (Gestionnaire de modèles ONNX + mode Simple/Avancé)
|
||
-----------------------------------------------------------------------------
|
||
- Onglet Simple : parcours en 3 clics + choix "PDF anonymisé (léger)" / "PDF image (très sûr)"
|
||
- Onglet Avancé : gestion des règles YAML + Créateur de règle + Gestionnaire de modèles ONNX
|
||
- Chargement paresseux du modèle NER (CamemBERT family, ONNX Runtime via Optimum)
|
||
- Application du NER uniquement au narratif, avec seuils par type
|
||
|
||
Fichiers requis à côté :
|
||
- anonymizer_core_refactored_onnx.py
|
||
- ner_manager_onnx.py
|
||
"""
|
||
from __future__ import annotations
|
||
import json
|
||
import os
|
||
import platform
|
||
import queue
|
||
import re
|
||
import threading
|
||
from pathlib import Path
|
||
from typing import Any, Dict
|
||
|
||
import tkinter as tk
|
||
from tkinter import filedialog, messagebox, ttk
|
||
|
||
# Core
|
||
try:
|
||
import anonymizer_core_refactored_onnx as core
|
||
except Exception as e:
|
||
raise SystemExit(f"Impossible d'importer le core ONNX : {e}")
|
||
|
||
# NER manager
|
||
try:
|
||
from ner_manager_onnx import NerModelManager, NerThresholds
|
||
except Exception as e:
|
||
NerModelManager = None # type: ignore
|
||
NerThresholds = None # type: ignore
|
||
|
||
try:
|
||
from eds_pseudo_manager import EdsPseudoManager
|
||
except Exception:
|
||
EdsPseudoManager = None # type: ignore
|
||
|
||
try:
|
||
import yaml
|
||
except Exception:
|
||
yaml = None
|
||
|
||
APP_TITLE = "Pseudonymisation de PDF"
|
||
DEFAULT_CFG = Path("config/dictionnaires.yml")
|
||
|
||
DEFAULTS_CFG_TEXT = r"""
|
||
# dictionnaires.yml – valeurs par défaut (bloc littéral pour les regex)
|
||
version: 1
|
||
encoding: "utf-8"
|
||
normalization: "NFKC"
|
||
whitelist:
|
||
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
|
||
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
|
||
org_gpe_keep: true
|
||
blacklist:
|
||
force_mask_terms: []
|
||
force_mask_regex: []
|
||
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
|
||
regex_overrides:
|
||
- name: OGC_court
|
||
pattern: |-
|
||
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
|
||
placeholder: '[OGC]'
|
||
flags: [IGNORECASE]
|
||
flags:
|
||
case_insensitive: true
|
||
unicode_word_boundaries: true
|
||
regex_engine: "python"
|
||
"""
|
||
|
||
|
||
class ToolTip:
|
||
def __init__(self, widget, text: str):
|
||
self.widget = widget; self.text = text; self.tip=None
|
||
widget.bind("<Enter>", self.show); widget.bind("<Leave>", self.hide)
|
||
def show(self, *_):
|
||
if self.tip: return
|
||
x = self.widget.winfo_rootx() + 20; y = self.widget.winfo_rooty() + self.widget.winfo_height() + 4
|
||
self.tip = tw = tk.Toplevel(self.widget); tw.wm_overrideredirect(True); tw.wm_geometry(f"+{x}+{y}")
|
||
tk.Label(tw, text=self.text, justify=tk.LEFT, relief=tk.SOLID, borderwidth=1, padx=6, pady=4).pack(ipadx=1)
|
||
def hide(self, *_):
|
||
if self.tip: self.tip.destroy(); self.tip=None
|
||
|
||
def open_folder(path: Path):
|
||
try:
|
||
if platform.system() == "Windows": os.startfile(str(path)) # type: ignore
|
||
elif platform.system() == "Darwin": os.system(f"open '{path}'")
|
||
else: os.system(f"xdg-open '{path}'")
|
||
except Exception: pass
|
||
|
||
class App:
|
||
def __init__(self, root: tk.Tk):
|
||
self.root = root; self.root.title(APP_TITLE); self.root.geometry("1280x900")
|
||
self.dir_var = tk.StringVar(); self.status_var = tk.StringVar(value="Prêt.")
|
||
self.cfg_path = tk.StringVar(value=str(DEFAULT_CFG))
|
||
self.queue: "queue.Queue[str]" = queue.Queue()
|
||
self.format_var = tk.StringVar(value="raster")
|
||
|
||
# NER state
|
||
self.use_hf = tk.BooleanVar(value=False)
|
||
self.model_choice = tk.StringVar(value="DistilCamemBERT-NER (ONNX)")
|
||
self.model_id = tk.StringVar(value="")
|
||
self.th_per = tk.DoubleVar(value=0.90); self.th_org = tk.DoubleVar(value=0.90); self.th_loc = tk.DoubleVar(value=0.90)
|
||
self.model_status = tk.StringVar(value="Aucun modèle chargé.")
|
||
self._onnx_manager: NerModelManager | None = NerModelManager(cache_dir=Path("models")) if NerModelManager else None
|
||
self._eds_manager: EdsPseudoManager | None = EdsPseudoManager(cache_dir=Path("models")) if EdsPseudoManager else None
|
||
self._active_manager = None # le manager actuellement chargé
|
||
|
||
self.cfg_data: Dict[str, Any] = {}
|
||
|
||
self._build_ui(); self._pump_logs(); self._ensure_cfg_exists(); self._load_cfg()
|
||
|
||
def _build_ui(self):
|
||
wrap = tk.Frame(self.root, padx=10, pady=10); wrap.pack(fill=tk.BOTH, expand=True)
|
||
nb = ttk.Notebook(wrap); nb.pack(fill=tk.BOTH, expand=True)
|
||
|
||
# --- Simple ---
|
||
simple = tk.Frame(nb, padx=12, pady=12); nb.add(simple, text="Simple")
|
||
row = tk.Frame(simple); row.pack(fill=tk.X)
|
||
tk.Label(row, text="Répertoire documents :").pack(side=tk.LEFT)
|
||
tk.Entry(row, textvariable=self.dir_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=6)
|
||
tk.Button(row, text="Choisir…", command=self._browse).pack(side=tk.LEFT, padx=3)
|
||
|
||
fmt = tk.LabelFrame(simple, text="Format du document final"); fmt.pack(fill=tk.X, pady=10)
|
||
rb_ras = tk.Radiobutton(fmt, text="PDF image (très sûr — recommandé)", variable=self.format_var, value="raster"); rb_ras.pack(anchor="w", padx=6)
|
||
ToolTip(rb_ras, "Convertit chaque page en image avec boîtes noires. Aucun texte résiduel. Fichier plus lourd, non sélectionnable.")
|
||
rb_vec = tk.Radiobutton(fmt, text="PDF anonymisé (léger)", variable=self.format_var, value="vector"); rb_vec.pack(anchor="w", padx=6)
|
||
ToolTip(rb_vec, "⚠ Le texte sous-jacent reste potentiellement récupérable par copier-coller. Utilisez le mode image pour une sécurité maximale.")
|
||
|
||
actions = tk.Frame(simple); actions.pack(fill=tk.X, pady=(6,2))
|
||
self.btn_run = tk.Button(actions, text="Anonymiser", command=self._run); self.btn_run.pack(side=tk.LEFT)
|
||
tk.Button(actions, text="Aide (2 min)", command=self._show_help).pack(side=tk.LEFT, padx=6)
|
||
self.btn_open_out = tk.Button(actions, text="Ouvrir le dossier de résultats", command=self._open_out, state=tk.DISABLED); self.btn_open_out.pack(side=tk.RIGHT)
|
||
|
||
tk.Label(simple, text="Rapport d’exécution :").pack(anchor="w")
|
||
self.txt = tk.Text(simple, height=22); self.txt.pack(fill=tk.BOTH, expand=True, pady=(2,0))
|
||
tk.Label(simple, textvariable=self.status_var, anchor="w").pack(fill=tk.X, pady=(4,0))
|
||
|
||
# --- Avancé ---
|
||
adv = tk.Frame(nb, padx=12, pady=12); nb.add(adv, text="Avancé")
|
||
# YAML
|
||
cfg = tk.LabelFrame(adv, text="Règles & dictionnaires (YAML)", padx=8, pady=8); cfg.pack(fill=tk.X, pady=6)
|
||
tk.Label(cfg, text="Fichier YAML :").grid(row=0, column=0, sticky="w")
|
||
tk.Entry(cfg, textvariable=self.cfg_path, width=60).grid(row=0, column=1, sticky="we", padx=6)
|
||
tk.Button(cfg, text="Parcourir", command=self._cfg_browse).grid(row=0, column=2)
|
||
tk.Button(cfg, text="Créer/Charger", command=self._load_cfg).grid(row=0, column=3, padx=4)
|
||
tk.Button(cfg, text="Sauver", command=self._save_cfg).grid(row=0, column=4)
|
||
tk.Button(cfg, text="Recharger", command=self._reload_cfg).grid(row=0, column=5, padx=4)
|
||
tk.Button(cfg, text="Restaurer défauts", command=self._restore_defaults).grid(row=0, column=6)
|
||
cfg.grid_columnconfigure(1, weight=1)
|
||
|
||
# Créateur de règle (résumé)
|
||
rc = tk.LabelFrame(adv, text="Créer rapidement une règle", padx=8, pady=8); rc.pack(fill=tk.X, pady=6)
|
||
tk.Label(rc, text="Exemple (copiez une ligne du PDF) :").grid(row=0, column=0, sticky="w")
|
||
self.rule_example = tk.Entry(rc, width=80); self.rule_example.grid(row=0, column=1, columnspan=4, sticky="we", padx=6)
|
||
tk.Label(rc, text="Type :").grid(row=1, column=0, sticky="e")
|
||
self.rule_type = ttk.Combobox(rc, values=["Mot exact", "Forme proche", "Modèle avancé"], state="readonly"); self.rule_type.set("Mot exact"); self.rule_type.grid(row=1, column=1, sticky="w")
|
||
tk.Label(rc, text="Remplacer par :").grid(row=1, column=2, sticky="e")
|
||
self.rule_placeholder = tk.Entry(rc, width=18); self.rule_placeholder.insert(0, "[MASK]"); self.rule_placeholder.grid(row=1, column=3, sticky="w")
|
||
tk.Label(rc, text="Où :").grid(row=1, column=4, sticky="e")
|
||
self.rule_scope = ttk.Combobox(rc, values=["partout", "narratif", "tables_valeur", "entetes_pieds"], state="readonly"); self.rule_scope.set("partout"); self.rule_scope.grid(row=1, column=5, sticky="w")
|
||
self.flag_ic = tk.BooleanVar(value=True); self.flag_bow = tk.BooleanVar(value=True)
|
||
tk.Checkbutton(rc, text="Ignorer la casse (A=a)", variable=self.flag_ic).grid(row=2, column=1, sticky="w")
|
||
tk.Checkbutton(rc, text="Respecter les mots entiers", variable=self.flag_bow).grid(row=2, column=2, sticky="w")
|
||
tk.Button(rc, text="Prévisualiser", command=self._preview_rule).grid(row=2, column=4)
|
||
tk.Button(rc, text="Enregistrer la règle", command=self._save_rule).grid(row=2, column=5)
|
||
|
||
# Gestionnaire de modèles ONNX
|
||
mm = tk.LabelFrame(adv, text="Renforcement NER (ONNX – narratif uniquement)", padx=8, pady=8); mm.pack(fill=tk.X, pady=6)
|
||
tk.Checkbutton(mm, text="Activer le renforcement NER", variable=self.use_hf).grid(row=0, column=0, sticky="w")
|
||
tk.Label(mm, text="Modèle :").grid(row=1, column=0, sticky="e")
|
||
# Fusionner les catalogues ONNX + EDS-Pseudo
|
||
catalog = {}
|
||
if self._onnx_manager:
|
||
catalog.update(self._onnx_manager.models_catalog())
|
||
if self._eds_manager:
|
||
catalog.update(self._eds_manager.models_catalog())
|
||
self._merged_catalog = catalog
|
||
self.model_combo = ttk.Combobox(mm, values=list(catalog.keys()), state="readonly")
|
||
if self.model_combo["values"]:
|
||
self.model_combo.set(self.model_combo["values"][0])
|
||
self.model_combo.grid(row=1, column=1, sticky="w")
|
||
tk.Label(mm, text="ou ID/chemin :").grid(row=1, column=2, sticky="e")
|
||
tk.Entry(mm, textvariable=self.model_id, width=36).grid(row=1, column=3, sticky="w")
|
||
tk.Button(mm, text="Charger", command=self._load_model).grid(row=1, column=4, padx=4)
|
||
tk.Button(mm, text="Décharger", command=self._unload_model).grid(row=1, column=5)
|
||
tk.Label(mm, textvariable=self.model_status).grid(row=2, column=0, columnspan=6, sticky="w", pady=(4,2))
|
||
ToolTip(mm, "Le modèle détecte les noms propres dans le texte libre. Les tableaux (clé : valeur) ne sont pas modifiés.")
|
||
|
||
tk.Label(mm, text="Seuils (0–1)").grid(row=3, column=0, sticky="e")
|
||
tk.Label(mm, text="PERSON").grid(row=3, column=1, sticky="w")
|
||
tk.Entry(mm, textvariable=self.th_per, width=6).grid(row=3, column=2, sticky="w")
|
||
tk.Label(mm, text="ORG").grid(row=3, column=3, sticky="w")
|
||
tk.Entry(mm, textvariable=self.th_org, width=6).grid(row=3, column=4, sticky="w")
|
||
tk.Label(mm, text="LOC").grid(row=3, column=5, sticky="w")
|
||
tk.Entry(mm, textvariable=self.th_loc, width=6).grid(row=3, column=6, sticky="w")
|
||
|
||
mm.grid_columnconfigure(1, weight=1)
|
||
|
||
# YAML helpers
|
||
def _ensure_cfg_exists(self):
|
||
p = Path(self.cfg_path.get()); p.parent.mkdir(parents=True, exist_ok=True)
|
||
if not p.exists(): p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
|
||
def _cfg_browse(self):
|
||
d = filedialog.asksaveasfilename(defaultextension=".yml", filetypes=[("YAML","*.yml *.yaml"), ("Tous","*.*")])
|
||
if d: self.cfg_path.set(d)
|
||
def _load_cfg(self):
|
||
if yaml is None:
|
||
messagebox.showerror("PyYAML manquant", "Installez PyYAML (pip install pyyaml)."); return
|
||
self._ensure_cfg_exists()
|
||
try:
|
||
self.cfg_data = yaml.safe_load(Path(self.cfg_path.get()).read_text(encoding="utf-8")) or {}
|
||
self._log(f"Règles chargées: {self.cfg_path.get()}")
|
||
except Exception as e:
|
||
messagebox.showerror("Fichier de règles invalide", str(e))
|
||
def _save_cfg(self):
|
||
if yaml is None:
|
||
messagebox.showerror("PyYAML manquant", "Installez PyYAML (pip install pyyaml)."); return
|
||
try:
|
||
Path(self.cfg_path.get()).write_text(yaml.safe_dump(self.cfg_data or yaml.safe_load(DEFAULTS_CFG_TEXT), allow_unicode=True, sort_keys=False), encoding="utf-8")
|
||
self._log("Règles sauvegardées.")
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible d'écrire le YAML: {e}")
|
||
def _reload_cfg(self): self._load_cfg(); self._log("Règles rechargées.")
|
||
def _restore_defaults(self):
|
||
try:
|
||
Path(self.cfg_path.get()).write_text(DEFAULTS_CFG_TEXT, encoding="utf-8"); self._log("CFG par défaut écrit."); self._load_cfg()
|
||
except Exception as e:
|
||
messagebox.showerror("Erreur", f"Impossible d'écrire le YAML par défaut: {e}")
|
||
|
||
# Règles rapides (résumé)
|
||
def _build_simple_regex(self, sample: str, bow: bool) -> str:
|
||
s = sample.strip(); s = re.sub(r"\s+", r"\\s+", re.escape(s))
|
||
return rf"\b{s}\b" if bow else s
|
||
def _preview_rule(self):
|
||
sample = getattr(self, 'rule_example').get().strip()
|
||
if not sample: messagebox.showinfo("Info", "Exemple vide."); return
|
||
rtype = getattr(self, 'rule_type').get(); ic = getattr(self, 'flag_ic').get(); bow = getattr(self, 'flag_bow').get()
|
||
pattern = sample if rtype == "Modèle avancé" else self._build_simple_regex(sample, bow)
|
||
try:
|
||
rx = re.compile(pattern, re.IGNORECASE if ic else 0)
|
||
except Exception as e:
|
||
messagebox.showerror("Modèle invalide", str(e)); return
|
||
folder = Path(self.dir_var.get().strip()); pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()]) if folder.is_dir() else []
|
||
if not pdfs: messagebox.showinfo("Info", "Aucun PDF pour prévisualiser."); return
|
||
try:
|
||
pages_text, tables_lines = core.extract_text_three_passes(pdfs[0])
|
||
text = "\n".join(pages_text) + "\n\n" + "\n".join("\n".join(r) for r in tables_lines)
|
||
hits = len(rx.findall(text)); self._log(f"Prévisualisation: {hits} occurences sur {pdfs[0].name}")
|
||
except Exception as e:
|
||
self._log(f"Prévisualisation indisponible: {e}")
|
||
def _save_rule(self):
|
||
if yaml is None: messagebox.showerror("PyYAML manquant", "Installez PyYAML (pip install pyyaml)."); return
|
||
sample = getattr(self, 'rule_example').get().strip()
|
||
if not sample: messagebox.showinfo("Info", "Exemple vide."); return
|
||
rtype = getattr(self, 'rule_type').get(); ic = getattr(self, 'flag_ic').get(); bow = getattr(self, 'flag_bow').get(); placeholder = getattr(self, 'rule_placeholder').get().strip() or "[MASK]"; scope = getattr(self, 'rule_scope').get()
|
||
cfg = self.cfg_data or {}; cfg.setdefault("blacklist", {}); cfg.setdefault("regex_overrides", [])
|
||
if rtype == "Mot exact":
|
||
lst = cfg["blacklist"].setdefault("force_mask_terms", [])
|
||
if sample not in lst: lst.append(sample)
|
||
elif rtype == "Forme proche":
|
||
pattern = self._build_simple_regex(sample, bow)
|
||
lst = cfg["blacklist"].setdefault("force_mask_regex", [])
|
||
if pattern not in lst: lst.append(pattern)
|
||
else:
|
||
entry = {"name": f"custom_{len(cfg['regex_overrides'])+1}", "pattern": sample, "placeholder": placeholder, "flags": ["IGNORECASE"] if ic else [], "scope": scope}
|
||
cfg["regex_overrides"].append(entry)
|
||
self.cfg_data = cfg; self._save_cfg(); self._log("Règle ajoutée au YAML.")
|
||
|
||
# Gestionnaire de modèles
|
||
def _load_model(self):
|
||
choice = self.model_combo.get().strip()
|
||
mid = self.model_id.get().strip()
|
||
model_id = self._merged_catalog.get(choice) if choice else None
|
||
model_id = mid or model_id or "cmarkea/distilcamembert-base-ner"
|
||
# Déterminer quel manager utiliser
|
||
is_eds = False
|
||
if self._eds_manager:
|
||
eds_ids = set(self._eds_manager.models_catalog().values())
|
||
if model_id in eds_ids:
|
||
is_eds = True
|
||
if is_eds:
|
||
if not self._eds_manager:
|
||
messagebox.showerror("edsnlp indisponible", "Installez : pip install 'edsnlp[ml]>=0.12.0'"); return
|
||
manager = self._eds_manager
|
||
else:
|
||
if not self._onnx_manager:
|
||
messagebox.showerror("ONNX indisponible", "Installez 'onnxruntime' et 'optimum'."); return
|
||
manager = self._onnx_manager
|
||
try:
|
||
self.model_status.set("Chargement du modèle…")
|
||
self.root.update_idletasks()
|
||
manager.load(model_id)
|
||
self._active_manager = manager
|
||
label = "EDS-Pseudo" if is_eds else "ONNX"
|
||
self.model_status.set(f"Modèle chargé ({label}) : {model_id}")
|
||
self.use_hf.set(True)
|
||
except Exception as e:
|
||
self.model_status.set(f"Échec : {e}")
|
||
self.use_hf.set(False)
|
||
|
||
def _unload_model(self):
|
||
if self._onnx_manager:
|
||
self._onnx_manager.unload()
|
||
if self._eds_manager:
|
||
self._eds_manager.unload()
|
||
self._active_manager = None
|
||
self.model_status.set("Aucun modèle chargé.")
|
||
self.use_hf.set(False)
|
||
|
||
# Actions
|
||
def _browse(self):
|
||
d = filedialog.askdirectory();
|
||
if d: self.dir_var.set(d)
|
||
|
||
def _run(self):
|
||
folder = Path(self.dir_var.get().strip())
|
||
if not folder.is_dir(): messagebox.showwarning("Dossier invalide", "Choisissez un dossier contenant des PDF."); return
|
||
self.btn_run.config(state=tk.DISABLED)
|
||
threading.Thread(target=self._worker, args=(folder,), daemon=True).start()
|
||
|
||
def _worker(self, folder: Path):
|
||
try:
|
||
pdfs = sorted([p for p in folder.glob("*.pdf") if p.is_file()])
|
||
if not pdfs: self._log("Aucun PDF trouvé."); return
|
||
outdir = folder / "pseudonymise"; outdir.mkdir(exist_ok=True)
|
||
ok = ko = 0; global_counts: Dict[str,int] = {}
|
||
for i, pdf in enumerate(pdfs, start=1):
|
||
self.status_var.set(f"{i}/{len(pdfs)} — {pdf.name}")
|
||
make_vec = (self.format_var.get() == "vector"); make_ras = (self.format_var.get() == "raster")
|
||
try:
|
||
active = self._active_manager
|
||
use_ner = bool(active and self.use_hf.get() and active.is_loaded())
|
||
thresholds = NerThresholds(self.th_per.get(), self.th_org.get(), self.th_loc.get(), 0.85) if (use_ner and NerThresholds and not (EdsPseudoManager and isinstance(active, EdsPseudoManager))) else None
|
||
outputs = core.process_pdf(
|
||
pdf_path=pdf,
|
||
out_dir=outdir,
|
||
make_vector_redaction=make_vec,
|
||
also_make_raster_burn=make_ras,
|
||
config_path=Path(self.cfg_path.get()),
|
||
use_hf=use_ner,
|
||
ner_manager=active,
|
||
ner_thresholds=thresholds,
|
||
)
|
||
self._log("✓ " + pdf.name)
|
||
for k, v in outputs.items(): self._log(f" - {k}: {v}")
|
||
# Résumé
|
||
audit_path = Path(outputs.get("audit", ""))
|
||
counts = self._count_audit(audit_path)
|
||
if counts:
|
||
self._log(" ~ résumé : " + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())))
|
||
for k,v in counts.items(): global_counts[k] = global_counts.get(k,0)+v
|
||
ok += 1
|
||
except Exception as e:
|
||
self._log(f"✗ {pdf.name} → ERREUR: {e}"); ko += 1
|
||
self.status_var.set(f"Terminé : {ok} OK, {ko} erreurs. Sortie: {outdir}")
|
||
if ok: self.btn_open_out.config(state=tk.NORMAL); self._last_outdir = outdir
|
||
if ok: self._log("RÉSUMÉ DU LOT : " + ", ".join(f"{k}={v}" for k, v in sorted(global_counts.items())))
|
||
finally:
|
||
self.btn_run.config(state=tk.NORMAL)
|
||
|
||
def _count_audit(self, audit_path: Path) -> Dict[str,int]:
|
||
d: Dict[str,int] = {}
|
||
try:
|
||
with open(audit_path, "r", encoding="utf-8") as f:
|
||
for line in f:
|
||
try:
|
||
obj = json.loads(line); k = obj.get("kind", "?"); d[k] = d.get(k,0)+1
|
||
except Exception: pass
|
||
except Exception: pass
|
||
return d
|
||
|
||
def _open_out(self):
|
||
p = getattr(self, "_last_outdir", None)
|
||
if p: open_folder(p)
|
||
|
||
def _pump_logs(self):
|
||
try:
|
||
while True:
|
||
msg = self.queue.get_nowait(); self.txt.insert(tk.END, msg + "\n"); self.txt.see(tk.END)
|
||
except queue.Empty:
|
||
pass
|
||
finally:
|
||
self.root.after(60, self._pump_logs)
|
||
def _log(self, msg: str): self.queue.put(msg)
|
||
|
||
def _show_help(self):
|
||
messagebox.showinfo(
|
||
"Aide (2 minutes)",
|
||
"1) Choisissez un dossier avec vos PDF.\n"
|
||
"2) Choisissez le format du document final.\n"
|
||
" - PDF anonymisé (léger) : texte supprimé + boîtes noires (sélection possible).\n"
|
||
" - PDF image (très sûr) : chaque page en image, aucun texte résiduel.\n"
|
||
"3) (Option) Chargez un modèle pour renforcer la détection des noms dans le texte libre.\n"
|
||
"4) Cliquez sur Anonymiser, puis ouvrez le dossier de résultats.",
|
||
)
|
||
|
||
if __name__ == "__main__":
|
||
root = tk.Tk(); App(root); root.mainloop()
|