feat(gui): apply WIP profils+masques+build-windows from stash (2026-04-27)

Application du stash@{0} resté en WIP depuis le 27/04 :
  "On main: wip-gui-profils-masque-manuel-build-windows-2026-04-27"

## Apport

- Pseudonymisation_Gui_V5.py (+1208 lignes) : profils, panneau paramètres
  avancés, éditeur de masques intégré, gestion whitelist/blacklist
- launcher.py (+315) : splash natif PyInstaller, single-instance,
  téléchargement modèles
- anonymisation_onefile.spec : config PyInstaller mise à jour
- pdf_mask_designer.py (+114) : éditeur de masques amélioré
- config_defaults.py (+23) : constantes nouvelles
- tests/unit/test_config_externalization.py (+12) : tests config
- .gitignore (+5)

## Pourquoi

La version courante de la GUI sur la branche feature manquait :
- L'éditeur de masques
- Les profils
- Le panneau paramètres avancés
- Le splash natif au démarrage

Aucun conflit avec mes 10 commits Q-1 (pas de chevauchement de fichiers).

## Validation

75 passed, 10 xfailed sur pytest tests/unit/.

## Note

Le stash reste disponible dans `git stash list` jusqu'à drop explicite.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 11:09:46 +02:00
parent 5d89eaf8dc
commit 380e520013
7 changed files with 1664 additions and 179 deletions

5
.gitignore vendored
View File

@@ -6,10 +6,12 @@ __pycache__/
*.egg
dist/
build/
release/
*.whl
# === Virtual environments ===
.venv/
.venv_build_win/
venv/
venv_*/
env/
@@ -66,6 +68,9 @@ Thumbs.db
# === Secrets ===
.env
*.env
*.pfx
*.p12
build_signing.local.ps1
credentials.json
token.pickle

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +1,128 @@
import os
block_cipher = None
app_dir = 'C:\\Users\\dom\\ai\\anonymisation'
from pathlib import Path
datas = [
(os.path.join(app_dir, 'config'), 'config'),
(os.path.join(app_dir, 'data', 'bdpm'), os.path.join('data', 'bdpm')),
(os.path.join(app_dir, 'data', 'finess'), os.path.join('data', 'finess')),
(os.path.join(app_dir, 'data', 'insee'), os.path.join('data', 'insee')),
(os.path.join(app_dir, 'models', 'camembert-bio-deid', 'onnx'), os.path.join('models', 'camembert-bio-deid', 'onnx')),
(os.path.join(app_dir, 'detectors'), 'detectors'),
(os.path.join(app_dir, 'scripts'), 'scripts'),
# Assets UI : logo (header + splash), icônes fenêtre, splash image.
# Le launcher et la GUI y accèdent via _asset(name) qui résout sous
# sys._MEIPASS/assets en mode frozen.
(os.path.join(app_dir, 'assets'), 'assets'),
]
# Fichiers directs dans data/ — IMPÉRATIF pour fonctionnement correct du core.
# Sans eux : stop-words/villes/DPI labels/companion blacklist sont des sets vides,
# ce qui dégrade la qualité d'anonymisation et peut masquer/laisser passer des faux-positifs.
for data_file in [
'stopwords_manuels.txt',
'villes_blacklist.txt',
'dpi_labels_blacklist.txt',
'companion_blacklist.txt',
block_cipher = None
project_dir = Path(globals().get("SPECPATH", os.getcwd())).resolve()
def _data_entry(relative_path: str, target_dir: str | None = None):
src = project_dir / relative_path
if not src.exists():
return None
return (str(src), target_dir or relative_path)
datas = []
for relative_path, target_dir in [
("config", "config"),
("data/bdpm", "data/bdpm"),
("data/finess", "data/finess"),
("data/insee", "data/insee"),
("models/camembert-bio-deid/onnx", "models/camembert-bio-deid/onnx"),
("detectors", "detectors"),
("scripts", "scripts"),
("assets", "assets"),
]:
src = os.path.join(app_dir, 'data', data_file)
if os.path.exists(src):
datas.append((src, 'data'))
for pyfile in ['anonymizer_core_refactored_onnx.py', 'eds_pseudo_manager.py',
'gliner_manager.py', 'camembert_ner_manager.py',
'Pseudonymisation_Gui_V5.py', 'build_info.py']:
datas.append((os.path.join(app_dir, pyfile), '.'))
entry = _data_entry(relative_path, target_dir)
if entry is not None:
datas.append(entry)
# Fichiers directs sous data/ requis par le core.
for relative_path in [
"data/stopwords_manuels.txt",
"data/villes_blacklist.txt",
"data/dpi_labels_blacklist.txt",
"data/companion_blacklist.txt",
]:
entry = _data_entry(relative_path, "data")
if entry is not None:
datas.append(entry)
hiddenimports = [
"Pseudonymisation_Gui_V5",
"anonymizer_core_refactored_onnx",
"admin_rules",
"config_defaults",
"profile_defaults",
"gui_batch_paths",
"manual_masking",
"pdf_mask_designer",
"format_converter",
"ner_manager_onnx",
"camembert_ner_manager",
"eds_pseudo_manager",
"gliner_manager",
"vlm_manager",
"build_info",
"doctr",
"doctr.io",
"doctr.models",
"doctr.models.detection",
"doctr.models.recognition",
"cv2",
"torchvision",
"edsnlp",
"edsnlp.pipes",
"edsnlp.pipes.ner",
"edsnlp.pipes.ner.pseudo",
"spacy",
"spacy.lang.fr",
"gliner",
"onnxruntime",
"transformers",
"tokenizers",
"torch",
"pdfplumber",
"fitz",
"PIL",
"yaml",
"loguru",
"regex",
"optimum",
"optimum.onnxruntime",
"optimum.pipelines",
"optimum.modeling_base",
"optimum.exporters.onnx",
]
a = Analysis(
[os.path.join(app_dir, 'launcher.py')],
pathex=[app_dir],
[str(project_dir / "launcher.py")],
pathex=[str(project_dir)],
datas=datas,
hiddenimports=[
'anonymizer_core_refactored_onnx', 'eds_pseudo_manager',
'gliner_manager', 'camembert_ner_manager', 'Pseudonymisation_Gui_V5',
'edsnlp', 'edsnlp.pipes', 'edsnlp.pipes.ner', 'edsnlp.pipes.ner.pseudo',
'spacy', 'spacy.lang.fr', 'gliner', 'onnxruntime',
'transformers', 'tokenizers', 'torch', 'pdfplumber',
'ahocorasick', 'sklearn', 'scipy', 'pydantic', 'yaml', 'PIL',
'loguru', 'regex',
# optimum : utilisé par ner_manager_onnx.py (fallback NER legacy).
# Sans ça, la GUI affiche "NER indisponible : optimum.onnxruntime introuvable"
# si EDS-Pseudo échoue. Le pipeline principal (CamemBERT-bio ONNX +
# EDS-Pseudo + GLiNER) n'en dépend pas — mais l'absence du hiddenimport
# crée un message d'erreur cosmétique gênant.
'optimum', 'optimum.onnxruntime', 'optimum.pipelines',
'optimum.modeling_base', 'optimum.exporters.onnx',
],
hiddenimports=hiddenimports,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
# Splash natif PyInstaller : image affichée AU LANCEMENT DE L'EXE,
# avant même que Python démarre. Couvre les ~15-30 s de décompression
# du bundle --onefile dans %TEMP% qui laissaient l'écran vide auparavant.
# Le launcher ferme le splash via pyi_splash.close() une fois la GUI prête.
splash = Splash(
os.path.join(app_dir, 'assets', 'splash.png'),
str(project_dir / "assets" / "splash.png"),
binaries=a.binaries,
datas=a.datas,
# Texte dynamique PyInstaller positionné dans la zone libre du PNG
# (y=170-235). text_pos correspond au coin haut-gauche du texte.
text_pos=(60, 195),
text_size=10,
text_color='white',
text_color="white",
minify_script=True,
always_on_top=False,
)
exe = EXE(
pyz, a.scripts,
splash, # image affichée immédiatement
splash.binaries, # bootloader splash
a.binaries, a.zipfiles, a.datas, [],
name='Anonymisation',
pyz,
a.scripts,
splash,
splash.binaries,
a.binaries,
a.zipfiles,
a.datas,
[],
name="Anonymisation",
debug=False,
strip=False,
upx=False,
console=False,
# Icône du fichier .exe visible dans l'Explorateur Windows et la taskbar
# (dérivée du logo aivanonym, multi-résolution 16→256 dans le .ico).
icon=os.path.join(app_dir, 'assets', 'icons', 'app.ico'),
icon=str(project_dir / "assets" / "icons" / "app.ico"),
)

View File

@@ -153,6 +153,29 @@ def load_effective_dictionaries_dict(path: Path | None = None) -> Dict[str, Any]
)
def _normalize_string_list(values: Any) -> list[str]:
if not isinstance(values, list):
return []
normalized: list[str] = []
for value in values:
text = str(value).strip()
if text:
normalized.append(text)
return normalized
def load_effective_param_lists(path: Path | None = None) -> Dict[str, list[str]]:
"""Return the effective parameter lists shown in the GUI."""
data = load_effective_dictionaries_dict(path)
return {
"whitelist_phrases": _normalize_string_list(data.get("whitelist_phrases", [])),
"blacklist_force_mask_terms": _normalize_string_list(
data.get("blacklist", {}).get("force_mask_terms", [])
),
"additional_stopwords": _normalize_string_list(data.get("additional_stopwords", [])),
}
def deep_merge_dict(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
merged = deepcopy(base)
for key, value in (override or {}).items():

View File

@@ -8,6 +8,8 @@ from tkinter import ttk, messagebox
from pathlib import Path
import threading
import logging
import contextlib
import time
# pyi_splash : module injecté par PyInstaller quand --splash est utilisé.
# Permet d'actualiser / fermer le splash natif affiché au démarrage de l'exe
@@ -38,6 +40,216 @@ def _splash_close() -> None:
except Exception:
pass
class BrandedSplash:
"""Splash applicatif avec le visuel existant + progression détaillée.
PyInstaller affiche d'abord le splash natif pendant l'extraction du onefile.
Dès que Python est démarré, cette fenêtre prend le relais pour montrer des
étapes lisibles et un petit journal de chargement.
"""
def __init__(self, total_steps: int = 6):
self.total_steps = max(total_steps, 1)
self.current_step = 0
self.enabled = False
self.root = None
self.status_var = None
self.progress = None
self.log_box = None
self._image = None
self._lines = []
try:
self.root = tk.Tk()
self.root.withdraw()
self.root.title("aivanonym")
self.root.resizable(False, False)
self.root.overrideredirect(True)
self.root.configure(bg="white")
container = tk.Frame(
self.root,
bg="white",
highlightthickness=1,
highlightbackground="#d8d8d8",
)
container.pack(fill="both", expand=True)
splash_path = APP_DIR / "assets" / "splash.png"
if splash_path.exists():
self._image = tk.PhotoImage(file=str(splash_path))
tk.Label(container, image=self._image, bg="white", bd=0).pack()
else:
fallback = tk.Frame(container, bg="white", width=500, height=170)
fallback.pack_propagate(False)
fallback.pack()
tk.Frame(fallback, bg="#cc0000", height=4).pack(fill="x")
tk.Label(
fallback,
text="aivanonym",
bg="white",
fg="#222222",
font=("Segoe UI", 28),
).pack(expand=True)
body = tk.Frame(container, bg="white", padx=24, pady=14)
body.pack(fill="x")
self.status_var = tk.StringVar(value="Initialisation...")
tk.Label(
body,
textvariable=self.status_var,
bg="white",
fg="#222222",
font=("Segoe UI", 10, "bold"),
anchor="w",
).pack(fill="x")
self.progress = ttk.Progressbar(
body,
mode="determinate",
maximum=self.total_steps,
length=452,
)
self.progress.pack(fill="x", pady=(8, 10))
tk.Label(
body,
text="Chargements en cours",
bg="white",
fg="#666666",
font=("Segoe UI", 8),
anchor="w",
).pack(fill="x")
self.log_box = tk.Listbox(
body,
height=5,
activestyle="none",
bg="#f7f7f7",
fg="#333333",
bd=0,
highlightthickness=1,
highlightbackground="#e7e7e7",
font=("Consolas", 8),
)
self.log_box.pack(fill="x", pady=(4, 0))
self._center()
self.root.deiconify()
self.root.lift()
self.root.update_idletasks()
self.root.update()
self.enabled = True
# Le splash natif PyInstaller n'a qu'une ligne de texte. Une fois
# cette fenêtre prête, elle prend le relais sans changer le visuel.
_splash_close()
except Exception as exc:
try:
if self.root is not None:
self.root.destroy()
except Exception:
pass
self.root = None
log.warning(f"Branded splash unavailable: {exc}")
def _center(self) -> None:
if self.root is None:
return
self.root.update_idletasks()
width = self.root.winfo_reqwidth()
height = self.root.winfo_reqheight()
screen_width = self.root.winfo_screenwidth()
screen_height = self.root.winfo_screenheight()
x = max(0, int((screen_width - width) / 2))
y = max(0, int((screen_height - height) / 2))
self.root.geometry(f"{width}x{height}+{x}+{y}")
def step(self, message: str) -> None:
self.current_step = min(self.current_step + 1, self.total_steps)
status = f"[{self.current_step}/{self.total_steps}] {message}"
self.message(status)
if self.progress is not None:
self.progress["value"] = self.current_step
self._pump()
def message(self, message: str) -> None:
_splash_update(message)
if self.enabled and self.status_var is not None:
self.status_var.set(message)
self._pump()
def detail(self, message: str) -> None:
_splash_update(message)
clean = " ".join(str(message).split())
if not clean:
return
if len(clean) > 150:
clean = clean[:147] + "..."
if self.enabled and self.log_box is not None:
self._lines.append(clean)
self._lines = self._lines[-7:]
self.log_box.delete(0, tk.END)
for line in self._lines:
self.log_box.insert(tk.END, line)
self.log_box.see(tk.END)
self._pump()
def close(self) -> None:
_splash_close()
if self.root is not None:
try:
self.root.destroy()
except Exception:
pass
self.root = None
self.enabled = False
def _pump(self) -> None:
if self.root is None:
return
try:
self.root.update_idletasks()
self.root.update()
except Exception:
self.enabled = False
class ModelProgressStream:
"""Redirige les sorties type tqdm vers une callback UI."""
def __init__(self, callback, prefix: str):
self.callback = callback
self.prefix = prefix
self.buffer = ""
self.last_line = ""
self.last_emit = 0.0
def write(self, data) -> int:
text = str(data)
self.buffer += text.replace("\r", "\n")
while "\n" in self.buffer:
line, self.buffer = self.buffer.split("\n", 1)
self._emit(line)
return len(text)
def flush(self) -> None:
if self.buffer:
self._emit(self.buffer)
self.buffer = ""
def _emit(self, line: str) -> None:
clean = " ".join(line.split())
if len(clean) < 3:
return
now = time.monotonic()
if clean == self.last_line and now - self.last_emit < 1.0:
return
self.last_line = clean
self.last_emit = now
self.callback(f"{self.prefix} : {clean}")
# ---------------------------------------------------------------------------
# Single-instance guard (lock file in user's temp directory)
# ---------------------------------------------------------------------------
@@ -105,23 +317,10 @@ def check_models_ready():
def launch_gui():
"""Launch the main GUI — étapes de chargement affichées DANS le splash natif.
Le splash natif PyInstaller (image avec logo + texte dynamique) reste
visible pendant TOUTE la phase de chargement. On intercepte les log.info()
du core via un logging.Handler et on pousse chaque étape traduite dans
le splash natif via pyi_splash.update_text(). L'utilisateur voit défiler
sous le logo :
"Chargement des prénoms français (INSEE)…"
"Chargement des noms de famille (INSEE)…"
"Chargement des numéros FINESS…"
Puis le splash se ferme et la GUI s'ouvre — pas de fenêtre intermédiaire.
En mode dev (pas frozen), pyi_splash n'existe pas ; on ajoute un
mini-splash tkinter temporaire pour voir le même rendu pendant le test.
"""
"""Launch the main GUI with visible startup progress."""
log.info("Launching GUI...")
progress = BrandedSplash(total_steps=5)
progress.step("Préparation de l'environnement")
# Traductions log.info() → libellés "prod" lisibles pour l'utilisateur.
_LOG_TRANSLATIONS = [
@@ -158,7 +357,7 @@ def launch_gui():
class _SplashHandler(logging.Handler):
def emit(self, record):
try:
_splash_update(_translate(record.getMessage()))
progress.detail(_translate(record.getMessage()))
except Exception:
pass
@@ -167,17 +366,24 @@ def launch_gui():
logging.getLogger().addHandler(_handler)
# Afficher tout de suite un message initial sous le logo
_splash_update("Démarrage…")
progress.detail("Démarrage du moteur applicatif")
# Import du core et de la GUI (synchrone : pas besoin de thread puisque
# le splash natif tourne dans son propre processus bootloader).
result = {"error": None}
try:
_splash_update("Chargement des dictionnaires médicaux")
progress.step("Chargement des dictionnaires médicaux")
import anonymizer_core_refactored_onnx # noqa
log.info("Core imported OK")
progress.step("Chargement du moteur d'anonymisation")
import Pseudonymisation_Gui_V5 # noqa
log.info("GUI module imported OK")
progress.step("Vérification des modèles locaux")
if check_models_ready():
progress.detail("CamemBERT-bio ONNX local disponible")
else:
progress.detail("CamemBERT-bio ONNX non trouvé dans le bundle")
progress.step("Ouverture de l'interface")
except Exception as e:
result["error"] = f"{e}\n{traceback.format_exc()}"
log.error(f"Import error: {result['error']}")
@@ -188,8 +394,8 @@ def launch_gui():
except Exception:
pass
# Fermer le splash natif maintenant que tout est prêt
_splash_close()
# Fermer le splash maintenant que tout est prêt
progress.close()
if result["error"]:
try:
@@ -239,12 +445,19 @@ class SetupWindow:
def __init__(self):
self.root = tk.Tk()
self.root.title("Anonymisation — Configuration initiale")
self.root.geometry("620x450")
self.root.geometry("660x700")
self.root.resizable(False, False)
self._logo_image = None
self._log_lines = []
frame = ttk.Frame(self.root, padding=20)
frame = ttk.Frame(self.root, padding=18)
frame.pack(fill="both", expand=True)
splash_path = APP_DIR / "assets" / "splash.png"
if splash_path.exists():
self._logo_image = tk.PhotoImage(file=str(splash_path))
ttk.Label(frame, image=self._logo_image).pack(pady=(0, 8))
ttk.Label(frame, text="Préparation des modèles d'intelligence artificielle",
font=("", 13, "bold")).pack(pady=(0, 4))
ttk.Label(
@@ -278,6 +491,22 @@ class SetupWindow:
font=("", 8)).pack(side="left")
self.step_labels[key] = icon
log_frame = ttk.LabelFrame(frame, text=" Détail du chargement ", padding=8)
log_frame.pack(fill="x", pady=(0, 12))
self.log_text = tk.Text(
log_frame,
height=7,
wrap="word",
state="disabled",
bg="#f7f7f7",
fg="#333333",
bd=0,
padx=8,
pady=6,
font=("Consolas", 8),
)
self.log_text.pack(fill="x")
# Bouton relance (caché au début)
self.btn = ttk.Button(frame, text="Relancer", command=self.start_download)
self.btn.pack(pady=6)
@@ -321,43 +550,54 @@ class SetupWindow:
try:
# 1. EDS-Pseudo
self._update("Téléchargement d'EDS-Pseudo… (modèle CamemBERT clinique)")
self._append_log("EDS-Pseudo : téléchargement/chargement du modèle AP-HP")
self._set_step("eds_pseudo", "running")
log.info("Downloading EDS-Pseudo...")
try:
from eds_pseudo_manager import EdsPseudoManager
mgr = EdsPseudoManager()
mgr.load()
with self._capture_model_output("EDS-Pseudo"):
mgr.load()
self._set_step("eds_pseudo", "ok")
self._append_log("EDS-Pseudo : modèle prêt")
log.info("EDS-Pseudo OK")
except Exception as e:
self._set_step("eds_pseudo", "fail")
self._append_log(f"EDS-Pseudo : échec - {e}")
failures.append(("EDS-Pseudo", str(e)))
log.warning(f"EDS-Pseudo failed: {e}")
self._advance()
# 2. GLiNER
self._update("Téléchargement de GLiNER… (détection zero-shot)")
self._append_log("GLiNER : téléchargement/chargement du modèle PII")
self._set_step("gliner", "running")
log.info("Downloading GLiNER...")
try:
from gliner_manager import GlinerManager
mgr = GlinerManager()
mgr.load()
with self._capture_model_output("GLiNER"):
mgr.load()
self._set_step("gliner", "ok")
self._append_log("GLiNER : modèle prêt")
log.info("GLiNER OK")
except Exception as e:
self._set_step("gliner", "fail")
self._append_log(f"GLiNER : échec - {e}")
failures.append(("GLiNER", str(e)))
log.warning(f"GLiNER failed: {e}")
self._advance()
# 3. CamemBERT-bio ONNX
self._update("Vérification CamemBERT-bio ONNX (modèle embarqué)…")
self._append_log("CamemBERT-bio ONNX : vérification du modèle embarqué")
self._set_step("camembert_onnx", "running")
if check_models_ready():
self._set_step("camembert_onnx", "ok")
self._append_log("CamemBERT-bio ONNX : modèle local présent")
else:
self._set_step("camembert_onnx", "fail")
self._append_log("CamemBERT-bio ONNX : fichier ONNX introuvable")
failures.append(("CamemBERT-bio ONNX", "fichier ONNX introuvable dans le bundle"))
log.error("CamemBERT-bio ONNX not found")
self._advance()
@@ -384,6 +624,31 @@ class SetupWindow:
def _update(self, msg):
self.root.after(0, lambda: self.status_var.set(msg))
def _append_log(self, msg):
clean = " ".join(str(msg).split())
if not clean:
return
if len(clean) > 180:
clean = clean[:177] + "..."
def _apply():
self._log_lines.append(clean)
self._log_lines = self._log_lines[-80:]
self.log_text.configure(state="normal")
self.log_text.delete("1.0", tk.END)
self.log_text.insert("end", "\n".join(self._log_lines))
self.log_text.configure(state="disabled")
self.log_text.see("end")
self.root.after(0, _apply)
@contextlib.contextmanager
def _capture_model_output(self, label):
stream = ModelProgressStream(self._append_log, label)
with contextlib.redirect_stdout(stream), contextlib.redirect_stderr(stream):
yield
stream.flush()
def _finish(self):
try:
self.root.destroy()

View File

@@ -17,6 +17,7 @@ Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML
"""
from __future__ import annotations
import argparse
import io
import json
import math
@@ -31,7 +32,12 @@ from PIL import Image, ImageTk
import fitz # PyMuPDF
import yaml
APP_TITLE = "PDF Mask Designer (Standalone)"
from manual_masking import (
DEFAULT_MASK_OUTPUT_DIRNAME,
DEFAULT_MASK_PREVIEW_DIRNAME,
)
APP_TITLE = "Éditeur de masques PDF"
TEMPLATE_VERSION = 1
# ----------------------------- Data structures -----------------------------
@@ -167,7 +173,16 @@ def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int,
# ----------------------------- GUI ------------------------------
class MaskDesignerApp:
def __init__(self, root: tk.Tk):
def __init__(
self,
root: tk.Tk,
*,
initial_pdf: Optional[Path] = None,
initial_template: Optional[Path] = None,
templates_dir: Optional[Path] = None,
output_dir_name: str = DEFAULT_MASK_OUTPUT_DIRNAME,
preview_dir_name: str = DEFAULT_MASK_PREVIEW_DIRNAME,
):
self.root = root
self.root.title(APP_TITLE)
self.root.geometry("1280x900")
@@ -181,11 +196,18 @@ class MaskDesignerApp:
self.template_name = tk.StringVar(value="template_masks")
self.status = tk.StringVar(value="Prêt.")
self.raster_dpi = tk.IntVar(value=200)
self.templates_dir = templates_dir
self.output_dir_name = output_dir_name
self.preview_dir_name = preview_dir_name
self.is_drawing = False
self.start_xy: Optional[Tuple[int,int]] = None
self._build_ui()
if initial_pdf:
self.open_pdf_path(initial_pdf)
if initial_template:
self.load_template_path(initial_template)
# UI layout
def _build_ui(self):
@@ -228,14 +250,17 @@ class MaskDesignerApp:
def open_pdf(self):
path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")])
if not path: return
self.open_pdf_path(Path(path))
def open_pdf_path(self, path: Path):
try:
self.doc = fitz.open(path)
self.doc = fitz.open(str(path))
self.doc_path = Path(path)
self.curr_page = 0
self.masks.clear()
self.template_name.set(self.doc_path.stem + "_template")
self.refresh()
self.status.set(f"PDF ouvert : {Path(path).name}{len(self.doc)} page(s)")
self.status.set(f"PDF ouvert : {self.doc_path.name}{len(self.doc)} page(s)")
except Exception as e:
messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}")
@@ -244,7 +269,7 @@ class MaskDesignerApp:
img = page_pix(self.doc, self.curr_page, self.zoom)
# overlay current page masks
rects = self.masks.get(self.curr_page, [])
img_o = draw_overlay(img, rects, 1.0, self.curr_page)
img_o = draw_overlay(img, rects, self.zoom, self.curr_page)
self.curr_image = img_o
self.tk_image = ImageTk.PhotoImage(img_o)
self.canvas.delete("all")
@@ -269,19 +294,25 @@ class MaskDesignerApp:
def on_down(self, ev):
if not self.doc: return
self.is_drawing = True
self.start_xy = (ev.x, ev.y)
self._preview_rect = self.canvas.create_rectangle(ev.x, ev.y, ev.x, ev.y, outline="#000", width=2)
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
self.start_xy = (x, y)
self._preview_rect = self.canvas.create_rectangle(x, y, x, y, outline="#000", width=2)
def on_drag(self, ev):
if not self.doc or not self.is_drawing: return
sx, sy = self.start_xy
self.canvas.coords(self._preview_rect, sx, sy, ev.x, ev.y)
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
self.canvas.coords(self._preview_rect, sx, sy, x, y)
def on_up(self, ev):
if not self.doc or not self.is_drawing: return
self.is_drawing = False
sx, sy = self.start_xy
x0, y0, x1, y1 = rect_norm(sx, sy, ev.x, ev.y)
x = self.canvas.canvasx(ev.x)
y = self.canvas.canvasy(ev.y)
x0, y0, x1, y1 = rect_norm(sx, sy, x, y)
# convert screen px to PDF points
page = self.doc[self.curr_page]
# we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix)
@@ -311,9 +342,12 @@ class MaskDesignerApp:
tpl = self._current_template()
except Exception as e:
messagebox.showwarning("Info", str(e)); return
path = filedialog.asksaveasfilename(defaultextension=".yml",
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
initialfile=f"{tpl.name}.yml")
path = filedialog.asksaveasfilename(
defaultextension=".yml",
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
initialdir=str(self._template_initialdir()),
initialfile=f"{tpl.name}.yml",
)
if not path: return
p = Path(path)
try:
@@ -326,8 +360,14 @@ class MaskDesignerApp:
messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}")
def load_template(self):
path = filedialog.askopenfilename(filetypes=[("YAML/JSON", "*.yml *.yaml *.json")])
path = filedialog.askopenfilename(
filetypes=[("YAML/JSON", "*.yml *.yaml *.json")],
initialdir=str(self._template_initialdir()),
)
if not path: return
self.load_template_path(Path(path))
def load_template_path(self, path: Path):
p = Path(path)
try:
if p.suffix.lower() in (".yml", ".yaml"):
@@ -351,6 +391,14 @@ class MaskDesignerApp:
self.refresh()
self.status.set(f"Masques de la page {self.curr_page+1} supprimés.")
def _template_initialdir(self) -> Path:
if self.templates_dir is not None:
self.templates_dir.mkdir(parents=True, exist_ok=True)
return self.templates_dir
if self.doc_path is not None:
return self.doc_path.parent
return Path.cwd()
# Preview / Apply
def _build_template_from_state(self) -> Optional[Template]:
if not self.doc:
@@ -365,7 +413,7 @@ class MaskDesignerApp:
if not samp: return
for i, s in enumerate(samp[:2], start=1):
pdf_in = Path(s)
out_dir = pdf_in.parent / "masked_preview"
out_dir = pdf_in.parent / self.preview_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
@@ -373,7 +421,10 @@ class MaskDesignerApp:
apply_template_vector(pdf_in, pdf_out, tpl, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}")
messagebox.showinfo("Prévisualisation", "Terminé (vectoriel). Ouvrez le dossier 'masked_preview'.")
messagebox.showinfo(
"Prévisualisation",
f"Terminé (vectoriel). Ouvrez le dossier '{self.preview_dir_name}'.",
)
def preview_raster(self):
tpl = self._build_template_from_state()
@@ -383,7 +434,7 @@ class MaskDesignerApp:
dpi = int(self.raster_dpi.get())
for i, s in enumerate(samp[:2], start=1):
pdf_in = Path(s)
out_dir = pdf_in.parent / "masked_preview"
out_dir = pdf_in.parent / self.preview_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
@@ -391,7 +442,10 @@ class MaskDesignerApp:
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
except Exception as e:
messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}")
messagebox.showinfo("Prévisualisation", "Terminé (raster). Ouvrez le dossier 'masked_preview'.")
messagebox.showinfo(
"Prévisualisation",
f"Terminé (raster). Ouvrez le dossier '{self.preview_dir_name}'.",
)
def apply_vector_batch(self):
tpl = self._build_template_from_state()
@@ -400,7 +454,7 @@ class MaskDesignerApp:
if not files: return
for s in files:
pdf_in = Path(s)
out_dir = pdf_in.parent / "masked"
out_dir = pdf_in.parent / self.output_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
@@ -418,7 +472,7 @@ class MaskDesignerApp:
dpi = int(self.raster_dpi.get())
for s in files:
pdf_in = Path(s)
out_dir = pdf_in.parent / "masked"
out_dir = pdf_in.parent / self.output_dir_name
out_dir.mkdir(exist_ok=True)
pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf"
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
@@ -430,9 +484,27 @@ class MaskDesignerApp:
# ----------------------------- Main ------------------------------
def main():
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Editeur de masques PDF reutilisables")
parser.add_argument("--pdf", type=Path, help="PDF de reference a ouvrir au demarrage")
parser.add_argument("--template", type=Path, help="Template YAML/JSON a charger au demarrage")
parser.add_argument("--templates-dir", type=Path, help="Dossier par defaut pour sauver/charger les templates")
parser.add_argument("--output-dir-name", default=DEFAULT_MASK_OUTPUT_DIRNAME, help="Nom du dossier de sortie pour l'application des masques")
parser.add_argument("--preview-dir-name", default=DEFAULT_MASK_PREVIEW_DIRNAME, help="Nom du dossier de sortie pour les previsualisations")
return parser
def main(argv: Optional[List[str]] = None):
args = build_arg_parser().parse_args(argv)
root = tk.Tk()
app = MaskDesignerApp(root)
app = MaskDesignerApp(
root,
initial_pdf=args.pdf,
initial_template=args.template,
templates_dir=args.templates_dir,
output_dir_name=args.output_dir_name,
preview_dir_name=args.preview_dir_name,
)
root.mainloop()
if __name__ == "__main__":

View File

@@ -9,6 +9,7 @@ from config_defaults import (
deep_merge_dict,
ensure_runtime_dictionaries_config,
load_effective_dictionaries_dict,
load_effective_param_lists,
read_default_dictionaries_text,
read_runtime_dictionaries_overlay_text,
)
@@ -90,3 +91,14 @@ def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
effective = load_effective_dictionaries_dict(cfg_path)
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]
def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path: Path):
cfg_path = tmp_path / "dictionnaires.yml"
cfg_path.write_text("{}\n", encoding="utf-8")
params = load_effective_param_lists(cfg_path)
assert "classification internationale" in params["whitelist_phrases"]
assert "CHCB" in params["blacklist_force_mask_terms"]
assert params["additional_stopwords"] == []