feat(gui): apply WIP profils+masques+build-windows from stash (2026-04-27)
Application du stash@{0} resté en WIP depuis le 27/04 :
"On main: wip-gui-profils-masque-manuel-build-windows-2026-04-27"
## Apport
- Pseudonymisation_Gui_V5.py (+1208 lignes) : profils, panneau paramètres
avancés, éditeur de masques intégré, gestion whitelist/blacklist
- launcher.py (+315) : splash natif PyInstaller, single-instance,
téléchargement modèles
- anonymisation_onefile.spec : config PyInstaller mise à jour
- pdf_mask_designer.py (+114) : éditeur de masques amélioré
- config_defaults.py (+23) : constantes nouvelles
- tests/unit/test_config_externalization.py (+12) : tests config
- .gitignore (+5)
## Pourquoi
La version courante de la GUI sur la branche feature manquait :
- L'éditeur de masques
- Les profils
- Le panneau paramètres avancés
- Le splash natif au démarrage
Aucun conflit avec mes 10 commits Q-1 (pas de chevauchement de fichiers).
## Validation
75 passed, 10 xfailed sur pytest tests/unit/.
## Note
Le stash reste disponible dans `git stash list` jusqu'à drop explicite.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -6,10 +6,12 @@ __pycache__/
|
||||
*.egg
|
||||
dist/
|
||||
build/
|
||||
release/
|
||||
*.whl
|
||||
|
||||
# === Virtual environments ===
|
||||
.venv/
|
||||
.venv_build_win/
|
||||
venv/
|
||||
venv_*/
|
||||
env/
|
||||
@@ -66,6 +68,9 @@ Thumbs.db
|
||||
# === Secrets ===
|
||||
.env
|
||||
*.env
|
||||
*.pfx
|
||||
*.p12
|
||||
build_signing.local.ps1
|
||||
credentials.json
|
||||
token.pickle
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,90 +1,128 @@
|
||||
import os
|
||||
block_cipher = None
|
||||
app_dir = 'C:\\Users\\dom\\ai\\anonymisation'
|
||||
from pathlib import Path
|
||||
|
||||
datas = [
|
||||
(os.path.join(app_dir, 'config'), 'config'),
|
||||
(os.path.join(app_dir, 'data', 'bdpm'), os.path.join('data', 'bdpm')),
|
||||
(os.path.join(app_dir, 'data', 'finess'), os.path.join('data', 'finess')),
|
||||
(os.path.join(app_dir, 'data', 'insee'), os.path.join('data', 'insee')),
|
||||
(os.path.join(app_dir, 'models', 'camembert-bio-deid', 'onnx'), os.path.join('models', 'camembert-bio-deid', 'onnx')),
|
||||
(os.path.join(app_dir, 'detectors'), 'detectors'),
|
||||
(os.path.join(app_dir, 'scripts'), 'scripts'),
|
||||
# Assets UI : logo (header + splash), icônes fenêtre, splash image.
|
||||
# Le launcher et la GUI y accèdent via _asset(name) qui résout sous
|
||||
# sys._MEIPASS/assets en mode frozen.
|
||||
(os.path.join(app_dir, 'assets'), 'assets'),
|
||||
]
|
||||
# Fichiers directs dans data/ — IMPÉRATIF pour fonctionnement correct du core.
|
||||
# Sans eux : stop-words/villes/DPI labels/companion blacklist sont des sets vides,
|
||||
# ce qui dégrade la qualité d'anonymisation et peut masquer/laisser passer des faux-positifs.
|
||||
for data_file in [
|
||||
'stopwords_manuels.txt',
|
||||
'villes_blacklist.txt',
|
||||
'dpi_labels_blacklist.txt',
|
||||
'companion_blacklist.txt',
|
||||
|
||||
block_cipher = None
|
||||
|
||||
project_dir = Path(globals().get("SPECPATH", os.getcwd())).resolve()
|
||||
|
||||
|
||||
def _data_entry(relative_path: str, target_dir: str | None = None):
|
||||
src = project_dir / relative_path
|
||||
if not src.exists():
|
||||
return None
|
||||
return (str(src), target_dir or relative_path)
|
||||
|
||||
|
||||
datas = []
|
||||
for relative_path, target_dir in [
|
||||
("config", "config"),
|
||||
("data/bdpm", "data/bdpm"),
|
||||
("data/finess", "data/finess"),
|
||||
("data/insee", "data/insee"),
|
||||
("models/camembert-bio-deid/onnx", "models/camembert-bio-deid/onnx"),
|
||||
("detectors", "detectors"),
|
||||
("scripts", "scripts"),
|
||||
("assets", "assets"),
|
||||
]:
|
||||
src = os.path.join(app_dir, 'data', data_file)
|
||||
if os.path.exists(src):
|
||||
datas.append((src, 'data'))
|
||||
for pyfile in ['anonymizer_core_refactored_onnx.py', 'eds_pseudo_manager.py',
|
||||
'gliner_manager.py', 'camembert_ner_manager.py',
|
||||
'Pseudonymisation_Gui_V5.py', 'build_info.py']:
|
||||
datas.append((os.path.join(app_dir, pyfile), '.'))
|
||||
entry = _data_entry(relative_path, target_dir)
|
||||
if entry is not None:
|
||||
datas.append(entry)
|
||||
|
||||
# Fichiers directs sous data/ requis par le core.
|
||||
for relative_path in [
|
||||
"data/stopwords_manuels.txt",
|
||||
"data/villes_blacklist.txt",
|
||||
"data/dpi_labels_blacklist.txt",
|
||||
"data/companion_blacklist.txt",
|
||||
]:
|
||||
entry = _data_entry(relative_path, "data")
|
||||
if entry is not None:
|
||||
datas.append(entry)
|
||||
|
||||
|
||||
hiddenimports = [
|
||||
"Pseudonymisation_Gui_V5",
|
||||
"anonymizer_core_refactored_onnx",
|
||||
"admin_rules",
|
||||
"config_defaults",
|
||||
"profile_defaults",
|
||||
"gui_batch_paths",
|
||||
"manual_masking",
|
||||
"pdf_mask_designer",
|
||||
"format_converter",
|
||||
"ner_manager_onnx",
|
||||
"camembert_ner_manager",
|
||||
"eds_pseudo_manager",
|
||||
"gliner_manager",
|
||||
"vlm_manager",
|
||||
"build_info",
|
||||
"doctr",
|
||||
"doctr.io",
|
||||
"doctr.models",
|
||||
"doctr.models.detection",
|
||||
"doctr.models.recognition",
|
||||
"cv2",
|
||||
"torchvision",
|
||||
"edsnlp",
|
||||
"edsnlp.pipes",
|
||||
"edsnlp.pipes.ner",
|
||||
"edsnlp.pipes.ner.pseudo",
|
||||
"spacy",
|
||||
"spacy.lang.fr",
|
||||
"gliner",
|
||||
"onnxruntime",
|
||||
"transformers",
|
||||
"tokenizers",
|
||||
"torch",
|
||||
"pdfplumber",
|
||||
"fitz",
|
||||
"PIL",
|
||||
"yaml",
|
||||
"loguru",
|
||||
"regex",
|
||||
"optimum",
|
||||
"optimum.onnxruntime",
|
||||
"optimum.pipelines",
|
||||
"optimum.modeling_base",
|
||||
"optimum.exporters.onnx",
|
||||
]
|
||||
|
||||
|
||||
a = Analysis(
|
||||
[os.path.join(app_dir, 'launcher.py')],
|
||||
pathex=[app_dir],
|
||||
[str(project_dir / "launcher.py")],
|
||||
pathex=[str(project_dir)],
|
||||
datas=datas,
|
||||
hiddenimports=[
|
||||
'anonymizer_core_refactored_onnx', 'eds_pseudo_manager',
|
||||
'gliner_manager', 'camembert_ner_manager', 'Pseudonymisation_Gui_V5',
|
||||
'edsnlp', 'edsnlp.pipes', 'edsnlp.pipes.ner', 'edsnlp.pipes.ner.pseudo',
|
||||
'spacy', 'spacy.lang.fr', 'gliner', 'onnxruntime',
|
||||
'transformers', 'tokenizers', 'torch', 'pdfplumber',
|
||||
'ahocorasick', 'sklearn', 'scipy', 'pydantic', 'yaml', 'PIL',
|
||||
'loguru', 'regex',
|
||||
# optimum : utilisé par ner_manager_onnx.py (fallback NER legacy).
|
||||
# Sans ça, la GUI affiche "NER indisponible : optimum.onnxruntime introuvable"
|
||||
# si EDS-Pseudo échoue. Le pipeline principal (CamemBERT-bio ONNX +
|
||||
# EDS-Pseudo + GLiNER) n'en dépend pas — mais l'absence du hiddenimport
|
||||
# crée un message d'erreur cosmétique gênant.
|
||||
'optimum', 'optimum.onnxruntime', 'optimum.pipelines',
|
||||
'optimum.modeling_base', 'optimum.exporters.onnx',
|
||||
],
|
||||
hiddenimports=hiddenimports,
|
||||
cipher=block_cipher,
|
||||
noarchive=False,
|
||||
)
|
||||
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||
|
||||
# Splash natif PyInstaller : image affichée AU LANCEMENT DE L'EXE,
|
||||
# avant même que Python démarre. Couvre les ~15-30 s de décompression
|
||||
# du bundle --onefile dans %TEMP% qui laissaient l'écran vide auparavant.
|
||||
# Le launcher ferme le splash via pyi_splash.close() une fois la GUI prête.
|
||||
splash = Splash(
|
||||
os.path.join(app_dir, 'assets', 'splash.png'),
|
||||
str(project_dir / "assets" / "splash.png"),
|
||||
binaries=a.binaries,
|
||||
datas=a.datas,
|
||||
# Texte dynamique PyInstaller positionné dans la zone libre du PNG
|
||||
# (y=170-235). text_pos correspond au coin haut-gauche du texte.
|
||||
text_pos=(60, 195),
|
||||
text_size=10,
|
||||
text_color='white',
|
||||
text_color="white",
|
||||
minify_script=True,
|
||||
always_on_top=False,
|
||||
)
|
||||
|
||||
exe = EXE(
|
||||
pyz, a.scripts,
|
||||
splash, # image affichée immédiatement
|
||||
splash.binaries, # bootloader splash
|
||||
a.binaries, a.zipfiles, a.datas, [],
|
||||
name='Anonymisation',
|
||||
pyz,
|
||||
a.scripts,
|
||||
splash,
|
||||
splash.binaries,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
[],
|
||||
name="Anonymisation",
|
||||
debug=False,
|
||||
strip=False,
|
||||
upx=False,
|
||||
console=False,
|
||||
# Icône du fichier .exe visible dans l'Explorateur Windows et la taskbar
|
||||
# (dérivée du logo aivanonym, multi-résolution 16→256 dans le .ico).
|
||||
icon=os.path.join(app_dir, 'assets', 'icons', 'app.ico'),
|
||||
icon=str(project_dir / "assets" / "icons" / "app.ico"),
|
||||
)
|
||||
|
||||
@@ -153,6 +153,29 @@ def load_effective_dictionaries_dict(path: Path | None = None) -> Dict[str, Any]
|
||||
)
|
||||
|
||||
|
||||
def _normalize_string_list(values: Any) -> list[str]:
|
||||
if not isinstance(values, list):
|
||||
return []
|
||||
normalized: list[str] = []
|
||||
for value in values:
|
||||
text = str(value).strip()
|
||||
if text:
|
||||
normalized.append(text)
|
||||
return normalized
|
||||
|
||||
|
||||
def load_effective_param_lists(path: Path | None = None) -> Dict[str, list[str]]:
|
||||
"""Return the effective parameter lists shown in the GUI."""
|
||||
data = load_effective_dictionaries_dict(path)
|
||||
return {
|
||||
"whitelist_phrases": _normalize_string_list(data.get("whitelist_phrases", [])),
|
||||
"blacklist_force_mask_terms": _normalize_string_list(
|
||||
data.get("blacklist", {}).get("force_mask_terms", [])
|
||||
),
|
||||
"additional_stopwords": _normalize_string_list(data.get("additional_stopwords", [])),
|
||||
}
|
||||
|
||||
|
||||
def deep_merge_dict(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
||||
merged = deepcopy(base)
|
||||
for key, value in (override or {}).items():
|
||||
|
||||
315
launcher.py
315
launcher.py
@@ -8,6 +8,8 @@ from tkinter import ttk, messagebox
|
||||
from pathlib import Path
|
||||
import threading
|
||||
import logging
|
||||
import contextlib
|
||||
import time
|
||||
|
||||
# pyi_splash : module injecté par PyInstaller quand --splash est utilisé.
|
||||
# Permet d'actualiser / fermer le splash natif affiché au démarrage de l'exe
|
||||
@@ -38,6 +40,216 @@ def _splash_close() -> None:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class BrandedSplash:
|
||||
"""Splash applicatif avec le visuel existant + progression détaillée.
|
||||
|
||||
PyInstaller affiche d'abord le splash natif pendant l'extraction du onefile.
|
||||
Dès que Python est démarré, cette fenêtre prend le relais pour montrer des
|
||||
étapes lisibles et un petit journal de chargement.
|
||||
"""
|
||||
|
||||
def __init__(self, total_steps: int = 6):
|
||||
self.total_steps = max(total_steps, 1)
|
||||
self.current_step = 0
|
||||
self.enabled = False
|
||||
self.root = None
|
||||
self.status_var = None
|
||||
self.progress = None
|
||||
self.log_box = None
|
||||
self._image = None
|
||||
self._lines = []
|
||||
|
||||
try:
|
||||
self.root = tk.Tk()
|
||||
self.root.withdraw()
|
||||
self.root.title("aivanonym")
|
||||
self.root.resizable(False, False)
|
||||
self.root.overrideredirect(True)
|
||||
self.root.configure(bg="white")
|
||||
|
||||
container = tk.Frame(
|
||||
self.root,
|
||||
bg="white",
|
||||
highlightthickness=1,
|
||||
highlightbackground="#d8d8d8",
|
||||
)
|
||||
container.pack(fill="both", expand=True)
|
||||
|
||||
splash_path = APP_DIR / "assets" / "splash.png"
|
||||
if splash_path.exists():
|
||||
self._image = tk.PhotoImage(file=str(splash_path))
|
||||
tk.Label(container, image=self._image, bg="white", bd=0).pack()
|
||||
else:
|
||||
fallback = tk.Frame(container, bg="white", width=500, height=170)
|
||||
fallback.pack_propagate(False)
|
||||
fallback.pack()
|
||||
tk.Frame(fallback, bg="#cc0000", height=4).pack(fill="x")
|
||||
tk.Label(
|
||||
fallback,
|
||||
text="aivanonym",
|
||||
bg="white",
|
||||
fg="#222222",
|
||||
font=("Segoe UI", 28),
|
||||
).pack(expand=True)
|
||||
|
||||
body = tk.Frame(container, bg="white", padx=24, pady=14)
|
||||
body.pack(fill="x")
|
||||
|
||||
self.status_var = tk.StringVar(value="Initialisation...")
|
||||
tk.Label(
|
||||
body,
|
||||
textvariable=self.status_var,
|
||||
bg="white",
|
||||
fg="#222222",
|
||||
font=("Segoe UI", 10, "bold"),
|
||||
anchor="w",
|
||||
).pack(fill="x")
|
||||
|
||||
self.progress = ttk.Progressbar(
|
||||
body,
|
||||
mode="determinate",
|
||||
maximum=self.total_steps,
|
||||
length=452,
|
||||
)
|
||||
self.progress.pack(fill="x", pady=(8, 10))
|
||||
|
||||
tk.Label(
|
||||
body,
|
||||
text="Chargements en cours",
|
||||
bg="white",
|
||||
fg="#666666",
|
||||
font=("Segoe UI", 8),
|
||||
anchor="w",
|
||||
).pack(fill="x")
|
||||
self.log_box = tk.Listbox(
|
||||
body,
|
||||
height=5,
|
||||
activestyle="none",
|
||||
bg="#f7f7f7",
|
||||
fg="#333333",
|
||||
bd=0,
|
||||
highlightthickness=1,
|
||||
highlightbackground="#e7e7e7",
|
||||
font=("Consolas", 8),
|
||||
)
|
||||
self.log_box.pack(fill="x", pady=(4, 0))
|
||||
|
||||
self._center()
|
||||
self.root.deiconify()
|
||||
self.root.lift()
|
||||
self.root.update_idletasks()
|
||||
self.root.update()
|
||||
self.enabled = True
|
||||
|
||||
# Le splash natif PyInstaller n'a qu'une ligne de texte. Une fois
|
||||
# cette fenêtre prête, elle prend le relais sans changer le visuel.
|
||||
_splash_close()
|
||||
except Exception as exc:
|
||||
try:
|
||||
if self.root is not None:
|
||||
self.root.destroy()
|
||||
except Exception:
|
||||
pass
|
||||
self.root = None
|
||||
log.warning(f"Branded splash unavailable: {exc}")
|
||||
|
||||
def _center(self) -> None:
|
||||
if self.root is None:
|
||||
return
|
||||
self.root.update_idletasks()
|
||||
width = self.root.winfo_reqwidth()
|
||||
height = self.root.winfo_reqheight()
|
||||
screen_width = self.root.winfo_screenwidth()
|
||||
screen_height = self.root.winfo_screenheight()
|
||||
x = max(0, int((screen_width - width) / 2))
|
||||
y = max(0, int((screen_height - height) / 2))
|
||||
self.root.geometry(f"{width}x{height}+{x}+{y}")
|
||||
|
||||
def step(self, message: str) -> None:
|
||||
self.current_step = min(self.current_step + 1, self.total_steps)
|
||||
status = f"[{self.current_step}/{self.total_steps}] {message}"
|
||||
self.message(status)
|
||||
if self.progress is not None:
|
||||
self.progress["value"] = self.current_step
|
||||
self._pump()
|
||||
|
||||
def message(self, message: str) -> None:
|
||||
_splash_update(message)
|
||||
if self.enabled and self.status_var is not None:
|
||||
self.status_var.set(message)
|
||||
self._pump()
|
||||
|
||||
def detail(self, message: str) -> None:
|
||||
_splash_update(message)
|
||||
clean = " ".join(str(message).split())
|
||||
if not clean:
|
||||
return
|
||||
if len(clean) > 150:
|
||||
clean = clean[:147] + "..."
|
||||
if self.enabled and self.log_box is not None:
|
||||
self._lines.append(clean)
|
||||
self._lines = self._lines[-7:]
|
||||
self.log_box.delete(0, tk.END)
|
||||
for line in self._lines:
|
||||
self.log_box.insert(tk.END, line)
|
||||
self.log_box.see(tk.END)
|
||||
self._pump()
|
||||
|
||||
def close(self) -> None:
|
||||
_splash_close()
|
||||
if self.root is not None:
|
||||
try:
|
||||
self.root.destroy()
|
||||
except Exception:
|
||||
pass
|
||||
self.root = None
|
||||
self.enabled = False
|
||||
|
||||
def _pump(self) -> None:
|
||||
if self.root is None:
|
||||
return
|
||||
try:
|
||||
self.root.update_idletasks()
|
||||
self.root.update()
|
||||
except Exception:
|
||||
self.enabled = False
|
||||
|
||||
|
||||
class ModelProgressStream:
|
||||
"""Redirige les sorties type tqdm vers une callback UI."""
|
||||
|
||||
def __init__(self, callback, prefix: str):
|
||||
self.callback = callback
|
||||
self.prefix = prefix
|
||||
self.buffer = ""
|
||||
self.last_line = ""
|
||||
self.last_emit = 0.0
|
||||
|
||||
def write(self, data) -> int:
|
||||
text = str(data)
|
||||
self.buffer += text.replace("\r", "\n")
|
||||
while "\n" in self.buffer:
|
||||
line, self.buffer = self.buffer.split("\n", 1)
|
||||
self._emit(line)
|
||||
return len(text)
|
||||
|
||||
def flush(self) -> None:
|
||||
if self.buffer:
|
||||
self._emit(self.buffer)
|
||||
self.buffer = ""
|
||||
|
||||
def _emit(self, line: str) -> None:
|
||||
clean = " ".join(line.split())
|
||||
if len(clean) < 3:
|
||||
return
|
||||
now = time.monotonic()
|
||||
if clean == self.last_line and now - self.last_emit < 1.0:
|
||||
return
|
||||
self.last_line = clean
|
||||
self.last_emit = now
|
||||
self.callback(f"{self.prefix} : {clean}")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Single-instance guard (lock file in user's temp directory)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -105,23 +317,10 @@ def check_models_ready():
|
||||
|
||||
|
||||
def launch_gui():
|
||||
"""Launch the main GUI — étapes de chargement affichées DANS le splash natif.
|
||||
|
||||
Le splash natif PyInstaller (image avec logo + texte dynamique) reste
|
||||
visible pendant TOUTE la phase de chargement. On intercepte les log.info()
|
||||
du core via un logging.Handler et on pousse chaque étape traduite dans
|
||||
le splash natif via pyi_splash.update_text(). L'utilisateur voit défiler
|
||||
sous le logo :
|
||||
"Chargement des prénoms français (INSEE)…"
|
||||
"Chargement des noms de famille (INSEE)…"
|
||||
"Chargement des numéros FINESS…"
|
||||
…
|
||||
Puis le splash se ferme et la GUI s'ouvre — pas de fenêtre intermédiaire.
|
||||
|
||||
En mode dev (pas frozen), pyi_splash n'existe pas ; on ajoute un
|
||||
mini-splash tkinter temporaire pour voir le même rendu pendant le test.
|
||||
"""
|
||||
"""Launch the main GUI with visible startup progress."""
|
||||
log.info("Launching GUI...")
|
||||
progress = BrandedSplash(total_steps=5)
|
||||
progress.step("Préparation de l'environnement")
|
||||
|
||||
# Traductions log.info() → libellés "prod" lisibles pour l'utilisateur.
|
||||
_LOG_TRANSLATIONS = [
|
||||
@@ -158,7 +357,7 @@ def launch_gui():
|
||||
class _SplashHandler(logging.Handler):
|
||||
def emit(self, record):
|
||||
try:
|
||||
_splash_update(_translate(record.getMessage()))
|
||||
progress.detail(_translate(record.getMessage()))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -167,17 +366,24 @@ def launch_gui():
|
||||
logging.getLogger().addHandler(_handler)
|
||||
|
||||
# Afficher tout de suite un message initial sous le logo
|
||||
_splash_update("Démarrage…")
|
||||
progress.detail("Démarrage du moteur applicatif")
|
||||
|
||||
# Import du core et de la GUI (synchrone : pas besoin de thread puisque
|
||||
# le splash natif tourne dans son propre processus bootloader).
|
||||
result = {"error": None}
|
||||
try:
|
||||
_splash_update("Chargement des dictionnaires médicaux…")
|
||||
progress.step("Chargement des dictionnaires médicaux")
|
||||
import anonymizer_core_refactored_onnx # noqa
|
||||
log.info("Core imported OK")
|
||||
progress.step("Chargement du moteur d'anonymisation")
|
||||
import Pseudonymisation_Gui_V5 # noqa
|
||||
log.info("GUI module imported OK")
|
||||
progress.step("Vérification des modèles locaux")
|
||||
if check_models_ready():
|
||||
progress.detail("CamemBERT-bio ONNX local disponible")
|
||||
else:
|
||||
progress.detail("CamemBERT-bio ONNX non trouvé dans le bundle")
|
||||
progress.step("Ouverture de l'interface")
|
||||
except Exception as e:
|
||||
result["error"] = f"{e}\n{traceback.format_exc()}"
|
||||
log.error(f"Import error: {result['error']}")
|
||||
@@ -188,8 +394,8 @@ def launch_gui():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fermer le splash natif maintenant que tout est prêt
|
||||
_splash_close()
|
||||
# Fermer le splash maintenant que tout est prêt
|
||||
progress.close()
|
||||
|
||||
if result["error"]:
|
||||
try:
|
||||
@@ -239,12 +445,19 @@ class SetupWindow:
|
||||
def __init__(self):
|
||||
self.root = tk.Tk()
|
||||
self.root.title("Anonymisation — Configuration initiale")
|
||||
self.root.geometry("620x450")
|
||||
self.root.geometry("660x700")
|
||||
self.root.resizable(False, False)
|
||||
self._logo_image = None
|
||||
self._log_lines = []
|
||||
|
||||
frame = ttk.Frame(self.root, padding=20)
|
||||
frame = ttk.Frame(self.root, padding=18)
|
||||
frame.pack(fill="both", expand=True)
|
||||
|
||||
splash_path = APP_DIR / "assets" / "splash.png"
|
||||
if splash_path.exists():
|
||||
self._logo_image = tk.PhotoImage(file=str(splash_path))
|
||||
ttk.Label(frame, image=self._logo_image).pack(pady=(0, 8))
|
||||
|
||||
ttk.Label(frame, text="Préparation des modèles d'intelligence artificielle",
|
||||
font=("", 13, "bold")).pack(pady=(0, 4))
|
||||
ttk.Label(
|
||||
@@ -278,6 +491,22 @@ class SetupWindow:
|
||||
font=("", 8)).pack(side="left")
|
||||
self.step_labels[key] = icon
|
||||
|
||||
log_frame = ttk.LabelFrame(frame, text=" Détail du chargement ", padding=8)
|
||||
log_frame.pack(fill="x", pady=(0, 12))
|
||||
self.log_text = tk.Text(
|
||||
log_frame,
|
||||
height=7,
|
||||
wrap="word",
|
||||
state="disabled",
|
||||
bg="#f7f7f7",
|
||||
fg="#333333",
|
||||
bd=0,
|
||||
padx=8,
|
||||
pady=6,
|
||||
font=("Consolas", 8),
|
||||
)
|
||||
self.log_text.pack(fill="x")
|
||||
|
||||
# Bouton relance (caché au début)
|
||||
self.btn = ttk.Button(frame, text="Relancer", command=self.start_download)
|
||||
self.btn.pack(pady=6)
|
||||
@@ -321,43 +550,54 @@ class SetupWindow:
|
||||
try:
|
||||
# 1. EDS-Pseudo
|
||||
self._update("Téléchargement d'EDS-Pseudo… (modèle CamemBERT clinique)")
|
||||
self._append_log("EDS-Pseudo : téléchargement/chargement du modèle AP-HP")
|
||||
self._set_step("eds_pseudo", "running")
|
||||
log.info("Downloading EDS-Pseudo...")
|
||||
try:
|
||||
from eds_pseudo_manager import EdsPseudoManager
|
||||
mgr = EdsPseudoManager()
|
||||
mgr.load()
|
||||
with self._capture_model_output("EDS-Pseudo"):
|
||||
mgr.load()
|
||||
self._set_step("eds_pseudo", "ok")
|
||||
self._append_log("EDS-Pseudo : modèle prêt")
|
||||
log.info("EDS-Pseudo OK")
|
||||
except Exception as e:
|
||||
self._set_step("eds_pseudo", "fail")
|
||||
self._append_log(f"EDS-Pseudo : échec - {e}")
|
||||
failures.append(("EDS-Pseudo", str(e)))
|
||||
log.warning(f"EDS-Pseudo failed: {e}")
|
||||
self._advance()
|
||||
|
||||
# 2. GLiNER
|
||||
self._update("Téléchargement de GLiNER… (détection zero-shot)")
|
||||
self._append_log("GLiNER : téléchargement/chargement du modèle PII")
|
||||
self._set_step("gliner", "running")
|
||||
log.info("Downloading GLiNER...")
|
||||
try:
|
||||
from gliner_manager import GlinerManager
|
||||
mgr = GlinerManager()
|
||||
mgr.load()
|
||||
with self._capture_model_output("GLiNER"):
|
||||
mgr.load()
|
||||
self._set_step("gliner", "ok")
|
||||
self._append_log("GLiNER : modèle prêt")
|
||||
log.info("GLiNER OK")
|
||||
except Exception as e:
|
||||
self._set_step("gliner", "fail")
|
||||
self._append_log(f"GLiNER : échec - {e}")
|
||||
failures.append(("GLiNER", str(e)))
|
||||
log.warning(f"GLiNER failed: {e}")
|
||||
self._advance()
|
||||
|
||||
# 3. CamemBERT-bio ONNX
|
||||
self._update("Vérification CamemBERT-bio ONNX (modèle embarqué)…")
|
||||
self._append_log("CamemBERT-bio ONNX : vérification du modèle embarqué")
|
||||
self._set_step("camembert_onnx", "running")
|
||||
if check_models_ready():
|
||||
self._set_step("camembert_onnx", "ok")
|
||||
self._append_log("CamemBERT-bio ONNX : modèle local présent")
|
||||
else:
|
||||
self._set_step("camembert_onnx", "fail")
|
||||
self._append_log("CamemBERT-bio ONNX : fichier ONNX introuvable")
|
||||
failures.append(("CamemBERT-bio ONNX", "fichier ONNX introuvable dans le bundle"))
|
||||
log.error("CamemBERT-bio ONNX not found")
|
||||
self._advance()
|
||||
@@ -384,6 +624,31 @@ class SetupWindow:
|
||||
def _update(self, msg):
|
||||
self.root.after(0, lambda: self.status_var.set(msg))
|
||||
|
||||
def _append_log(self, msg):
|
||||
clean = " ".join(str(msg).split())
|
||||
if not clean:
|
||||
return
|
||||
if len(clean) > 180:
|
||||
clean = clean[:177] + "..."
|
||||
|
||||
def _apply():
|
||||
self._log_lines.append(clean)
|
||||
self._log_lines = self._log_lines[-80:]
|
||||
self.log_text.configure(state="normal")
|
||||
self.log_text.delete("1.0", tk.END)
|
||||
self.log_text.insert("end", "\n".join(self._log_lines))
|
||||
self.log_text.configure(state="disabled")
|
||||
self.log_text.see("end")
|
||||
|
||||
self.root.after(0, _apply)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _capture_model_output(self, label):
|
||||
stream = ModelProgressStream(self._append_log, label)
|
||||
with contextlib.redirect_stdout(stream), contextlib.redirect_stderr(stream):
|
||||
yield
|
||||
stream.flush()
|
||||
|
||||
def _finish(self):
|
||||
try:
|
||||
self.root.destroy()
|
||||
|
||||
@@ -17,6 +17,7 @@ Dépendances : PyMuPDF (pymupdf), Pillow, PyYAML
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import io
|
||||
import json
|
||||
import math
|
||||
@@ -31,7 +32,12 @@ from PIL import Image, ImageTk
|
||||
import fitz # PyMuPDF
|
||||
import yaml
|
||||
|
||||
APP_TITLE = "PDF Mask Designer (Standalone)"
|
||||
from manual_masking import (
|
||||
DEFAULT_MASK_OUTPUT_DIRNAME,
|
||||
DEFAULT_MASK_PREVIEW_DIRNAME,
|
||||
)
|
||||
|
||||
APP_TITLE = "Éditeur de masques PDF"
|
||||
TEMPLATE_VERSION = 1
|
||||
|
||||
# ----------------------------- Data structures -----------------------------
|
||||
@@ -167,7 +173,16 @@ def apply_template_raster(pdf_in: Path, pdf_out: Path, tpl: Template, dpi: int,
|
||||
# ----------------------------- GUI ------------------------------
|
||||
|
||||
class MaskDesignerApp:
|
||||
def __init__(self, root: tk.Tk):
|
||||
def __init__(
|
||||
self,
|
||||
root: tk.Tk,
|
||||
*,
|
||||
initial_pdf: Optional[Path] = None,
|
||||
initial_template: Optional[Path] = None,
|
||||
templates_dir: Optional[Path] = None,
|
||||
output_dir_name: str = DEFAULT_MASK_OUTPUT_DIRNAME,
|
||||
preview_dir_name: str = DEFAULT_MASK_PREVIEW_DIRNAME,
|
||||
):
|
||||
self.root = root
|
||||
self.root.title(APP_TITLE)
|
||||
self.root.geometry("1280x900")
|
||||
@@ -181,11 +196,18 @@ class MaskDesignerApp:
|
||||
self.template_name = tk.StringVar(value="template_masks")
|
||||
self.status = tk.StringVar(value="Prêt.")
|
||||
self.raster_dpi = tk.IntVar(value=200)
|
||||
self.templates_dir = templates_dir
|
||||
self.output_dir_name = output_dir_name
|
||||
self.preview_dir_name = preview_dir_name
|
||||
|
||||
self.is_drawing = False
|
||||
self.start_xy: Optional[Tuple[int,int]] = None
|
||||
|
||||
self._build_ui()
|
||||
if initial_pdf:
|
||||
self.open_pdf_path(initial_pdf)
|
||||
if initial_template:
|
||||
self.load_template_path(initial_template)
|
||||
|
||||
# UI layout
|
||||
def _build_ui(self):
|
||||
@@ -228,14 +250,17 @@ class MaskDesignerApp:
|
||||
def open_pdf(self):
|
||||
path = filedialog.askopenfilename(filetypes=[("PDF", "*.pdf")])
|
||||
if not path: return
|
||||
self.open_pdf_path(Path(path))
|
||||
|
||||
def open_pdf_path(self, path: Path):
|
||||
try:
|
||||
self.doc = fitz.open(path)
|
||||
self.doc = fitz.open(str(path))
|
||||
self.doc_path = Path(path)
|
||||
self.curr_page = 0
|
||||
self.masks.clear()
|
||||
self.template_name.set(self.doc_path.stem + "_template")
|
||||
self.refresh()
|
||||
self.status.set(f"PDF ouvert : {Path(path).name} — {len(self.doc)} page(s)")
|
||||
self.status.set(f"PDF ouvert : {self.doc_path.name} — {len(self.doc)} page(s)")
|
||||
except Exception as e:
|
||||
messagebox.showerror("Erreur", f"Impossible d'ouvrir le PDF : {e}")
|
||||
|
||||
@@ -244,7 +269,7 @@ class MaskDesignerApp:
|
||||
img = page_pix(self.doc, self.curr_page, self.zoom)
|
||||
# overlay current page masks
|
||||
rects = self.masks.get(self.curr_page, [])
|
||||
img_o = draw_overlay(img, rects, 1.0, self.curr_page)
|
||||
img_o = draw_overlay(img, rects, self.zoom, self.curr_page)
|
||||
self.curr_image = img_o
|
||||
self.tk_image = ImageTk.PhotoImage(img_o)
|
||||
self.canvas.delete("all")
|
||||
@@ -269,19 +294,25 @@ class MaskDesignerApp:
|
||||
def on_down(self, ev):
|
||||
if not self.doc: return
|
||||
self.is_drawing = True
|
||||
self.start_xy = (ev.x, ev.y)
|
||||
self._preview_rect = self.canvas.create_rectangle(ev.x, ev.y, ev.x, ev.y, outline="#000", width=2)
|
||||
x = self.canvas.canvasx(ev.x)
|
||||
y = self.canvas.canvasy(ev.y)
|
||||
self.start_xy = (x, y)
|
||||
self._preview_rect = self.canvas.create_rectangle(x, y, x, y, outline="#000", width=2)
|
||||
|
||||
def on_drag(self, ev):
|
||||
if not self.doc or not self.is_drawing: return
|
||||
sx, sy = self.start_xy
|
||||
self.canvas.coords(self._preview_rect, sx, sy, ev.x, ev.y)
|
||||
x = self.canvas.canvasx(ev.x)
|
||||
y = self.canvas.canvasy(ev.y)
|
||||
self.canvas.coords(self._preview_rect, sx, sy, x, y)
|
||||
|
||||
def on_up(self, ev):
|
||||
if not self.doc or not self.is_drawing: return
|
||||
self.is_drawing = False
|
||||
sx, sy = self.start_xy
|
||||
x0, y0, x1, y1 = rect_norm(sx, sy, ev.x, ev.y)
|
||||
x = self.canvas.canvasx(ev.x)
|
||||
y = self.canvas.canvasy(ev.y)
|
||||
x0, y0, x1, y1 = rect_norm(sx, sy, x, y)
|
||||
# convert screen px to PDF points
|
||||
page = self.doc[self.curr_page]
|
||||
# we rendered with zoom, but here current image is at display resolution (zoom applied in page_pix)
|
||||
@@ -311,9 +342,12 @@ class MaskDesignerApp:
|
||||
tpl = self._current_template()
|
||||
except Exception as e:
|
||||
messagebox.showwarning("Info", str(e)); return
|
||||
path = filedialog.asksaveasfilename(defaultextension=".yml",
|
||||
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
|
||||
initialfile=f"{tpl.name}.yml")
|
||||
path = filedialog.asksaveasfilename(
|
||||
defaultextension=".yml",
|
||||
filetypes=[("YAML", "*.yml *.yaml"), ("JSON", "*.json")],
|
||||
initialdir=str(self._template_initialdir()),
|
||||
initialfile=f"{tpl.name}.yml",
|
||||
)
|
||||
if not path: return
|
||||
p = Path(path)
|
||||
try:
|
||||
@@ -326,8 +360,14 @@ class MaskDesignerApp:
|
||||
messagebox.showerror("Erreur", f"Impossible d'écrire le template : {e}")
|
||||
|
||||
def load_template(self):
|
||||
path = filedialog.askopenfilename(filetypes=[("YAML/JSON", "*.yml *.yaml *.json")])
|
||||
path = filedialog.askopenfilename(
|
||||
filetypes=[("YAML/JSON", "*.yml *.yaml *.json")],
|
||||
initialdir=str(self._template_initialdir()),
|
||||
)
|
||||
if not path: return
|
||||
self.load_template_path(Path(path))
|
||||
|
||||
def load_template_path(self, path: Path):
|
||||
p = Path(path)
|
||||
try:
|
||||
if p.suffix.lower() in (".yml", ".yaml"):
|
||||
@@ -351,6 +391,14 @@ class MaskDesignerApp:
|
||||
self.refresh()
|
||||
self.status.set(f"Masques de la page {self.curr_page+1} supprimés.")
|
||||
|
||||
def _template_initialdir(self) -> Path:
|
||||
if self.templates_dir is not None:
|
||||
self.templates_dir.mkdir(parents=True, exist_ok=True)
|
||||
return self.templates_dir
|
||||
if self.doc_path is not None:
|
||||
return self.doc_path.parent
|
||||
return Path.cwd()
|
||||
|
||||
# Preview / Apply
|
||||
def _build_template_from_state(self) -> Optional[Template]:
|
||||
if not self.doc:
|
||||
@@ -365,7 +413,7 @@ class MaskDesignerApp:
|
||||
if not samp: return
|
||||
for i, s in enumerate(samp[:2], start=1):
|
||||
pdf_in = Path(s)
|
||||
out_dir = pdf_in.parent / "masked_preview"
|
||||
out_dir = pdf_in.parent / self.preview_dir_name
|
||||
out_dir.mkdir(exist_ok=True)
|
||||
pdf_out = out_dir / f"{pdf_in.stem}.preview_vector.pdf"
|
||||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||||
@@ -373,7 +421,10 @@ class MaskDesignerApp:
|
||||
apply_template_vector(pdf_in, pdf_out, tpl, audit)
|
||||
except Exception as e:
|
||||
messagebox.showerror("Erreur", f"Prévisualisation vectorielle échouée sur {pdf_in.name} : {e}")
|
||||
messagebox.showinfo("Prévisualisation", "Terminé (vectoriel). Ouvrez le dossier 'masked_preview'.")
|
||||
messagebox.showinfo(
|
||||
"Prévisualisation",
|
||||
f"Terminé (vectoriel). Ouvrez le dossier '{self.preview_dir_name}'.",
|
||||
)
|
||||
|
||||
def preview_raster(self):
|
||||
tpl = self._build_template_from_state()
|
||||
@@ -383,7 +434,7 @@ class MaskDesignerApp:
|
||||
dpi = int(self.raster_dpi.get())
|
||||
for i, s in enumerate(samp[:2], start=1):
|
||||
pdf_in = Path(s)
|
||||
out_dir = pdf_in.parent / "masked_preview"
|
||||
out_dir = pdf_in.parent / self.preview_dir_name
|
||||
out_dir.mkdir(exist_ok=True)
|
||||
pdf_out = out_dir / f"{pdf_in.stem}.preview_raster.pdf"
|
||||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||||
@@ -391,7 +442,10 @@ class MaskDesignerApp:
|
||||
apply_template_raster(pdf_in, pdf_out, tpl, dpi, audit)
|
||||
except Exception as e:
|
||||
messagebox.showerror("Erreur", f"Prévisualisation raster échouée sur {pdf_in.name} : {e}")
|
||||
messagebox.showinfo("Prévisualisation", "Terminé (raster). Ouvrez le dossier 'masked_preview'.")
|
||||
messagebox.showinfo(
|
||||
"Prévisualisation",
|
||||
f"Terminé (raster). Ouvrez le dossier '{self.preview_dir_name}'.",
|
||||
)
|
||||
|
||||
def apply_vector_batch(self):
|
||||
tpl = self._build_template_from_state()
|
||||
@@ -400,7 +454,7 @@ class MaskDesignerApp:
|
||||
if not files: return
|
||||
for s in files:
|
||||
pdf_in = Path(s)
|
||||
out_dir = pdf_in.parent / "masked"
|
||||
out_dir = pdf_in.parent / self.output_dir_name
|
||||
out_dir.mkdir(exist_ok=True)
|
||||
pdf_out = out_dir / f"{pdf_in.stem}.masked_vector.pdf"
|
||||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||||
@@ -418,7 +472,7 @@ class MaskDesignerApp:
|
||||
dpi = int(self.raster_dpi.get())
|
||||
for s in files:
|
||||
pdf_in = Path(s)
|
||||
out_dir = pdf_in.parent / "masked"
|
||||
out_dir = pdf_in.parent / self.output_dir_name
|
||||
out_dir.mkdir(exist_ok=True)
|
||||
pdf_out = out_dir / f"{pdf_in.stem}.masked_raster.pdf"
|
||||
audit = out_dir / f"{pdf_in.stem}.audit.jsonl"
|
||||
@@ -430,9 +484,27 @@ class MaskDesignerApp:
|
||||
|
||||
# ----------------------------- Main ------------------------------
|
||||
|
||||
def main():
|
||||
def build_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description="Editeur de masques PDF reutilisables")
|
||||
parser.add_argument("--pdf", type=Path, help="PDF de reference a ouvrir au demarrage")
|
||||
parser.add_argument("--template", type=Path, help="Template YAML/JSON a charger au demarrage")
|
||||
parser.add_argument("--templates-dir", type=Path, help="Dossier par defaut pour sauver/charger les templates")
|
||||
parser.add_argument("--output-dir-name", default=DEFAULT_MASK_OUTPUT_DIRNAME, help="Nom du dossier de sortie pour l'application des masques")
|
||||
parser.add_argument("--preview-dir-name", default=DEFAULT_MASK_PREVIEW_DIRNAME, help="Nom du dossier de sortie pour les previsualisations")
|
||||
return parser
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None):
|
||||
args = build_arg_parser().parse_args(argv)
|
||||
root = tk.Tk()
|
||||
app = MaskDesignerApp(root)
|
||||
app = MaskDesignerApp(
|
||||
root,
|
||||
initial_pdf=args.pdf,
|
||||
initial_template=args.template,
|
||||
templates_dir=args.templates_dir,
|
||||
output_dir_name=args.output_dir_name,
|
||||
preview_dir_name=args.preview_dir_name,
|
||||
)
|
||||
root.mainloop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -9,6 +9,7 @@ from config_defaults import (
|
||||
deep_merge_dict,
|
||||
ensure_runtime_dictionaries_config,
|
||||
load_effective_dictionaries_dict,
|
||||
load_effective_param_lists,
|
||||
read_default_dictionaries_text,
|
||||
read_runtime_dictionaries_overlay_text,
|
||||
)
|
||||
@@ -90,3 +91,14 @@ def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
|
||||
effective = load_effective_dictionaries_dict(cfg_path)
|
||||
assert "CHCB" in effective["blacklist"]["force_mask_terms"]
|
||||
assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]
|
||||
|
||||
|
||||
def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path: Path):
|
||||
cfg_path = tmp_path / "dictionnaires.yml"
|
||||
cfg_path.write_text("{}\n", encoding="utf-8")
|
||||
|
||||
params = load_effective_param_lists(cfg_path)
|
||||
|
||||
assert "classification internationale" in params["whitelist_phrases"]
|
||||
assert "CHCB" in params["blacklist_force_mask_terms"]
|
||||
assert params["additional_stopwords"] == []
|
||||
|
||||
Reference in New Issue
Block a user