Externalize dictionaries and add anonymization review corpus

This commit is contained in:
2026-04-21 10:32:57 +02:00
parent 39db675052
commit 34dcf8f360
99 changed files with 1805 additions and 805 deletions

View File

@@ -20,7 +20,6 @@ import os
import platform
import queue
import re
import shutil
import subprocess
import sys
import threading
@@ -75,6 +74,11 @@ try:
except Exception:
yaml = None
from config_defaults import (
read_default_dictionaries_text,
read_runtime_dictionaries_overlay_text,
)
# ---------------------------------------------------------------------------
# Thème optionnel
# ---------------------------------------------------------------------------
@@ -142,47 +146,19 @@ def _resolve_config() -> Path:
pour que l'utilisateur puisse la modifier sans recompiler.
"""
exe_cfg = _exe_dir() / "config" / "dictionnaires.yml"
app_cfg = _app_dir() / "config" / "dictionnaires.yml"
if exe_cfg.exists():
return exe_cfg
# Premier lancement : copier la config embarquée à côté de l'exe
if app_cfg.exists():
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
import shutil
shutil.copy2(str(app_cfg), str(exe_cfg))
return exe_cfg
return app_cfg # fallback
exe_cfg.parent.mkdir(parents=True, exist_ok=True)
exe_cfg.write_text(read_runtime_dictionaries_overlay_text(), encoding="utf-8")
return exe_cfg
DEFAULT_CFG = _resolve_config()
MODELS_DIR = _app_dir() / "models"
DEFAULTS_CFG_TEXT = r"""
# dictionnaires.yml valeurs par défaut (bloc littéral pour les regex)
version: 1
encoding: "utf-8"
normalization: "NFKC"
whitelist:
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
org_gpe_keep: true
blacklist:
force_mask_terms: []
force_mask_regex: []
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
regex_overrides:
- name: OGC_court
pattern: |-
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
placeholder: '[OGC]'
flags: [IGNORECASE]
flags:
case_insensitive: true
unicode_word_boundaries: true
regex_engine: "python"
"""
DEFAULTS_CFG_TEXT = read_default_dictionaries_text()
RUNTIME_CFG_TEXT = read_runtime_dictionaries_overlay_text()
# Palette dérivée du logo aivanonym (gradient magenta → rose → pêche → noir)
# Magenta du logo : primaire (boutons, accents)
@@ -1593,7 +1569,7 @@ class App:
p = Path(self.cfg_path.get())
p.parent.mkdir(parents=True, exist_ok=True)
if not p.exists():
p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
p.write_text(RUNTIME_CFG_TEXT, encoding="utf-8")
def _load_cfg(self):
if yaml is None: