Externalize dictionaries and add anonymization review corpus

This commit is contained in:
2026-04-21 10:32:57 +02:00
parent 39db675052
commit 34dcf8f360
99 changed files with 1805 additions and 805 deletions

View File

@@ -37,33 +37,18 @@ try:
except Exception:
yaml = None
APP_TITLE = "Pseudonymisation de PDF"
DEFAULT_CFG = Path("config/dictionnaires.yml")
from config_defaults import (
RUNTIME_DICTIONARIES_CONFIG_PATH,
read_default_dictionaries_text,
read_runtime_dictionaries_overlay_text,
)
# YAML par défaut (patterns en bloc littéral pour éviter les échappements)
DEFAULTS_CFG_TEXT = """# dictionnaires.yml valeurs par défaut
version: 1
encoding: "utf-8"
normalization: "NFKC"
whitelist:
sections_titres: [DIM, GHM, GHS, RUM, COMPTE, RENDU, DIAGNOSTIC]
noms_maj_excepts: ["Médecin DIM", "Praticien conseil"]
org_gpe_keep: true
blacklist:
force_mask_terms: []
force_mask_regex: []
kv_labels_preserve: [FINESS, IPP, "N° OGC", Etablissement]
regex_overrides:
- name: OGC_court
pattern: |-
\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
placeholder: '[OGC]'
flags: [IGNORECASE]
flags:
case_insensitive: true
unicode_word_boundaries: true
regex_engine: "python"
"""
APP_TITLE = "Pseudonymisation de PDF"
DEFAULT_CFG = RUNTIME_DICTIONARIES_CONFIG_PATH
# YAML par défaut externalisé dans config/dictionnaires.default.yml
DEFAULTS_CFG_TEXT = read_default_dictionaries_text()
RUNTIME_CFG_TEXT = read_runtime_dictionaries_overlay_text()
# ---------- util : ToolTip & helpers ----------
class ToolTip:
@@ -211,7 +196,7 @@ class App:
p = Path(self.cfg_path.get())
p.parent.mkdir(parents=True, exist_ok=True)
if not p.exists():
p.write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
p.write_text(RUNTIME_CFG_TEXT, encoding="utf-8")
def _cfg_browse(self):
d = filedialog.asksaveasfilename(defaultextension=".yml", filetypes=[("YAML","*.yml *.yaml"), ("Tous","*.*")])
@@ -248,7 +233,7 @@ class App:
return
try:
with open(self.cfg_path.get(), "w", encoding="utf-8") as f:
yaml.safe_dump(self.cfg_data or yaml.safe_load(DEFAULTS_CFG_TEXT), f, allow_unicode=True, sort_keys=False)
yaml.safe_dump(self.cfg_data or {}, f, allow_unicode=True, sort_keys=False)
self._log("Règles sauvegardées.")
except Exception as e:
messagebox.showerror("Erreur", f"Impossible d'écrire le fichier de règles: {e}")
@@ -258,8 +243,8 @@ class App:
def _restore_defaults(self):
try:
Path(self.cfg_path.get()).write_text(DEFAULTS_CFG_TEXT, encoding="utf-8")
self._log("Règles restaurées aux valeurs par défaut.")
Path(self.cfg_path.get()).write_text(RUNTIME_CFG_TEXT, encoding="utf-8")
self._log("Surcharge locale réinitialisée.")
self._load_cfg()
except Exception as e:
messagebox.showerror("Erreur", f"Impossible d'écrire le YAML par défaut: {e}")