#!/usr/bin/env python3 """ Helpers partagés pour la config dictionnaires. """ from __future__ import annotations from copy import deepcopy from pathlib import Path from typing import Any, Dict try: import yaml except Exception: yaml = None PROJECT_DIR = Path(__file__).resolve().parent CONFIG_DIR = PROJECT_DIR / "config" DEFAULT_DICTIONARIES_CONFIG_PATH = CONFIG_DIR / "dictionnaires.default.yml" RUNTIME_DICTIONARIES_CONFIG_PATH = CONFIG_DIR / "dictionnaires.yml" _RUNTIME_DICTIONARIES_OVERLAY_TEXT = """# Surcharge locale chargée par défaut par l'application. # Seuls les écarts par rapport à config/dictionnaires.default.yml sont nécessaires ici. # Si ce fichier est vide, les valeurs du template par défaut s'appliquent. # # Exemples : # blacklist: # force_mask_terms: # - VOTRE_SIGLE # additional_stopwords: # - votre_terme {} """ _FALLBACK_DEFAULT_DICTIONARIES_TEXT = """version: 1 encoding: utf-8 normalization: NFKC whitelist: sections_titres: - DIM - GHM - GHS - RUM - COMPTE - RENDU - DIAGNOSTIC noms_maj_excepts: - Médecin DIM - Praticien conseil org_gpe_keep: false blacklist: force_mask_terms: [] force_mask_regex: [] kv_labels_preserve: - FINESS - IPP - N° OGC - Etablissement regex_overrides: - name: OGC_court pattern: \\b(?:N°\\s*)?OGC\\s*[:\\-]?\\s*([A-Za-z0-9\\-]{1,3})\\b placeholder: '[OGC]' flags: - IGNORECASE whitelist_phrases: [] additional_stopwords: [] additional_villes_blacklist: [] additional_dpi_labels: [] additional_companion_blacklist: [] flags: case_insensitive: true unicode_word_boundaries: true regex_engine: python """ _FALLBACK_DEFAULT_DICTIONARIES_DICT: Dict[str, Any] = { "version": 1, "encoding": "utf-8", "normalization": "NFKC", "whitelist": { "sections_titres": ["DIM", "GHM", "GHS", "RUM", "COMPTE", "RENDU", "DIAGNOSTIC"], "noms_maj_excepts": ["Médecin DIM", "Praticien conseil"], "org_gpe_keep": False, }, "blacklist": { "force_mask_terms": [], "force_mask_regex": [], }, "kv_labels_preserve": ["FINESS", "IPP", "N° OGC", "Etablissement"], "regex_overrides": [ { "name": "OGC_court", "pattern": r"\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b", "placeholder": "[OGC]", "flags": ["IGNORECASE"], } ], "whitelist_phrases": [], "additional_stopwords": [], "additional_villes_blacklist": [], "additional_dpi_labels": [], "additional_companion_blacklist": [], "flags": { "case_insensitive": True, "unicode_word_boundaries": True, "regex_engine": "python", }, } def read_default_dictionaries_text() -> str: try: return DEFAULT_DICTIONARIES_CONFIG_PATH.read_text(encoding="utf-8") except Exception: return _FALLBACK_DEFAULT_DICTIONARIES_TEXT def read_runtime_dictionaries_overlay_text() -> str: return _RUNTIME_DICTIONARIES_OVERLAY_TEXT def load_default_dictionaries_dict() -> Dict[str, Any]: text = read_default_dictionaries_text() if yaml is not None: try: loaded = yaml.safe_load(text) or {} if isinstance(loaded, dict): return loaded except Exception: pass return deepcopy(_FALLBACK_DEFAULT_DICTIONARIES_DICT) def load_runtime_dictionaries_overlay_dict(path: Path | None = None) -> Dict[str, Any]: target = Path(path) if path is not None else RUNTIME_DICTIONARIES_CONFIG_PATH if not target.exists(): return {} if yaml is None: return {} try: loaded = yaml.safe_load(target.read_text(encoding="utf-8")) or {} if isinstance(loaded, dict): return loaded except Exception: pass return {} def load_effective_dictionaries_dict(path: Path | None = None) -> Dict[str, Any]: return deep_merge_dict( load_default_dictionaries_dict(), load_runtime_dictionaries_overlay_dict(path), ) def _normalize_string_list(values: Any) -> list[str]: if not isinstance(values, list): return [] normalized: list[str] = [] for value in values: text = str(value).strip() if text: normalized.append(text) return normalized def load_effective_param_lists(path: Path | None = None) -> Dict[str, list[str]]: """Return the effective parameter lists shown in the GUI.""" data = load_effective_dictionaries_dict(path) return { "whitelist_phrases": _normalize_string_list(data.get("whitelist_phrases", [])), "blacklist_force_mask_terms": _normalize_string_list( data.get("blacklist", {}).get("force_mask_terms", []) ), "additional_stopwords": _normalize_string_list(data.get("additional_stopwords", [])), } def deep_merge_dict(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]: merged = deepcopy(base) for key, value in (override or {}).items(): if isinstance(value, dict) and isinstance(merged.get(key), dict): merged[key] = deep_merge_dict(merged[key], value) elif isinstance(value, list) and isinstance(merged.get(key), list): combined = list(merged[key]) for item in value: if item not in combined: combined.append(deepcopy(item)) merged[key] = combined else: merged[key] = deepcopy(value) return merged def ensure_runtime_dictionaries_config(path: Path | None = None) -> Path: target = Path(path) if path is not None else RUNTIME_DICTIONARIES_CONFIG_PATH if not target.exists(): target.parent.mkdir(parents=True, exist_ok=True) target.write_text(read_runtime_dictionaries_overlay_text(), encoding="utf-8") return target