178 lines
4.9 KiB
Python
178 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Helpers partagés pour la config dictionnaires.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from copy import deepcopy
|
|
from pathlib import Path
|
|
from typing import Any, Dict
|
|
|
|
try:
|
|
import yaml
|
|
except Exception:
|
|
yaml = None
|
|
|
|
|
|
PROJECT_DIR = Path(__file__).resolve().parent
|
|
CONFIG_DIR = PROJECT_DIR / "config"
|
|
DEFAULT_DICTIONARIES_CONFIG_PATH = CONFIG_DIR / "dictionnaires.default.yml"
|
|
RUNTIME_DICTIONARIES_CONFIG_PATH = CONFIG_DIR / "dictionnaires.yml"
|
|
|
|
_RUNTIME_DICTIONARIES_OVERLAY_TEXT = """# Surcharge locale chargée par défaut par l'application.
|
|
# Seuls les écarts par rapport à config/dictionnaires.default.yml sont nécessaires ici.
|
|
# Si ce fichier est vide, les valeurs du template par défaut s'appliquent.
|
|
#
|
|
# Exemples :
|
|
# blacklist:
|
|
# force_mask_terms:
|
|
# - VOTRE_SIGLE
|
|
# additional_stopwords:
|
|
# - votre_terme
|
|
{}
|
|
"""
|
|
|
|
_FALLBACK_DEFAULT_DICTIONARIES_TEXT = """version: 1
|
|
encoding: utf-8
|
|
normalization: NFKC
|
|
whitelist:
|
|
sections_titres:
|
|
- DIM
|
|
- GHM
|
|
- GHS
|
|
- RUM
|
|
- COMPTE
|
|
- RENDU
|
|
- DIAGNOSTIC
|
|
noms_maj_excepts:
|
|
- Médecin DIM
|
|
- Praticien conseil
|
|
org_gpe_keep: false
|
|
blacklist:
|
|
force_mask_terms: []
|
|
force_mask_regex: []
|
|
kv_labels_preserve:
|
|
- FINESS
|
|
- IPP
|
|
- N° OGC
|
|
- Etablissement
|
|
regex_overrides:
|
|
- name: OGC_court
|
|
pattern: \\b(?:N°\\s*)?OGC\\s*[:\\-]?\\s*([A-Za-z0-9\\-]{1,3})\\b
|
|
placeholder: '[OGC]'
|
|
flags:
|
|
- IGNORECASE
|
|
whitelist_phrases: []
|
|
additional_stopwords: []
|
|
additional_villes_blacklist: []
|
|
additional_dpi_labels: []
|
|
additional_companion_blacklist: []
|
|
flags:
|
|
case_insensitive: true
|
|
unicode_word_boundaries: true
|
|
regex_engine: python
|
|
"""
|
|
|
|
_FALLBACK_DEFAULT_DICTIONARIES_DICT: Dict[str, Any] = {
|
|
"version": 1,
|
|
"encoding": "utf-8",
|
|
"normalization": "NFKC",
|
|
"whitelist": {
|
|
"sections_titres": ["DIM", "GHM", "GHS", "RUM", "COMPTE", "RENDU", "DIAGNOSTIC"],
|
|
"noms_maj_excepts": ["Médecin DIM", "Praticien conseil"],
|
|
"org_gpe_keep": False,
|
|
},
|
|
"blacklist": {
|
|
"force_mask_terms": [],
|
|
"force_mask_regex": [],
|
|
},
|
|
"kv_labels_preserve": ["FINESS", "IPP", "N° OGC", "Etablissement"],
|
|
"regex_overrides": [
|
|
{
|
|
"name": "OGC_court",
|
|
"pattern": r"\b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b",
|
|
"placeholder": "[OGC]",
|
|
"flags": ["IGNORECASE"],
|
|
}
|
|
],
|
|
"whitelist_phrases": [],
|
|
"additional_stopwords": [],
|
|
"additional_villes_blacklist": [],
|
|
"additional_dpi_labels": [],
|
|
"additional_companion_blacklist": [],
|
|
"flags": {
|
|
"case_insensitive": True,
|
|
"unicode_word_boundaries": True,
|
|
"regex_engine": "python",
|
|
},
|
|
}
|
|
|
|
|
|
def read_default_dictionaries_text() -> str:
|
|
try:
|
|
return DEFAULT_DICTIONARIES_CONFIG_PATH.read_text(encoding="utf-8")
|
|
except Exception:
|
|
return _FALLBACK_DEFAULT_DICTIONARIES_TEXT
|
|
|
|
|
|
def read_runtime_dictionaries_overlay_text() -> str:
|
|
return _RUNTIME_DICTIONARIES_OVERLAY_TEXT
|
|
|
|
|
|
def load_default_dictionaries_dict() -> Dict[str, Any]:
|
|
text = read_default_dictionaries_text()
|
|
if yaml is not None:
|
|
try:
|
|
loaded = yaml.safe_load(text) or {}
|
|
if isinstance(loaded, dict):
|
|
return loaded
|
|
except Exception:
|
|
pass
|
|
return deepcopy(_FALLBACK_DEFAULT_DICTIONARIES_DICT)
|
|
|
|
|
|
def load_runtime_dictionaries_overlay_dict(path: Path | None = None) -> Dict[str, Any]:
|
|
target = Path(path) if path is not None else RUNTIME_DICTIONARIES_CONFIG_PATH
|
|
if not target.exists():
|
|
return {}
|
|
if yaml is None:
|
|
return {}
|
|
try:
|
|
loaded = yaml.safe_load(target.read_text(encoding="utf-8")) or {}
|
|
if isinstance(loaded, dict):
|
|
return loaded
|
|
except Exception:
|
|
pass
|
|
return {}
|
|
|
|
|
|
def load_effective_dictionaries_dict(path: Path | None = None) -> Dict[str, Any]:
|
|
return deep_merge_dict(
|
|
load_default_dictionaries_dict(),
|
|
load_runtime_dictionaries_overlay_dict(path),
|
|
)
|
|
|
|
|
|
def deep_merge_dict(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
|
merged = deepcopy(base)
|
|
for key, value in (override or {}).items():
|
|
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
|
merged[key] = deep_merge_dict(merged[key], value)
|
|
elif isinstance(value, list) and isinstance(merged.get(key), list):
|
|
combined = list(merged[key])
|
|
for item in value:
|
|
if item not in combined:
|
|
combined.append(deepcopy(item))
|
|
merged[key] = combined
|
|
else:
|
|
merged[key] = deepcopy(value)
|
|
return merged
|
|
|
|
|
|
def ensure_runtime_dictionaries_config(path: Path | None = None) -> Path:
|
|
target = Path(path) if path is not None else RUNTIME_DICTIONARIES_CONFIG_PATH
|
|
if not target.exists():
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
target.write_text(read_runtime_dictionaries_overlay_text(), encoding="utf-8")
|
|
return target
|