Initial commit — Pseudonymisation de PDF v5
- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
37
config/dictionnaires.yml
Normal file
37
config/dictionnaires.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
version: 1
|
||||
encoding: utf-8
|
||||
normalization: NFKC
|
||||
whitelist:
|
||||
sections_titres:
|
||||
- DIM
|
||||
- GHM
|
||||
- GHS
|
||||
- RUM
|
||||
- COMPTE
|
||||
- RENDU
|
||||
- DIAGNOSTIC
|
||||
noms_maj_excepts:
|
||||
- Médecin DIM
|
||||
- Praticien conseil
|
||||
org_gpe_keep: true
|
||||
blacklist:
|
||||
force_mask_terms:
|
||||
- CENTRE HOSPITALIER COTE BASQUE
|
||||
- 'Dates du séjour :'
|
||||
- CONCERTATION
|
||||
force_mask_regex: []
|
||||
kv_labels_preserve:
|
||||
- FINESS
|
||||
- IPP
|
||||
- N° OGC
|
||||
- Etablissement
|
||||
regex_overrides:
|
||||
- name: OGC_court
|
||||
pattern: \b(?:N°\s*)?OGC\s*[:\-]?\s*([A-Za-z0-9\-]{1,3})\b
|
||||
placeholder: '[OGC]'
|
||||
flags:
|
||||
- IGNORECASE
|
||||
flags:
|
||||
case_insensitive: true
|
||||
unicode_word_boundaries: true
|
||||
regex_engine: python
|
||||
Reference in New Issue
Block a user