chore(rgpd): replace CHCB/Bayonne/Saint-Denis/Réunion refs in source + configs (D-12)

Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis, Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils, et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels. - profile key chcb_strict → chuxx_strict - CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield, Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999, préfixe tél 05.59.44 → 0X.XX.XX - renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 14:39:21 +02:00
parent 7b09b06065
commit 92557d4e74
37 changed files with 819 additions and 128 deletions
--- a/Pseudonymisation_Gui_V5.py
+++ b/Pseudonymisation_Gui_V5.py
@@ -2136,7 +2136,7 @@ class App:
            return
        base_spec = self._selected_processing_profile_spec()
        profile_label = str(base_spec.get("label") or profile_key)
-        if profile_key in {"standard_local", "chcb_strict", "partage_recherche", "dossier_audit", "demo"}:
+        if profile_key in {"standard_local", "chuxx_strict", "partage_recherche", "dossier_audit", "demo"}:
            confirmed = messagebox.askyesno(
                "Profils",
                "Vous allez enregistrer une surcharge locale sur un profil fourni par défaut.\n\n"
@@ -2656,7 +2656,7 @@ class App:
            import re
            patterns = {
                "date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
-                "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
+                "force_term": re.compile(r"\bCHUXX\b", re.IGNORECASE),
            }
            
            for txt_file in iter_pseudonymized_texts(output_dir):
--- a/config/admin_rules.default.yml
+++ b/config/admin_rules.default.yml
@@ -12,15 +12,15 @@ defaults:
    - structured
    - table
 rules:
-  - id: rule_chcb_exact_mask
-    label: Masquer le sigle CHCB
+  - id: rule_chuxx_exact_mask
+    label: Masquer le sigle CHUXX
    description: Sigle local a masquer dans tous les contextes documentaires.
    type: exact_term
    action: mask
    placeholder: "[MASK]"
    status: active
    match:
-      exact_value: CHCB
+      exact_value: CHUXX
    normalization:
      case_insensitive: true
      whole_word: true
--- a/config/dictionnaires.default.yml
+++ b/config/dictionnaires.default.yml
@@ -22,7 +22,7 @@ blacklist:
  # nationaux (FINESS / INSEE / BDPM). Évitez d'ajouter ici des noms d'hôpitaux,
  # villes, codes postaux ou numéros FINESS — ils sont déjà détectés automatiquement.
  force_mask_terms:
-  - CHCB
+  - CHUXX
  - 'Dates du séjour :'
  - CONCERTATION
  - LABORATOIRE de BIOLOGIE MEDICALE
--- a/config/hospital_stopwords.yml
+++ b/config/hospital_stopwords.yml
@@ -13,47 +13,47 @@ hospital_addresses:

 # Codes postaux d'établissements (avec CEDEX)
 hospital_postal_codes:
-  - "64109 BAYONNE CEDEX"
-  - "64109 BAYONNE Cedex"
+  - "12345 CHICAGO CEDEX"
+  - "12345 CHICAGO Cedex"
  - "33076 BORDEAUX CEDEX"

 # Villes avec CEDEX (indique un établissement)
 hospital_cities:
-  - "BAYONNE CEDEX"
+  - "CHICAGO CEDEX"
  - "BORDEAUX CEDEX"

-# Téléphones d'hôpitaux (préfixes 05 59 44 = CH Côte Basque)
+# Téléphones d'hôpitaux (préfixes 0X XX XX = CHUXX générique)
 hospital_phones:
-  - "05 59 44 35 35"
-  - "05 59 63 35 88"
-  - "05.59.44.37.33"
-  - "05.59.44.37.32"
-  - "05.59.44.37.42"
-  - "05.59.44.38.62"
-  - "05.59.44.37.74"
-  - "05.33.78.81.89"
-  - "05.59.44.35.49"
-  - "05.59.44.37.25"
-  - "05.59.44.37.22"
-  - "05.59.44.37.29"
-  - "05.59.44.37.23"
-  - "05.59.44.38.44"
-  - "05.59.44.35.69"
-  - "05.59.44.35.30"
-  - "05.59.44.35.06"
-  - "05.59.44.39.24"
-  - "05.59.44.37.07"
-  - "05.59.44.31.39"
-  - "05.59.44.37.35"
-  - "05.59.44.37.46"
-  - "05.59.44.37.39"
-  - "05.59.44.35.05"
-  - "0559443674"
+  - "0X XX XX 35 35"
+  - "0X XX XX 35 88"
+  - "0X.XX.XX.37.33"
+  - "0X.XX.XX.37.32"
+  - "0X.XX.XX.37.42"
+  - "0X.XX.XX.38.62"
+  - "0X.XX.XX.37.74"
+  - "0X.XX.XX.81.89"
+  - "0X.XX.XX.35.49"
+  - "0X.XX.XX.37.25"
+  - "0X.XX.XX.37.22"
+  - "0X.XX.XX.37.29"
+  - "0X.XX.XX.37.23"
+  - "0X.XX.XX.38.44"
+  - "0X.XX.XX.35.69"
+  - "0X.XX.XX.35.30"
+  - "0X.XX.XX.35.06"
+  - "0X.XX.XX.39.24"
+  - "0X.XX.XX.37.07"
+  - "0X.XX.XX.31.39"
+  - "0X.XX.XX.37.35"
+  - "0X.XX.XX.37.46"
+  - "0X.XX.XX.37.39"
+  - "0X.XX.XX.35.05"
+  - "0XXXXXXX74"

 # Patterns de téléphones hospitaliers (regex)
 hospital_phone_patterns:
-  - "^05\\.?59\\.?44\\.?"  # CH Côte Basque
-  - "^05\\.?33\\.?78\\.?"  # Autre établissement
+  - "^0X\\.?XX\\.?XX\\.?"  # CHUXX générique
+  - "^0X\\.?XX\\.?XX\\.?"  # Autre établissement

 # Termes médicaux/anatomiques souvent confondus avec des villes
 anatomical_terms:
--- a/config/profiles.default.yml
+++ b/config/profiles.default.yml
@@ -0,0 +1,48 @@
+version: 1
+default_profile: standard_local
+
+profiles:
+  standard_local:
+    label: Standard local
+    description: Profil par défaut pour les traitements internes sur poste bureautique.
+    require_manual_mask: false
+    force_disable_vlm: false
+    dictionaries_overlay: {}
+
+  chuxx_strict:
+    label: CHUXX strict
+    description: Profil conservateur pour les échanges prudents du CHUXX.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay:
+      blacklist:
+        force_mask_terms:
+          - CHUXX
+          - Centre Hospitalier Universitaire XX
+          - CENTRE HOSPITALIER UNIVERSITAIRE XX
+
+  partage_recherche:
+    label: Partage recherche
+    description: Profil externe strict. Le masque manuel est recommandé pour les documents formatés.
+    require_manual_mask: true
+    force_disable_vlm: true
+    dictionaries_overlay:
+      blacklist:
+        force_mask_terms:
+          - CHUXX
+          - Centre Hospitalier Universitaire XX
+          - CENTRE HOSPITALIER UNIVERSITAIRE XX
+
+  dossier_audit:
+    label: Dossier audit
+    description: Profil orienté traçabilité et reproductibilité des traitements.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay: {}
+
+  demo:
+    label: Démo
+    description: Profil léger pour démonstration interne sur machine de bureau.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay: {}
--- a/config/profiles.yml
+++ b/config/profiles.yml
@@ -0,0 +1,77 @@
+# Surcharge locale des profils métier.
+# Source de vérité : config/profiles.default.yml
+# Les profils créés depuis la GUI sont enregistrés ici.
+
+profiles:
+  standard_local_copie:
+    label: Standard local copie
+    description: Profil par défaut pour les traitements internes sur poste bureautique.
+    require_manual_mask: false
+    force_disable_vlm: false
+    dictionaries_overlay: {}
+    param_lists:
+      whitelist_phrases:
+      - classification internationale
+      - prise en charge
+      - bas de contention
+      - date de naissance
+      - lieu de naissance
+      - ville de résidence
+      - date de sortie
+      - date d'admission
+      - code postal
+      blacklist_force_mask_terms:
+      - CHUXX
+      - 'Dates du séjour :'
+      - CONCERTATION
+      - LABORATOIRE de BIOLOGIE MEDICALE
+      additional_stopwords: []
+    preferred_manual_mask_template: ''
+  standard_local_copie_copie:
+    label: Standard local copie copie
+    description: Profil par défaut pour les traitements internes sur poste bureautique.
+    require_manual_mask: false
+    force_disable_vlm: false
+    dictionaries_overlay: {}
+    param_lists:
+      whitelist_phrases:
+      - classification internationale
+      - prise en charge
+      - bas de contention
+      - date de naissance
+      - lieu de naissance
+      - ville de résidence
+      - date de sortie
+      - date d'admission
+      - code postal
+      blacklist_force_mask_terms:
+      - CHUXX
+      - 'Dates du séjour :'
+      - CONCERTATION
+      - LABORATOIRE de BIOLOGIE MEDICALE
+      additional_stopwords: []
+    preferred_manual_mask_template: ''
+  standard_local_copie_2:
+    label: Standard local copie
+    description: Profil par défaut pour les traitements internes sur poste bureautique.
+    require_manual_mask: false
+    force_disable_vlm: false
+    dictionaries_overlay: {}
+    param_lists:
+      whitelist_phrases:
+      - classification internationale
+      - prise en charge
+      - bas de contention
+      - date de naissance
+      - lieu de naissance
+      - ville de résidence
+      - date de sortie
+      - date d'admission
+      - code postal
+      blacklist_force_mask_terms:
+      - CHUXX
+      - 'Dates du séjour :'
+      - CONCERTATION
+      - LABORATOIRE de BIOLOGIE MEDICALE
+      additional_stopwords: []
+    preferred_manual_mask_template: ''
--- a/detectors/hospital_filter.py
+++ b/detectors/hospital_filter.py
@@ -214,9 +214,9 @@ if __name__ == "__main__":
        # ADRESSE, CODE_POSTAL, VILLE, TEL : ne sont plus filtrés (identifient le patient)
        ("ADRESSE", "13, Avenue de l'Interne J", "", -1, False),
        ("ADRESSE", "22 LOT MENDI ALDE", "", -1, False),
-        ("CODE_POSTAL", "64109 BAYONNE CEDEX", "", -1, False),
-        ("CODE_POSTAL", "64130", "", -1, False),
-        ("VILLE", "BAYONNE CEDEX", "", -1, False),
+        ("CODE_POSTAL", "12345 CHICAGO CEDEX", "", -1, False),
+        ("CODE_POSTAL", "12345", "", -1, False),
+        ("VILLE", "CHICAGO CEDEX", "", -1, False),
        ("VILLE", "CHERAUTE", "", -1, False),
        ("VILLE", "DROIT", "", -1, False),
        ("TEL", "05 59 44 35 35", "", -1, False),
--- a/installer/Anonymisation.iss
+++ b/installer/Anonymisation.iss
@@ -0,0 +1,43 @@
+#define MyAppName "Anonymisation"
+#define MyAppPublisher "CHUXX"
+#define MyAppExeName "Anonymisation.exe"
+#ifndef AppVersion
+#define AppVersion "1.0.0"
+#endif
+
+[Setup]
+AppId={{6D11E4F8-26D8-4CFB-9F19-5A81E0637F56}
+AppName={#MyAppName}
+AppVersion={#AppVersion}
+AppPublisher={#MyAppPublisher}
+DefaultDirName={localappdata}\Programs\{#MyAppName}
+DefaultGroupName={#MyAppName}
+DisableDirPage=no
+DisableProgramGroupPage=no
+PrivilegesRequired=lowest
+OutputDir=..\release
+OutputBaseFilename=Anonymisation-Setup
+SetupIconFile=..\assets\icons\app.ico
+UninstallDisplayIcon={app}\{#MyAppExeName}
+Compression=lzma2
+SolidCompression=yes
+WizardStyle=modern
+ArchitecturesAllowed=x64compatible
+ArchitecturesInstallIn64BitMode=x64compatible
+
+[Languages]
+Name: "french"; MessagesFile: "compiler:Languages\French.isl"
+
+[Tasks]
+Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: checkedonce
+
+[Files]
+Source: "..\release\Anonymisation-Windows\Anonymisation.exe"; DestDir: "{app}"; Flags: ignoreversion
+Source: "..\release\Anonymisation-Windows\README.txt"; DestDir: "{app}"; Flags: ignoreversion skipifsourcedoesntexist
+
+[Icons]
+Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
+Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
+
+[Run]
+Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
--- a/profile_defaults.py
+++ b/profile_defaults.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+"""
+Helpers partagés pour les profils métier.
+"""
+from __future__ import annotations
+
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict
+
+try:
+    import yaml
+except Exception:
+    yaml = None
+
+from config_defaults import CONFIG_DIR, deep_merge_dict
+
+
+DEFAULT_PROFILES_CONFIG_PATH = CONFIG_DIR / "profiles.default.yml"
+RUNTIME_PROFILES_CONFIG_PATH = CONFIG_DIR / "profiles.yml"
+
+_RUNTIME_PROFILES_OVERLAY_TEXT = """# Surcharge locale des profils métier.
+# Source de vérité : config/profiles.default.yml
+# Ne mettez ici que les écarts spécifiques à votre environnement.
+#
+# Exemples :
+# default_profile: chuxx_strict
+# profiles:
+#   mon_profil:
+#     label: Mon profil
+#     description: Surcharge locale
+#     require_manual_mask: true
+#     force_disable_vlm: true
+#     preferred_manual_mask_template: chcb/formulaire.yml
+#     param_lists:
+#       whitelist_phrases:
+#       - Document validé DIM
+#     dictionaries_overlay:
+#       blacklist:
+#         force_mask_terms:
+#         - MON_ETAB
+{}
+"""
+
+_FALLBACK_DEFAULT_PROFILES_TEXT = """version: 1
+default_profile: standard_local
+profiles:
+  standard_local:
+    label: Standard local
+    description: Profil par défaut pour les traitements internes.
+    require_manual_mask: false
+    force_disable_vlm: false
+    dictionaries_overlay: {}
+  chuxx_strict:
+    label: CHUXX strict
+    description: Profil conservateur pour le CHUXX, orienté diffusion prudente.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay:
+      blacklist:
+        force_mask_terms:
+        - CHUXX
+        - Centre Hospitalier Universitaire XX
+        - CENTRE HOSPITALIER UNIVERSITAIRE XX
+  partage_recherche:
+    label: Partage recherche
+    description: Profil externe strict. Le masque manuel est recommandé pour les formulaires répétitifs.
+    require_manual_mask: true
+    force_disable_vlm: true
+    dictionaries_overlay:
+      blacklist:
+        force_mask_terms:
+        - CHUXX
+        - Centre Hospitalier Universitaire XX
+        - CENTRE HOSPITALIER UNIVERSITAIRE XX
+  dossier_audit:
+    label: Dossier audit
+    description: Profil orienté traçabilité et reproductibilité.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay: {}
+  demo:
+    label: Démo
+    description: Profil léger pour démonstration interne sur poste bureautique.
+    require_manual_mask: false
+    force_disable_vlm: true
+    dictionaries_overlay: {}
+"""
+
+_FALLBACK_DEFAULT_PROFILES_DICT: Dict[str, Any] = {
+    "version": 1,
+    "default_profile": "standard_local",
+    "profiles": {
+        "standard_local": {
+            "label": "Standard local",
+            "description": "Profil par défaut pour les traitements internes.",
+            "require_manual_mask": False,
+            "force_disable_vlm": False,
+            "dictionaries_overlay": {},
+        },
+        "chuxx_strict": {
+            "label": "CHUXX strict",
+            "description": "Profil conservateur pour le CHUXX, orienté diffusion prudente.",
+            "require_manual_mask": False,
+            "force_disable_vlm": True,
+            "dictionaries_overlay": {
+                "blacklist": {
+                    "force_mask_terms": [
+                        "CHUXX",
+                        "Centre Hospitalier Universitaire XX",
+                        "CENTRE HOSPITALIER UNIVERSITAIRE XX",
+                    ],
+                },
+            },
+        },
+        "partage_recherche": {
+            "label": "Partage recherche",
+            "description": (
+                "Profil externe strict. Le masque manuel est recommandé "
+                "pour les formulaires répétitifs."
+            ),
+            "require_manual_mask": True,
+            "force_disable_vlm": True,
+            "dictionaries_overlay": {
+                "blacklist": {
+                    "force_mask_terms": [
+                        "CHUXX",
+                        "Centre Hospitalier Universitaire XX",
+                        "CENTRE HOSPITALIER UNIVERSITAIRE XX",
+                    ],
+                },
+            },
+        },
+        "dossier_audit": {
+            "label": "Dossier audit",
+            "description": "Profil orienté traçabilité et reproductibilité.",
+            "require_manual_mask": False,
+            "force_disable_vlm": True,
+            "dictionaries_overlay": {},
+        },
+        "demo": {
+            "label": "Démo",
+            "description": "Profil léger pour démonstration interne sur poste bureautique.",
+            "require_manual_mask": False,
+            "force_disable_vlm": True,
+            "dictionaries_overlay": {},
+        },
+    },
+}
+
+
+def read_default_profiles_text() -> str:
+    try:
+        return DEFAULT_PROFILES_CONFIG_PATH.read_text(encoding="utf-8")
+    except Exception:
+        return _FALLBACK_DEFAULT_PROFILES_TEXT
+
+
+def read_runtime_profiles_overlay_text() -> str:
+    return _RUNTIME_PROFILES_OVERLAY_TEXT
+
+
+def load_default_profiles_dict() -> Dict[str, Any]:
+    text = read_default_profiles_text()
+    if yaml is not None:
+        try:
+            loaded = yaml.safe_load(text) or {}
+            if isinstance(loaded, dict):
+                return loaded
+        except Exception:
+            pass
+    return deepcopy(_FALLBACK_DEFAULT_PROFILES_DICT)
+
+
+def list_default_profile_keys() -> set[str]:
+    data = load_default_profiles_dict()
+    profiles = data.get("profiles", {}) or {}
+    if not isinstance(profiles, dict):
+        return set()
+    return {str(key) for key in profiles}
+
+
+def load_runtime_profiles_overlay_dict(path: Path | None = None) -> Dict[str, Any]:
+    target = Path(path) if path is not None else RUNTIME_PROFILES_CONFIG_PATH
+    if not target.exists() or yaml is None:
+        return {}
+    try:
+        loaded = yaml.safe_load(target.read_text(encoding="utf-8")) or {}
+        if isinstance(loaded, dict):
+            return loaded
+    except Exception:
+        pass
+    return {}
+
+
+def load_effective_profiles_dict(path: Path | None = None) -> Dict[str, Any]:
+    return deep_merge_dict(
+        load_default_profiles_dict(),
+        load_runtime_profiles_overlay_dict(path),
+    )
+
+
+def _normalize_string_list(values: Any) -> list[str]:
+    if not isinstance(values, list):
+        return []
+    normalized: list[str] = []
+    for value in values:
+        text = str(value).strip()
+        if text:
+            normalized.append(text)
+    return normalized
+
+
+def _normalize_param_lists(value: Any) -> Dict[str, list[str]]:
+    if not isinstance(value, dict):
+        return {}
+    return {
+        "whitelist_phrases": _normalize_string_list(value.get("whitelist_phrases", [])),
+        "blacklist_force_mask_terms": _normalize_string_list(
+            value.get("blacklist_force_mask_terms", [])
+        ),
+        "additional_stopwords": _normalize_string_list(value.get("additional_stopwords", [])),
+    }
+
+
+def _write_runtime_profiles_overlay_dict(path: Path, data: Dict[str, Any]) -> Path:
+    if yaml is None:
+        raise RuntimeError("PyYAML indisponible")
+    body = yaml.safe_dump(
+        data or {},
+        allow_unicode=True,
+        default_flow_style=False,
+        sort_keys=False,
+    )
+    header = (
+        "# Surcharge locale des profils métier.\n"
+        "# Source de vérité : config/profiles.default.yml\n"
+        "# Les profils créés depuis la GUI sont enregistrés ici.\n"
+    )
+    path.write_text(header + "\n" + body, encoding="utf-8")
+    return path
+
+
+def ensure_runtime_profiles_config(path: Path | None = None) -> Path:
+    target = Path(path) if path is not None else RUNTIME_PROFILES_CONFIG_PATH
+    if not target.exists():
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(read_runtime_profiles_overlay_text(), encoding="utf-8")
+    return target
+
+
+def list_effective_profiles(path: Path | None = None) -> Dict[str, Dict[str, Any]]:
+    data = load_effective_profiles_dict(path)
+    profiles = data.get("profiles", {}) or {}
+    if not isinstance(profiles, dict):
+        return {}
+    normalized: Dict[str, Dict[str, Any]] = {}
+    for key, value in profiles.items():
+        if not isinstance(value, dict):
+            continue
+        raw_param_lists = value.get("param_lists")
+        has_param_lists = isinstance(raw_param_lists, dict)
+        preferred_manual_mask_template = str(value.get("preferred_manual_mask_template") or "").strip()
+        normalized[str(key)] = {
+            "label": str(value.get("label") or key),
+            "description": str(value.get("description") or ""),
+            "require_manual_mask": bool(value.get("require_manual_mask", False)),
+            "force_disable_vlm": bool(value.get("force_disable_vlm", False)),
+            "dictionaries_overlay": deepcopy(value.get("dictionaries_overlay") or {}),
+            "param_lists": _normalize_param_lists(raw_param_lists),
+            "has_param_lists": has_param_lists,
+            "preferred_manual_mask_template": preferred_manual_mask_template,
+            "has_preferred_manual_mask_template": "preferred_manual_mask_template" in value,
+        }
+    return normalized
+
+
+def get_default_profile_key(path: Path | None = None) -> str:
+    data = load_effective_profiles_dict(path)
+    key = str(data.get("default_profile") or "").strip()
+    profiles = list_effective_profiles(path)
+    if key and key in profiles:
+        return key
+    if profiles:
+        return next(iter(profiles))
+    return "standard_local"
+
+
+def save_runtime_profile(
+    profile_key: str,
+    profile_spec: Dict[str, Any],
+    path: Path | None = None,
+    *,
+    set_default: bool = False,
+) -> Path:
+    target = ensure_runtime_profiles_config(path)
+    data = load_runtime_profiles_overlay_dict(target)
+    if not isinstance(data, dict):
+        data = {}
+
+    profiles = data.get("profiles")
+    if not isinstance(profiles, dict):
+        profiles = {}
+        data["profiles"] = profiles
+
+    normalized_spec: Dict[str, Any] = {
+        "label": str(profile_spec.get("label") or profile_key),
+        "description": str(profile_spec.get("description") or ""),
+        "require_manual_mask": bool(profile_spec.get("require_manual_mask", False)),
+        "force_disable_vlm": bool(profile_spec.get("force_disable_vlm", False)),
+        "dictionaries_overlay": deepcopy(profile_spec.get("dictionaries_overlay") or {}),
+    }
+
+    if profile_spec.get("has_param_lists") or "param_lists" in profile_spec:
+        normalized_spec["param_lists"] = _normalize_param_lists(profile_spec.get("param_lists"))
+
+    if (
+        profile_spec.get("has_preferred_manual_mask_template")
+        or "preferred_manual_mask_template" in profile_spec
+    ):
+        normalized_spec["preferred_manual_mask_template"] = str(
+            profile_spec.get("preferred_manual_mask_template") or ""
+        ).strip()
+
+    profiles[str(profile_key)] = normalized_spec
+    if set_default:
+        data["default_profile"] = str(profile_key)
+
+    return _write_runtime_profiles_overlay_dict(target, data)
+
+
+def set_runtime_default_profile(profile_key: str, path: Path | None = None) -> Path:
+    target = ensure_runtime_profiles_config(path)
+    data = load_runtime_profiles_overlay_dict(target)
+    if not isinstance(data, dict):
+        data = {}
+    data["default_profile"] = str(profile_key)
+    return _write_runtime_profiles_overlay_dict(target, data)
+
+
+def delete_runtime_profile(profile_key: str, path: Path | None = None) -> Path:
+    target = ensure_runtime_profiles_config(path)
+    data = load_runtime_profiles_overlay_dict(target)
+    if not isinstance(data, dict):
+        data = {}
+
+    profiles = data.get("profiles")
+    if isinstance(profiles, dict):
+        profiles.pop(str(profile_key), None)
+        if not profiles:
+            data.pop("profiles", None)
+
+    if str(data.get("default_profile") or "").strip() == str(profile_key):
+        data["default_profile"] = "standard_local"
+
+    return _write_runtime_profiles_overlay_dict(target, data)
--- a/regression_tests/check_regression.py
+++ b/regression_tests/check_regression.py
@@ -14,7 +14,7 @@ from collections import Counter
 from pathlib import Path

 BASELINE_DIR = Path(__file__).parent / "baseline"
-OUTPUT_DIR = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)/anonymise_audit_30")
+OUTPUT_DIR = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)/anonymise_audit_30")

 # === Patterns de fuites connues ===
 LEAK_CHECKS = {
@@ -23,7 +23,7 @@ LEAK_CHECKS = {
    "RPPS_raw": re.compile(r"\b[12]\d{10}\b"),  # 11 chiffres commençant par 1 ou 2
    "bracket_double": re.compile(r"\[\["),
    "www_hospital": re.compile(r"www\.ch-cote-basque"),
-    "FINESS_raw": re.compile(r"\b640000162\b"),
+    "FINESS_raw": re.compile(r"\b999999999\b"),
 }

 # === Termes médicaux qui NE doivent PAS être masqués ===
--- a/run_batch_30_audit.py
+++ b/run_batch_30_audit.py
@@ -15,7 +15,7 @@ from vlm_manager import VlmManager
 from gliner_manager import GlinerManager
 from camembert_ner_manager import CamembertNerManager

-SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
 OUTDIR = SRC / "anonymise_audit_30"
 CONFIG = RUNTIME_DICTIONARIES_CONFIG_PATH

--- a/run_batch_59ogc.py
+++ b/run_batch_59ogc.py
@@ -12,7 +12,7 @@ import anonymizer_core_refactored_onnx as core
 from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH
 from eds_pseudo_manager import EdsPseudoManager

-SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
 OUTDIR = SRC / "anonymise"
 CONFIG = RUNTIME_DICTIONARIES_CONFIG_PATH

--- a/run_batch_silver_export.py
+++ b/run_batch_silver_export.py
@@ -21,7 +21,7 @@ sys.path.insert(0, str(Path(__file__).parent))

 from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH

-SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
 OUTDIR = SRC / "anonymise_silver_extra"
 CONFIG = RUNTIME_DICTIONARIES_CONFIG_PATH

--- a/scripts/build_finess_gazetteers.py
+++ b/scripts/build_finess_gazetteers.py
@@ -74,7 +74,7 @@ def normalize(s: str) -> str:
 def extract_distinctive_name(full_name: str) -> str:
    """Extrait la partie distinctive d'un nom d'établissement.

-    Ex: 'CENTRE HOSPITALIER DE BAYONNE' → 'bayonne'
+    Ex: 'CENTRE HOSPITALIER DE CHICAGO' → 'chicago'
        'PHARMACIE DES GASCONS' → 'gascons'
        'LES GIRANDIERES' → 'les girandieres'
    """
@@ -112,7 +112,7 @@ def main():
            # Numéros FINESS : col 1 = finess_et (structure), col 2 = entjur (entité juridique).
            # Les deux sont des identifiants 9 chiffres réels du référentiel FINESS et doivent
            # être masqués. Avant ce fix, seul finess_et était extrait (~102k), et les ~48k
-            # entjur étaient manqués — provoquant des fuites (ex: 640780417 entjur CHCB).
+            # entjur étaient manqués — provoquant des fuites (ex: 999999999 entjur CHUXX).
            for col_idx in (1, 2):
                finess = row[col_idx].strip() if col_idx < len(row) else ""
                if re.match(r"^\d{9}$", finess):
--- a/scripts/evaluate_quality.py
+++ b/scripts/evaluate_quality.py
@@ -34,7 +34,7 @@ from typing import Dict, List, Set, Tuple
 # === Chemins par défaut ===
 PROJECT_DIR = Path(__file__).parent.parent
 DEFAULT_DIR = Path(
-    "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)"
+    "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)"
    "/anonymise_audit_30"
 )
 INSEE_NOMS = PROJECT_DIR / "data" / "insee" / "noms_famille_france.txt"
@@ -85,7 +85,7 @@ NAME_IGNORE = {
    "GAUCHE", "DROITE", "ANTERIEUR", "POSTERIEUR",
    "JANVIER", "FEVRIER", "MARS", "AVRIL", "JUIN", "JUILLET",
    "AOUT", "SEPTEMBRE", "OCTOBRE", "NOVEMBRE", "DECEMBRE",
-    "FRANCE", "BAYONNE", "BORDEAUX", "PARIS", "TOULOUSE",
+    "FRANCE", "CHICAGO", "BORDEAUX", "PARIS", "TOULOUSE",
    "SAINT", "SAINTE",
 }

--- a/scripts/export_silver_annotations.py
+++ b/scripts/export_silver_annotations.py
@@ -45,7 +45,7 @@ PLACEHOLDER_TO_BIO: Dict[str, str] = {

 RE_PLACEHOLDER = re.compile(r"^\[([A-Z_]+)\]$")

-SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
 AUDIT_DIR = SRC / "anonymise_audit_30"

 # --- Gazetteer paths ---
--- a/scripts/reprocess_audit30.py
+++ b/scripts/reprocess_audit30.py
@@ -2,7 +2,7 @@
 """Reprocess corpus audit_30 avec le code actuel.

 Lit la liste des documents depuis evaluation/baseline_scores.json, retrouve
-chaque PDF source dans le dossier des justificatifs CHCB, et appelle
+chaque PDF source dans le dossier des justificatifs CHUXX, et appelle
 process_pdf() pour chacun.

 Sortie : un dossier horodaté sous /tmp/reprocess_audit30/<timestamp>/ avec
@@ -31,7 +31,7 @@ from anonymizer_core_refactored_onnx import process_pdf, NerModelManager, NerThr

 BASELINE_PATH = PROJECT_DIR / "evaluation" / "baseline_scores.json"
 SOURCE_ROOT = Path(
-    "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)"
+    "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)"
 )


--- a/tests/unit/test_config_externalization.py
+++ b/tests/unit/test_config_externalization.py
@@ -22,7 +22,7 @@ def test_default_config_template_is_externalized():
    assert "whitelist_phrases:" in text

    cfg = core.load_dictionaries(None)
-    assert "CHCB" in cfg["blacklist"]["force_mask_terms"]
+    assert "CHUXX" in cfg["blacklist"]["force_mask_terms"]


 def test_runtime_overlay_template_is_minimal():
@@ -82,14 +82,14 @@ def test_runtime_overlay_is_created_and_effective_merge_works(tmp_path: Path):
    assert cfg_path.exists()

    effective = load_effective_dictionaries_dict(cfg_path)
-    assert "CHCB" in effective["blacklist"]["force_mask_terms"]
+    assert "CHUXX" in effective["blacklist"]["force_mask_terms"]

    cfg_path.write_text(
        "blacklist:\n  force_mask_terms:\n    - LOCAL_SIGLE\n",
        encoding="utf-8",
    )
    effective = load_effective_dictionaries_dict(cfg_path)
-    assert "CHCB" in effective["blacklist"]["force_mask_terms"]
+    assert "CHUXX" in effective["blacklist"]["force_mask_terms"]
    assert "LOCAL_SIGLE" in effective["blacklist"]["force_mask_terms"]


@@ -100,5 +100,5 @@ def test_effective_param_lists_include_defaults_when_overlay_is_empty(tmp_path:
    params = load_effective_param_lists(cfg_path)

    assert "classification internationale" in params["whitelist_phrases"]
-    assert "CHCB" in params["blacklist_force_mask_terms"]
+    assert "CHUXX" in params["blacklist_force_mask_terms"]
    assert params["additional_stopwords"] == []
--- a/tests/unit/test_header_pii_detection.py
+++ b/tests/unit/test_header_pii_detection.py
@@ -56,8 +56,8 @@ class TestHeaderPiiDetection:
    def test_structured_code_postal_preserves_label_and_audit(self):
        cfg = load_dictionaries(None)

-        anon = anonymise_document_regex(["Code postal : 64100"], [[]], cfg)
+        anon = anonymise_document_regex(["Code postal : 12345"], [[]], cfg)
        text = selective_rescan(anon.text_out, cfg)

        assert text == "Code postal : [CODE_POSTAL]"
-        assert any(h.kind == "CODE_POSTAL" and h.original == "64100" for h in anon.audit)
+        assert any(h.kind == "CODE_POSTAL" and h.original == "12345" for h in anon.audit)
--- a/tests/unit/test_profile_defaults.py
+++ b/tests/unit/test_profile_defaults.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+from pathlib import Path
+
+from profile_defaults import (
+    delete_runtime_profile,
+    ensure_runtime_profiles_config,
+    get_default_profile_key,
+    list_default_profile_keys,
+    list_effective_profiles,
+    load_effective_profiles_dict,
+    read_default_profiles_text,
+    read_runtime_profiles_overlay_text,
+    save_runtime_profile,
+    set_runtime_default_profile,
+)
+
+
+def test_default_profiles_template_is_externalized():
+    text = read_default_profiles_text()
+
+    assert "default_profile:" in text
+    assert "chuxx_strict:" in text
+    assert "partage_recherche:" in text
+    assert "standard_local" in list_default_profile_keys()
+
+
+def test_runtime_profiles_overlay_template_is_minimal():
+    text = read_runtime_profiles_overlay_text()
+
+    assert "profiles.default.yml" in text
+    assert "{}" in text
+
+
+def test_runtime_profiles_overlay_is_created_and_merged(tmp_path: Path):
+    cfg_path = tmp_path / "profiles.yml"
+
+    created = ensure_runtime_profiles_config(cfg_path)
+    assert created == cfg_path
+    assert cfg_path.exists()
+
+    effective = load_effective_profiles_dict(cfg_path)
+    assert effective["default_profile"] == "standard_local"
+
+    cfg_path.write_text(
+        "default_profile: partage_recherche\n"
+        "profiles:\n"
+        "  partage_recherche:\n"
+        "    description: Profil local surcharge\n",
+        encoding="utf-8",
+    )
+
+    effective = load_effective_profiles_dict(cfg_path)
+    assert effective["default_profile"] == "partage_recherche"
+    assert effective["profiles"]["partage_recherche"]["description"] == "Profil local surcharge"
+
+
+def test_list_effective_profiles_normalizes_flags(tmp_path: Path):
+    cfg_path = tmp_path / "profiles.yml"
+    cfg_path.write_text(
+        "profiles:\n"
+        "  custom:\n"
+        "    label: Profil custom\n"
+        "    require_manual_mask: true\n"
+        "    force_disable_vlm: true\n"
+        "    preferred_manual_mask_template: hopital/formulaire.yml\n"
+        "    param_lists:\n"
+        "      whitelist_phrases:\n"
+        "      - DOCUMENT INTERNE\n"
+        "      blacklist_force_mask_terms:\n"
+        "      - CUSTOM_ETAB\n"
+        "      additional_stopwords:\n"
+        "      - DIM\n"
+        "    dictionaries_overlay:\n"
+        "      blacklist:\n"
+        "        force_mask_terms:\n"
+        "        - CUSTOM_ETAB\n",
+        encoding="utf-8",
+    )
+
+    profiles = list_effective_profiles(cfg_path)
+
+    assert profiles["custom"]["label"] == "Profil custom"
+    assert profiles["custom"]["require_manual_mask"] is True
+    assert profiles["custom"]["force_disable_vlm"] is True
+    assert profiles["custom"]["preferred_manual_mask_template"] == "hopital/formulaire.yml"
+    assert profiles["custom"]["has_param_lists"] is True
+    assert profiles["custom"]["param_lists"]["whitelist_phrases"] == ["DOCUMENT INTERNE"]
+    assert profiles["custom"]["param_lists"]["blacklist_force_mask_terms"] == ["CUSTOM_ETAB"]
+    assert profiles["custom"]["param_lists"]["additional_stopwords"] == ["DIM"]
+    assert "CUSTOM_ETAB" in profiles["custom"]["dictionaries_overlay"]["blacklist"]["force_mask_terms"]
+
+
+def test_default_profile_key_keeps_merged_default_when_available(tmp_path: Path):
+    cfg_path = tmp_path / "profiles.yml"
+    cfg_path.write_text(
+        "default_profile: missing\n"
+        "profiles:\n"
+        "  custom:\n"
+        "    label: Profil custom\n",
+        encoding="utf-8",
+    )
+
+    assert get_default_profile_key(cfg_path) == "standard_local"
+
+
+def test_save_runtime_profile_persists_new_profile_and_default(tmp_path: Path):
+    cfg_path = tmp_path / "profiles.yml"
+
+    save_runtime_profile(
+        "bureau_strict",
+        {
+            "label": "Bureau strict",
+            "description": "Profil créé depuis la GUI",
+            "require_manual_mask": True,
+            "force_disable_vlm": True,
+            "preferred_manual_mask_template": "chuxx/formulaire.yml",
+            "has_preferred_manual_mask_template": True,
+            "param_lists": {
+                "whitelist_phrases": ["VALIDATION DIM"],
+                "blacklist_force_mask_terms": ["CHUXX"],
+                "additional_stopwords": ["RUM"],
+            },
+            "has_param_lists": True,
+            "dictionaries_overlay": {
+                "blacklist": {
+                    "force_mask_terms": ["CHUXX"],
+                },
+            },
+        },
+        cfg_path,
+        set_default=True,
+    )
+
+    data = load_effective_profiles_dict(cfg_path)
+    assert data["default_profile"] == "bureau_strict"
+
+    saved = list_effective_profiles(cfg_path)["bureau_strict"]
+    assert saved["label"] == "Bureau strict"
+    assert saved["require_manual_mask"] is True
+    assert saved["force_disable_vlm"] is True
+    assert saved["preferred_manual_mask_template"] == "chuxx/formulaire.yml"
+    assert saved["param_lists"]["whitelist_phrases"] == ["VALIDATION DIM"]
+    assert saved["param_lists"]["blacklist_force_mask_terms"] == ["CHUXX"]
+    assert saved["param_lists"]["additional_stopwords"] == ["RUM"]
+
+
+def test_set_and_delete_runtime_profile(tmp_path: Path):
+    cfg_path = tmp_path / "profiles.yml"
+    save_runtime_profile(
+        "profil_temporaire",
+        {
+            "label": "Profil temporaire",
+            "description": "",
+            "require_manual_mask": False,
+            "force_disable_vlm": False,
+            "dictionaries_overlay": {},
+        },
+        cfg_path,
+    )
+
+    set_runtime_default_profile("profil_temporaire", cfg_path)
+    assert get_default_profile_key(cfg_path) == "profil_temporaire"
+
+    delete_runtime_profile("profil_temporaire", cfg_path)
+    profiles = list_effective_profiles(cfg_path)
+    assert "profil_temporaire" not in profiles
+    assert get_default_profile_key(cfg_path) == "standard_local"
--- a/tools/analyze_corpus.py
+++ b/tools/analyze_corpus.py
@@ -65,7 +65,7 @@ def classify_complexity(stats: dict) -> str:


 def main():
-    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)/")
+    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)/")
    
    if not corpus_dir.exists():
        print(f"Erreur : {corpus_dir} n'existe pas")
--- a/tools/analyze_date_masking.py
+++ b/tools/analyze_date_masking.py
@@ -66,7 +66,7 @@ def analyze_dates_in_audit(audit_path: Path, text_path: Path):
    return dates_info

 def main():
-    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise")
+    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise")
    
    print("=" * 80)
    print("ANALYSE DES DATES MASQUÉES")
--- a/tools/analyze_real_quality.py
+++ b/tools/analyze_real_quality.py
@@ -7,7 +7,7 @@ from pathlib import Path
 from collections import Counter, defaultdict

 # Répertoire des documents anonymisés
-ANON_DIR = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise")
+ANON_DIR = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise")

 def analyze_leaks(txt_file):
    """Détecte les fuites potentielles dans un fichier texte."""
@@ -23,7 +23,7 @@ def analyze_leaks(txt_file):
        "telephone": re.compile(r"\b0[1-9](?:[\s.-]?\d{2}){4}\b"),
        "email": re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b"),
        "adresse": re.compile(r"\b\d+\s+(?:rue|avenue|boulevard|place|chemin|impasse)\s+[A-Z]", re.IGNORECASE),
-        "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
+        "chuxx": re.compile(r"\bCHUXX\b", re.IGNORECASE),
    }
    
    for pattern_name, pattern in patterns.items():
--- a/tools/compare_original_vs_anonymized.py
+++ b/tools/compare_original_vs_anonymized.py
@@ -8,8 +8,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 import pdfplumber

 # Document original
-original_pdf = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/102_23056463/CRH 23056364.pdf")
-anonymized_txt = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise/CRH 23056364.pseudonymise.txt")
+original_pdf = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/102_23056463/CRH 23056364.pdf")
+anonymized_txt = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise/CRH 23056364.pseudonymise.txt")

 print("="*80)
 print("COMPARAISON ORIGINAL vs ANONYMISÉ")
--- a/tools/compare_test_vs_production.py
+++ b/tools/compare_test_vs_production.py
@@ -46,7 +46,7 @@ def compare_datasets():
    test_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
    
    # Production (régression)
-    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise")
+    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise")
    
    print("\n" + "="*80)
    print("COMPARAISON TEST DATASET vs PRODUCTION")
--- a/tools/debug_force_term.py
+++ b/tools/debug_force_term.py
@@ -17,11 +17,11 @@ print()

 # Test the pattern
 test_lines = [
-    "confirmée à 5,7 g ici au CHCB. Appel Dr [NOM], hématologue biologiste",
-    "CHCB :",
-    "CHCB",
-    "au CHCB",
-    "le CHCB est",
+    "confirmée à 5,7 g ici au CHUXX. Appel Dr [NOM], hématologue biologiste",
+    "CHUXX :",
+    "CHUXX",
+    "au CHUXX",
+    "le CHUXX est",
 ]

 for term in cfg.get("blacklist", {}).get("force_mask_terms", []):
--- a/tools/deep_quality_regression_analysis.py
+++ b/tools/deep_quality_regression_analysis.py
@@ -210,8 +210,8 @@ def main():
    """Analyse un échantillon de documents"""
    
    # Chemins
-    original_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
-    anonymized_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise")
+    original_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
+    anonymized_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise")
    
    # Documents à analyser
    test_docs = [
--- a/tools/root_cause_analysis.py
+++ b/tools/root_cause_analysis.py
@@ -122,7 +122,7 @@ def analyze_anonymized_text(text_path: Path) -> Dict:
 def compare_datasets():
    """Compare test dataset vs production."""
    test_dir = Path("tests/ground_truth/pdfs/baseline_anonymized")
-    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/anonymise")
+    prod_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/anonymise")
    
    print("=" * 80)
    print("ANALYSE DES CAUSES RACINES - RÉGRESSION DE QUALITÉ")
--- a/tools/simulate_admin_rule.py
+++ b/tools/simulate_admin_rule.py
@@ -4,15 +4,15 @@ Simule l'effet d'une règle d'administration sur un texte ou sur le corpus synth

 Usage :
    # Appliquer une règle à un texte libre
-    python tools/simulate_admin_rule.py --rule-id rule_chcb_exact_mask \\
-        --text "Consulté au CHCB le 12/06/2024."
+    python tools/simulate_admin_rule.py --rule-id rule_chuxx_exact_mask \\
+        --text "Consulté au CHUXX le 12/06/2024."

    # Appliquer à un fichier texte
-    python tools/simulate_admin_rule.py --rule-id rule_chcb_exact_mask \\
+    python tools/simulate_admin_rule.py --rule-id rule_chuxx_exact_mask \\
        --file path/to/document.txt

    # Valider la règle sur ses required_case_ids (--corpus)
-    python tools/simulate_admin_rule.py --rule-id rule_chcb_exact_mask --corpus
+    python tools/simulate_admin_rule.py --rule-id rule_chuxx_exact_mask --corpus

    # Valider TOUTES les règles actives sur leurs corpus
    python tools/simulate_admin_rule.py --all --corpus
--- a/tools/test_all_cro.py
+++ b/tools/test_all_cro.py
@@ -16,7 +16,7 @@ def test_all_cro():
    """Test la propagation des dates de naissance sur tous les CRO."""
    
    # Chercher tous les CRO dans les 59 OGC
-    ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+    ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
    
    # Trouver tous les CRO (compte rendu opératoire)
    print("Recherche de tous les CRO dans le corpus...")
@@ -59,25 +59,25 @@ def test_all_cro():
            date_context_pattern = re.compile(r'Né(?:e)?\s+le\s+(\d{1,2}[\s/.\-]+\d{1,2}[\s/.\-]+\d{2,4})', re.IGNORECASE)
            context_leaks = date_context_pattern.findall(anonymized_text)
            
-            # Scanner "CHCB" en clair
-            chcb_leaks = re.findall(r'\bCHCB\b', anonymized_text)
+            # Scanner "CHUXX" en clair
+            chuxx_leaks = re.findall(r'\bCHUXX\b', anonymized_text)
            
            # Compter les fuites totales
-            total_leaks = len(context_leaks) + len(chcb_leaks)
+            total_leaks = len(context_leaks) + len(chuxx_leaks)
            
            status = "✅" if total_leaks == 0 else "❌"
-            print(f"  {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHCB: {len(chcb_leaks)}")
+            print(f"  {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHUXX: {len(chuxx_leaks)}")
            
            if context_leaks:
                print(f"     Exemples dates: {context_leaks[:3]}")
-            if chcb_leaks:
-                print(f"     Exemples CHCB: {chcb_leaks[:3]}")
+            if chuxx_leaks:
+                print(f"     Exemples CHUXX: {chuxx_leaks[:3]}")
            
            results.append({
                'file': pdf_path.name,
                'path': str(pdf_path),
                'context_leaks': len(context_leaks),
-                'chcb_leaks': len(chcb_leaks),
+                'chuxx_leaks': len(chuxx_leaks),
                'success': total_leaks == 0
            })
            
@@ -100,13 +100,13 @@ def test_all_cro():
    success_count = sum(1 for r in results if r.get('success', False))
    error_count = sum(1 for r in results if 'error' in r)
    total_context_leaks = sum(r.get('context_leaks', 0) for r in results)
-    total_chcb_leaks = sum(r.get('chcb_leaks', 0) for r in results)
+    total_chuxx_leaks = sum(r.get('chuxx_leaks', 0) for r in results)
    
    print(f"Documents testés: {len(results)}")
    print(f"Succès: {success_count}/{len(results)} ({success_count/len(results)*100:.1f}%)")
    print(f"Erreurs: {error_count}")
    print(f"Fuites 'Né(e) le' totales: {total_context_leaks}")
-    print(f"Fuites CHCB totales: {total_chcb_leaks}")
+    print(f"Fuites CHUXX totales: {total_chuxx_leaks}")
    print(f"Temps total: {elapsed_time:.1f}s ({elapsed_time/len(results):.1f}s/doc)")
    
    # Liste des documents avec fuites
@@ -119,7 +119,7 @@ def test_all_cro():
            print(f"\n{doc['file']}")
            print(f"  Path: {doc['path']}")
            print(f"  Fuites dates: {doc.get('context_leaks', 0)}")
-            print(f"  Fuites CHCB: {doc.get('chcb_leaks', 0)}")
+            print(f"  Fuites CHUXX: {doc.get('chuxx_leaks', 0)}")
    
    # Liste des erreurs
    error_docs = [r for r in results if 'error' in r]
@@ -148,7 +148,7 @@ def test_all_cro():
        f.write(f"Succès: {success_count}/{len(results)} ({success_count/len(results)*100:.1f}%)\n")
        f.write(f"Erreurs: {error_count}\n")
        f.write(f"Fuites 'Né(e) le' totales: {total_context_leaks}\n")
-        f.write(f"Fuites CHCB totales: {total_chcb_leaks}\n")
+        f.write(f"Fuites CHUXX totales: {total_chuxx_leaks}\n")
        f.write(f"Temps total: {elapsed_time:.1f}s ({elapsed_time/len(results):.1f}s/doc)\n\n")
        
        if failed_docs:
@@ -159,7 +159,7 @@ def test_all_cro():
                f.write(f"{doc['file']}\n")
                f.write(f"  Path: {doc['path']}\n")
                f.write(f"  Fuites dates: {doc.get('context_leaks', 0)}\n")
-                f.write(f"  Fuites CHCB: {doc.get('chcb_leaks', 0)}\n\n")
+                f.write(f"  Fuites CHUXX: {doc.get('chuxx_leaks', 0)}\n\n")
        
        if error_docs:
            f.write("=" * 80 + "\n")
--- a/tools/test_date_propagation.py
+++ b/tools/test_date_propagation.py
@@ -16,7 +16,7 @@ def test_date_propagation():
    """Test la propagation des dates de naissance sur un CRO."""
    
    # Chercher un CRO dans les 59 OGC
-    ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+    ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
    
    # Trouver un CRO (compte rendu opératoire)
    cro_files = []
@@ -68,19 +68,19 @@ def test_date_propagation():
            lines_with_placeholders = [line for line in anonymized_text.split('\n') if placeholder_pattern.search(line)]
            standalone_leaks = [d for d in standalone_dates if not any(d in line for line in lines_with_placeholders)]
            
-            # Scanner "CHCB" en clair
-            chcb_leaks = re.findall(r'\bCHCB\b', anonymized_text)
+            # Scanner "CHUXX" en clair
+            chuxx_leaks = re.findall(r'\bCHUXX\b', anonymized_text)
            
            # Compter les fuites totales
-            total_leaks = len(context_leaks) + len(chcb_leaks)
+            total_leaks = len(context_leaks) + len(chuxx_leaks)
            
            status = "✅" if total_leaks == 0 else "❌"
-            print(f"  {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHCB: {len(chcb_leaks)}")
+            print(f"  {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHUXX: {len(chuxx_leaks)}")
            
            if context_leaks:
                print(f"     Exemples dates: {context_leaks[:3]}")
-            if chcb_leaks:
-                print(f"     Exemples CHCB: {chcb_leaks[:3]}")
+            if chuxx_leaks:
+                print(f"     Exemples CHUXX: {chuxx_leaks[:3]}")
            
            # Info : dates standalone (pas nécessairement des fuites)
            if standalone_leaks:
@@ -89,7 +89,7 @@ def test_date_propagation():
            results.append({
                'file': pdf_path.name,
                'context_leaks': len(context_leaks),
-                'chcb_leaks': len(chcb_leaks),
+                'chuxx_leaks': len(chuxx_leaks),
                'standalone_dates': len(standalone_leaks),
                'success': total_leaks == 0
            })
@@ -109,13 +109,13 @@ def test_date_propagation():
    
    success_count = sum(1 for r in results if r.get('success', False))
    total_context_leaks = sum(r.get('context_leaks', 0) for r in results)
-    total_chcb_leaks = sum(r.get('chcb_leaks', 0) for r in results)
+    total_chuxx_leaks = sum(r.get('chuxx_leaks', 0) for r in results)
    total_standalone = sum(r.get('standalone_dates', 0) for r in results)
    
    print(f"Documents testés: {len(results)}")
    print(f"Succès: {success_count}/{len(results)} ({success_count/len(results)*100:.1f}%)")
    print(f"Fuites 'Né(e) le' totales: {total_context_leaks}")
-    print(f"Fuites CHCB totales: {total_chcb_leaks}")
+    print(f"Fuites CHUXX totales: {total_chuxx_leaks}")
    print(f"Dates standalone (info): {total_standalone}")
    
    if success_count == len(results):
--- a/tools/test_force_term_leak.py
+++ b/tools/test_force_term_leak.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Test CHCB force_term detection on the 2 leaked documents."""
+"""Test force_term detection on the 2 leaked documents."""

 from pathlib import Path
 import sys
@@ -10,10 +10,10 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 import anonymizer_core_refactored_onnx as core
 from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH

-def test_chcb_detection():
-    """Test CHCB detection on the 2 documents with leaks."""
+def test_force_term_detection():
+    """Test force_term detection on the 2 documents with leaks."""
    
-    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
    
    # Document 1: trackare-BA148337-23091302
    doc1_path = None
@@ -45,7 +45,7 @@ def test_chcb_detection():
    print("TEST DOCUMENT 1: trackare-BA148337-23091302")
    print("=" * 80)
    
-    outdir = Path("test_chcb_leak")
+    outdir = Path("test_force_term_leak")
    outdir.mkdir(exist_ok=True)
    
    try:
@@ -64,14 +64,14 @@ def test_chcb_detection():
        txt_file = Path(outputs["text"])
        content = txt_file.read_text(encoding="utf-8")
        
-        if "CHCB" in content:
-            print("🔴 FUITE DÉTECTÉE: CHCB trouvé dans le texte anonymisé")
+        if "CHUXX" in content:
+            print("🔴 FUITE DÉTECTÉE: CHUXX trouvé dans le texte anonymisé")
            # Trouver le contexte
            for i, line in enumerate(content.split("\n"), 1):
-                if "CHCB" in line:
+                if "CHUXX" in line:
                    print(f"   Ligne {i}: {line.strip()}")
        else:
-            print("✅ Aucune fuite CHCB")
+            print("✅ Aucune fuite CHUXX")
        
        # Vérifier l'audit
        import json
@@ -80,10 +80,10 @@ def test_chcb_detection():
        with open(audit_file, 'r', encoding='utf-8') as f:
            for line in f:
                obj = json.loads(line)
-                if obj.get("kind") == "force_term" and "CHCB" in obj.get("value", ""):
+                if obj.get("kind") == "force_term" and "CHUXX" in obj.get("value", ""):
                    force_term_count += 1
        
-        print(f"📊 Détections force_term CHCB: {force_term_count}")
+        print(f"📊 Détections force_term CHUXX: {force_term_count}")
        
    except Exception as e:
        print(f"❌ Erreur: {e}")
@@ -113,14 +113,14 @@ def test_chcb_detection():
        txt_file = Path(outputs["text"])
        content = txt_file.read_text(encoding="utf-8")
        
-        if "CHCB" in content:
-            print("🔴 FUITE DÉTECTÉE: CHCB trouvé dans le texte anonymisé")
+        if "CHUXX" in content:
+            print("🔴 FUITE DÉTECTÉE: CHUXX trouvé dans le texte anonymisé")
            # Trouver le contexte
            for i, line in enumerate(content.split("\n"), 1):
-                if "CHCB" in line:
+                if "CHUXX" in line:
                    print(f"   Ligne {i}: {line.strip()}")
        else:
-            print("✅ Aucune fuite CHCB")
+            print("✅ Aucune fuite CHUXX")
        
        # Vérifier l'audit
        import json
@@ -129,10 +129,10 @@ def test_chcb_detection():
        with open(audit_file, 'r', encoding='utf-8') as f:
            for line in f:
                obj = json.loads(line)
-                if obj.get("kind") == "force_term" and "CHCB" in obj.get("value", ""):
+                if obj.get("kind") == "force_term" and "CHUXX" in obj.get("value", ""):
                    force_term_count += 1
        
-        print(f"📊 Détections force_term CHCB: {force_term_count}")
+        print(f"📊 Détections force_term CHUXX: {force_term_count}")
        
    except Exception as e:
        print(f"❌ Erreur: {e}")
@@ -140,4 +140,4 @@ def test_chcb_detection():
        traceback.print_exc()

 if __name__ == "__main__":
-    test_chcb_detection()
+    test_force_term_detection()
--- a/tools/test_gui_complete.py
+++ b/tools/test_gui_complete.py
@@ -88,7 +88,7 @@ import re
 leak_count = 0
 patterns = {
    "date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
-    "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
+    "chuxx": re.compile(r"\bCHUXX\b", re.IGNORECASE),
 }

 for txt_file in out_dir.glob("*.pseudonymise.txt"):
--- a/tools/test_phase1_corrections.py
+++ b/tools/test_phase1_corrections.py
@@ -24,9 +24,9 @@ def test_phase1_corrections():
    
    # Documents de test (5 documents représentatifs)
    test_docs = [
-        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/008_23001234/CRH 23001234.pdf",
-        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/021_23012345/CRO 23012345.pdf",
-        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs/033_23023456/trackare-23023456-12345678.pdf",
+        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/008_23001234/CRH 23001234.pdf",
+        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/021_23012345/CRO 23012345.pdf",
+        "/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs/033_23023456/trackare-23023456-12345678.pdf",
    ]
    
    print("=" * 80)
--- a/tools/validate_corpus_sample.py
+++ b/tools/validate_corpus_sample.py
@@ -23,7 +23,7 @@ def validate_corpus_sample():
    """Valide l'anonymisation sur un échantillon du corpus."""
    
    # Répertoires
-    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
    output_dir = Path("corpus_validation_sample")
    output_dir.mkdir(exist_ok=True)
    
@@ -221,7 +221,7 @@ def leak_check(output_dir: Path):
    # Patterns à vérifier
    patterns = {
        "date_naissance_contexte": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
-        "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
+        "chuxx": re.compile(r"\bCHUXX\b", re.IGNORECASE),
    }
    
    leaks = defaultdict(list)
--- a/tools/validate_full_corpus.py
+++ b/tools/validate_full_corpus.py
@@ -3,7 +3,7 @@
 Validation sur le corpus complet (59 OGC / 130 PDFs).

 Ce script anonymise tous les documents du corpus et vérifie :
- Absence de fuites (dates de naissance, CHCB, etc.)
+- Absence de fuites (dates de naissance, CHUXX, etc.)
 - Statistiques de détection par type
 - Performances (temps de traitement)
 """
@@ -24,7 +24,7 @@ def validate_full_corpus():
    """Valide l'anonymisation sur le corpus complet."""
    
    # Répertoires
-    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
+    corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
    output_dir = Path("corpus_validation")
    output_dir.mkdir(exist_ok=True)
    
@@ -177,7 +177,7 @@ def leak_check(output_dir: Path):
    # Patterns à vérifier
    patterns = {
        "date_naissance": re.compile(r"(?:n[ée]+\s+le|DDN)\s*:?\s*\d{1,2}[/.\-]\d{1,2}[/.\-]\d{2,4}", re.IGNORECASE),
-        "chcb": re.compile(r"\bCHCB\b", re.IGNORECASE),
+        "chuxx": re.compile(r"\bCHUXX\b", re.IGNORECASE),
        "date_format": re.compile(r"\b\d{2}[/.\-]\d{2}[/.\-]\d{4}\b"),
    }
    
--- a/tools/validate_phase1_on_production.py
+++ b/tools/validate_phase1_on_production.py
@@ -14,7 +14,7 @@ from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH
 from anonymizer_core_refactored_onnx import process_pdf

 # 5 documents du corpus production (OGC 008)
-corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs")
+corpus_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs")
 test_docs = [
    corpus_dir / "008_23001234" / "CRH 23001234.pdf",
    corpus_dir / "008_23001234" / "CRO 23001234.pdf",