Fix 62 fuites résiduelles : DATE_NAISSANCE global, CHCB, Centre Hospitalier de la Côte Basque

- RE_HOPITAL_VILLE : ajout articles (la/le/l'/les) après prépositions
- DATE_NAISSANCE + force_term + force_regex : propagation globale cross-pages
- Config : org_gpe_keep=false, CHCB + variantes Centre Hospitalier en force_mask
- Audit 130 fichiers : 0 résidu (était 36 DATE_NAISS + 26 ETAB)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 12:07:58 +01:00
parent 0684b77d68
commit 96d55584c7
2 changed files with 10 additions and 6 deletions

View File

@@ -528,9 +528,9 @@ RE_HOPITAL_VILLE = re.compile(
r"\b((?:[Hh]ôpital|[Cc]linique|[Pp]olyclinique|[Cc]entre\s+[Hh]ospitalier"
r"|[Cc]entre\s+[Dd]e\s+[Ss]oins|[Mm]aison\s+[Dd]e\s+[Ss]anté"
r"|[Mm]aison\s+[Dd]e\s+[Rr]etraite|[Rr]ésidence|[Ff]oyer)"
r"\s+(?:de\s+|d['']\s*|du\s+|des\s+)?"
r"\s+(?:de\s+|d['']\s*|du\s+|des\s+)?(?:la\s+|le\s+|l['']\s*|les\s+)?"
r"(?:[A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇ][A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇa-zéèàùâêîôûäëïöüç\-']+)"
r"(?:\s+(?:de\s+|d['']\s*|du\s+|des\s+)?"
r"(?:\s+(?:de\s+|d['']\s*|du\s+|des\s+)?(?:la\s+|le\s+|l['']\s*|les\s+)?"
r"[A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇ][A-ZÉÈÀÙÂÊÎÔÛÄËÏÖÜÇa-zéèàùâêîôûäëïöüç\-']+)*)",
)
RE_SERVICE = re.compile(
@@ -1950,7 +1950,8 @@ def process_pdf(
_global_pii: Dict[str, set] = {}
for h in anon.audit:
if h.kind in {"TEL", "EMAIL", "ADRESSE", "CODE_POSTAL", "EPISODE", "RPPS", "VILLE", "ETAB",
"VLM_SERVICE", "VLM_ETAB"}:
"VLM_SERVICE", "VLM_ETAB", "DATE_NAISSANCE",
"force_term", "force_regex"}:
_global_pii.setdefault(h.kind, set()).add(h.original.strip())
for kind, values in _global_pii.items():
placeholder = PLACEHOLDERS.get(kind, PLACEHOLDERS["MASK"])
@@ -1958,7 +1959,7 @@ def process_pdf(
anon.audit.append(PiiHit(page=-1, kind=f"{kind}_GLOBAL", original=val, placeholder=placeholder))
# 4e) Appliquer les tokens globaux sur le texte pseudonymisé
_GLOBAL_SKIP_KINDS = {"EDS_DATE_GLOBAL", "DATE_NAISSANCE_GLOBAL"}
_GLOBAL_SKIP_KINDS = {"EDS_DATE_GLOBAL"}
for h in anon.audit:
if h.page != -1:
continue