chore(rgpd): replace CHCB/Bayonne/Saint-Denis/Réunion refs in source + configs (D-12)

Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis,
Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils,
et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels.

- profile key chcb_strict → chuxx_strict
- CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield,
  Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999,
  préfixe tél 05.59.44 → 0X.XX.XX
- renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-02 14:39:21 +02:00
parent a1ef2225d5
commit 1c44a26eb3
37 changed files with 819 additions and 128 deletions

View File

@@ -16,7 +16,7 @@ def test_date_propagation():
"""Test la propagation des dates de naissance sur un CRO."""
# Chercher un CRO dans les 59 OGC
ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
ogc_dir = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHUXX_DocJustificatifs (1)")
# Trouver un CRO (compte rendu opératoire)
cro_files = []
@@ -68,19 +68,19 @@ def test_date_propagation():
lines_with_placeholders = [line for line in anonymized_text.split('\n') if placeholder_pattern.search(line)]
standalone_leaks = [d for d in standalone_dates if not any(d in line for line in lines_with_placeholders)]
# Scanner "CHCB" en clair
chcb_leaks = re.findall(r'\bCHCB\b', anonymized_text)
# Scanner "CHUXX" en clair
chuxx_leaks = re.findall(r'\bCHUXX\b', anonymized_text)
# Compter les fuites totales
total_leaks = len(context_leaks) + len(chcb_leaks)
total_leaks = len(context_leaks) + len(chuxx_leaks)
status = "" if total_leaks == 0 else ""
print(f" {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHCB: {len(chcb_leaks)}")
print(f" {status} Fuites 'Né(e) le': {len(context_leaks)}, Fuites CHUXX: {len(chuxx_leaks)}")
if context_leaks:
print(f" Exemples dates: {context_leaks[:3]}")
if chcb_leaks:
print(f" Exemples CHCB: {chcb_leaks[:3]}")
if chuxx_leaks:
print(f" Exemples CHUXX: {chuxx_leaks[:3]}")
# Info : dates standalone (pas nécessairement des fuites)
if standalone_leaks:
@@ -89,7 +89,7 @@ def test_date_propagation():
results.append({
'file': pdf_path.name,
'context_leaks': len(context_leaks),
'chcb_leaks': len(chcb_leaks),
'chuxx_leaks': len(chuxx_leaks),
'standalone_dates': len(standalone_leaks),
'success': total_leaks == 0
})
@@ -109,13 +109,13 @@ def test_date_propagation():
success_count = sum(1 for r in results if r.get('success', False))
total_context_leaks = sum(r.get('context_leaks', 0) for r in results)
total_chcb_leaks = sum(r.get('chcb_leaks', 0) for r in results)
total_chuxx_leaks = sum(r.get('chuxx_leaks', 0) for r in results)
total_standalone = sum(r.get('standalone_dates', 0) for r in results)
print(f"Documents testés: {len(results)}")
print(f"Succès: {success_count}/{len(results)} ({success_count/len(results)*100:.1f}%)")
print(f"Fuites 'Né(e) le' totales: {total_context_leaks}")
print(f"Fuites CHCB totales: {total_chcb_leaks}")
print(f"Fuites CHUXX totales: {total_chuxx_leaks}")
print(f"Dates standalone (info): {total_standalone}")
if success_count == len(results):