Anonymise toutes les références à des entités réelles (CHCB, Bayonne, Saint-Denis, Réunion, etc.) dans le code source, les configurations YAML, les scripts/outils, et les tests unitaires. Conserve les tests synthétiques (cases) intentionnels. - profile key chcb_strict → chuxx_strict - CHCB → CHUXX, Bayonne → Chicago, Saint-Denis → Springfield, Réunion → Province Bêta, 64100/97400 → 12345, FINESS → 999999999, préfixe tél 05.59.44 → 0X.XX.XX - renomme tools/test_chcb_leak.py → tools/test_force_term_leak.py Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Debug force_term mechanism."""
|
|
|
|
import re
|
|
|
|
from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH, load_effective_dictionaries_dict
|
|
|
|
# Load effective config
|
|
cfg_path = RUNTIME_DICTIONARIES_CONFIG_PATH
|
|
cfg = load_effective_dictionaries_dict(cfg_path)
|
|
|
|
print("=" * 80)
|
|
print("CONFIG LOADED")
|
|
print("=" * 80)
|
|
print(f"force_mask_terms: {cfg.get('blacklist', {}).get('force_mask_terms', [])}")
|
|
print()
|
|
|
|
# Test the pattern
|
|
test_lines = [
|
|
"confirmée à 5,7 g ici au CHUXX. Appel Dr [NOM], hématologue biologiste",
|
|
"CHUXX :",
|
|
"CHUXX",
|
|
"au CHUXX",
|
|
"le CHUXX est",
|
|
]
|
|
|
|
for term in cfg.get("blacklist", {}).get("force_mask_terms", []):
|
|
if not term:
|
|
continue
|
|
|
|
print(f"Testing term: '{term}'")
|
|
word_rx = re.compile(rf"\b{re.escape(term)}\b", re.IGNORECASE)
|
|
|
|
for line in test_lines:
|
|
match = word_rx.search(line)
|
|
if match:
|
|
print(f" ✅ MATCH: '{line}'")
|
|
print(f" → Matched: '{match.group()}'")
|
|
else:
|
|
print(f" ❌ NO MATCH: '{line}'")
|
|
print()
|