Files
anonymisation/tools/debug_force_term.py

42 lines
1020 B
Python

#!/usr/bin/env python3
"""Debug force_term mechanism."""
import re
import yaml
from pathlib import Path
# Load config
cfg_path = Path("config/dictionnaires.yml")
cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
print("=" * 80)
print("CONFIG LOADED")
print("=" * 80)
print(f"force_mask_terms: {cfg.get('blacklist', {}).get('force_mask_terms', [])}")
print()
# Test the pattern
test_lines = [
"confirmée à 5,7 g ici au CHCB. Appel Dr [NOM], hématologue biologiste",
"CHCB :",
"CHCB",
"au CHCB",
"le CHCB est",
]
for term in cfg.get("blacklist", {}).get("force_mask_terms", []):
if not term:
continue
print(f"Testing term: '{term}'")
word_rx = re.compile(rf"\b{re.escape(term)}\b", re.IGNORECASE)
for line in test_lines:
match = word_rx.search(line)
if match:
print(f" ✅ MATCH: '{line}'")
print(f" → Matched: '{match.group()}'")
else:
print(f" ❌ NO MATCH: '{line}'")
print()