42 lines
1020 B
Python
42 lines
1020 B
Python
#!/usr/bin/env python3
|
|
"""Debug force_term mechanism."""
|
|
|
|
import re
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
# Load config
|
|
cfg_path = Path("config/dictionnaires.yml")
|
|
cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
|
|
|
|
print("=" * 80)
|
|
print("CONFIG LOADED")
|
|
print("=" * 80)
|
|
print(f"force_mask_terms: {cfg.get('blacklist', {}).get('force_mask_terms', [])}")
|
|
print()
|
|
|
|
# Test the pattern
|
|
test_lines = [
|
|
"confirmée à 5,7 g ici au CHCB. Appel Dr [NOM], hématologue biologiste",
|
|
"CHCB :",
|
|
"CHCB",
|
|
"au CHCB",
|
|
"le CHCB est",
|
|
]
|
|
|
|
for term in cfg.get("blacklist", {}).get("force_mask_terms", []):
|
|
if not term:
|
|
continue
|
|
|
|
print(f"Testing term: '{term}'")
|
|
word_rx = re.compile(rf"\b{re.escape(term)}\b", re.IGNORECASE)
|
|
|
|
for line in test_lines:
|
|
match = word_rx.search(line)
|
|
if match:
|
|
print(f" ✅ MATCH: '{line}'")
|
|
print(f" → Matched: '{match.group()}'")
|
|
else:
|
|
print(f" ❌ NO MATCH: '{line}'")
|
|
print()
|