docs: Analyse finale validation corpus - système fonctionnel
This commit is contained in:
41
tools/debug_force_term.py
Normal file
41
tools/debug_force_term.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Debug force_term mechanism."""
|
||||
|
||||
import re
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
|
||||
# Load config
|
||||
cfg_path = Path("config/dictionnaires.yml")
|
||||
cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
|
||||
|
||||
print("=" * 80)
|
||||
print("CONFIG LOADED")
|
||||
print("=" * 80)
|
||||
print(f"force_mask_terms: {cfg.get('blacklist', {}).get('force_mask_terms', [])}")
|
||||
print()
|
||||
|
||||
# Test the pattern
|
||||
test_lines = [
|
||||
"confirmée à 5,7 g ici au CHCB. Appel Dr [NOM], hématologue biologiste",
|
||||
"CHCB :",
|
||||
"CHCB",
|
||||
"au CHCB",
|
||||
"le CHCB est",
|
||||
]
|
||||
|
||||
for term in cfg.get("blacklist", {}).get("force_mask_terms", []):
|
||||
if not term:
|
||||
continue
|
||||
|
||||
print(f"Testing term: '{term}'")
|
||||
word_rx = re.compile(rf"\b{re.escape(term)}\b", re.IGNORECASE)
|
||||
|
||||
for line in test_lines:
|
||||
match = word_rx.search(line)
|
||||
if match:
|
||||
print(f" ✅ MATCH: '{line}'")
|
||||
print(f" → Matched: '{match.group()}'")
|
||||
else:
|
||||
print(f" ❌ NO MATCH: '{line}'")
|
||||
print()
|
||||
Reference in New Issue
Block a user