feat(phase2): Multi-signal NER — BDPM gazetteers, confiance EDS, safe patterns, GLiNER
Chantier 1: Intégration BDPM (5737 médicaments officiels) dans medication whitelist Chantier 2: Safe patterns contextuels (dosages mg/mL/cpr, formes pharma, même ligne) Chantier 3: Scores de confiance NER réels (edsnlp 0.20 ner_confidence_score) Chantier 4: GLiNER zero-shot (urchade/gliner_multi_pii-v1) en vote croisé Chantier 5: Scripts export silver annotations + fine-tuning CamemBERT-bio 0 fuite, 0 régression, -18 FP supplémentaires éliminés. Sécurité: GLiNER ne peut rejeter que si confiance NER < 0.70. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ sys.path.insert(0, str(Path(__file__).parent))
|
||||
import anonymizer_core_refactored_onnx as core
|
||||
from eds_pseudo_manager import EdsPseudoManager
|
||||
from vlm_manager import VlmManager
|
||||
from gliner_manager import GlinerManager
|
||||
|
||||
SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
|
||||
OUTDIR = SRC / "anonymise_audit_30"
|
||||
@@ -57,6 +58,15 @@ def main():
|
||||
assert ner.is_loaded(), "EDS-Pseudo non chargé"
|
||||
print("EDS-Pseudo chargé.", flush=True)
|
||||
|
||||
print("Chargement GLiNER (vote croisé NER)...", flush=True)
|
||||
gliner = GlinerManager()
|
||||
try:
|
||||
gliner.load()
|
||||
print("GLiNER chargé.", flush=True)
|
||||
except Exception as e:
|
||||
print(f"GLiNER indisponible ({e}), on continue sans.", flush=True)
|
||||
gliner = None
|
||||
|
||||
print("Chargement VLM (Ollama qwen2.5vl:7b)...", flush=True)
|
||||
vlm = VlmManager()
|
||||
try:
|
||||
@@ -97,6 +107,7 @@ def main():
|
||||
ner_thresholds=None,
|
||||
ogc_label=ogc,
|
||||
vlm_manager=vlm,
|
||||
gliner_manager=gliner,
|
||||
)
|
||||
audit_path = Path(outputs.get("audit", ""))
|
||||
if audit_path.exists():
|
||||
|
||||
Reference in New Issue
Block a user