feat(phase2): Intégration CamemBERT-bio ONNX comme 3e signal NER (vote triple)
- camembert_ner_manager.py : inférence ONNX CPU (~10ms), predict/predict_long/validate_eds_entities - Vote triple NER : EDS-Pseudo (confiance) + GLiNER (zero-shot) + CamemBERT-bio (fine-tuné F1=89%) - CamemBERT-bio peut sauver un vrai nom à basse confiance EDS (camembert_confirmed=True) - CamemBERT-bio confirme le rejet des FP médicaux (Paracétamol, Tramadol → False) - Intégré dans process_pdf via paramètre camembert_manager - run_batch_30_audit.py mis à jour pour charger le modèle Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import anonymizer_core_refactored_onnx as core
|
||||
from eds_pseudo_manager import EdsPseudoManager
|
||||
from vlm_manager import VlmManager
|
||||
from gliner_manager import GlinerManager
|
||||
from camembert_ner_manager import CamembertNerManager
|
||||
|
||||
SRC = Path("/home/dom/Téléchargements/II-1 Ctrl_T2A_2025_CHCB_DocJustificatifs (1)")
|
||||
OUTDIR = SRC / "anonymise_audit_30"
|
||||
@@ -67,6 +68,15 @@ def main():
|
||||
print(f"GLiNER indisponible ({e}), on continue sans.", flush=True)
|
||||
gliner = None
|
||||
|
||||
print("Chargement CamemBERT-bio ONNX (vote croisé NER)...", flush=True)
|
||||
camembert = CamembertNerManager()
|
||||
try:
|
||||
camembert.load()
|
||||
print("CamemBERT-bio ONNX chargé.", flush=True)
|
||||
except Exception as e:
|
||||
print(f"CamemBERT-bio indisponible ({e}), on continue sans.", flush=True)
|
||||
camembert = None
|
||||
|
||||
print("Chargement VLM (Ollama qwen2.5vl:7b)...", flush=True)
|
||||
vlm = VlmManager()
|
||||
try:
|
||||
@@ -108,6 +118,7 @@ def main():
|
||||
ogc_label=ogc,
|
||||
vlm_manager=vlm,
|
||||
gliner_manager=gliner,
|
||||
camembert_manager=camembert,
|
||||
)
|
||||
audit_path = Path(outputs.get("audit", ""))
|
||||
if audit_path.exists():
|
||||
|
||||
Reference in New Issue
Block a user