Externalize dictionaries and add anonymization review corpus

This commit is contained in:
2026-04-21 10:32:57 +02:00
parent 39db675052
commit 34dcf8f360
99 changed files with 1805 additions and 805 deletions

View File

@@ -29,6 +29,8 @@ from typing import Optional
from fastapi import FastAPI, File, Form, UploadFile
from fastapi.responses import JSONResponse
from config_defaults import RUNTIME_DICTIONARIES_CONFIG_PATH
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
@@ -86,7 +88,7 @@ def _load_models():
"""Charge tous les modèles NER une seule fois au démarrage."""
global _eds_manager, _camembert_manager, _gliner_manager, _vlm_manager, _cfg
_cfg = load_dictionaries(Path(__file__).parent / "config" / "dictionnaires.yml")
_cfg = load_dictionaries(RUNTIME_DICTIONARIES_CONFIG_PATH)
# EDS-Pseudo (F1=0.97)
if EdsPseudoManager is not None:
@@ -288,7 +290,7 @@ async def anonymize_pdf(
out_dir=out_dir,
make_vector_redaction=vector_redaction,
also_make_raster_burn=raster_redaction,
config_path=Path(__file__).parent / "config" / "dictionnaires.yml",
config_path=RUNTIME_DICTIONARIES_CONFIG_PATH,
use_hf=use_ner and ner_mgr is not None,
ner_manager=ner_mgr,
gliner_manager=_gliner_manager if use_ner else None,