Condition du GO-CONDITIONNEL Qwen sur le lot engine capabilities (cb3b767/890edb3/5e5f0bd) : un profil YAML forçant enable_eds/enable_gliner ne doit pas déclencher un chargement voué à l'échec silencieux. NerManagers.ensure_loaded() applique désormais un garde-fou via la sonde engine_capabilities.capabilities_map() (injectable) AVANT toute tentative de load EDS/GLiNER : si le moteur optionnel demandé est indisponible dans le build courant → warning + désactivation forcée dans les réglages runtime. Best-effort (sonde en échec ⇒ réglages inchangés, les try/except de load protègent déjà). Sonde légère (find_spec), aucun import lourd. CamemBERT (requis) inchangé. Diff limité au garde-fou + tests cibles. TDD : 4 tests (test_gui_v6_engine_bridge.py) — eds/gliner indispo désactivés et jamais chargés, moteur dispo conservé, fail-safe sonde. 282 unit passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
234 lines
8.0 KiB
Python
234 lines
8.0 KiB
Python
"""Pont GUI V6 → moteur d'anonymisation (G3-A).
|
|
|
|
Construit les kwargs d'appel du moteur (``process_document``) au plus proche de
|
|
la V5 / du CLI de production, et charge les managers NER **paresseusement** :
|
|
|
|
- aucun manager n'est importé ni instancié à l'import de ce module ;
|
|
- le chargement réel n'a lieu qu'au premier traitement (``ensure_loaded``) ;
|
|
- les factories sont injectables pour les tests (aucun modèle, aucun réseau).
|
|
|
|
Mapping moteur (identique au CLI validé `scripts/anonymize_cli.py`) :
|
|
- ``camembert_manager`` ← CamembertNerManager (NER local principal)
|
|
- ``ner_manager`` ← EdsPseudoManager (optionnel)
|
|
- ``gliner_manager`` ← GlinerManager (optionnel)
|
|
- ``use_hf`` ← True si au moins un manager NER est chargé
|
|
|
|
Aucune logique de détection ici : on orchestre uniquement.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Dict, Optional
|
|
|
|
from engine_capabilities import capabilities_map
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
ProcessFn = Callable[[Path, Path], dict]
|
|
ManagerFactory = Callable[[], Any]
|
|
# Fournisseur de capabilities : () -> {key: objet exposant `.available`/`.reason`}.
|
|
CapsProvider = Callable[[], Dict[str, Any]]
|
|
|
|
|
|
class ManagerState(str, Enum):
|
|
NOT_LOADED = "not_loaded"
|
|
LOADING = "loading"
|
|
READY = "ready"
|
|
UNAVAILABLE = "unavailable"
|
|
|
|
|
|
@dataclass
|
|
class EngineSettings:
|
|
"""Réglages d'appel moteur exposés par l'onglet Configuration."""
|
|
|
|
make_vector_redaction: bool = False
|
|
also_make_raster_burn: bool = True
|
|
config_path: Optional[Path] = None
|
|
use_local_ner: bool = True
|
|
enable_eds: bool = False
|
|
enable_gliner: bool = False
|
|
ogc_label: Optional[str] = None
|
|
profile: Optional[str] = None
|
|
|
|
|
|
def _default_factories() -> dict[str, ManagerFactory]:
|
|
"""Factories par défaut : import paresseux, instanciation + load réels.
|
|
|
|
Définies dans une fonction pour qu'aucun manager ne soit importé à l'import
|
|
de ce module.
|
|
"""
|
|
|
|
def camembert() -> Any:
|
|
from camembert_ner_manager import CamembertNerManager
|
|
|
|
manager = CamembertNerManager()
|
|
manager.load()
|
|
return manager
|
|
|
|
def eds() -> Any:
|
|
from eds_pseudo_manager import EdsPseudoManager
|
|
|
|
manager = EdsPseudoManager()
|
|
manager.load()
|
|
return manager
|
|
|
|
def gliner() -> Any:
|
|
from gliner_manager import GlinerManager
|
|
|
|
manager = GlinerManager()
|
|
manager.load()
|
|
return manager
|
|
|
|
return {"camembert": camembert, "eds": eds, "gliner": gliner}
|
|
|
|
|
|
class NerManagers:
|
|
"""Conteneur de managers NER à chargement paresseux."""
|
|
|
|
def __init__(
|
|
self,
|
|
settings: EngineSettings,
|
|
factories: Optional[dict[str, ManagerFactory]] = None,
|
|
caps_provider: Optional[CapsProvider] = None,
|
|
) -> None:
|
|
self._settings = settings
|
|
self._factories = factories if factories is not None else _default_factories()
|
|
# Sonde « moteurs réellement embarqués » (injectable pour les tests).
|
|
self._caps_provider = caps_provider if caps_provider is not None else capabilities_map
|
|
self._camembert: Any = None
|
|
self._eds: Any = None
|
|
self._gliner: Any = None
|
|
self._state = ManagerState.NOT_LOADED
|
|
|
|
@property
|
|
def state(self) -> ManagerState:
|
|
return self._state
|
|
|
|
@property
|
|
def use_hf(self) -> bool:
|
|
return bool(self._camembert or self._eds or self._gliner)
|
|
|
|
def as_kwargs(self) -> dict:
|
|
return {
|
|
"ner_manager": self._eds,
|
|
"gliner_manager": self._gliner,
|
|
"camembert_manager": self._camembert,
|
|
}
|
|
|
|
def _apply_capability_guard(self) -> None:
|
|
"""Désactive un moteur optionnel demandé mais non embarqué (fail-safe).
|
|
|
|
Empêche qu'un profil YAML forçant ``enable_eds``/``enable_gliner`` ne
|
|
déclenche un chargement voué à l'échec silencieux : si la sonde
|
|
``engine_capabilities`` indique le moteur indisponible, on log un
|
|
warning et on force la désactivation dans les réglages runtime.
|
|
|
|
Best-effort : toute erreur de sonde laisse les réglages inchangés (les
|
|
``try/except`` de chargement protègent déjà contre un crash). La sonde
|
|
reste légère (``find_spec``) — aucun import lourd ici.
|
|
"""
|
|
requested = []
|
|
if self._settings.enable_eds:
|
|
requested.append(("eds", "EDS-Pseudo"))
|
|
if self._settings.enable_gliner:
|
|
requested.append(("gliner", "GLiNER"))
|
|
if not requested:
|
|
return
|
|
try:
|
|
caps = self._caps_provider()
|
|
except Exception: # noqa: BLE001 — best-effort, ne jamais bloquer le load
|
|
return
|
|
for key, label in requested:
|
|
cap = caps.get(key) if hasattr(caps, "get") else None
|
|
if cap is not None and not getattr(cap, "available", False):
|
|
log.warning(
|
|
"%s demandé par la configuration mais non embarqué dans "
|
|
"cette version — désactivation forcée (%s)",
|
|
label,
|
|
getattr(cap, "reason", ""),
|
|
)
|
|
if key == "eds":
|
|
self._settings.enable_eds = False
|
|
else:
|
|
self._settings.enable_gliner = False
|
|
|
|
def ensure_loaded(self) -> ManagerState:
|
|
"""Charge les managers requis si nécessaire. Idempotent, sans crash."""
|
|
if not self._settings.use_local_ner:
|
|
self._state = ManagerState.NOT_LOADED
|
|
return self._state
|
|
if self._state == ManagerState.READY:
|
|
return self._state
|
|
|
|
self._state = ManagerState.LOADING
|
|
# Garde-fou : ne jamais tenter de charger un moteur optionnel non embarqué.
|
|
self._apply_capability_guard()
|
|
try:
|
|
# CamemBERT-bio est le NER local principal (obligatoire si NER actif).
|
|
self._camembert = self._factories["camembert"]()
|
|
except Exception:
|
|
self._state = ManagerState.UNAVAILABLE
|
|
return self._state
|
|
|
|
if self._settings.enable_eds:
|
|
try:
|
|
self._eds = self._factories["eds"]()
|
|
except Exception:
|
|
self._eds = None # optionnel : absence tolérée
|
|
if self._settings.enable_gliner:
|
|
try:
|
|
self._gliner = self._factories["gliner"]()
|
|
except Exception:
|
|
self._gliner = None # optionnel : absence tolérée
|
|
|
|
self._state = ManagerState.READY
|
|
return self._state
|
|
|
|
|
|
def build_engine_kwargs(
|
|
settings: EngineSettings, managers: Optional[NerManagers] = None
|
|
) -> dict:
|
|
"""Construit le dict de kwargs passé au moteur."""
|
|
kwargs: dict = {
|
|
"make_vector_redaction": settings.make_vector_redaction,
|
|
"also_make_raster_burn": settings.also_make_raster_burn,
|
|
"config_path": settings.config_path,
|
|
"ogc_label": settings.ogc_label,
|
|
}
|
|
if managers is not None and settings.use_local_ner:
|
|
kwargs.update(managers.as_kwargs())
|
|
kwargs["use_hf"] = managers.use_hf
|
|
else:
|
|
kwargs["use_hf"] = False
|
|
return kwargs
|
|
|
|
|
|
def make_process_fn(
|
|
settings: EngineSettings,
|
|
managers: Optional[NerManagers] = None,
|
|
engine: Optional[Callable[..., dict]] = None,
|
|
) -> ProcessFn:
|
|
"""Retourne un ``process_fn(doc, out_dir)`` câblé au moteur.
|
|
|
|
``engine`` est injectable pour les tests ; par défaut, import paresseux de
|
|
``process_document`` (aucun chargement du moteur à l'import de ce module).
|
|
"""
|
|
managers = managers if managers is not None else NerManagers(settings)
|
|
|
|
def process_fn(doc_path: Path, out_dir: Path) -> dict:
|
|
if settings.use_local_ner:
|
|
managers.ensure_loaded()
|
|
kwargs = build_engine_kwargs(settings, managers)
|
|
run = engine
|
|
if run is None:
|
|
from anonymizer_core_refactored_onnx import process_document
|
|
|
|
run = process_document
|
|
return run(doc_path, out_dir, **kwargs)
|
|
|
|
return process_fn
|