feat(cli): charger les moteurs optionnels depuis les modeles embarques

This commit is contained in:
2026-06-17 19:52:29 +02:00
parent 9b40fc0a85
commit ea1752d4a7
5 changed files with 120 additions and 9 deletions

View File

@@ -1,6 +1,8 @@
import os
from pathlib import Path
from PyInstaller.utils.hooks import collect_all, copy_metadata
# Spec CLI frozen — EXE de PRODUCTION (anonymisation fichier unique sans GUI).
# Même moteur / mêmes datas que anonymisation_onefile.spec, mais :
# - entrypoint = scripts/anonymize_cli.py (CLI production, pas launcher.py)
@@ -22,6 +24,7 @@ def _data_entry(relative_path: str, target_dir: str | None = None):
return (str(src), target_dir or relative_path)
binaries = []
datas = []
for relative_path, target_dir in [
("config", "config"),
@@ -95,9 +98,41 @@ hiddenimports = [
]
def _collect_optional_package(package_name: str):
try:
package_datas, package_binaries, package_hiddenimports = collect_all(package_name)
datas.extend(package_datas)
binaries.extend(package_binaries)
hiddenimports.extend(package_hiddenimports)
try:
datas.extend(copy_metadata(package_name, recursive=True))
except Exception:
pass
except Exception:
pass
for _package_name in [
"edsnlp",
"spacy",
"thinc",
"blis",
"srsly",
"catalogue",
"confection",
"cymem",
"preshed",
"murmurhash",
"gliner",
"loguru",
]:
_collect_optional_package(_package_name)
a = Analysis(
[str(project_dir / "scripts" / "anonymize_cli.py")],
pathex=[str(project_dir)],
binaries=binaries,
datas=datas,
hiddenimports=hiddenimports,
cipher=block_cipher,

View File

@@ -9,6 +9,7 @@ Mapping des 13 labels EDS-Pseudo vers les clés PLACEHOLDERS du core d'anonymisa
Dépendance : pip install 'edsnlp[ml]>=0.12.0'
"""
from __future__ import annotations
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -41,6 +42,26 @@ EDS_MODELS_CATALOG: Dict[str, str] = {
"EDS-Pseudo AP-HP (edsnlp)": "AP-HP/eds-pseudo-public",
}
DEFAULT_MODEL = "AP-HP/eds-pseudo-public"
BUNDLED_MODEL_DIR = "eds-pseudo-public"
def _app_dir() -> Path:
if getattr(sys, "frozen", False):
return Path(getattr(sys, "_MEIPASS", Path(sys.executable).parent))
return Path(__file__).resolve().parent
def _bundled_model_path(cache_dir: Optional[Path] = None) -> Optional[Path]:
candidates = []
if cache_dir is not None:
candidates.append(Path(cache_dir) / BUNDLED_MODEL_DIR)
candidates.append(_app_dir() / "models" / BUNDLED_MODEL_DIR)
for candidate in candidates:
if candidate.is_dir():
return candidate
return None
class EdsPseudoManager:
"""Gestionnaire pour le modèle EDS-Pseudo (edsnlp). Même interface que NerModelManager."""
@@ -54,16 +75,21 @@ class EdsPseudoManager:
def is_loaded(self) -> bool:
return self._loaded and self._nlp is not None
def load(self, model_id_or_path: str = "AP-HP/eds-pseudo-public") -> None:
def load(self, model_id_or_path: str = DEFAULT_MODEL) -> None:
if not _EDSNLP_AVAILABLE:
raise RuntimeError("edsnlp non disponible. Installez : pip install 'edsnlp[ml]>=0.12.0'")
self.unload()
self.model_id = model_id_or_path
path = Path(model_id_or_path)
source = model_id_or_path
if model_id_or_path == DEFAULT_MODEL:
bundled = _bundled_model_path(self.cache_dir)
if bundled is not None:
source = str(bundled)
self.model_id = source
path = Path(source)
if path.is_dir():
self._nlp = edsnlp.load(path)
else:
self._nlp = edsnlp.load(model_id_or_path)
self._nlp = edsnlp.load(source)
# Activer les scores de confiance NER (edsnlp >= 0.16)
try:
ner_pipe = self._nlp.get_pipe('ner')

View File

@@ -73,6 +73,14 @@ def _camembert_model_path() -> Path:
return _app_dir() / "models" / "camembert-bio-deid" / "onnx" / "model.onnx"
def _eds_model_path() -> Path:
return _app_dir() / "models" / "eds-pseudo-public"
def _gliner_model_path() -> Path:
return _app_dir() / "models" / "gliner_multi_pii-v1"
def _probe_camembert() -> "tuple[bool, str]":
if not _has_module("onnxruntime"):
return False, "onnxruntime non embarqué dans cette version"
@@ -85,13 +93,17 @@ def _probe_eds() -> "tuple[bool, str]":
missing = [m for m in ("edsnlp", "spacy") if not _has_module(m)]
if missing:
return False, "non embarqué dans cette version (manque : " + ", ".join(missing) + ")"
return True, "edsnlp + spacy disponibles"
if not _eds_model_path().is_dir():
return False, "dépendances disponibles, modèle AP-HP eds-pseudo-public non embarqué"
return True, "edsnlp + spacy + modèle AP-HP embarqués"
def _probe_gliner() -> "tuple[bool, str]":
if not _has_module("gliner"):
return False, "non embarqué dans cette version (manque : gliner)"
return True, "gliner disponible"
if not _gliner_model_path().is_dir():
return False, "dépendance disponible, modèle GLiNER non embarqué"
return True, "gliner + modèle local embarqués"
def _default_probes() -> Dict[str, Probe]:

View File

@@ -13,6 +13,8 @@ Version compatible : gliner==0.2.18 (pas plus récent, casse optimum-onnx)
from __future__ import annotations
import logging
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional
log = logging.getLogger(__name__)
@@ -56,6 +58,18 @@ GLINER_LABEL_MAP: Dict[str, str] = {
}
DEFAULT_MODEL = "urchade/gliner_multi_pii-v1"
BUNDLED_MODEL_DIR = "gliner_multi_pii-v1"
def _app_dir() -> Path:
if getattr(sys, "frozen", False):
return Path(getattr(sys, "_MEIPASS", Path(sys.executable).parent))
return Path(__file__).resolve().parent
def _bundled_model_path() -> Optional[Path]:
candidate = _app_dir() / "models" / BUNDLED_MODEL_DIR
return candidate if candidate.is_dir() else None
class GlinerManager:
@@ -73,10 +87,15 @@ class GlinerManager:
if not _GLINER_AVAILABLE:
raise RuntimeError("gliner non disponible. Installez : pip install 'gliner==0.2.18'")
self.unload()
self.model_id = model_id
self._model = GLiNER.from_pretrained(model_id)
source = model_id
if model_id == DEFAULT_MODEL:
bundled = _bundled_model_path()
if bundled is not None:
source = str(bundled)
self.model_id = source
self._model = GLiNER.from_pretrained(source)
self._loaded = True
log.info(f"GLiNER chargé: {model_id}")
log.info(f"GLiNER chargé: {source}")
def unload(self) -> None:
self._model = None

View File

@@ -64,3 +64,22 @@ def test_default_probes_run_without_crash_and_are_consistent():
for cap in caps.values():
assert isinstance(cap.available, bool)
assert isinstance(cap.reason, str) and cap.reason
def test_optional_engines_require_bundled_models(monkeypatch, tmp_path):
monkeypatch.setattr(ec, "_has_module", lambda name: name in {"onnxruntime", "edsnlp", "spacy", "gliner"})
monkeypatch.setattr(ec, "_app_dir", lambda: tmp_path)
(tmp_path / "models" / "camembert-bio-deid" / "onnx").mkdir(parents=True)
(tmp_path / "models" / "camembert-bio-deid" / "onnx" / "model.onnx").write_bytes(b"fake")
caps = ec.capabilities_map()
assert caps["eds"].available is False
assert "modèle" in caps["eds"].reason
assert caps["gliner"].available is False
assert "modèle" in caps["gliner"].reason
(tmp_path / "models" / "eds-pseudo-public").mkdir()
(tmp_path / "models" / "gliner_multi_pii-v1").mkdir()
caps = ec.capabilities_map()
assert caps["eds"].available is True
assert caps["gliner"].available is True