feat(cli): charger les moteurs optionnels depuis les modeles embarques
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from PyInstaller.utils.hooks import collect_all, copy_metadata
|
||||
|
||||
# Spec CLI frozen — EXE de PRODUCTION (anonymisation fichier unique sans GUI).
|
||||
# Même moteur / mêmes datas que anonymisation_onefile.spec, mais :
|
||||
# - entrypoint = scripts/anonymize_cli.py (CLI production, pas launcher.py)
|
||||
@@ -22,6 +24,7 @@ def _data_entry(relative_path: str, target_dir: str | None = None):
|
||||
return (str(src), target_dir or relative_path)
|
||||
|
||||
|
||||
binaries = []
|
||||
datas = []
|
||||
for relative_path, target_dir in [
|
||||
("config", "config"),
|
||||
@@ -95,9 +98,41 @@ hiddenimports = [
|
||||
]
|
||||
|
||||
|
||||
def _collect_optional_package(package_name: str):
|
||||
try:
|
||||
package_datas, package_binaries, package_hiddenimports = collect_all(package_name)
|
||||
datas.extend(package_datas)
|
||||
binaries.extend(package_binaries)
|
||||
hiddenimports.extend(package_hiddenimports)
|
||||
try:
|
||||
datas.extend(copy_metadata(package_name, recursive=True))
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
for _package_name in [
|
||||
"edsnlp",
|
||||
"spacy",
|
||||
"thinc",
|
||||
"blis",
|
||||
"srsly",
|
||||
"catalogue",
|
||||
"confection",
|
||||
"cymem",
|
||||
"preshed",
|
||||
"murmurhash",
|
||||
"gliner",
|
||||
"loguru",
|
||||
]:
|
||||
_collect_optional_package(_package_name)
|
||||
|
||||
|
||||
a = Analysis(
|
||||
[str(project_dir / "scripts" / "anonymize_cli.py")],
|
||||
pathex=[str(project_dir)],
|
||||
binaries=binaries,
|
||||
datas=datas,
|
||||
hiddenimports=hiddenimports,
|
||||
cipher=block_cipher,
|
||||
|
||||
@@ -9,6 +9,7 @@ Mapping des 13 labels EDS-Pseudo vers les clés PLACEHOLDERS du core d'anonymisa
|
||||
Dépendance : pip install 'edsnlp[ml]>=0.12.0'
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
@@ -41,6 +42,26 @@ EDS_MODELS_CATALOG: Dict[str, str] = {
|
||||
"EDS-Pseudo AP-HP (edsnlp)": "AP-HP/eds-pseudo-public",
|
||||
}
|
||||
|
||||
DEFAULT_MODEL = "AP-HP/eds-pseudo-public"
|
||||
BUNDLED_MODEL_DIR = "eds-pseudo-public"
|
||||
|
||||
|
||||
def _app_dir() -> Path:
|
||||
if getattr(sys, "frozen", False):
|
||||
return Path(getattr(sys, "_MEIPASS", Path(sys.executable).parent))
|
||||
return Path(__file__).resolve().parent
|
||||
|
||||
|
||||
def _bundled_model_path(cache_dir: Optional[Path] = None) -> Optional[Path]:
|
||||
candidates = []
|
||||
if cache_dir is not None:
|
||||
candidates.append(Path(cache_dir) / BUNDLED_MODEL_DIR)
|
||||
candidates.append(_app_dir() / "models" / BUNDLED_MODEL_DIR)
|
||||
for candidate in candidates:
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
class EdsPseudoManager:
|
||||
"""Gestionnaire pour le modèle EDS-Pseudo (edsnlp). Même interface que NerModelManager."""
|
||||
@@ -54,16 +75,21 @@ class EdsPseudoManager:
|
||||
def is_loaded(self) -> bool:
|
||||
return self._loaded and self._nlp is not None
|
||||
|
||||
def load(self, model_id_or_path: str = "AP-HP/eds-pseudo-public") -> None:
|
||||
def load(self, model_id_or_path: str = DEFAULT_MODEL) -> None:
|
||||
if not _EDSNLP_AVAILABLE:
|
||||
raise RuntimeError("edsnlp non disponible. Installez : pip install 'edsnlp[ml]>=0.12.0'")
|
||||
self.unload()
|
||||
self.model_id = model_id_or_path
|
||||
path = Path(model_id_or_path)
|
||||
source = model_id_or_path
|
||||
if model_id_or_path == DEFAULT_MODEL:
|
||||
bundled = _bundled_model_path(self.cache_dir)
|
||||
if bundled is not None:
|
||||
source = str(bundled)
|
||||
self.model_id = source
|
||||
path = Path(source)
|
||||
if path.is_dir():
|
||||
self._nlp = edsnlp.load(path)
|
||||
else:
|
||||
self._nlp = edsnlp.load(model_id_or_path)
|
||||
self._nlp = edsnlp.load(source)
|
||||
# Activer les scores de confiance NER (edsnlp >= 0.16)
|
||||
try:
|
||||
ner_pipe = self._nlp.get_pipe('ner')
|
||||
|
||||
@@ -73,6 +73,14 @@ def _camembert_model_path() -> Path:
|
||||
return _app_dir() / "models" / "camembert-bio-deid" / "onnx" / "model.onnx"
|
||||
|
||||
|
||||
def _eds_model_path() -> Path:
|
||||
return _app_dir() / "models" / "eds-pseudo-public"
|
||||
|
||||
|
||||
def _gliner_model_path() -> Path:
|
||||
return _app_dir() / "models" / "gliner_multi_pii-v1"
|
||||
|
||||
|
||||
def _probe_camembert() -> "tuple[bool, str]":
|
||||
if not _has_module("onnxruntime"):
|
||||
return False, "onnxruntime non embarqué dans cette version"
|
||||
@@ -85,13 +93,17 @@ def _probe_eds() -> "tuple[bool, str]":
|
||||
missing = [m for m in ("edsnlp", "spacy") if not _has_module(m)]
|
||||
if missing:
|
||||
return False, "non embarqué dans cette version (manque : " + ", ".join(missing) + ")"
|
||||
return True, "edsnlp + spacy disponibles"
|
||||
if not _eds_model_path().is_dir():
|
||||
return False, "dépendances disponibles, modèle AP-HP eds-pseudo-public non embarqué"
|
||||
return True, "edsnlp + spacy + modèle AP-HP embarqués"
|
||||
|
||||
|
||||
def _probe_gliner() -> "tuple[bool, str]":
|
||||
if not _has_module("gliner"):
|
||||
return False, "non embarqué dans cette version (manque : gliner)"
|
||||
return True, "gliner disponible"
|
||||
if not _gliner_model_path().is_dir():
|
||||
return False, "dépendance disponible, modèle GLiNER non embarqué"
|
||||
return True, "gliner + modèle local embarqués"
|
||||
|
||||
|
||||
def _default_probes() -> Dict[str, Probe]:
|
||||
|
||||
@@ -13,6 +13,8 @@ Version compatible : gliner==0.2.18 (pas plus récent, casse optimum-onnx)
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -56,6 +58,18 @@ GLINER_LABEL_MAP: Dict[str, str] = {
|
||||
}
|
||||
|
||||
DEFAULT_MODEL = "urchade/gliner_multi_pii-v1"
|
||||
BUNDLED_MODEL_DIR = "gliner_multi_pii-v1"
|
||||
|
||||
|
||||
def _app_dir() -> Path:
|
||||
if getattr(sys, "frozen", False):
|
||||
return Path(getattr(sys, "_MEIPASS", Path(sys.executable).parent))
|
||||
return Path(__file__).resolve().parent
|
||||
|
||||
|
||||
def _bundled_model_path() -> Optional[Path]:
|
||||
candidate = _app_dir() / "models" / BUNDLED_MODEL_DIR
|
||||
return candidate if candidate.is_dir() else None
|
||||
|
||||
|
||||
class GlinerManager:
|
||||
@@ -73,10 +87,15 @@ class GlinerManager:
|
||||
if not _GLINER_AVAILABLE:
|
||||
raise RuntimeError("gliner non disponible. Installez : pip install 'gliner==0.2.18'")
|
||||
self.unload()
|
||||
self.model_id = model_id
|
||||
self._model = GLiNER.from_pretrained(model_id)
|
||||
source = model_id
|
||||
if model_id == DEFAULT_MODEL:
|
||||
bundled = _bundled_model_path()
|
||||
if bundled is not None:
|
||||
source = str(bundled)
|
||||
self.model_id = source
|
||||
self._model = GLiNER.from_pretrained(source)
|
||||
self._loaded = True
|
||||
log.info(f"GLiNER chargé: {model_id}")
|
||||
log.info(f"GLiNER chargé: {source}")
|
||||
|
||||
def unload(self) -> None:
|
||||
self._model = None
|
||||
|
||||
@@ -64,3 +64,22 @@ def test_default_probes_run_without_crash_and_are_consistent():
|
||||
for cap in caps.values():
|
||||
assert isinstance(cap.available, bool)
|
||||
assert isinstance(cap.reason, str) and cap.reason
|
||||
|
||||
|
||||
def test_optional_engines_require_bundled_models(monkeypatch, tmp_path):
|
||||
monkeypatch.setattr(ec, "_has_module", lambda name: name in {"onnxruntime", "edsnlp", "spacy", "gliner"})
|
||||
monkeypatch.setattr(ec, "_app_dir", lambda: tmp_path)
|
||||
(tmp_path / "models" / "camembert-bio-deid" / "onnx").mkdir(parents=True)
|
||||
(tmp_path / "models" / "camembert-bio-deid" / "onnx" / "model.onnx").write_bytes(b"fake")
|
||||
|
||||
caps = ec.capabilities_map()
|
||||
assert caps["eds"].available is False
|
||||
assert "modèle" in caps["eds"].reason
|
||||
assert caps["gliner"].available is False
|
||||
assert "modèle" in caps["gliner"].reason
|
||||
|
||||
(tmp_path / "models" / "eds-pseudo-public").mkdir()
|
||||
(tmp_path / "models" / "gliner_multi_pii-v1").mkdir()
|
||||
caps = ec.capabilities_map()
|
||||
assert caps["eds"].available is True
|
||||
assert caps["gliner"].available is True
|
||||
|
||||
Reference in New Issue
Block a user