feat(cli): add Windows single-file anonymization entrypoint
CLI de production sans GUI pour anonymiser un fichier unique, validé GO par Qwen (revue indépendante contrat/packaging/modèles) sur de vrais PDF. - scripts/anonymize_cli.py (NOUVEAU) : contrat positionnel `Anonymisation-CLI.exe <fichier> <dossier_sortie>` (+ --out compat), chemins espaces/accents, codes retour 0/1/2/3/4. Chargement modèles fail-closed : CamemBERT-bio ONNX OBLIGATOIRE (code 3 si absent, aucun mode dégradé silencieux) ; EDS-Pseudo + GLiNER optionnels, tracés au log ; --no-ner = regex seul assumé. Résolution _MEIPASS frozen alignée sur launcher.py. Sortie burn raster identique GUI v5. - anonymisation_cli_onefile.spec : entrypoint basculé vers anonymize_cli.py (le harnais perf D-19 anonymize_batch_cli.py reste hors build). - docs/build-windows-oneclick.md : section « CLI Windows (sans GUI) » (build, usage, codes retour, modèles, limitations). Tests Linux (vrais PDF) : --help OK, fichier manquant→2, --no-ner accents→0, NER complet→0 (CamemBERT-bio + EDS-Pseudo chargés), modèle déplacé→3. Build/smoke Windows à suivre (séparé). Commit CLI-only strict, distinct du P0. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
120
anonymisation_cli_onefile.spec
Normal file
120
anonymisation_cli_onefile.spec
Normal file
@@ -0,0 +1,120 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Spec CLI frozen — EXE de PRODUCTION (anonymisation fichier unique sans GUI).
|
||||
# Même moteur / mêmes datas que anonymisation_onefile.spec, mais :
|
||||
# - entrypoint = scripts/anonymize_cli.py (CLI production, pas launcher.py)
|
||||
# Contrat : Anonymisation-CLI.exe <fichier> <dossier_sortie>
|
||||
# Modèle CamemBERT-bio ONNX OBLIGATOIRE (fail-closed, code 3 si absent).
|
||||
# - console=True (CLI), pas de Splash
|
||||
# - name = Anonymisation-CLI -> ne remplace pas dist/Anonymisation.exe
|
||||
# (Le harnais perf D-19 reste scripts/anonymize_batch_cli.py, non buildé ici.)
|
||||
|
||||
block_cipher = None
|
||||
|
||||
project_dir = Path(globals().get("SPECPATH", os.getcwd())).resolve()
|
||||
|
||||
|
||||
def _data_entry(relative_path: str, target_dir: str | None = None):
|
||||
src = project_dir / relative_path
|
||||
if not src.exists():
|
||||
return None
|
||||
return (str(src), target_dir or relative_path)
|
||||
|
||||
|
||||
datas = []
|
||||
for relative_path, target_dir in [
|
||||
("config", "config"),
|
||||
("data/bdpm", "data/bdpm"),
|
||||
("data/finess", "data/finess"),
|
||||
("data/insee", "data/insee"),
|
||||
("models/camembert-bio-deid/onnx", "models/camembert-bio-deid/onnx"),
|
||||
("detectors", "detectors"),
|
||||
("scripts", "scripts"),
|
||||
("assets", "assets"),
|
||||
]:
|
||||
entry = _data_entry(relative_path, target_dir)
|
||||
if entry is not None:
|
||||
datas.append(entry)
|
||||
|
||||
for relative_path in [
|
||||
"data/stopwords_manuels.txt",
|
||||
"data/villes_blacklist.txt",
|
||||
"data/dpi_labels_blacklist.txt",
|
||||
"data/companion_blacklist.txt",
|
||||
]:
|
||||
entry = _data_entry(relative_path, "data")
|
||||
if entry is not None:
|
||||
datas.append(entry)
|
||||
|
||||
|
||||
hiddenimports = [
|
||||
"anonymizer_core_refactored_onnx",
|
||||
"admin_rules",
|
||||
"config_defaults",
|
||||
"profile_defaults",
|
||||
"gui_batch_paths",
|
||||
"manual_masking",
|
||||
"pdf_mask_designer",
|
||||
"format_converter",
|
||||
"ner_manager_onnx",
|
||||
"camembert_ner_manager",
|
||||
"eds_pseudo_manager",
|
||||
"gliner_manager",
|
||||
"vlm_manager",
|
||||
"build_info",
|
||||
"doctr",
|
||||
"doctr.io",
|
||||
"doctr.models",
|
||||
"doctr.models.detection",
|
||||
"doctr.models.recognition",
|
||||
"cv2",
|
||||
"torchvision",
|
||||
"edsnlp",
|
||||
"edsnlp.pipes",
|
||||
"edsnlp.pipes.ner",
|
||||
"edsnlp.pipes.ner.pseudo",
|
||||
"spacy",
|
||||
"spacy.lang.fr",
|
||||
"gliner",
|
||||
"onnxruntime",
|
||||
"transformers",
|
||||
"tokenizers",
|
||||
"torch",
|
||||
"pdfplumber",
|
||||
"fitz",
|
||||
"PIL",
|
||||
"yaml",
|
||||
"loguru",
|
||||
"regex",
|
||||
"optimum",
|
||||
"optimum.onnxruntime",
|
||||
"optimum.pipelines",
|
||||
"optimum.modeling_base",
|
||||
"optimum.exporters.onnx",
|
||||
]
|
||||
|
||||
|
||||
a = Analysis(
|
||||
[str(project_dir / "scripts" / "anonymize_cli.py")],
|
||||
pathex=[str(project_dir)],
|
||||
datas=datas,
|
||||
hiddenimports=hiddenimports,
|
||||
cipher=block_cipher,
|
||||
noarchive=False,
|
||||
)
|
||||
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
[],
|
||||
name="Anonymisation-CLI",
|
||||
debug=False,
|
||||
strip=False,
|
||||
upx=False,
|
||||
console=True,
|
||||
)
|
||||
Reference in New Issue
Block a user