Files
anonymisation/anonymisation_gui_v6_onefile.spec
Domi31tls 8d683bc6d8 feat(ocr): migrer l'OCR de docTR (PyTorch) vers OnnxTR (ONNX Runtime)
OnnxTR exécute les MÊMES modèles que docTR (db_resnet50 + crnn_vgg16_bn) sur
ONNX Runtime, sans PyTorch. Corrige le crash torch/oneDNN « could not create a
primitive » sur CPU contraint (VM 2 cœurs collaborateur : OCR scan impossible →
quarantaine). Qualité identique validée empiriquement (CER 0,10-0,23 % vs docTR,
2 validations indépendantes Claude+Qwen), OCR ~2-3× plus rapide CPU.

- core : import OnnxTR, _get_ocr_model(), _OCR_AVAILABLE, boucle OCR inchangée
  (API miroir) ; ONNXTR_CACHE_DIR pour le frozen ; bandeau de logs ENV au démarrage
  (OS, CPU+AVX, cœurs, RAM, versions, providers) pour retours terrain auto-suffisants.
- 3 .spec : embarquent les poids ONNX OnnxTR (fail-closed) + hiddenimports onnxtr.
- requirements : onnxtr[cpu] (python-doctr conservé transitoirement).
- inclut le correctif quarantaine-visible du runner (GO Qwen).

Tests : test_ocr_onnxtr.py (RED→GREEN), 95 unit passed, e2e scan client OK
(OCR 5/5, PDF produit, plus de crash). Retrait torch du frozen + rebuild Windows
= étapes suivantes (gates Dom).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 17:07:00 +02:00

184 lines
4.5 KiB
Python

# PyInstaller spec — GUI V6 (build-prep G3-D).
#
# Produit `Anonymisation.exe` (V6), source de l'installateur Inno
# `installer/Anonymisation.iss` qui génère la cible finale `Anonymisation-Setup.exe`.
#
# Entrée directe : Pseudonymisation_Gui_V6.py (expose main() + --self-test).
# Ne construit AUCUN artefact ici : la génération réelle se fait sur Windows.
import os
from pathlib import Path
block_cipher = None
project_dir = Path(globals().get("SPECPATH", os.getcwd())).resolve()
def _data_entry(relative_path: str, target_dir: str | None = None):
src = project_dir / relative_path
if not src.exists():
return None
return (str(src), target_dir or relative_path)
datas = []
for relative_path, target_dir in [
("config", "config"),
("data/bdpm", "data/bdpm"),
("data/finess", "data/finess"),
("data/insee", "data/insee"),
("models/camembert-bio-deid/onnx", "models/camembert-bio-deid/onnx"),
("detectors", "detectors"),
("scripts", "scripts"),
("assets", "assets"),
]:
entry = _data_entry(relative_path, target_dir)
if entry is not None:
datas.append(entry)
for relative_path in [
"data/stopwords_manuels.txt",
"data/villes_blacklist.txt",
"data/dpi_labels_blacklist.txt",
"data/companion_blacklist.txt",
]:
entry = _data_entry(relative_path, "data")
if entry is not None:
datas.append(entry)
onnxtr_cache_dir = Path(os.environ.get("ONNXTR_CACHE_DIR", Path.home() / ".cache" / "onnxtr"))
required_onnxtr_weights = [
"db_resnet50-69ba0015.onnx",
"crnn_vgg16_bn-743599aa.onnx",
]
missing_onnxtr_weights = []
for filename in required_onnxtr_weights:
src = onnxtr_cache_dir / "models" / filename
if src.exists():
datas.append((str(src), "models/onnxtr/models"))
else:
missing_onnxtr_weights.append(str(src))
if missing_onnxtr_weights:
raise FileNotFoundError(
"Poids OCR OnnxTR manquants pour le build frozen : "
+ ", ".join(missing_onnxtr_weights)
+ ". Précharger OnnxTR (lancer une OCR une fois) ou définir ONNXTR_CACHE_DIR avant PyInstaller."
)
hiddenimports = [
# Entrée + package GUI V6
"Pseudonymisation_Gui_V6",
"gui_v6",
"gui_v6.app",
"gui_v6.theme",
"gui_v6.license_client",
"gui_v6.license_store",
"gui_v6.machine_id",
"gui_v6.engine_bridge",
"gui_v6.config_state",
"gui_v6.processing_runner",
"gui_v6.tabs",
"gui_v6.tabs.tab_about",
"gui_v6.tabs.tab_config",
"gui_v6.tabs.tab_usage",
# UI customtkinter
"customtkinter",
"darkdetect",
# Réseau licence
"requests",
# Moteur + modules support (inchangés vs V5)
"anonymizer_core_refactored_onnx",
"admin_mode",
"admin_rules",
"config_defaults",
"profile_defaults",
"gui_batch_paths",
"manual_masking",
"pdf_mask_designer",
"format_converter",
"ner_manager_onnx",
"camembert_ner_manager",
"eds_pseudo_manager",
"gliner_manager",
"vlm_manager",
"build_info",
"doctr",
"doctr.io",
"doctr.models",
"doctr.models.detection",
"doctr.models.recognition",
# OCR OnnxTR (remplace docTR — ONNX Runtime, sans torch)
"onnxtr",
"onnxtr.io",
"onnxtr.models",
"onnxtr.models.detection",
"onnxtr.models.recognition",
"onnxtr.utils",
"onnxtr.utils.data",
"cv2",
"torchvision",
"edsnlp",
"edsnlp.pipes",
"edsnlp.pipes.ner",
"edsnlp.pipes.ner.pseudo",
"spacy",
"spacy.lang.fr",
"gliner",
"onnxruntime",
"transformers",
"tokenizers",
"torch",
"pdfplumber",
"fitz",
"PIL",
"yaml",
"loguru",
"regex",
"optimum",
"optimum.onnxruntime",
"optimum.pipelines",
"optimum.modeling_base",
"optimum.exporters.onnx",
]
a = Analysis(
[str(project_dir / "Pseudonymisation_Gui_V6.py")],
pathex=[str(project_dir)],
datas=datas,
hiddenimports=hiddenimports,
cipher=block_cipher,
noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
splash = Splash(
str(project_dir / "assets" / "splash.png"),
binaries=a.binaries,
datas=a.datas,
text_pos=(60, 195),
text_size=10,
text_color="white",
minify_script=True,
always_on_top=False,
)
exe = EXE(
pyz,
a.scripts,
splash,
splash.binaries,
a.binaries,
a.zipfiles,
a.datas,
[],
name="Anonymisation",
debug=False,
strip=False,
upx=False,
console=False,
icon=str(project_dir / "assets" / "icons" / "app.ico"),
)