From 4b7c8db9a83152a5cf07584ed526e3b166af8e4f Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Mon, 22 Jun 2026 16:49:03 +0200 Subject: [PATCH] =?UTF-8?q?build:=20retirer=20torch/docTR=20du=20frozen=20?= =?UTF-8?q?+=20hiddenimports=20OnnxTR=20(pr=C3=A9-audit=20Qwen=20GO)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suite à la migration OCR docTR→OnnxTR (8d683bc) et au verdict pré-audit Qwen (GO technique, 2026-06-21), préparation des 3 specs PyInstaller pour le prochain rebuild Windows : - Retrait de "torch", "torchvision", "doctr.*" des hiddenimports des 3 specs (anonymisation_onefile, _cli_onefile, _gui_v6_onefile) → -~2 Go EXE attendu, suppression définitive de la classe de bug oneDNN sur CPU contraint. - Ajout des hiddenimports transitifs OnnxTR manquants (réserve R1 Qwen) : "pyclipper", "scipy.cluster.hierarchy", "scipy.special" → anti-omission PyInstaller (évite un crash OCR en frozen). - Retrait de python-doctr[torch]>=0.9.0 de requirements.txt (transitoire levé). _configure_torch_threads() conservé en code (lazy import torch sous try/except, no-op si torch absent) pour future réactivation EDS-Pseudo/GLiNER. Aucun rebuild ni diffusion (gate Dom). 3 specs compilent (py_compile). Co-Authored-By: Claude Opus 4.8 (1M context) --- anonymisation_cli_onefile.spec | 13 +++++-------- anonymisation_gui_v6_onefile.spec | 13 +++++-------- anonymisation_onefile.spec | 13 +++++-------- requirements.txt | 3 +-- 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/anonymisation_cli_onefile.spec b/anonymisation_cli_onefile.spec index 5fcfad1..2e2a4e1 100644 --- a/anonymisation_cli_onefile.spec +++ b/anonymisation_cli_onefile.spec @@ -85,12 +85,7 @@ hiddenimports = [ "gliner_manager", "vlm_manager", "build_info", - "doctr", - "doctr.io", - "doctr.models", - "doctr.models.detection", - "doctr.models.recognition", - # OCR OnnxTR (remplace docTR — ONNX Runtime, sans torch) + # OCR OnnxTR (ONNX Runtime, remplace docTR — sans torch ni doctr) "onnxtr", "onnxtr.io", "onnxtr.models", @@ -98,8 +93,11 @@ hiddenimports = [ "onnxtr.models.recognition", "onnxtr.utils", "onnxtr.utils.data", + # Dépendances transitives OnnxTR (hiddenimports défensifs vs omission PyInstaller) + "pyclipper", + "scipy.cluster.hierarchy", + "scipy.special", "cv2", - "torchvision", "edsnlp", "edsnlp.pipes", "edsnlp.pipes.ner", @@ -110,7 +108,6 @@ hiddenimports = [ "onnxruntime", "transformers", "tokenizers", - "torch", "pdfplumber", "fitz", "PIL", diff --git a/anonymisation_gui_v6_onefile.spec b/anonymisation_gui_v6_onefile.spec index d0a9fe3..894a64d 100644 --- a/anonymisation_gui_v6_onefile.spec +++ b/anonymisation_gui_v6_onefile.spec @@ -104,12 +104,7 @@ hiddenimports = [ "gliner_manager", "vlm_manager", "build_info", - "doctr", - "doctr.io", - "doctr.models", - "doctr.models.detection", - "doctr.models.recognition", - # OCR OnnxTR (remplace docTR — ONNX Runtime, sans torch) + # OCR OnnxTR (ONNX Runtime, remplace docTR — sans torch ni doctr) "onnxtr", "onnxtr.io", "onnxtr.models", @@ -117,8 +112,11 @@ hiddenimports = [ "onnxtr.models.recognition", "onnxtr.utils", "onnxtr.utils.data", + # Dépendances transitives OnnxTR (hiddenimports défensifs vs omission PyInstaller) + "pyclipper", + "scipy.cluster.hierarchy", + "scipy.special", "cv2", - "torchvision", "edsnlp", "edsnlp.pipes", "edsnlp.pipes.ner", @@ -129,7 +127,6 @@ hiddenimports = [ "onnxruntime", "transformers", "tokenizers", - "torch", "pdfplumber", "fitz", "PIL", diff --git a/anonymisation_onefile.spec b/anonymisation_onefile.spec index 83339ea..7778ea4 100644 --- a/anonymisation_onefile.spec +++ b/anonymisation_onefile.spec @@ -76,12 +76,7 @@ hiddenimports = [ "gliner_manager", "vlm_manager", "build_info", - "doctr", - "doctr.io", - "doctr.models", - "doctr.models.detection", - "doctr.models.recognition", - # OCR OnnxTR (remplace docTR — ONNX Runtime, sans torch) + # OCR OnnxTR (ONNX Runtime, remplace docTR — sans torch ni doctr) "onnxtr", "onnxtr.io", "onnxtr.models", @@ -89,8 +84,11 @@ hiddenimports = [ "onnxtr.models.recognition", "onnxtr.utils", "onnxtr.utils.data", + # Dépendances transitives OnnxTR (hiddenimports défensifs vs omission PyInstaller) + "pyclipper", + "scipy.cluster.hierarchy", + "scipy.special", "cv2", - "torchvision", "edsnlp", "edsnlp.pipes", "edsnlp.pipes.ner", @@ -101,7 +99,6 @@ hiddenimports = [ "onnxruntime", "transformers", "tokenizers", - "torch", "pdfplumber", "fitz", "PIL", diff --git a/requirements.txt b/requirements.txt index 83dab2b..8775a32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,9 +21,8 @@ pyahocorasick>=2.1.0,<3 # --- OCR pour PDF scannés --- # OnnxTR = mêmes modèles docTR (db_resnet50 + crnn_vgg16_bn) sur ONNX Runtime, SANS torch. # Remplace docTR pour l'OCR (supprime le crash torch/oneDNN sur CPU contraint). +# docTR/torch retirés du build frozen (verdict pré-audit Qwen 2026-06-21, GO Dom) : -~2 Go EXE. onnxtr[cpu]>=0.8.1 -# python-doctr conservé en transitoire (retrait avec torch = étape séparée) : -python-doctr[torch]>=0.9.0 # (optionnel – NER clinique EDS-Pseudo AP-HP, activer manuellement) # edsnlp[ml]>=0.12.0