Initial commit — Pseudonymisation de PDF v5
- GUI v5 : vue unique épurée (tkinter), 2 étapes visuelles - Core ONNX : anonymisation regex + NER optionnel - Extraction globale des noms depuis champs structurés (Patient, Rédigé par, MME/Madame, DR) - Génération simultanée PDF Image + PDF Anonymisé (structure préservée) - Build Windows via Nuitka (script batch + GitHub Actions CI) - install.sh pour setup/run Linux Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
216
setup_env_and_build.bat
Executable file
216
setup_env_and_build.bat
Executable file
@@ -0,0 +1,216 @@
|
||||
@echo off
|
||||
setlocal EnableExtensions EnableDelayedExpansion
|
||||
|
||||
REM ======== FENETRE PERSISTANTE ========
|
||||
if /I not "%~1"=="/keep" (
|
||||
start "" cmd /k "%~f0" /keep
|
||||
goto :eof
|
||||
)
|
||||
title Setup & Build Pseudonymiseur (Robuste) - PERSISTANT
|
||||
|
||||
REM ======== CONFIG ========
|
||||
set "PY=py -3.11"
|
||||
set "VENV=.venv"
|
||||
set "ENTRY=pseudonymisation_pipeline_robuste.py"
|
||||
set "EXENAME=PseudonymiseurMedical"
|
||||
set "MODEL_DIR=models\fr_core_news_lg"
|
||||
set "LOG=build_log.txt"
|
||||
set "FR_WHEEL_URL=https://github.com/explosion/spacy-models/releases/download/fr_core_news_lg-3.7.0/fr_core_news_lg-3.7.0-py3-none-any.whl"
|
||||
set "SPM_MISSING=1"
|
||||
|
||||
REM ======== MENAGE PRECO ========
|
||||
echo .
|
||||
echo [CLEAN] Nettoyage de l'environnement...
|
||||
if exist "Build" del /f /q "Build" >nul 2>&1
|
||||
if exist "BUILD" del /f /q "BUILD" >nul 2>&1
|
||||
if exist ".\build" rmdir /s /q ".\build" >nul 2>&1
|
||||
if exist ".\dist" rmdir /s /q ".\dist" >nul 2>&1
|
||||
if exist ".\out" rmdir /s /q ".\out" >nul 2>&1
|
||||
del /f /q *.spec *.pyc 2>nul
|
||||
for /d %%D in (__pycache__ .pytest_cache .mypy_cache) do if exist "%%D" rmdir /s /q "%%D" 2>nul
|
||||
echo [CLEAN] OK
|
||||
|
||||
echo.
|
||||
echo [0] Verif Python 3.11 x64
|
||||
%PY% -c "import sys,platform;assert sys.version_info[:2]==(3,11);print(sys.version);print(platform.architecture())"
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Python 3.11 x64 requis.
|
||||
goto MENU
|
||||
)
|
||||
|
||||
echo.
|
||||
echo [1] Environnement virtuel
|
||||
if not exist "%VENV%\Scripts\python.exe" %PY% -m venv "%VENV%"
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Creation venv impossible.
|
||||
goto MENU
|
||||
)
|
||||
call "%VENV%\Scripts\activate"
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Activation venv impossible.
|
||||
goto MENU
|
||||
)
|
||||
|
||||
echo.
|
||||
echo [2] Installation des dependances (voir %LOG%)
|
||||
python -m pip install -U pip wheel > "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Upgrade pip/wheel a echoue. Voir %LOG%.
|
||||
goto VIEW_LOG
|
||||
)
|
||||
pip install -r requirements.txt >> "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Installation requirements a echoue. Voir %LOG%.
|
||||
goto VIEW_LOG
|
||||
)
|
||||
|
||||
echo.
|
||||
echo [2a] sentencepiece (necessaire pour CamemBERT/DrBERT)
|
||||
pip install --only-binary=:all: sentencepiece==0.1.99 >> "%LOG%" 2>&1
|
||||
if not errorlevel 1 set "SPM_MISSING=0"
|
||||
|
||||
echo.
|
||||
echo [2b] Test imports (core)
|
||||
python -c "import pdfplumber,spacy,requests,transformers,torch,tokenizers,huggingface_hub,yaml,PyInstaller,sys,importlib.util as u; print('Core imports OK. sentencepiece=', bool(u.find_spec('sentencepiece')))"
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Echec imports Python de base. Voir %LOG%.
|
||||
goto VIEW_LOG
|
||||
)
|
||||
|
||||
echo.
|
||||
echo [3] Modele spaCy fr_core_news_lg
|
||||
if exist "%MODEL_DIR%\config.cfg" (
|
||||
echo [OK] Modele local detecte: %MODEL_DIR%
|
||||
) else (
|
||||
echo [INFO] Tentative A: python -m spacy download fr_core_news_lg
|
||||
python -m spacy download fr_core_news_lg >> "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [INFO] Tentative B: pip install wheel officiel
|
||||
pip install "%FR_WHEEL_URL%" >> "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [WARN] Echec installation du modele spaCy. Vous pourrez le telecharger via l'UI.
|
||||
) else (
|
||||
echo [OK] Modele installe via wheel.
|
||||
)
|
||||
) else (
|
||||
echo [OK] Modele telecharge via spacy.
|
||||
)
|
||||
)
|
||||
|
||||
echo.
|
||||
echo [3bis] Pre-cache HuggingFace (accelere le 1er usage)
|
||||
if "%SPM_MISSING%"=="0" (
|
||||
set "HF_CACHE=%LOCALAPPDATA%\Pseudonymiseur\models\hf_cache"
|
||||
set "HF_HOME=%HF_CACHE%"
|
||||
echo Cache: %HF_CACHE%
|
||||
|
||||
set "HF_PRECACHE=%TEMP%\hf_precache.py"
|
||||
> "%HF_PRECACHE%" echo import os
|
||||
>>"%HF_PRECACHE%" echo os.environ['HF_HOME']=r'%HF_CACHE%'
|
||||
>>"%HF_PRECACHE%" echo from transformers import AutoTokenizer, AutoModelForTokenClassification, AutoModel
|
||||
>>"%HF_PRECACHE%" echo # Tokenizers
|
||||
>>"%HF_PRECACHE%" echo AutoTokenizer.from_pretrained('Jean-Baptiste/camembert-ner')
|
||||
>>"%HF_PRECACHE%" echo AutoTokenizer.from_pretrained('almanach/camembert-base-bio')
|
||||
>>"%HF_PRECACHE%" echo AutoTokenizer.from_pretrained('Dr-BERT/DrBERT-7GB')
|
||||
>>"%HF_PRECACHE%" echo # Models
|
||||
>>"%HF_PRECACHE%" echo AutoModelForTokenClassification.from_pretrained('Jean-Baptiste/camembert-ner')
|
||||
>>"%HF_PRECACHE%" echo AutoModel.from_pretrained('almanach/camembert-base-bio')
|
||||
>>"%HF_PRECACHE%" echo AutoModel.from_pretrained('Dr-BERT/DrBERT-7GB')
|
||||
|
||||
python "%HF_PRECACHE%" >> "%LOG%" 2>&1
|
||||
del /f /q "%HF_PRECACHE%" >nul 2>&1
|
||||
if errorlevel 1 (echo [WARN] Pre-cache HF partiel. Voir %LOG%.) else (echo [OK] Pre-cache HF)
|
||||
) else (
|
||||
echo [INFO] Pre-cache HF saute (sentencepiece manquant).
|
||||
)
|
||||
|
||||
|
||||
:MENU
|
||||
echo.
|
||||
echo ================== MENU ==================
|
||||
echo [A] Lancer l'application (UI)
|
||||
echo [B] Builder EXE onefile (sans console)
|
||||
echo [C] Builder EXE onedir (dev rapide)
|
||||
echo [X] Nettoyer (build/dist/spec/caches/logs)
|
||||
echo [V] Voir les 80 dernieres lignes du log
|
||||
echo [Q] Quitter (fenetre persiste)
|
||||
set /p CHOIX="Votre choix ? "
|
||||
if /I "%CHOIX%"=="A" goto RUN
|
||||
if /I "%CHOIX%"=="B" goto BUILD_ONEFILE
|
||||
if /I "%CHOIX%"=="C" goto BUILD_ONEDIR
|
||||
if /I "%CHOIX%"=="X" goto CLEAN_AGAIN
|
||||
if /I "%CHOIX%"=="V" goto VIEW_LOG
|
||||
if /I "%CHOIX%"=="Q" goto END
|
||||
echo Choix invalide.
|
||||
goto MENU
|
||||
|
||||
:RUN
|
||||
echo.
|
||||
echo [RUN] Lancement de l'UI...
|
||||
python "%ENTRY%"
|
||||
echo.
|
||||
echo [INFO] L'UI s'est fermee. Retour menu.
|
||||
pause
|
||||
goto MENU
|
||||
|
||||
:BUILD_ONEFILE
|
||||
echo.
|
||||
echo [BUILD] EXE onefile (sans console)
|
||||
taskkill /IM %EXENAME%.exe /F >nul 2>&1
|
||||
rmdir /s /q build dist out 2>nul
|
||||
set "PYI_COMMON=--clean --noconfirm --onefile --noconsole --name %EXENAME% --hidden-import=pdfplumber --hidden-import=pdfminer --hidden-import=pdfminer.six --hidden-import=cffi --hidden-import=_cffi_backend --hidden-import=cryptography --hidden-import=cryptography.hazmat.bindings._rust --hidden-import=sentencepiece --collect-binaries cryptography --collect-binaries cffi --collect-binaries sentencepiece --collect-data cryptography --collect-data pdfminer --collect-data pdfplumber --collect-data spacy --collect-all transformers --collect-all tokenizers --collect-all huggingface_hub --collect-data torch"
|
||||
set "PYI_MODEL="
|
||||
if exist "%MODEL_DIR%" set "PYI_MODEL=--add-data ""%MODEL_DIR%;%MODEL_DIR%"""
|
||||
echo [CMD] python -m PyInstaller %PYI_COMMON% %PYI_MODEL% "%ENTRY%"
|
||||
python -m PyInstaller %PYI_COMMON% %PYI_MODEL% "%ENTRY%" >> "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Build onefile. Voir %LOG% ci-dessous:
|
||||
goto VIEW_LOG
|
||||
) else (
|
||||
echo [OK] EXE : dist\%EXENAME%.exe
|
||||
pause
|
||||
goto MENU
|
||||
)
|
||||
|
||||
:BUILD_ONEDIR
|
||||
echo.
|
||||
echo [BUILD] EXE onedir (dev rapide)
|
||||
set "PYI_MODEL="
|
||||
if exist "%MODEL_DIR%" set "PYI_MODEL=--add-data ""%MODEL_DIR%;%MODEL_DIR%"""
|
||||
python -m PyInstaller --clean --noconfirm --onedir --noconsole --name %EXENAME%_dev %PYI_MODEL% --hidden-import=pdfplumber --hidden-import=pdfminer --hidden-import=pdfminer.six --hidden-import=cffi --hidden-import=_cffi_backend --hidden-import=cryptography --hidden-import=cryptography.hazmat.bindings._rust --hidden-import=sentencepiece --collect-binaries cryptography --collect-binaries cffi --collect-binaries sentencepiece --collect-data cryptography --collect-data pdfminer --collect-data pdfplumber --collect-data spacy --collect-all transformers --collect-all tokenizers --collect-all huggingface_hub --collect-data torch "%ENTRY%" >> "%LOG%" 2>&1
|
||||
if errorlevel 1 (
|
||||
echo [ERREUR] Build onedir. Voir %LOG% ci-dessous:
|
||||
goto VIEW_LOG
|
||||
) else (
|
||||
echo [OK] Dossier : dist\%EXENAME%_dev
|
||||
pause
|
||||
goto MENU
|
||||
)
|
||||
|
||||
:CLEAN_AGAIN
|
||||
echo.
|
||||
echo [CLEAN] Suppression build/dist/out/*.spec/caches/logs
|
||||
if exist ".\build" rmdir /s /q ".\build" >nul 2>&1
|
||||
if exist ".\dist" rmdir /s /q ".\dist" >nul 2>&1
|
||||
if exist ".\out" rmdir /s /q ".\out" >nul 2>&1
|
||||
del /f /q *.spec build_log.txt 2>nul
|
||||
for /d %%D in (__pycache__ .pytest_cache .mypy_cache) do if exist "%%D" rmdir /s /q "%%D" 2>nul
|
||||
echo [CLEAN] OK
|
||||
pause
|
||||
goto MENU
|
||||
|
||||
:VIEW_LOG
|
||||
echo.
|
||||
echo ===== Dernieres lignes de %LOG% =====
|
||||
if exist "%LOG%" (
|
||||
powershell -NoLogo -NoProfile -Command "Get-Content -Path '%LOG%' -Tail 80"
|
||||
) else (
|
||||
echo (pas de log pour l'instant)
|
||||
)
|
||||
echo =====================================
|
||||
pause
|
||||
goto MENU
|
||||
|
||||
:END
|
||||
echo.
|
||||
echo Fin du script. La fenetre reste ouverte (mode persistant).
|
||||
Reference in New Issue
Block a user