Sécurité VLM : format JSON forcé, modèle local uniquement, fix logging critique

- vlm_manager: ajout format:json dans payload Ollama (élimine hallucinations JSON)
- vlm_manager: retour modèle local qwen2.5vl:7b (sécurité données médicales)
- anonymizer_core: ajout import logging (fix NameError silencieux qui tuait le VLM)
- anonymizer_core: masquage direct pages manuscrites (suppression rotation inutile)
- GUI: intégration checkbox VLM + auto-load EDS-Pseudo prioritaire

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 02:38:30 +01:00
parent 125ac82f4f
commit 86274b3b2a
4 changed files with 117 additions and 37 deletions

View File

@@ -63,6 +63,12 @@ try:
except Exception:
EdsPseudoManager = None # type: ignore
try:
from vlm_manager import VlmManager, VlmConfig
except Exception:
VlmManager = None # type: ignore
VlmConfig = None # type: ignore
try:
import yaml
except Exception:
@@ -280,6 +286,11 @@ class App:
self._active_manager: Optional[Any] = None
self.cfg_data: Dict[str, Any] = {}
# --- VLM (optionnel) ---
self.use_vlm = tk.BooleanVar(value=False)
self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None
self._vlm_available = False
# --- Fusion catalogue modèles ---
catalog: Dict[str, str] = {}
if self._onnx_manager:
@@ -440,6 +451,24 @@ class App:
anchor="w", justify=tk.LEFT,
).pack(fill=tk.X, pady=(4, 0))
# --- Checkbox VLM ---
if VlmManager is not None:
vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT)
vlm_row.pack(fill=tk.X, pady=(8, 0))
self._vlm_check = tk.Checkbutton(
vlm_row, text="Analyse visuelle VLM (Ollama)",
variable=self.use_vlm, font=self._f_card_desc,
bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT,
command=self._on_vlm_toggle,
)
self._vlm_check.pack(side=tk.LEFT)
self._vlm_status_lbl = tk.Label(
vlm_row, text="", font=self._f_small,
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
)
self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0))
ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.")
# =============================================================
# BOUTON LANCER
# =============================================================
@@ -687,6 +716,14 @@ class App:
parent_name = pdf.parent.name
ogc = parent_name.split("_")[0] if "_" in parent_name else None
# VLM
vlm_active = bool(
self.use_vlm.get()
and self._vlm_available
and self._vlm_manager
and self._vlm_manager.is_loaded()
)
outputs = core.process_pdf(
pdf_path=pdf,
out_dir=outdir,
@@ -697,6 +734,8 @@ class App:
ner_manager=active,
ner_thresholds=thresholds,
ogc_label=ogc,
use_vlm=vlm_active,
vlm_manager=self._vlm_manager if vlm_active else None,
)
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}"))
for k, v in outputs.items():
@@ -882,21 +921,72 @@ class App:
# Chargement automatique NER au démarrage
# ---------------------------------------------------------------
def _auto_load_ner(self):
"""Charge le modèle NER par défaut en arrière-plan."""
if not self._onnx_manager:
"""Charge le modèle NER par défaut en arrière-plan.
Priorité : EDS-Pseudo (meilleur sur données cliniques) → DistilCamemBERT-NER (fallback).
"""
if not self._eds_manager and not self._onnx_manager:
return
self.status_var.set("Chargement du modèle NER...")
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
def _auto_load_ner_worker(self):
# 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques)
if self._eds_manager:
try:
self._eds_manager.load("AP-HP/eds-pseudo-public")
self._active_manager = self._eds_manager
self.use_hf = True
self.status_var.set("Prêt — EDS-Pseudo actif.")
return
except Exception as e:
import logging
logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e)
# 2) Fallback : DistilCamemBERT-NER ONNX
if self._onnx_manager:
try:
self._onnx_manager.load("cmarkea/distilcamembert-base-ner")
self._active_manager = self._onnx_manager
self.use_hf = True
self.status_var.set("Prêt — NER ONNX actif.")
return
except Exception as e2:
self.status_var.set(f"Prêt (NER indisponible : {e2})")
return
self.status_var.set("Prêt (aucun backend NER disponible).")
# ---------------------------------------------------------------
# VLM toggle
# ---------------------------------------------------------------
def _on_vlm_toggle(self):
"""Appelé quand l'utilisateur coche/décoche la checkbox VLM."""
if not self.use_vlm.get():
self._vlm_available = False
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
return
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY)
threading.Thread(target=self._vlm_connect_worker, daemon=True).start()
def _vlm_connect_worker(self):
"""Vérifie la connexion Ollama en arrière-plan."""
try:
default_model = "cmarkea/distilcamembert-base-ner"
self._onnx_manager.load(default_model)
self._active_manager = self._onnx_manager
self.use_hf = True
self.status_var.set("Prêt — NER actif.")
if self._vlm_manager is None:
raise RuntimeError("VlmManager non disponible")
self._vlm_manager.load()
self._vlm_available = True
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN)
except Exception as e:
self.status_var.set(f"Prêt (NER indisponible : {e})")
self._vlm_available = False
self.use_vlm.set(False)
err = str(e)
if len(err) > 60:
err = err[:57] + "..."
if hasattr(self, '_vlm_status_lbl'):
self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED)
# ---------------------------------------------------------------
# Modèles NER (API interne)