Sécurité VLM : format JSON forcé, modèle local uniquement, fix logging critique
- vlm_manager: ajout format:json dans payload Ollama (élimine hallucinations JSON) - vlm_manager: retour modèle local qwen2.5vl:7b (sécurité données médicales) - anonymizer_core: ajout import logging (fix NameError silencieux qui tuait le VLM) - anonymizer_core: masquage direct pages manuscrites (suppression rotation inutile) - GUI: intégration checkbox VLM + auto-load EDS-Pseudo prioritaire Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -63,6 +63,12 @@ try:
|
||||
except Exception:
|
||||
EdsPseudoManager = None # type: ignore
|
||||
|
||||
try:
|
||||
from vlm_manager import VlmManager, VlmConfig
|
||||
except Exception:
|
||||
VlmManager = None # type: ignore
|
||||
VlmConfig = None # type: ignore
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except Exception:
|
||||
@@ -280,6 +286,11 @@ class App:
|
||||
self._active_manager: Optional[Any] = None
|
||||
self.cfg_data: Dict[str, Any] = {}
|
||||
|
||||
# --- VLM (optionnel) ---
|
||||
self.use_vlm = tk.BooleanVar(value=False)
|
||||
self._vlm_manager: Optional[Any] = VlmManager() if VlmManager else None
|
||||
self._vlm_available = False
|
||||
|
||||
# --- Fusion catalogue modèles ---
|
||||
catalog: Dict[str, str] = {}
|
||||
if self._onnx_manager:
|
||||
@@ -440,6 +451,24 @@ class App:
|
||||
anchor="w", justify=tk.LEFT,
|
||||
).pack(fill=tk.X, pady=(4, 0))
|
||||
|
||||
# --- Checkbox VLM ---
|
||||
if VlmManager is not None:
|
||||
vlm_row = tk.Frame(info_inner, bg=CLR_BLUE_LIGHT)
|
||||
vlm_row.pack(fill=tk.X, pady=(8, 0))
|
||||
self._vlm_check = tk.Checkbutton(
|
||||
vlm_row, text="Analyse visuelle VLM (Ollama)",
|
||||
variable=self.use_vlm, font=self._f_card_desc,
|
||||
bg=CLR_BLUE_LIGHT, activebackground=CLR_BLUE_LIGHT,
|
||||
command=self._on_vlm_toggle,
|
||||
)
|
||||
self._vlm_check.pack(side=tk.LEFT)
|
||||
self._vlm_status_lbl = tk.Label(
|
||||
vlm_row, text="", font=self._f_small,
|
||||
bg=CLR_BLUE_LIGHT, fg=CLR_TEXT_SECONDARY,
|
||||
)
|
||||
self._vlm_status_lbl.pack(side=tk.LEFT, padx=(8, 0))
|
||||
ToolTip(self._vlm_check, "Envoie chaque page comme image à un VLM local (Ollama)\npour détecter les noms que le regex a pu manquer.")
|
||||
|
||||
# =============================================================
|
||||
# BOUTON LANCER
|
||||
# =============================================================
|
||||
@@ -687,6 +716,14 @@ class App:
|
||||
parent_name = pdf.parent.name
|
||||
ogc = parent_name.split("_")[0] if "_" in parent_name else None
|
||||
|
||||
# VLM
|
||||
vlm_active = bool(
|
||||
self.use_vlm.get()
|
||||
and self._vlm_available
|
||||
and self._vlm_manager
|
||||
and self._vlm_manager.is_loaded()
|
||||
)
|
||||
|
||||
outputs = core.process_pdf(
|
||||
pdf_path=pdf,
|
||||
out_dir=outdir,
|
||||
@@ -697,6 +734,8 @@ class App:
|
||||
ner_manager=active,
|
||||
ner_thresholds=thresholds,
|
||||
ogc_label=ogc,
|
||||
use_vlm=vlm_active,
|
||||
vlm_manager=self._vlm_manager if vlm_active else None,
|
||||
)
|
||||
self.queue.put(UiMessage(kind=MsgType.LOG, text=f"\u2713 {pdf.name}"))
|
||||
for k, v in outputs.items():
|
||||
@@ -882,21 +921,72 @@ class App:
|
||||
# Chargement automatique NER au démarrage
|
||||
# ---------------------------------------------------------------
|
||||
def _auto_load_ner(self):
|
||||
"""Charge le modèle NER par défaut en arrière-plan."""
|
||||
if not self._onnx_manager:
|
||||
"""Charge le modèle NER par défaut en arrière-plan.
|
||||
Priorité : EDS-Pseudo (meilleur sur données cliniques) → DistilCamemBERT-NER (fallback).
|
||||
"""
|
||||
if not self._eds_manager and not self._onnx_manager:
|
||||
return
|
||||
self.status_var.set("Chargement du modèle NER...")
|
||||
threading.Thread(target=self._auto_load_ner_worker, daemon=True).start()
|
||||
|
||||
def _auto_load_ner_worker(self):
|
||||
# 1) Essayer EDS-Pseudo en priorité (F1=97.4% sur données cliniques)
|
||||
if self._eds_manager:
|
||||
try:
|
||||
self._eds_manager.load("AP-HP/eds-pseudo-public")
|
||||
self._active_manager = self._eds_manager
|
||||
self.use_hf = True
|
||||
self.status_var.set("Prêt — EDS-Pseudo actif.")
|
||||
return
|
||||
except Exception as e:
|
||||
import logging
|
||||
logging.getLogger(__name__).info("EDS-Pseudo indisponible, fallback ONNX : %s", e)
|
||||
|
||||
# 2) Fallback : DistilCamemBERT-NER ONNX
|
||||
if self._onnx_manager:
|
||||
try:
|
||||
self._onnx_manager.load("cmarkea/distilcamembert-base-ner")
|
||||
self._active_manager = self._onnx_manager
|
||||
self.use_hf = True
|
||||
self.status_var.set("Prêt — NER ONNX actif.")
|
||||
return
|
||||
except Exception as e2:
|
||||
self.status_var.set(f"Prêt (NER indisponible : {e2})")
|
||||
return
|
||||
|
||||
self.status_var.set("Prêt (aucun backend NER disponible).")
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# VLM toggle
|
||||
# ---------------------------------------------------------------
|
||||
def _on_vlm_toggle(self):
|
||||
"""Appelé quand l'utilisateur coche/décoche la checkbox VLM."""
|
||||
if not self.use_vlm.get():
|
||||
self._vlm_available = False
|
||||
if hasattr(self, '_vlm_status_lbl'):
|
||||
self._vlm_status_lbl.configure(text="", fg=CLR_TEXT_SECONDARY)
|
||||
return
|
||||
if hasattr(self, '_vlm_status_lbl'):
|
||||
self._vlm_status_lbl.configure(text="Connexion...", fg=CLR_TEXT_SECONDARY)
|
||||
threading.Thread(target=self._vlm_connect_worker, daemon=True).start()
|
||||
|
||||
def _vlm_connect_worker(self):
|
||||
"""Vérifie la connexion Ollama en arrière-plan."""
|
||||
try:
|
||||
default_model = "cmarkea/distilcamembert-base-ner"
|
||||
self._onnx_manager.load(default_model)
|
||||
self._active_manager = self._onnx_manager
|
||||
self.use_hf = True
|
||||
self.status_var.set("Prêt — NER actif.")
|
||||
if self._vlm_manager is None:
|
||||
raise RuntimeError("VlmManager non disponible")
|
||||
self._vlm_manager.load()
|
||||
self._vlm_available = True
|
||||
if hasattr(self, '_vlm_status_lbl'):
|
||||
self._vlm_status_lbl.configure(text="Connecté", fg=CLR_GREEN)
|
||||
except Exception as e:
|
||||
self.status_var.set(f"Prêt (NER indisponible : {e})")
|
||||
self._vlm_available = False
|
||||
self.use_vlm.set(False)
|
||||
err = str(e)
|
||||
if len(err) > 60:
|
||||
err = err[:57] + "..."
|
||||
if hasattr(self, '_vlm_status_lbl'):
|
||||
self._vlm_status_lbl.configure(text=f"Indisponible : {err}", fg=CLR_RED)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Modèles NER (API interne)
|
||||
|
||||
Reference in New Issue
Block a user