feat(anonymisation): blur PII côté serveur via EDS-NLP + VLM local-first

Blur PII server-side (core/anonymisation/pii_blur.py) : - Pipeline OCR (docTR) → NER (EDS-NLP + fallback regex) - Détection ciblée noms/prénoms/adresses/NIR/téléphone/email - Protection explicite CIM-10, CCAM, montants €, dates, IDs techniques - Dual-storage : shot_XXXX_full.png (brut) + _blurred.png (affichage) - 18 tests Client : - RPA_BLUR_SENSITIVE=false par défaut (blur serveur uniquement) - Zéro overhead côté poste utilisateur VLM config : - vlm_config.py : gemma4:latest, fallbacks qwen3-vl:8b + UI-TARS - think=false auto pour gemma4 (bug Ollama 0.20.x) - VLM provider VWB : local-first (Ollama), cloud opt-in via VLM_ALLOW_CLOUD Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 16:48:23 +02:00
parent a9a99953dd
commit f7b8cddd2b
10 changed files with 1283 additions and 65 deletions
--- a/core/detection/ollama_client.py
+++ b/core/detection/ollama_client.py
@@ -23,9 +23,9 @@ class OllamaClient:
    Permet d'envoyer des images et prompts à un VLM via l'API Ollama.
    """
    
-    def __init__(self, 
+    def __init__(self,
                 endpoint: str = "http://localhost:11434",
-                 model: str = "qwen3-vl:8b",
+                 model: str = None,
                 timeout: int = 180):
        """
        Initialiser le client Ollama
@@ -36,7 +36,12 @@ class OllamaClient:
            timeout: Timeout en secondes
        """
        self.endpoint = endpoint.rstrip('/')
-        self.model = model
+        # Résolution du modèle : paramètre explicite > config centralisée
+        if model is not None:
+            self.model = model
+        else:
+            from core.detection.vlm_config import get_vlm_model
+            self.model = get_vlm_model(endpoint=self.endpoint)
        self.timeout = timeout
        self._check_connection()
    
@@ -126,7 +131,12 @@ class OllamaClient:
            messages.append(user_message)

            # Déterminer si le modèle est un modèle thinking (qwen3)
-            is_thinking_model = "qwen3" in self.model.lower()
+            # Les modèles non-thinking (gemma4, qwen2.5vl) n'ont pas besoin
+            # du workaround prefill et supportent le rôle system natif.
+            from core.detection.vlm_config import is_thinking_model as _is_thinking
+            from core.detection.vlm_config import needs_think_false as _needs_think_false
+            is_thinking_model = _is_thinking(self.model)
+            requires_think_false = _needs_think_false(self.model)

            # WORKAROUND Ollama 0.18.x : think=false est ignoré par le
            # renderer qwen3-vl-thinking. On utilise un assistant prefill
@@ -168,9 +178,9 @@ class OllamaClient:
                }
            }

-            # Garder think=false au cas où une future version d'Ollama le
-            # corrige — le prefill reste le mécanisme principal
-            if is_thinking_model:
+            # think=false : requis pour qwen3 (prefill reste le mécanisme
+            # principal) ET pour gemma4 (sinon tokens vides sur Ollama >=0.20)
+            if is_thinking_model or requires_think_false:
                payload["think"] = False

            if force_json:
@@ -575,7 +585,7 @@ Your answer:"""
 # Fonctions utilitaires
 # ============================================================================

-def create_ollama_client(model: str = "qwen3-vl:8b",
+def create_ollama_client(model: str = None,
                        endpoint: str = "http://localhost:11434") -> OllamaClient:
    """
    Créer un client Ollama