feat(anonymisation): blur PII côté serveur via EDS-NLP + VLM local-first

Blur PII server-side (core/anonymisation/pii_blur.py) : - Pipeline OCR (docTR) → NER (EDS-NLP + fallback regex) - Détection ciblée noms/prénoms/adresses/NIR/téléphone/email - Protection explicite CIM-10, CCAM, montants €, dates, IDs techniques - Dual-storage : shot_XXXX_full.png (brut) + _blurred.png (affichage) - 18 tests Client : - RPA_BLUR_SENSITIVE=false par défaut (blur serveur uniquement) - Zéro overhead côté poste utilisateur VLM config : - vlm_config.py : gemma4:latest, fallbacks qwen3-vl:8b + UI-TARS - think=false auto pour gemma4 (bug Ollama 0.20.x) - VLM provider VWB : local-first (Ollama), cloud opt-in via VLM_ALLOW_CLOUD Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 16:48:23 +02:00
parent a9a99953dd
commit f7b8cddd2b
10 changed files with 1283 additions and 65 deletions
--- a/visual_workflow_builder/backend/vlm_provider.py
+++ b/visual_workflow_builder/backend/vlm_provider.py
@@ -18,22 +18,29 @@ for path in env_paths:
        break

 class VLMProvider:
-    """Hub de Vision Sémantique Multi-Fournisseurs (OpenAI, Gemini, Anthropic, Ollama)"""
+    """Hub de Vision Sémantique — Ollama local prioritaire, cloud opt-in.
+
+    Par défaut, seul Ollama local est utilisé (100% local, pas de cloud).
+    Pour activer les APIs cloud en fallback, définir VLM_ALLOW_CLOUD=true
+    dans l'environnement.
+    """

    def __init__(self):
-        # Clés API
-        self.openai_key = os.getenv("OPENAI_API_KEY")
-        self.gemini_key = os.getenv("GOOGLE_API_KEY")
-        self.anthropic_key = os.getenv("ANTHROPIC_API_KEY")
-        self.deepseek_key = os.getenv("DEEPSEEK_API_KEY")
-        
-        # Configuration Ollama Local
-        self.ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
-        self.local_model = os.getenv("VLM_MODEL", "qwen3-vl:8b")
+        # Cloud opt-in uniquement (VLM_ALLOW_CLOUD=true pour activer)
+        self.allow_cloud = os.getenv("VLM_ALLOW_CLOUD", "").lower() in ("true", "1", "yes")

-        # Priorité par défaut
-        self.preferred_cloud = "openai" # gpt-4o est la référence UI
-        print(f"🔧 [VLM Hub] Initialisé. OpenAI: {bool(self.openai_key)}, Gemini: {bool(self.gemini_key)}, Anthropic: {bool(self.anthropic_key)}")
+        # Clés API (chargées mais pas utilisées sauf si cloud autorisé)
+        self.openai_key = os.getenv("OPENAI_API_KEY") if self.allow_cloud else None
+        self.gemini_key = os.getenv("GOOGLE_API_KEY") if self.allow_cloud else None
+        self.anthropic_key = os.getenv("ANTHROPIC_API_KEY") if self.allow_cloud else None
+        self.deepseek_key = os.getenv("DEEPSEEK_API_KEY") if self.allow_cloud else None
+
+        # Configuration Ollama Local (toujours prioritaire)
+        self.ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
+        self.local_model = os.getenv("RPA_VLM_MODEL", os.getenv("VLM_MODEL", "gemma4:latest"))
+
+        cloud_status = f"OpenAI: {bool(self.openai_key)}, Gemini: {bool(self.gemini_key)}, Anthropic: {bool(self.anthropic_key)}" if self.allow_cloud else "désactivé (VLM_ALLOW_CLOUD non défini)"
+        print(f"[VLM Hub] Ollama local: {self.ollama_url} ({self.local_model}), Cloud: {cloud_status}")

    def _to_base64(self, image_input) -> str:
        """Convertit n'importe quel input image en base64 pur"""
@@ -51,25 +58,28 @@ class VLMProvider:
        return base64.b64encode(image_input).decode("utf-8")

    def detect_ui_element(self, screenshot, anchor_image=None, description: str = "") -> Optional[Dict[str, Any]]:
-        """Tente de localiser l'élément en essayant les fournisseurs par ordre de qualité"""
-        
-        # 1. Tenter OpenAI (Référence Vision UI)
-        if self.openai_key:
-            res = self._call_openai(screenshot, anchor_image, description)
-            if res and res.get('found'): return res
+        """Localise l'élément — Ollama local en priorité, cloud en fallback opt-in."""

-        # 2. Tenter Gemini (Excellent backup Vision)
-        if self.gemini_key:
-            res = self._call_gemini(screenshot, anchor_image, description)
-            if res and res.get('found'): return res
+        # 1. Ollama local (toujours prioritaire — 100% local)
+        res = self._call_ollama_local(screenshot, anchor_image, description)
+        if res and res.get('found'):
+            return res

-        # 3. Tenter Anthropic (Précision logique)
-        if self.anthropic_key:
-            res = self._call_anthropic(screenshot, anchor_image, description)
-            if res and res.get('found'): return res
+        # 2-4. Fallback cloud (uniquement si VLM_ALLOW_CLOUD=true)
+        if self.allow_cloud:
+            if self.openai_key:
+                res = self._call_openai(screenshot, anchor_image, description)
+                if res and res.get('found'): return res

-        # 4. Fallback Local (Ollama) - Crucial pour le DGX Spark
-        return self._call_ollama_local(screenshot, anchor_image, description)
+            if self.gemini_key:
+                res = self._call_gemini(screenshot, anchor_image, description)
+                if res and res.get('found'): return res
+
+            if self.anthropic_key:
+                res = self._call_anthropic(screenshot, anchor_image, description)
+                if res and res.get('found'): return res
+
+        return res  # Retourner le dernier résultat (Ollama ou cloud)

    def _call_openai(self, screenshot, anchor_image, description):
        try:
@@ -137,28 +147,36 @@ class VLMProvider:
            return None

    def _call_ollama_local(self, screenshot, anchor_image, description):
-        """Appel à Ollama local (Mode DGX Spark / Offline)"""
+        """Appel a Ollama local (prioritaire — 100% local)"""
        try:
            import requests
-            print(f"🏠 [Hub] Fallback Local Ollama ({self.local_model})...")
-            prompt = f"Localise l'élément '{description}'. Retourne JSON: {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax] (0-1000)}}"
-            
+            print(f"[Hub] Ollama local ({self.local_model})...")
+            prompt = f"Localise l'element '{description}'. Retourne JSON: {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax] (0-1000)}}"
+
+            images = [self._to_base64(screenshot)]
+            if anchor_image:
+                images.append(self._to_base64(anchor_image))
+
+            messages = [{"role": "user", "content": prompt, "images": images}]
+
            payload = {
                "model": self.local_model,
-                "prompt": prompt,
-                "images": [self._to_base64(screenshot)],
+                "messages": messages,
                "stream": False,
                "format": "json"
            }
-            if anchor_image:
-                payload["images"].append(self._to_base64(anchor_image))

-            response = requests.post(f"{self.ollama_url}/api/generate", json=payload, timeout=60)
+            # gemma4 necessite think=false (sinon tokens vides sur Ollama >=0.20)
+            if "gemma4" in self.local_model.lower():
+                payload["think"] = False
+
+            response = requests.post(f"{self.ollama_url}/api/chat", json=payload, timeout=60)
            if response.status_code == 200:
-                return json.loads(response.json().get('response', '{}'))
+                content = response.json().get("message", {}).get("content", "{}")
+                return json.loads(content)
            return None
        except Exception as e:
-            print(f"❌ [Hub] Local Ollama Error: {e}")
+            print(f"[Hub] Ollama local erreur: {e}")
            return {"found": False, "error": str(e)}

 # Instance unique