Files
Dom f7b8cddd2b feat(anonymisation): blur PII côté serveur via EDS-NLP + VLM local-first
Blur PII server-side (core/anonymisation/pii_blur.py) :
- Pipeline OCR (docTR) → NER (EDS-NLP + fallback regex)
- Détection ciblée noms/prénoms/adresses/NIR/téléphone/email
- Protection explicite CIM-10, CCAM, montants €, dates, IDs techniques
- Dual-storage : shot_XXXX_full.png (brut) + _blurred.png (affichage)
- 18 tests

Client :
- RPA_BLUR_SENSITIVE=false par défaut (blur serveur uniquement)
- Zéro overhead côté poste utilisateur

VLM config :
- vlm_config.py : gemma4:latest, fallbacks qwen3-vl:8b + UI-TARS
- think=false auto pour gemma4 (bug Ollama 0.20.x)
- VLM provider VWB : local-first (Ollama), cloud opt-in via VLM_ALLOW_CLOUD

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 16:48:23 +02:00

184 lines
8.3 KiB
Python

import os
import io
import json
import base64
from typing import Optional, Dict, Any, List
from PIL import Image
from dotenv import load_dotenv
# Charger les variables d'environnement
env_paths = [
os.path.join(os.getcwd(), ".env.local"),
os.path.join(os.getcwd(), "rpa_vision_v3/.env.local"),
os.path.join(os.path.dirname(__file__), "../../../.env.local")
]
for path in env_paths:
if os.path.exists(path):
load_dotenv(path, override=True)
break
class VLMProvider:
"""Hub de Vision Sémantique — Ollama local prioritaire, cloud opt-in.
Par défaut, seul Ollama local est utilisé (100% local, pas de cloud).
Pour activer les APIs cloud en fallback, définir VLM_ALLOW_CLOUD=true
dans l'environnement.
"""
def __init__(self):
# Cloud opt-in uniquement (VLM_ALLOW_CLOUD=true pour activer)
self.allow_cloud = os.getenv("VLM_ALLOW_CLOUD", "").lower() in ("true", "1", "yes")
# Clés API (chargées mais pas utilisées sauf si cloud autorisé)
self.openai_key = os.getenv("OPENAI_API_KEY") if self.allow_cloud else None
self.gemini_key = os.getenv("GOOGLE_API_KEY") if self.allow_cloud else None
self.anthropic_key = os.getenv("ANTHROPIC_API_KEY") if self.allow_cloud else None
self.deepseek_key = os.getenv("DEEPSEEK_API_KEY") if self.allow_cloud else None
# Configuration Ollama Local (toujours prioritaire)
self.ollama_url = os.getenv("OLLAMA_URL", "http://localhost:11434")
self.local_model = os.getenv("RPA_VLM_MODEL", os.getenv("VLM_MODEL", "gemma4:latest"))
cloud_status = f"OpenAI: {bool(self.openai_key)}, Gemini: {bool(self.gemini_key)}, Anthropic: {bool(self.anthropic_key)}" if self.allow_cloud else "désactivé (VLM_ALLOW_CLOUD non défini)"
print(f"[VLM Hub] Ollama local: {self.ollama_url} ({self.local_model}), Cloud: {cloud_status}")
def _to_base64(self, image_input) -> str:
"""Convertit n'importe quel input image en base64 pur"""
if isinstance(image_input, Image.Image):
buffer = io.BytesIO()
image_input.save(buffer, format="PNG")
return base64.b64encode(buffer.getvalue()).decode("utf-8")
elif isinstance(image_input, str):
if image_input.startswith("data:image"):
return image_input.split(",", 1)[1]
elif os.path.exists(image_input):
with open(image_input, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
return image_input # Base64 brut supposé
return base64.b64encode(image_input).decode("utf-8")
def detect_ui_element(self, screenshot, anchor_image=None, description: str = "") -> Optional[Dict[str, Any]]:
"""Localise l'élément — Ollama local en priorité, cloud en fallback opt-in."""
# 1. Ollama local (toujours prioritaire — 100% local)
res = self._call_ollama_local(screenshot, anchor_image, description)
if res and res.get('found'):
return res
# 2-4. Fallback cloud (uniquement si VLM_ALLOW_CLOUD=true)
if self.allow_cloud:
if self.openai_key:
res = self._call_openai(screenshot, anchor_image, description)
if res and res.get('found'): return res
if self.gemini_key:
res = self._call_gemini(screenshot, anchor_image, description)
if res and res.get('found'): return res
if self.anthropic_key:
res = self._call_anthropic(screenshot, anchor_image, description)
if res and res.get('found'): return res
return res # Retourner le dernier résultat (Ollama ou cloud)
def _call_openai(self, screenshot, anchor_image, description):
try:
from openai import OpenAI
client = OpenAI(api_key=self.openai_key)
prompt = f"Expert UI: Localise précisément '{description}'. Retourne JSON: {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax] (0-1000), 'confidence': float}}"
content = [{"type": "text", "text": prompt}]
content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{self._to_base64(screenshot)}"}})
if anchor_image:
content.append({"type": "text", "text": "Ancre de référence:"})
content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{self._to_base64(anchor_image)}"}})
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": content}],
response_format={"type": "json_object"},
temperature=0
)
return json.loads(response.choices[0].message.content)
except Exception as e:
print(f"⚠️ [Hub] OpenAI Error: {e}")
return None
def _call_gemini(self, screenshot, anchor_image, description):
try:
from google import genai
client = genai.Client(api_key=self.gemini_key)
prompt = f"Expert UI: Localise précisément '{description}'. Retourne JSON: {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax] (0-1000), 'confidence': float}}"
contents = [prompt, Image.open(io.BytesIO(base64.b64decode(self._to_base64(screenshot))))]
if anchor_image:
contents.append(Image.open(io.BytesIO(base64.b64decode(self._to_base64(anchor_image)))))
response = client.models.generate_content(
model="gemini-1.5-flash",
contents=contents,
config={"response_mime_type": "application/json"}
)
return json.loads(response.text)
except Exception as e:
print(f"⚠️ [Hub] Gemini Error: {e}")
return None
def _call_anthropic(self, screenshot, anchor_image, description):
try:
import anthropic
client = anthropic.Anthropic(api_key=self.anthropic_key)
# Claude 3.5 Sonnet supporte la vision mais pas le format JSON strict en sortie nativement via config
# On utilise un prompt renforcé
prompt = f"Localise '{description}'. Réponds UNIQUEMENT en JSON : {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax], 'confidence': float}}"
content = [{"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": self._to_base64(screenshot)}},
{"type": "text", "text": prompt}]
response = client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1000,
messages=[{"role": "user", "content": content}]
)
text = response.content[0].text
return json.loads(text[text.find('{'):text.rfind('}')+1])
except Exception as e:
print(f"⚠️ [Hub] Anthropic Error: {e}")
return None
def _call_ollama_local(self, screenshot, anchor_image, description):
"""Appel a Ollama local (prioritaire — 100% local)"""
try:
import requests
print(f"[Hub] Ollama local ({self.local_model})...")
prompt = f"Localise l'element '{description}'. Retourne JSON: {{'found': bool, 'bbox': [ymin, xmin, ymax, xmax] (0-1000)}}"
images = [self._to_base64(screenshot)]
if anchor_image:
images.append(self._to_base64(anchor_image))
messages = [{"role": "user", "content": prompt, "images": images}]
payload = {
"model": self.local_model,
"messages": messages,
"stream": False,
"format": "json"
}
# gemma4 necessite think=false (sinon tokens vides sur Ollama >=0.20)
if "gemma4" in self.local_model.lower():
payload["think"] = False
response = requests.post(f"{self.ollama_url}/api/chat", json=payload, timeout=60)
if response.status_code == 200:
content = response.json().get("message", {}).get("content", "{}")
return json.loads(content)
return None
except Exception as e:
print(f"[Hub] Ollama local erreur: {e}")
return {"found": False, "error": str(e)}
# Instance unique
vlm_hub = VLMProvider()