feat: replay visuel VLM-first, worker séparé, package Léa, AZERTY, sécurité HTTPS
Pipeline replay visuel : - VLM-first : l'agent appelle Ollama directement pour trouver les éléments - Template matching en fallback (seuil strict 0.90) - Stop immédiat si élément non trouvé (pas de clic blind) - Replay depuis session brute (/replay-session) sans attendre le VLM - Vérification post-action (screenshot hash avant/après) - Gestion des popups (Enter/Escape/Tab+Enter) Worker VLM séparé : - run_worker.py : process distinct du serveur HTTP - Communication par fichiers (_worker_queue.txt + _replay_active.lock) - Le serveur HTTP ne fait plus jamais de VLM → toujours réactif - Service systemd rpa-worker.service Capture clavier : - raw_keys (vk + press/release) pour replay exact indépendant du layout - Fix AZERTY : ToUnicodeEx + AltGr detection - Enter capturé comme \n, Tab comme \t - Filtrage modificateurs seuls (Ctrl/Alt/Shift parasites) - Fusion text_input consécutifs, dédup key_combo Sécurité & Internet : - HTTPS Let's Encrypt (lea.labs + vwb.labs.laurinebazin.design) - Token API fixe dans .env.local - HTTP Basic Auth sur VWB - Security headers (HSTS, CSP, nosniff) - CORS domaines publics, plus de wildcard Infrastructure : - DPI awareness (SetProcessDpiAwareness) Python + Rust - Métadonnées système (dpi_scale, window_bounds, monitors, os_theme) - Template matching multi-scale [0.5, 2.0] - Résolution dynamique (plus de hardcode 1920x1080) - VLM prefill fix (47x speedup, 3.5s au lieu de 180s) Modules : - core/auth/ : credential vault (Fernet AES), TOTP (RFC 6238), auth handler - core/federation/ : LearningPack export/import anonymisé, FAISS global - deploy/ : package Léa (config.txt, Lea.bat, install.bat, LISEZMOI.txt) UX : - Filtrage OS (VWB + Chat montrent que les workflows de l'OS courant) - Bibliothèque persistante (cache local + SQLite) - Clustering hybride (titre fenêtre + DBSCAN) - EdgeConstraints + PostConditions peuplés - GraphBuilder compound actions (toutes les frappes) Agent Rust : - Token Bearer auth (network.rs) - sysinfo.rs (DPI, résolution, window bounds via Win32 API) - config.txt lu automatiquement - Support Chrome/Brave/Firefox (pas que Edge) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
195
agent_v0/agent_v1/vision/system_info.py
Normal file
195
agent_v0/agent_v1/vision/system_info.py
Normal file
@@ -0,0 +1,195 @@
|
||||
# agent_v1/vision/system_info.py
|
||||
"""
|
||||
Capture des metadonnees systeme pour enrichir les evenements.
|
||||
|
||||
Collecte DPI, resolution, fenetre active, moniteur, theme OS et langue.
|
||||
Les fonctions Windows (ctypes.windll, winreg) ont des fallbacks gracieux
|
||||
pour Linux/Mac.
|
||||
"""
|
||||
|
||||
import platform
|
||||
import locale
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache du systeme d'exploitation pour eviter les appels repetes
|
||||
_SYSTEM = platform.system()
|
||||
|
||||
|
||||
def get_dpi_scale() -> int:
|
||||
"""Retourne le facteur DPI en % (100 = normal, 150 = haute resolution).
|
||||
|
||||
Windows : ctypes.windll.user32.GetDpiForSystem()
|
||||
Linux/Mac : fallback 100
|
||||
|
||||
NOTE : Le process DOIT deja etre DPI-aware (via SetProcessDpiAwareness(2)
|
||||
appele dans config.py) pour que GetDpiForSystem retourne le vrai DPI.
|
||||
"""
|
||||
if _SYSTEM == "Windows":
|
||||
try:
|
||||
import ctypes
|
||||
dpi = ctypes.windll.user32.GetDpiForSystem()
|
||||
return round(dpi * 100 / 96) # 96 DPI = 100%
|
||||
except Exception as e:
|
||||
logger.debug(f"Impossible de lire le DPI Windows : {e}")
|
||||
return 100
|
||||
return 100 # Linux/Mac fallback
|
||||
|
||||
|
||||
def get_window_bounds() -> Optional[List[int]]:
|
||||
"""Retourne [x, y, width, height] de la fenetre active.
|
||||
|
||||
Windows : ctypes GetWindowRect(GetForegroundWindow())
|
||||
Linux/Mac : fallback None
|
||||
"""
|
||||
if _SYSTEM == "Windows":
|
||||
try:
|
||||
import ctypes
|
||||
import ctypes.wintypes
|
||||
|
||||
hwnd = ctypes.windll.user32.GetForegroundWindow()
|
||||
if not hwnd:
|
||||
return None
|
||||
rect = ctypes.wintypes.RECT()
|
||||
ctypes.windll.user32.GetWindowRect(hwnd, ctypes.byref(rect))
|
||||
return [
|
||||
rect.left,
|
||||
rect.top,
|
||||
rect.right - rect.left,
|
||||
rect.bottom - rect.top,
|
||||
]
|
||||
except Exception as e:
|
||||
logger.debug(f"Impossible de lire les bounds fenetre : {e}")
|
||||
return None
|
||||
|
||||
# Linux : tentative via xdotool
|
||||
if _SYSTEM == "Linux":
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
wid = subprocess.check_output(
|
||||
["xdotool", "getactivewindow"],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode().strip()
|
||||
geom = subprocess.check_output(
|
||||
["xdotool", "getwindowgeometry", "--shell", wid],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode()
|
||||
# Parse "X=...\nY=...\nWIDTH=...\nHEIGHT=..."
|
||||
vals: Dict[str, int] = {}
|
||||
for line in geom.strip().splitlines():
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
vals[k.strip()] = int(v.strip())
|
||||
if {"X", "Y", "WIDTH", "HEIGHT"} <= vals.keys():
|
||||
return [vals["X"], vals["Y"], vals["WIDTH"], vals["HEIGHT"]]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_monitor_info() -> Tuple[int, List[Dict[str, int]]]:
|
||||
"""Retourne (monitor_index, liste_moniteurs).
|
||||
|
||||
Chaque moniteur : {width, height, x, y}
|
||||
monitor_index : index du moniteur contenant la fenetre active
|
||||
"""
|
||||
monitors: List[Dict[str, int]] = []
|
||||
active_index = 0
|
||||
|
||||
try:
|
||||
import mss
|
||||
|
||||
with mss.mss() as sct:
|
||||
for mon in sct.monitors[1:]: # Skip le moniteur virtuel (index 0)
|
||||
monitors.append({
|
||||
"width": mon["width"],
|
||||
"height": mon["height"],
|
||||
"x": mon["left"],
|
||||
"y": mon["top"],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug(f"mss indisponible, resolution par defaut : {e}")
|
||||
monitors = [{"width": 1920, "height": 1080, "x": 0, "y": 0}]
|
||||
|
||||
# Determiner quel moniteur contient la fenetre active
|
||||
bounds = get_window_bounds()
|
||||
if bounds and len(monitors) > 1:
|
||||
wx, wy = bounds[0], bounds[1]
|
||||
for i, mon in enumerate(monitors):
|
||||
if (mon["x"] <= wx < mon["x"] + mon["width"]
|
||||
and mon["y"] <= wy < mon["y"] + mon["height"]):
|
||||
active_index = i
|
||||
break
|
||||
|
||||
return active_index, monitors
|
||||
|
||||
|
||||
def get_os_theme() -> str:
|
||||
"""Retourne 'light', 'dark' ou 'unknown'."""
|
||||
if _SYSTEM == "Windows":
|
||||
try:
|
||||
import winreg
|
||||
|
||||
key = winreg.OpenKey(
|
||||
winreg.HKEY_CURRENT_USER,
|
||||
r"Software\Microsoft\Windows\CurrentVersion\Themes\Personalize",
|
||||
)
|
||||
value, _ = winreg.QueryValueEx(key, "AppsUseLightTheme")
|
||||
winreg.CloseKey(key)
|
||||
return "light" if value == 1 else "dark"
|
||||
except Exception as e:
|
||||
logger.debug(f"Impossible de lire le theme Windows : {e}")
|
||||
return "unknown"
|
||||
|
||||
# Linux : tentative via gsettings (GNOME)
|
||||
if _SYSTEM == "Linux":
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
result = subprocess.check_output(
|
||||
["gsettings", "get", "org.gnome.desktop.interface", "color-scheme"],
|
||||
stderr=subprocess.DEVNULL,
|
||||
).decode().strip().strip("'\"")
|
||||
if "dark" in result.lower():
|
||||
return "dark"
|
||||
elif "light" in result.lower() or "default" in result.lower():
|
||||
return "light"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_os_language() -> str:
|
||||
"""Retourne le code langue (fr, en, de, etc.)."""
|
||||
try:
|
||||
lang = locale.getdefaultlocale()[0] # ex: 'fr_FR'
|
||||
if lang:
|
||||
return lang[:2] # ex: 'fr'
|
||||
except Exception:
|
||||
pass
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_screen_metadata() -> Dict[str, Any]:
|
||||
"""Capture toutes les metadonnees systeme en une fois.
|
||||
|
||||
Appelee une fois au demarrage + a chaque changement de focus.
|
||||
Resultat injecte dans les evenements envoyes au serveur.
|
||||
"""
|
||||
monitor_index, monitors = get_monitor_info()
|
||||
primary = monitors[0] if monitors else {"width": 1920, "height": 1080}
|
||||
|
||||
return {
|
||||
"dpi_scale": get_dpi_scale(),
|
||||
"monitor_index": monitor_index,
|
||||
"monitors": monitors,
|
||||
"screen_resolution": [primary["width"], primary["height"]],
|
||||
"window_bounds": get_window_bounds(),
|
||||
"os_theme": get_os_theme(),
|
||||
"os_language": get_os_language(),
|
||||
}
|
||||
Reference in New Issue
Block a user