backup: snapshot post-démo GHT 2026-05-19

Backup état complet après enregistrement vidéo démo de bout en bout. À utiliser comme point de référence pour la consolidation post-démo. Changements majeurs de la session 18-19 mai : - AIVA-URGENCE : page autonome avec preset URL + auto-focus chain - Workflow Demo_urgence_3_db : merge linux_db + steps AIVA + pause humaine NoMachine - Bypass LLM (static_result / static_text) dans replay_engine pour démos déterministes sans appel Ollama - Fix api_stream:3013 — replay_paused au premier polling /next - dag_execute : lift duration_ms vers top-level pour wait runtime - NPM bypass auth /aiva-urgence/ via location ^~ (proxy_host/10.conf hors git) - scripts/cancel-replays.sh — workaround Stop VWB qui ne purge pas la queue Anchors visuels (468) forcés dans le commit pour garantir restorabilité. DB workflows actuelle + ~12 .bak DB de la journée incluses. Sujets identifiés pour consolidation post-démo (TODO) : 1. Bug VWB recapture anchor ne régénère pas le PNG 2. Léa client accumule état mémoire (restart périodique requis) 3. Stop VWB ne purge pas la queue serveur (lien manquant vers /replay/cancel) 4. Bug coord client mss tronqué 2560x60 → mapping Y cassé 5. delay_before/delay_after ignorés au runtime (fix partiel duration_ms) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-19 14:55:06 +02:00
parent f2212e77e3
commit 5ea4960e65
627 changed files with 211348 additions and 169 deletions
--- a/core/grounding/infigui_server.py
+++ b/core/grounding/infigui_server.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+core/grounding/infigui_server.py — Service grounding persistant InfiGUI
+
+Charge InfiGUI-G1-3B en 4-bit une fois (~2.4 GB VRAM), puis sert les requêtes
+de grounding via un Unix socket. Évite le coût de chargement (~10s) à chaque
+appel que paie le subprocess one-shot.
+
+Protocole (length-prefixed JSON) :
+    Requête  : [4 octets uint32 BE = longueur] + payload JSON UTF-8
+    Réponse  : [4 octets uint32 BE = longueur] + payload JSON UTF-8
+
+Opérations supportées (champ "op", défaut "ground") :
+    - "ping"     → {"ok": true, "vram_gb": float, "uptime_s": float}
+    - "ground"   → {"x": int|None, "y": int|None, "confidence": float, ...}
+    - "shutdown" → {"ok": true} puis arrêt propre du serveur
+
+Le payload "ground" reprend exactement le format de l'ancien worker one-shot :
+    {"target": str, "description": str, "image_path": str, "anchor_image_path": str}
+
+Les images restent passées via fichiers (/tmp/...) — pas de bytes sur le socket.
+
+Lancement (manuel ou via systemd user unit rpa-grounding.service) :
+    cd ~/ai/rpa_vision_v3
+    .venv/bin/python -m core.grounding.infigui_server
+
+Variables d'environnement :
+    RPA_GROUNDING_SOCKET   chemin du socket (défaut: $XDG_RUNTIME_DIR/rpa-grounding.sock
+                           sinon /tmp/rpa-grounding.sock)
+    RPA_GROUNDING_BACKLOG  taille listen backlog (défaut: 4)
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import signal
+import socket
+import struct
+import sys
+import threading
+import time
+import traceback
+from typing import Any, Dict, Optional
+
+# Réutilise la logique de chargement et d'inférence du worker one-shot.
+# load_model() et infer() sont conçus pour être appelés en process indépendant ;
+# on les appelle ici dans un process unique de longue durée.
+from core.grounding.infigui_worker import infer, load_model
+
+
+# ── Configuration ────────────────────────────────────────────────────────
+
+
+def _default_socket_path() -> str:
+    # /run/rpa/ est la convention "production" (RuntimeDirectory=rpa partagé
+    # entre les services systemd). Cohérent avec ui_tars_grounder._default_socket_path.
+    if os.path.isdir("/run/rpa"):
+        return "/run/rpa/grounding.sock"
+    runtime_dir = os.environ.get("XDG_RUNTIME_DIR")
+    if runtime_dir and os.path.isdir(runtime_dir):
+        return os.path.join(runtime_dir, "rpa-grounding.sock")
+    return "/tmp/rpa-grounding.sock"
+
+
+SOCKET_PATH = os.environ.get("RPA_GROUNDING_SOCKET") or _default_socket_path()
+LISTEN_BACKLOG = int(os.environ.get("RPA_GROUNDING_BACKLOG", "4"))
+
+# Limite raisonnable pour un payload JSON (la requête contient juste des chemins
+# et du texte court — 4 MB suffit largement pour parer un client buggé).
+MAX_PAYLOAD_BYTES = 4 * 1024 * 1024
+
+
+# ── Protocole length-prefixed ────────────────────────────────────────────
+
+
+def _recv_exact(conn: socket.socket, n: int) -> Optional[bytes]:
+    """Lit exactement n octets ou retourne None si la connexion ferme avant."""
+    chunks = []
+    remaining = n
+    while remaining > 0:
+        chunk = conn.recv(remaining)
+        if not chunk:
+            return None
+        chunks.append(chunk)
+        remaining -= len(chunk)
+    return b"".join(chunks)
+
+
+def recv_message(conn: socket.socket) -> Optional[Dict[str, Any]]:
+    header = _recv_exact(conn, 4)
+    if header is None:
+        return None
+    (length,) = struct.unpack(">I", header)
+    if length == 0 or length > MAX_PAYLOAD_BYTES:
+        raise ValueError(f"Longueur payload invalide: {length}")
+    payload = _recv_exact(conn, length)
+    if payload is None:
+        return None
+    return json.loads(payload.decode("utf-8"))
+
+
+def send_message(conn: socket.socket, obj: Dict[str, Any]) -> None:
+    payload = json.dumps(obj, ensure_ascii=False).encode("utf-8")
+    conn.sendall(struct.pack(">I", len(payload)) + payload)
+
+
+# ── Serveur ──────────────────────────────────────────────────────────────
+
+
+class InfiGUIServer:
+    """Daemon qui sert les requêtes de grounding sur un Unix socket."""
+
+    def __init__(self, socket_path: str = SOCKET_PATH):
+        self.socket_path = socket_path
+        self._server_sock: Optional[socket.socket] = None
+        self._stop = threading.Event()
+        # CUDA n'est pas thread-safe sur le même modèle ; sérialise les inférences.
+        self._infer_lock = threading.Lock()
+        self._model = None
+        self._processor = None
+        self._start_time = time.time()
+        self._request_count = 0
+
+    # ── Lifecycle ────────────────────────────────────────────────────────
+
+    def start(self) -> None:
+        # 1. Charger le modèle AVANT d'ouvrir le socket : si le chargement échoue,
+        #    on n'expose pas un endpoint à moitié fonctionnel aux clients.
+        print(f"[infigui-server] Chargement InfiGUI-G1-3B...")
+        self._model, self._processor = load_model()
+
+        # 2. Ouvrir le Unix socket (suppression d'un éventuel ancien socket fantôme)
+        try:
+            os.unlink(self.socket_path)
+        except FileNotFoundError:
+            pass
+
+        self._server_sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        self._server_sock.bind(self.socket_path)
+        # Permissions : owner-only (rw-------) pour éviter les autres utilisateurs
+        os.chmod(self.socket_path, 0o600)
+        self._server_sock.listen(LISTEN_BACKLOG)
+        # Petit timeout sur accept pour que la boucle réagisse aux signaux
+        self._server_sock.settimeout(1.0)
+
+        print(f"[infigui-server] Écoute sur {self.socket_path}")
+
+        # 3. Signaux d'arrêt propre
+        signal.signal(signal.SIGTERM, self._on_signal)
+        signal.signal(signal.SIGINT, self._on_signal)
+
+        # 4. Boucle accept
+        try:
+            while not self._stop.is_set():
+                try:
+                    conn, _ = self._server_sock.accept()
+                except socket.timeout:
+                    continue
+                except OSError:
+                    if self._stop.is_set():
+                        break
+                    raise
+                # Une requête à la fois (CUDA non thread-safe). On gère néanmoins
+                # la connexion dans un thread pour pouvoir lire/écrire sans bloquer
+                # l'accept boucle quand le client traîne — l'inférence elle-même
+                # est sérialisée par self._infer_lock.
+                threading.Thread(
+                    target=self._handle_client,
+                    args=(conn,),
+                    daemon=True,
+                ).start()
+        finally:
+            self._cleanup()
+
+    def _on_signal(self, signum, _frame) -> None:
+        print(f"[infigui-server] Signal {signum} reçu, arrêt...")
+        self._stop.set()
+        # Casse un éventuel accept() bloqué
+        try:
+            if self._server_sock is not None:
+                self._server_sock.close()
+        except OSError:
+            pass
+
+    def _cleanup(self) -> None:
+        try:
+            if self._server_sock is not None:
+                self._server_sock.close()
+        except OSError:
+            pass
+        try:
+            os.unlink(self.socket_path)
+        except FileNotFoundError:
+            pass
+        print(f"[infigui-server] Arrêté ({self._request_count} requêtes traitées)")
+
+    # ── Gestion d'une connexion ──────────────────────────────────────────
+
+    def _handle_client(self, conn: socket.socket) -> None:
+        try:
+            # Une connexion = N requêtes (keep-alive). Le client peut envoyer
+            # plusieurs grounds successifs sans repayer le coût TCP/socket.
+            while not self._stop.is_set():
+                try:
+                    req = recv_message(conn)
+                except (ValueError, json.JSONDecodeError) as e:
+                    self._safe_send(conn, {"error": f"requête invalide: {e}"})
+                    return
+                if req is None:
+                    return  # client a fermé proprement
+
+                op = req.get("op", "ground")
+                if op == "ping":
+                    self._safe_send(conn, self._do_ping())
+                elif op == "shutdown":
+                    self._safe_send(conn, {"ok": True})
+                    self._stop.set()
+                    try:
+                        if self._server_sock is not None:
+                            self._server_sock.close()
+                    except OSError:
+                        pass
+                    return
+                elif op == "ground":
+                    resp = self._do_ground(req)
+                    self._safe_send(conn, resp)
+                else:
+                    self._safe_send(conn, {"error": f"op inconnue: {op}"})
+        except Exception as e:
+            traceback.print_exc()
+            self._safe_send(conn, {"error": str(e)})
+        finally:
+            try:
+                conn.close()
+            except OSError:
+                pass
+
+    def _safe_send(self, conn: socket.socket, obj: Dict[str, Any]) -> None:
+        try:
+            send_message(conn, obj)
+        except OSError:
+            # Client parti ; pas de quoi paniquer
+            pass
+
+    # ── Opérations ───────────────────────────────────────────────────────
+
+    def _do_ping(self) -> Dict[str, Any]:
+        try:
+            import torch
+
+            vram_gb = round(torch.cuda.memory_allocated() / 1e9, 2) if torch.cuda.is_available() else 0.0
+        except Exception:
+            vram_gb = 0.0
+        return {
+            "ok": True,
+            "vram_gb": vram_gb,
+            "uptime_s": round(time.time() - self._start_time, 1),
+            "requests": self._request_count,
+        }
+
+    def _do_ground(self, req: Dict[str, Any]) -> Dict[str, Any]:
+        with self._infer_lock:
+            self._request_count += 1
+            try:
+                return infer(self._model, self._processor, req)
+            except Exception as e:
+                traceback.print_exc()
+                return {
+                    "x": None,
+                    "y": None,
+                    "error": str(e),
+                }
+
+
+def main() -> int:
+    server = InfiGUIServer()
+    try:
+        server.start()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:
+        print(f"[infigui-server] Erreur fatale: {e}")
+        traceback.print_exc()
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/core/grounding/ui_tars_grounder.py
+++ b/core/grounding/ui_tars_grounder.py
@@ -1,34 +1,127 @@
 """
-core/grounding/ui_tars_grounder.py — Grounding via script one-shot InfiGUI
+core/grounding/ui_tars_grounder.py — Grounding InfiGUI (service persistant + fallback subprocess)

-Chaque appel lance un subprocess Python qui charge le modèle, infère, et quitte.
-Lent (~15s) mais fiable — pas de crash CUDA en process persistant.
+Stratégie d'appel (avril 2026) :
+1. Tenter d'abord le service persistant via Unix socket (rpa-grounding.service).
+   → ~1-2 s par requête (modèle déjà en VRAM).
+2. Si le socket n'est pas disponible (service down, env dev sans systemd),
+   retomber sur l'ancien subprocess one-shot (~15 s, mais fiable).
+
+Le fallback subprocess est conservé en filet de sécurité pendant 2-3 semaines
+le temps de valider la stabilité du service en production.
 """

 from __future__ import annotations

 import json
 import os
+import socket
+import struct
 import subprocess
 import sys
 import threading
 import time
-from typing import Optional
+from typing import Any, Dict, Optional

 from core.grounding.target import GroundingResult

-_instance: Optional[UITarsGrounder] = None
+_instance: Optional["UITarsGrounder"] = None
 _instance_lock = threading.Lock()


+# ── Configuration du client socket ──────────────────────────────────────
+
+
+def _default_socket_path() -> str:
+    # /run/rpa/ est la convention "production" (RuntimeDirectory=rpa partagé
+    # entre les services systemd). On la préfère au XDG_RUNTIME_DIR pour qu'un
+    # client lancé en CLI sur la même machine retrouve le socket du service.
+    if os.path.isdir("/run/rpa"):
+        return "/run/rpa/grounding.sock"
+    runtime_dir = os.environ.get("XDG_RUNTIME_DIR")
+    if runtime_dir and os.path.isdir(runtime_dir):
+        return os.path.join(runtime_dir, "rpa-grounding.sock")
+    return "/tmp/rpa-grounding.sock"
+
+
+# Timeout connexion court : si le service est down, on bascule vite sur subprocess.
+SOCKET_CONNECT_TIMEOUT = 0.5
+# Timeout réponse plus large : l'inférence peut prendre 1-3s.
+SOCKET_REQUEST_TIMEOUT = 30.0
+
+
+def _image_dir() -> str:
+    """Répertoire pour les images partagées entre client et serveur grounding.
+
+    En production systemd, tous les services concernés ont PrivateTmp=true →
+    leur /tmp est isolé et les fichiers ne sont pas visibles côté serveur.
+    Solution : utiliser /run/rpa/ via RuntimeDirectory=rpa (partagé entre
+    services qui déclarent ce RuntimeDirectory). En dev, fallback sur /tmp.
+    """
+    candidate = os.environ.get("RPA_GROUNDING_IMG_DIR")
+    if candidate and os.path.isdir(candidate):
+        return candidate
+    if os.path.isdir("/run/rpa"):
+        return "/run/rpa"
+    return "/tmp"
+
+
+def _send_socket_request(req: Dict[str, Any], socket_path: str) -> Optional[Dict[str, Any]]:
+    """Envoie une requête au service persistant. Retourne None si le service
+    n'est pas joignable (socket absent / connexion refusée / timeout)."""
+    if not os.path.exists(socket_path):
+        return None
+    try:
+        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        sock.settimeout(SOCKET_CONNECT_TIMEOUT)
+        sock.connect(socket_path)
+        sock.settimeout(SOCKET_REQUEST_TIMEOUT)
+
+        payload = json.dumps(req, ensure_ascii=False).encode("utf-8")
+        sock.sendall(struct.pack(">I", len(payload)) + payload)
+
+        # Lecture réponse length-prefixed
+        header = b""
+        while len(header) < 4:
+            chunk = sock.recv(4 - len(header))
+            if not chunk:
+                return None
+            header += chunk
+        (length,) = struct.unpack(">I", header)
+
+        body = b""
+        while len(body) < length:
+            chunk = sock.recv(min(65536, length - len(body)))
+            if not chunk:
+                return None
+            body += chunk
+
+        try:
+            sock.close()
+        except OSError:
+            pass
+
+        return json.loads(body.decode("utf-8"))
+    except (socket.timeout, ConnectionRefusedError, FileNotFoundError, OSError):
+        return None
+    except Exception as e:  # pragma: no cover
+        print(f"⚠️ [InfiGUI/socket] Erreur inattendue: {e}")
+        return None
+
+
 class UITarsGrounder:
-    """Grounding via script one-shot InfiGUI."""
+    """Grounding InfiGUI : socket persistant en priorité, subprocess en secours."""

    def __init__(self):
        self._lock = threading.Lock()
        self._project_root = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "..", "..")
        )
+        self._socket_path = os.environ.get("RPA_GROUNDING_SOCKET") or _default_socket_path()
+        # On marque l'absence du service après un échec pour éviter de payer le
+        # coût "tester le socket" à chaque appel pendant une session sans service.
+        # Re-test toutes les 30s au cas où le service serait relancé.
+        self._service_unavailable_until: float = 0.0

    @classmethod
    def get_instance(cls) -> UITarsGrounder:
@@ -64,28 +157,41 @@ class UITarsGrounder:

        try:
            with self._lock:
+                # Répertoire partagé client/serveur (cf. _image_dir)
+                img_dir = _image_dir()
                # Sauver l'image principale
-                image_path = "/tmp/infigui_screen.png"
+                image_path = os.path.join(img_dir, "infigui_screen.png")
                if screen_pil is not None:
                    screen_pil.save(image_path)

                # Sauver l'image d'ancre (mode fusionné)
                anchor_image_path = ""
                if anchor_pil is not None:
-                    anchor_image_path = "/tmp/infigui_anchor.png"
+                    anchor_image_path = os.path.join(img_dir, "infigui_anchor.png")
                    anchor_pil.save(anchor_image_path)

-                # Construire la requête JSON
-                req = json.dumps({
+                req_dict = {
                    "target": target_text,
                    "description": target_description,
                    "image_path": image_path,
                    "anchor_image_path": anchor_image_path,
-                })
+                }

                mode_str = "fused" if anchor_pil is not None else "text"
                label_short = target_text[:30] if target_text else "<crop only>"
-                print(f"🎯 [InfiGUI] Lancement one-shot [{mode_str}]: '{label_short}'")
+
+                # ── 1) Tenter le service persistant (socket Unix) ─────────
+                if time.time() >= self._service_unavailable_until:
+                    sock_result = _send_socket_request(req_dict, self._socket_path)
+                    if sock_result is not None:
+                        return self._build_result(sock_result, anchor_pil, t0, source="server")
+                    # Service down : on note pour 30s et on bascule subprocess
+                    self._service_unavailable_until = time.time() + 30.0
+                    print(f"ℹ️ [InfiGUI] Service indisponible, fallback subprocess pour 30s")
+
+                # ── 2) Fallback subprocess one-shot ──────────────────────
+                req = json.dumps(req_dict)
+                print(f"🎯 [InfiGUI/subprocess] Lancement one-shot [{mode_str}]: '{label_short}'")

                # Lancer le script one-shot
                # IMPORTANT: depuis un service systemd où le parent a déjà chargé CUDA,
@@ -137,21 +243,7 @@ class UITarsGrounder:
                    print(f"⚠️ [InfiGUI] Pas de réponse JSON dans la sortie")
                    return None

-            dt = (time.time() - t0) * 1000
-
-            if data.get("x") is not None:
-                method_name = "infigui_fused" if anchor_pil is not None else "infigui"
-                print(f"🎯 [InfiGUI/{method_name}] ({data['x']}, {data['y']}) "
-                      f"conf={data.get('confidence', 0):.2f} ({dt:.0f}ms)")
-                return GroundingResult(
-                    x=data["x"], y=data["y"],
-                    method=method_name,
-                    confidence=data.get("confidence", 0.90),
-                    time_ms=dt,
-                )
-            else:
-                print(f"⚠️ [InfiGUI] Pas trouvé ({dt:.0f}ms)")
-                return None
+            return self._build_result(data, anchor_pil, t0, source="subprocess")

        except subprocess.TimeoutExpired:
            print(f"⚠️ [InfiGUI] Timeout 60s")
@@ -159,3 +251,38 @@ class UITarsGrounder:
        except Exception as e:
            print(f"⚠️ [InfiGUI] Erreur: {e}")
            return None
+
+    # ──────────────────────────────────────────────────────────────────
+    # Helpers internes
+    # ──────────────────────────────────────────────────────────────────
+
+    def _build_result(
+        self,
+        data: Dict[str, Any],
+        anchor_pil: Any,
+        t0: float,
+        source: str,
+    ) -> Optional[GroundingResult]:
+        """Convertit une réponse JSON (server ou subprocess) en GroundingResult."""
+        dt = (time.time() - t0) * 1000
+
+        if data.get("error"):
+            print(f"⚠️ [InfiGUI/{source}] Erreur: {data['error']} ({dt:.0f}ms)")
+            return None
+
+        if data.get("x") is None:
+            print(f"⚠️ [InfiGUI/{source}] Pas trouvé ({dt:.0f}ms)")
+            return None
+
+        method_name = "infigui_fused" if anchor_pil is not None else "infigui"
+        print(
+            f"🎯 [InfiGUI/{source}/{method_name}] ({data['x']}, {data['y']}) "
+            f"conf={data.get('confidence', 0):.2f} ({dt:.0f}ms)"
+        )
+        return GroundingResult(
+            x=data["x"],
+            y=data["y"],
+            method=method_name,
+            confidence=data.get("confidence", 0.90),
+            time_ms=dt,
+        )