fix(p0): secure agent revocation and R6 worker queue

This commit is contained in:
Dom
2026-06-02 15:52:35 +02:00
parent 2dd306724c
commit 7a1a5cb6fd
11 changed files with 2869 additions and 109 deletions

View File

@@ -173,6 +173,9 @@ class AgentRegistry:
# Deja enrolle et actif -> conflit explicit
raise AgentAlreadyEnrolledError(dict(existing))
if existing["uninstall_reason"] == "admin_revoke":
raise AgentRevokedError(dict(existing))
# Agent desinstalle : reactivation si autorise (defaut)
if not allow_reactivate:
raise AgentAlreadyEnrolledError(dict(existing))
@@ -273,13 +276,15 @@ class AgentRegistry:
"""Met a jour last_seen_at (appel depuis le stream / heartbeat).
Silencieux si l'agent est inconnu (evite les erreurs sur vieux clients).
Ne reactive jamais un agent desinstalle/revoque.
"""
if not machine_id:
return
now = _utc_now_iso()
with _DB_LOCK, self._connect() as conn:
conn.execute(
"UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?",
"UPDATE enrolled_agents SET last_seen_at = ? "
"WHERE machine_id = ? AND status = 'active'",
(now, machine_id),
)
conn.commit()
@@ -294,3 +299,14 @@ class AgentAlreadyEnrolledError(Exception):
f"machine_id={existing_row.get('machine_id')} deja enrole "
f"(status={existing_row.get('status')})"
)
class AgentRevokedError(Exception):
"""Levee si un administrateur a revoque ce machine_id."""
def __init__(self, existing_row: Dict[str, Any]):
self.existing = existing_row
super().__init__(
f"machine_id={existing_row.get('machine_id')} revoque "
f"(reason={existing_row.get('uninstall_reason')})"
)

File diff suppressed because it is too large Load Diff

View File

@@ -25,6 +25,7 @@ Le worker :
5. Se suspend quand un replay est actif (libère le GPU)
"""
import json
import logging
import os
import signal
@@ -67,6 +68,7 @@ class VLMWorker:
self._running = False
self._processor = None # Initialisé au premier besoin (lazy loading GPU)
self._current_session: Optional[str] = None
self._started_at: str = datetime.now().isoformat()
# Stats
self._stats: Dict[str, int] = {
@@ -83,7 +85,10 @@ class VLMWorker:
if self._processor is None:
logger.info("Initialisation du StreamProcessor (chargement GPU)...")
from .stream_processor import StreamProcessor
self._processor = StreamProcessor(data_dir=str(LIVE_SESSIONS_DIR))
self._processor = StreamProcessor(
data_dir=str(DATA_DIR),
enable_vlm=True,
)
logger.info("StreamProcessor initialisé.")
return self._processor
@@ -98,6 +103,11 @@ class VLMWorker:
logger.info(" Sessions dir : %s", LIVE_SESSIONS_DIR)
logger.info(" Poll interval : %ds", POLL_INTERVAL)
# N2 + N3 : santé initiale + signal READY systemd dès le démarrage
# (avant tout chargement GPU, pour ne pas dépasser le timeout de start).
self._write_health("healthy")
self._sd_notify("READY=1")
while self._running:
try:
# Vérifier si un replay est actif
@@ -110,6 +120,7 @@ class VLMWorker:
if session_id:
self._process_session(session_id)
else:
self._write_health("healthy") # N2 : cycle idle
time.sleep(POLL_INTERVAL)
except KeyboardInterrupt:
@@ -119,6 +130,7 @@ class VLMWorker:
logger.error("Erreur dans la boucle principale : %s", e, exc_info=True)
time.sleep(5) # Éviter une boucle d'erreurs rapide
self._write_health("stopped") # N2 : santé finale
logger.info("VLM Worker arrêté.")
def stop(self):
@@ -126,6 +138,103 @@ class VLMWorker:
self._running = False
logger.info("Arrêt demandé.")
# =========================================================================
# N2 — Health file (_worker_health.json)
# =========================================================================
#
# Garde-fou anti-blocage silencieux : expose l'état de santé du worker sur
# disque pour qu'un superviseur (humain, dashboard, watchdog) détecte un
# worker dégradé sans avoir à fouiller les logs. Écriture atomique.
#
# CONFIDENTIALITÉ (HDS) : n'écrit AUCUNE donnée patient — uniquement des
# identifiants techniques (session_id), des compteurs et des booléens de
# composants. Jamais d'OCR, de noms de fichiers screenshots, ni de contenu
# de session.
def _sd_notify(self, state: str) -> bool:
"""Notifie systemd via $NOTIFY_SOCKET, sans dépendance `systemd.daemon`.
Implémentation pure socket (AF_UNIX SOCK_DGRAM) : fonctionne sous systemd
`Type=notify` pour `READY=1` et le heartbeat `WATCHDOG=1`. No-op silencieux
hors systemd (variable absente) ou en cas d'erreur — jamais bloquant.
Retourne True si le message a été émis.
"""
addr = os.environ.get("NOTIFY_SOCKET")
if not addr:
return False
try:
import socket
# Namespace abstrait systemd : '@' → octet nul de préfixe
connect_addr = "\0" + addr[1:] if addr.startswith("@") else addr
with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock:
sock.connect(connect_addr)
sock.sendall(state.encode("utf-8"))
return True
except Exception as e:
logger.debug("sd_notify(%s) échoué : %s", state, e)
return False
def _health_components(self) -> Dict[str, bool]:
"""Statut booléen de chaque composant lourd, dérivé du processor."""
proc = self._processor
return {
"screen_analyzer": proc is not None and getattr(proc, "_screen_analyzer", None) is not None,
"clip_embedder": proc is not None and getattr(proc, "_clip_embedder", None) is not None,
"faiss_manager": proc is not None and getattr(proc, "_faiss_manager", None) is not None,
"state_embedding_builder": proc is not None and getattr(proc, "_state_embedding_builder", None) is not None,
}
def _write_health(self, status: str) -> None:
"""Écrit data/training/_worker_health.json de façon atomique.
`status` attendu : healthy | busy | degraded | stopped. Si le worker
tourne en mode VLM mais que ScreenAnalyzer est absent, le statut est
forcé à 'degraded' quelle que soit la valeur demandée.
"""
try:
components = self._health_components()
proc = self._processor
vlm_mode = proc is not None and getattr(proc, "_enable_vlm", False)
if vlm_mode and not components["screen_analyzer"]:
status = "degraded"
queue_path = DATA_DIR / "_worker_queue.txt"
try:
queue_length = len(
[ln for ln in queue_path.read_text(encoding="utf-8").splitlines() if ln.strip()]
) if queue_path.exists() else 0
except Exception:
queue_length = 0
payload = {
"pid": os.getpid(),
"started_at": self._started_at,
"last_cycle": datetime.now().isoformat(),
"current_session": self._current_session,
"queue_length": queue_length,
"components": components,
"stats": dict(self._stats),
"status": status,
}
health_path = DATA_DIR / "_worker_health.json"
tmp_path = health_path.with_suffix(".json.tmp")
tmp_path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2),
encoding="utf-8",
)
tmp_path.rename(health_path)
except Exception as e:
# Le health file est un garde-fou, jamais un point de défaillance.
logger.warning("Écriture health file échouée : %s", e)
# N3 : chaque écriture santé sert aussi de heartbeat watchdog systemd
# (sauf à l'arrêt). No-op hors systemd.
if status != "stopped":
self._sd_notify("WATCHDOG=1")
# =========================================================================
# Queue management (fichier _worker_queue.txt)
# =========================================================================
@@ -206,6 +315,9 @@ class VLMWorker:
REPLAY_WAIT_TIMEOUT,
)
break
# N3 : heartbeat pendant la pause replay (peut durer jusqu'à 120s,
# sinon le watchdog tuerait un worker pourtant sain et en attente).
self._sd_notify("WATCHDOG=1")
time.sleep(REPLAY_CHECK_INTERVAL)
elapsed = time.time() - start
@@ -220,6 +332,7 @@ class VLMWorker:
"""Traite une session complète (analyse VLM + construction workflow)."""
self._current_session = session_id
logger.info("=== Début traitement session %s ===", session_id)
self._write_health("busy") # N2 : début de session
start_time = time.time()
try:
@@ -331,6 +444,7 @@ class VLMWorker:
finally:
self._current_session = None
self._write_health("healthy") # N2 : fin de session (ou degraded auto)
logger.info("=== Fin traitement session %s ===", session_id)
@@ -347,6 +461,8 @@ class VLMWorker:
f" ({shot_id})" if shot_id else "",
)
self._write_health("busy") # N2 : heartbeat à chaque screenshot
# Vérifier si un replay est devenu actif pendant le traitement
if self._is_replay_active():
logger.info(

View File

@@ -20,6 +20,15 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
try:
from agent_v0.agent_v1.ui.message_contract import (
coerce_supervised_pause_message,
warn_visible_message,
)
except Exception: # pragma: no cover - fallback for partial server deployments
coerce_supervised_pause_message = None
warn_visible_message = None
@dataclass
class PausePayload:
@@ -50,8 +59,25 @@ def build_pause_payload(
last_screenshot: Optional[str],
) -> PausePayload:
"""Construit le payload de pause enrichi pour une action pause_for_human."""
params = action.get("parameters") or {}
message = params.get("message", "Validation requise")
params = dict(action.get("parameters") or {})
for key in ("message", "safety_level", "safety_checks", "pause_reason"):
if key not in params or params.get(key) in (None, "", []):
if action.get(key) not in (None, "", []):
params[key] = action.get(key)
raw_message = (
params.get("message")
or action.get("message")
or action.get("intention")
or ""
)
message = _coerce_pause_message(
raw_message,
intention=params.get("intention") or action.get("intention") or action.get("description"),
attendu=params.get("attendu") or params.get("expected") or action.get("expected"),
vu=params.get("vu") or params.get("observed") or action.get("observed"),
demande=params.get("demande") or params.get("request"),
)
safety_level = params.get("safety_level")
declarative = params.get("safety_checks") or []
@@ -90,11 +116,60 @@ def build_pause_payload(
return PausePayload(
checks=checks,
pause_reason="",
pause_reason=params.get("pause_reason", ""),
message=message,
)
def _coerce_pause_message(
message: Any = "",
*,
intention: Any = "",
attendu: Any = "",
vu: Any = "",
demande: Any = "",
) -> str:
if warn_visible_message is not None:
warn_visible_message(
message,
source="safety_checks_provider._coerce_pause_message.raw",
supervised_pause=False,
)
if coerce_supervised_pause_message is not None:
result = coerce_supervised_pause_message(
message,
intention=intention,
attendu=attendu,
vu=vu,
demande=demande,
)
if warn_visible_message is not None:
warn_visible_message(
result,
source="safety_checks_provider._coerce_pause_message.final",
supervised_pause=True,
)
return result
fallback_request = "indiquer si je peux continuer ou corriger l'action attendue"
result = "\n".join(
(
f"J'essaie de : {intention or 'continuer une etape supervisee'}",
f"J'attendais : {attendu or 'un accord humain clair avant de continuer'}",
f"Je vois : {vu or 'je suis sur une etape qui demande une verification humaine'}",
f"Peux-tu : {demande or message or fallback_request}",
)
)
if warn_visible_message is not None:
warn_visible_message(
result,
source="safety_checks_provider._coerce_pause_message.final_fallback",
supervised_pause=True,
)
return result
def _call_llm_for_contextual_checks(
action: Dict[str, Any],
replay_state: Dict[str, Any],

View File

@@ -37,6 +37,11 @@ _MODIFIER_ONLY_KEYS = {
"meta", "meta_l", "meta_r", "super", "super_l", "super_r",
}
_STANDALONE_SYSTEM_KEYS = {
"win", "win_l", "win_r", "cmd", "cmd_l", "cmd_r",
"windows", "meta", "meta_l", "meta_r", "super", "super_l", "super_r",
}
# Mapping numpad vk codes → caractères (layout-indépendant)
_NUMPAD_VK_MAP = {
96: '0', 97: '1', 98: '2', 99: '3', 100: '4',
@@ -69,6 +74,18 @@ def _is_modifier_only(keys: list) -> bool:
return all(k.lower() in _MODIFIER_ONLY_KEYS for k in keys)
def _is_standalone_system_key(keys: list) -> bool:
"""True pour les touches système seules qui sont des gestes utiles."""
if len(keys) != 1:
return False
return str(keys[0]).lower() in _STANDALONE_SYSTEM_KEYS
def _is_ignorable_modifier_only(keys: list) -> bool:
"""True pour les modificateurs seuls qui ne doivent pas devenir replay."""
return _is_modifier_only(keys) and not _is_standalone_system_key(keys)
def _sanitize_keys(keys: list) -> list:
"""Nettoyer une liste de touches : convertir les caractères de contrôle."""
cleaned = []
@@ -94,7 +111,7 @@ def _is_parasitic_event(event_data: Dict[str, Any]) -> bool:
if event_type in ("key_press", "key_combo"):
keys = event_data.get("keys", event_data.get("data", {}).get("keys", []))
if not keys or _is_modifier_only(keys):
if not keys or _is_ignorable_modifier_only(keys):
return True
elif event_type == "text_input":
@@ -203,7 +220,7 @@ def _filter_parasitic_steps(steps: list) -> list:
s for s in steps
if not (
s.get("type") in ("key_combo", "key_press")
and _is_modifier_only(s.get("keys", []))
and _is_ignorable_modifier_only(s.get("keys", []))
)
]
@@ -266,7 +283,7 @@ def clean_enriched_actions(actions: list) -> list:
# key_combo : sanitiser les touches, puis filtrer les modificateurs seuls
if atype == 'key_combo':
keys = _sanitize_keys(a.get('keys', []))
if _is_modifier_only(keys):
if _is_ignorable_modifier_only(keys):
continue
if not keys:
continue
@@ -328,6 +345,12 @@ _IGNORED_EVENT_TYPES = frozenset({
_POST_COMBO_WAITS = {
# (tuple de touches normalisées, triées en minuscule) -> wait_ms
# NB : les tuples sont sorted() alphabétiquement
('win',): 2000, # Win seul → menu/recherche Windows
('cmd',): 2000,
('s', 'win'): 2000, # Win+S → Recherche Windows
('cmd', 's'): 2000,
('escape',): 800, # Escape → fermeture menu/dialogue
('esc',): 800,
('r', 'win'): 3000, # Win+R → Exécuter
('r', 'super'): 3000,
('meta', 'r'): 3000,
@@ -956,8 +979,32 @@ def enrich_click_from_screenshot(
vlm_description = ", ".join(vlm_parts) if vlm_parts else ""
# ── 4. SomEngine : identifier l'élément cliqué ──
# C2d-bis (2026-05-25) court-circuits :
# Niveau A : si vision_info.text déjà présent, le code priorise vision_info.text
# ligne 974-981 de toute façon → SomEngine redondant (économie ~1.2s/clic CPU).
# Niveau B : flag RPA_SKIP_BUILD_VISION=true (alias RPA_SKIP_BUILD_VLM)
# skip total SomEngine + gemma4 (économie ~4s/clic). Défaut OFF
# pour préserver comportement historique.
has_vision_text = bool(isinstance(vision_info, dict) and vision_info.get("text"))
_skip_flag_raw = (
os.environ.get("RPA_SKIP_BUILD_VISION")
or os.environ.get("RPA_SKIP_BUILD_VLM")
or "0"
)
skip_build_vision = _skip_flag_raw.strip().lower() in ("1", "true", "yes")
som_elem = None
if session_dir and screenshot_id:
if skip_build_vision:
logger.debug(
"[PERF] vision.skip_som reason=RPA_SKIP_BUILD_VISION click=(%d,%d)",
click_x, click_y,
)
elif has_vision_text:
logger.debug(
"[PERF] vision.skip_som reason=vision_info.text click=(%d,%d) text=%r",
click_x, click_y, vision_info.get("text", "")[:40],
)
elif session_dir and screenshot_id:
# Appeler _som_identify_clicked_element via un event_data minimal
fake_event = {
"screenshot_id": screenshot_id,
@@ -981,10 +1028,15 @@ def enrich_click_from_screenshot(
text_source = "ocr"
# ── 5b. Gemma4 : identifier l'élément cliqué via le screenshot fenêtre ──
# Quand l'OCR et SomEngine ne trouvent pas de texte, gemma4 (port 11435)
# reçoit le screenshot fenêtre + la position du clic et décrit l'élément.
# Un seul appel, une seule fois, pendant l'enregistrement.
if not element_text:
# Quand l'OCR et SomEngine ne trouvent pas de texte, gemma4 reçoit le
# screenshot fenêtre + la position du clic et décrit l'élément.
# Skippé si RPA_SKIP_BUILD_VISION actif (Niveau B C2d-bis).
if not element_text and skip_build_vision:
logger.debug(
"[PERF] vision.skip_gemma4 reason=RPA_SKIP_BUILD_VISION click=(%d,%d)",
click_x, click_y,
)
elif not element_text:
# Essayer avec le screenshot fenêtre (contexte complet)
win_screenshot = None
if session_dir and screenshot_id:
@@ -1320,6 +1372,157 @@ def _infer_close_tab_target(
return None
def _is_notepad_title(title: str) -> bool:
"""Retourne True pour les fenêtres Bloc-notes modernes/françaises."""
lowered = str(title or "").casefold()
return "bloc-notes" in lowered or "notepad" in lowered
def _infer_save_dialog_primary_button_target(
raw_events: list,
click_event: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Détecter le bouton primaire du dialogue Windows ``Enregistrer sous``.
Pattern réel ``sess_20260520T102916_066851`` :
- clic dans la fenêtre ``Enregistrer sous`` en bas de la boîte ;
- focus immédiat de retour vers ``... Bloc-notes``.
Quand OCR/SomEngine sont skippés au build, ce clic restait seulement
décrit par position + crop. Le template matching pouvait alors dériver.
On encode donc l'intention UI stable : bouton ``Enregistrer``.
"""
if click_event.get("type") != "mouse_click":
return None
window = click_event.get("window", {})
if not isinstance(window, dict):
return None
from_title = str(window.get("title", "") or "").strip()
app_name = str(window.get("app_name", "") or "").strip().lower()
if from_title.casefold() != "enregistrer sous":
return None
if app_name and "notepad" not in app_name:
return None
window_capture = click_event.get("window_capture", {})
if not isinstance(window_capture, dict):
return None
click_relative = window_capture.get("click_relative")
window_size = window_capture.get("window_size")
if not (
isinstance(click_relative, list)
and len(click_relative) == 2
and isinstance(window_size, list)
and len(window_size) == 2
):
return None
try:
rel_y = float(click_relative[1])
win_h = float(window_size[1])
except (TypeError, ValueError):
return None
if win_h <= 0 or rel_y / win_h < 0.78:
# Boutons Enregistrer/Annuler en bas de dialogue.
return None
click_ts = click_event.get("timestamp")
click_pos = click_event.get("pos") or []
match_idx = None
for idx, raw_evt in enumerate(raw_events):
event_data = raw_evt.get("event", raw_evt)
if event_data.get("type") != "mouse_click":
continue
if event_data.get("timestamp") != click_ts:
continue
if (event_data.get("pos") or []) != click_pos:
continue
match_idx = idx
break
if match_idx is None:
return None
for follow_evt in raw_events[match_idx + 1: match_idx + 6]:
follow_data = follow_evt.get("event", follow_evt)
follow_type = follow_data.get("type", "")
if follow_type in {"mouse_click", "text_input", "key_press", "key_combo"}:
return None
if follow_type != "window_focus_change":
continue
to_info = follow_data.get("to", {})
if not isinstance(to_info, dict):
continue
to_title = str(to_info.get("title", "") or "").strip()
to_app = str(to_info.get("app_name", "") or "").strip().lower()
if "notepad" not in to_app or not _is_notepad_title(to_title):
continue
follow_ts = follow_data.get("timestamp")
if (
isinstance(click_ts, (int, float))
and isinstance(follow_ts, (int, float))
and follow_ts - click_ts > 3.0
):
break
return {
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
"context_hints": {
"window_title": "Enregistrer sous",
"interaction": "save_dialog_primary_button",
"expected_after_window": to_title,
},
"vlm_description": (
"Dans la fenêtre 'Enregistrer sous', le bouton principal "
"'Enregistrer' en bas de la boîte de dialogue"
),
}
return None
def _is_post_save_out_of_window_click(event_data: dict) -> bool:
"""Vrai pour un clic parasite hors fenêtre juste après sauvegarde Notepad."""
if event_data.get("type") != "mouse_click":
return False
window = event_data.get("window", {})
if not isinstance(window, dict):
return False
if not _is_notepad_title(str(window.get("title", "") or "")):
return False
window_capture = event_data.get("window_capture", {})
if not isinstance(window_capture, dict):
return False
if window_capture.get("click_inside_window") is False:
return True
click_relative = window_capture.get("click_relative")
window_size = window_capture.get("window_size")
if not (
isinstance(click_relative, list)
and len(click_relative) == 2
and isinstance(window_size, list)
and len(window_size) == 2
):
return False
try:
rel_x = float(click_relative[0])
rel_y = float(click_relative[1])
win_w = float(window_size[0])
win_h = float(window_size[1])
except (TypeError, ValueError):
return False
return win_w > 0 and win_h > 0 and (
rel_x < 0 or rel_y < 0 or rel_x > win_w or rel_y > win_h
)
def _attach_expected_window_before(actions: list, raw_events: list) -> None:
"""Attacher la fenêtre attendue AVANT chaque clic en rejouant les
raw events et en conservant le dernier ``window_focus_change.to.title``.
@@ -1463,6 +1666,17 @@ def _enrich_actions_with_intentions(
"""
import requests as _requests
skip_flag = (
os.environ.get("RPA_SKIP_INTENTION_ENRICHMENT")
or os.environ.get("RPA_SKIP_ENRICHMENT")
or ""
)
if skip_flag.strip().lower() in {"1", "true", "yes", "on"}:
logger.info(
"Enrichissement intentions désactivé par RPA_SKIP_INTENTION_ENRICHMENT"
)
return
gemma4_port = os.environ.get("GEMMA4_PORT", _GEMMA4_PORT)
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
@@ -1659,6 +1873,21 @@ def build_replay_from_raw_events(
if not events:
return []
# C2b 2026-05-25 : instrumentation [PERF] des étapes de build_replay
# (décomposition des ~22s restantes après skip enrichissement gemma4).
# Préfixe [PERF] cohérent avec arbitrage Codex D3 10:19. Pas de flag :
# spans build hors boucle chaude, info permanente OK.
import time as _time_perf
_perf_t_step = _time_perf.perf_counter()
_perf_t_total = _perf_t_step
def _perf_log(step: str) -> None:
nonlocal _perf_t_step
now = _time_perf.perf_counter()
elapsed_ms = (now - _perf_t_step) * 1000
logger.info("[PERF] build.%s session=%s elapsed_ms=%.0f", step, session_id, elapsed_ms)
_perf_t_step = now
# Résoudre le répertoire de session pour les crops visuels
session_dir_path = Path(session_dir) if session_dir else None
if session_dir_path and not session_dir_path.is_dir():
@@ -1675,6 +1904,8 @@ def build_replay_from_raw_events(
bool(session_dir_path),
)
_perf_log("step1_extract_resolution")
# ── 2. Filtrer et normaliser les événements ──
actionable_events = []
saw_save_combo = False # Tracker Ctrl+S / Ctrl+Shift+S pour la coupure systray
@@ -1714,8 +1945,19 @@ def build_replay_from_raw_events(
)
break
if _is_post_save_out_of_window_click(event_data):
logger.debug(
"Coupure du replay : clic post-save hors fenêtre applicative "
"(window=%s, click_relative=%s)",
(event_data.get("window") or {}).get("title", ""),
(event_data.get("window_capture") or {}).get("click_relative"),
)
break
actionable_events.append(event_data)
_perf_log("step2_filter_normalize")
# ── 3. Fusionner les text_input consécutifs ──
# Tous les text_input consécutifs sont fusionnés en un seul, indépendamment
# du gap temporel. L'utilisateur tape lettre par lettre mais on veut un
@@ -1854,6 +2096,8 @@ def build_replay_from_raw_events(
original[:50],
)
_perf_log("step3_merge_text_input")
# ── 4. Convertir en actions replay normalisées ──
actions = []
last_ts = 0.0
@@ -1977,6 +2221,22 @@ def build_replay_from_raw_events(
target_spec["context_hints"] = context_hints
action["visual_mode"] = True
save_dialog_target = _infer_save_dialog_primary_button_target(events, evt)
if save_dialog_target:
target_spec = action.setdefault("target_spec", {})
target_spec["by_text"] = save_dialog_target["by_text"]
target_spec["by_text_source"] = "heuristic"
target_spec["by_role"] = save_dialog_target["by_role"]
target_spec["window_title"] = save_dialog_target["window_title"]
target_spec["vlm_description"] = save_dialog_target["vlm_description"]
context_hints = dict(target_spec.get("context_hints") or {})
context_hints.update(save_dialog_target["context_hints"])
target_spec["context_hints"] = context_hints
expected_after_window = context_hints.get("expected_after_window")
if expected_after_window:
action["expected_window_title"] = expected_after_window
action["visual_mode"] = True
elif evt_type == "text_input":
text = evt.get("text", "")
if not text:
@@ -2027,8 +2287,11 @@ def build_replay_from_raw_events(
actions.append(action)
_perf_log("step4_convert_actions_and_crops")
# ── 5. Nettoyage global (dédup combos, sanitize, merge texte, waits) ──
actions = clean_enriched_actions(actions)
_perf_log("step5_clean_enriched_actions")
# ── 6. Insérer des waits contextuels après raccourcis critiques ──
final_actions = []
@@ -2043,6 +2306,8 @@ def build_replay_from_raw_events(
"duration_ms": post_wait,
})
_perf_log("step6_insert_contextual_waits")
# ── 7. Dernier nettoyage des waits consécutifs ──
result = []
for a in final_actions:
@@ -2055,12 +2320,16 @@ def build_replay_from_raw_events(
continue
result.append(a)
_perf_log("step7_cleanup_consecutive_waits")
# ── 8. Attacher les screenshots de référence (état attendu après action) ──
# Les screenshots res_shot_XXXX.png capturés 1s après chaque action pendant
# l'enregistrement servent de référence pour le contrôle visuel.
if session_dir_path:
_attach_expected_screenshots(result, events, session_dir_path)
_perf_log("step8_attach_screenshots")
# ── 9. Enrichir avec expected_window_title (titre fenêtre attendu après le clic) ──
# Pour la vérification post-action : le titre de la fenêtre APRÈS le clic
# est le window_title du PROCHAIN clic dans la séquence.
@@ -2087,6 +2356,8 @@ def build_replay_from_raw_events(
# il prime sur target_spec.window_title obsolète.
_attach_expected_window_before(result, events)
_perf_log("step9_expected_window_title")
# ── 10. Enrichir avec intention + expected_result via gemma4 (Critic) ──
# gemma4 analyse chaque action dans son contexte pour produire :
# - intention : ce que l'utilisateur veut accomplir
@@ -2099,6 +2370,8 @@ def build_replay_from_raw_events(
if session_dir_path:
_enrich_actions_with_intentions(result, session_dir_path)
_perf_log("step10_enrich_intentions_gemma4")
# ── 11. Consolider avec les apprentissages passés ──
# Les replays précédents ont enregistré quelles méthodes marchent
# pour quels éléments. On réinjecte ces connaissances dans le workflow.
@@ -2115,6 +2388,10 @@ def build_replay_from_raw_events(
except Exception as e:
logger.debug("Consolidation apprentissage échouée : %s", e)
_perf_log("step11_replay_learner_consolidation")
_total_ms = (_time_perf.perf_counter() - _perf_t_total) * 1000
logger.info("[PERF] build.TOTAL session=%s total_ms=%.0f", session_id, _total_ms)
# Stats visual replay
visual_clicks = sum(
1 for a in result
@@ -2148,8 +2425,9 @@ class StreamProcessor:
4. finalize_session() — construit le Workflow via GraphBuilder (DBSCAN)
"""
def __init__(self, data_dir: str = "data/training"):
def __init__(self, data_dir: str = "data/training", enable_vlm: bool = False):
self.data_dir = Path(data_dir)
self._enable_vlm = enable_vlm
persist_dir = str(self.data_dir / "streaming_sessions")
live_sessions_dir = str(self.data_dir / "live_sessions")
self.session_manager = LiveSessionManager(
@@ -2290,10 +2568,12 @@ class StreamProcessor:
"""
if self._initialized:
return
# Marquer comme initialisé SANS charger les composants GPU
self._initialized = True
logger.info("StreamProcessor initialisé en mode LÉGER (pas de GPU, pas de VLM)")
return
if not self._enable_vlm:
# Marquer comme initialisé SANS charger les composants GPU. Le serveur
# HTTP reste en mode léger ; le worker dédié active enable_vlm=True.
self._initialized = True
logger.info("StreamProcessor initialisé en mode LÉGER (pas de GPU, pas de VLM)")
return
with self._lock:
if self._initialized:
@@ -2357,6 +2637,20 @@ class StreamProcessor:
logger.error(f" Erreur init FAISSManager: {e}")
self._faiss_manager = None
# N1 anti-poison : en mode VLM, un ScreenAnalyzer absent rend le worker
# incapable d'enrichir le moindre screenshot. Ne PAS figer
# _initialized=True dans ce cas, sinon l'échec (souvent transitoire :
# contention GPU au boot, OOM passager) est mis en cache pour toute la
# vie du process — c'est précisément ce qui a provoqué le blocage R6 de
# 5 jours (worker vivant mais 0 enrichissement, sans alarme). On laisse
# _initialized à False pour réessayer au screenshot / cycle suivant.
if self._screen_analyzer is None:
logger.critical(
"Worker VLM DÉGRADÉ : ScreenAnalyzer indisponible après init "
"(_initialized laissé à False, retry au prochain cycle)."
)
return
self._initialized = True
logger.info("Composants core initialisés.")
@@ -3115,7 +3409,7 @@ class StreamProcessor:
# pour que ScreenAnalyzer crée des ScreenStates avec les bons titres de fenêtre
self._restore_window_events(session_id, session_dir)
# Restaurer les événements utilisateur (mouse_click, text_input, key_press)
# Restaurer les événements utilisateur (mouse_click, text_input, key_press, key_combo)
# depuis live_events.jsonl → session.events, pour que to_raw_session()
# puisse les passer au GraphBuilder (construction des edges/actions)
self._restore_user_events(session_id, session_dir)
@@ -3377,7 +3671,7 @@ class StreamProcessor:
def _restore_user_events(self, session_id: str, session_dir: Path):
"""Restaurer les événements utilisateur depuis live_events.jsonl.
Charge les événements d'action (mouse_click, text_input, key_press)
Charge les événements d'action (mouse_click, text_input, key_press, key_combo)
dans session.events via session_manager.add_event().
Sans cela, to_raw_session() retourne une liste d'events vide,
et le GraphBuilder ne peut pas construire les actions des edges.
@@ -3423,7 +3717,7 @@ class StreamProcessor:
evt_type = event_data.get("type", "")
ts = float(event_data.get("timestamp", raw.get("timestamp", 0)))
if evt_type not in ("mouse_click", "text_input", "key_press"):
if evt_type not in ("mouse_click", "text_input", "key_press", "key_combo"):
continue
# Construire le dict d'événement pour add_event()
@@ -3438,8 +3732,11 @@ class StreamProcessor:
evt_dict["button"] = event_data.get("button", "left")
elif evt_type == "text_input":
evt_dict["text"] = event_data.get("text", "")
elif evt_type == "key_press":
elif evt_type in ("key_press", "key_combo"):
evt_dict["keys"] = event_data.get("keys", [])
raw_keys = event_data.get("raw_keys")
if raw_keys:
evt_dict["raw_keys"] = raw_keys
# Copier window info si disponible
window = event_data.get("window")

View File

@@ -34,8 +34,16 @@ class StreamWorker:
self.running = False
self.processed_files: Set[str] = set()
# StreamProcessor partagé (créé si non fourni)
self.processor = processor or StreamProcessor(data_dir=str(self.live_dir))
# StreamProcessor partagé (créé si non fourni). En mode standalone,
# live_dir pointe normalement vers data/training/live_sessions ; le
# processor doit garder data/training comme racine pour workflows/.
processor_data_dir = (
self.live_dir.parent if self.live_dir.name == "live_sessions" else self.live_dir
)
self.processor = processor or StreamProcessor(
data_dir=str(processor_data_dir),
enable_vlm=True,
)
self._thread: threading.Thread = None

View File

@@ -15,13 +15,20 @@ if str(ROOT) not in sys.path:
@pytest.fixture
def isolated_replay_state(monkeypatch):
def isolated_replay_state(monkeypatch, tmp_path):
monkeypatch.setenv("RPA_API_TOKEN", "test_replay_single_inflight_token")
from agent_v0.server_v1 import api_stream
from agent_v0.server_v1.agent_registry import AgentRegistry
monkeypatch.setattr(api_stream, "API_TOKEN", "test_replay_single_inflight_token")
# Isoler le registre pour que _agent_registry_has_entries() retourne False
# (mode dev, aucun agent enrolle) — sinon le garde fleet bloque les tests
original_registry = api_stream.agent_registry
empty_registry = AgentRegistry(db_path=str(tmp_path / "empty_agents.db"))
monkeypatch.setattr(api_stream, "agent_registry", empty_registry)
if api_stream._replay_lock.locked():
pytest.fail(
"_replay_lock is already held at fixture setup — a previous test "
@@ -53,6 +60,7 @@ def isolated_replay_state(monkeypatch):
api_stream._machine_replay_target.update(saved_targets)
api_stream._last_heartbeat.clear()
api_stream._last_heartbeat.update(saved_heartbeat)
monkeypatch.setattr(api_stream, "agent_registry", original_registry)
def _running_replay_state(

View File

@@ -11,6 +11,7 @@ import shutil
import sys
import tempfile
import threading
import types
from pathlib import Path
from unittest.mock import MagicMock, patch
@@ -171,6 +172,271 @@ class TestLiveSessionManager:
class TestStreamProcessor:
def test_default_initialization_stays_light(self, temp_dir):
"""Par défaut, l'API HTTP ne charge pas les composants VLM/GPU."""
from agent_v0.server_v1.stream_processor import StreamProcessor
test_processor = StreamProcessor(data_dir=temp_dir)
test_processor._ensure_initialized()
assert test_processor._initialized is True
assert test_processor._screen_analyzer is None
assert test_processor._clip_embedder is None
assert test_processor._faiss_manager is None
def test_enable_vlm_initialization_loads_components(self, temp_dir, monkeypatch):
"""Le worker VLM peut explicitement charger ScreenAnalyzer/CLIP/FAISS."""
from agent_v0.server_v1.stream_processor import StreamProcessor
screen_module = types.ModuleType("core.pipeline.screen_analyzer")
clip_module = types.ModuleType("core.embedding.clip_embedder")
state_module = types.ModuleType("core.embedding.state_embedding_builder")
faiss_module = types.ModuleType("core.embedding.faiss_manager")
class FakeScreenAnalyzer:
def __init__(self, session_id=""):
self.session_id = session_id
class FakeCLIPEmbedder:
pass
class FakeStateEmbeddingBuilder:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
class FakeFAISSManager:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.index = MagicMock(ntotal=0)
screen_module.ScreenAnalyzer = FakeScreenAnalyzer
clip_module.CLIPEmbedder = FakeCLIPEmbedder
state_module.StateEmbeddingBuilder = FakeStateEmbeddingBuilder
faiss_module.FAISSManager = FakeFAISSManager
monkeypatch.setitem(sys.modules, "core.pipeline.screen_analyzer", screen_module)
monkeypatch.setitem(sys.modules, "core.embedding.clip_embedder", clip_module)
monkeypatch.setitem(sys.modules, "core.embedding.state_embedding_builder", state_module)
monkeypatch.setitem(sys.modules, "core.embedding.faiss_manager", faiss_module)
test_processor = StreamProcessor(data_dir=temp_dir, enable_vlm=True)
test_processor._ensure_initialized()
assert test_processor._initialized is True
assert isinstance(test_processor._screen_analyzer, FakeScreenAnalyzer)
assert isinstance(test_processor._clip_embedder, FakeCLIPEmbedder)
assert isinstance(test_processor._state_embedding_builder, FakeStateEmbeddingBuilder)
assert isinstance(test_processor._faiss_manager, FakeFAISSManager)
def test_enable_vlm_screen_analyzer_failure_does_not_cache_broken_state(
self, temp_dir, monkeypatch, caplog
):
"""N1 anti-poison : en mode VLM, si ScreenAnalyzer échoue à l'init, ne PAS
figer _initialized=True (sinon le worker reste cassé à vie, cf. blocage R6
des 5 jours). Doit logger en critical et permettre un retry au cycle suivant.
"""
import logging
from agent_v0.server_v1.stream_processor import StreamProcessor
screen_module = types.ModuleType("core.pipeline.screen_analyzer")
clip_module = types.ModuleType("core.embedding.clip_embedder")
state_module = types.ModuleType("core.embedding.state_embedding_builder")
faiss_module = types.ModuleType("core.embedding.faiss_manager")
class BrokenScreenAnalyzer:
def __init__(self, session_id=""):
raise RuntimeError("CUDA indisponible au démarrage du worker")
class HealedScreenAnalyzer:
def __init__(self, session_id=""):
self.session_id = session_id
class FakeCLIPEmbedder:
pass
class FakeStateEmbeddingBuilder:
def __init__(self, *args, **kwargs):
pass
class FakeFAISSManager:
def __init__(self, *args, **kwargs):
self.index = MagicMock(ntotal=0)
screen_module.ScreenAnalyzer = BrokenScreenAnalyzer
clip_module.CLIPEmbedder = FakeCLIPEmbedder
state_module.StateEmbeddingBuilder = FakeStateEmbeddingBuilder
faiss_module.FAISSManager = FakeFAISSManager
monkeypatch.setitem(sys.modules, "core.pipeline.screen_analyzer", screen_module)
monkeypatch.setitem(sys.modules, "core.embedding.clip_embedder", clip_module)
monkeypatch.setitem(sys.modules, "core.embedding.state_embedding_builder", state_module)
monkeypatch.setitem(sys.modules, "core.embedding.faiss_manager", faiss_module)
test_processor = StreamProcessor(data_dir=temp_dir, enable_vlm=True)
with caplog.at_level(logging.CRITICAL):
test_processor._ensure_initialized()
# Pas de cache à vie : l'état reste retry-able
assert test_processor._initialized is False
assert test_processor._screen_analyzer is None
assert any(rec.levelno == logging.CRITICAL for rec in caplog.records), (
"un log critical doit signaler le worker VLM dégradé"
)
# Retry au cycle suivant : ScreenAnalyzer réparé → init réussit cette fois
screen_module.ScreenAnalyzer = HealedScreenAnalyzer
test_processor._ensure_initialized()
assert test_processor._initialized is True
assert isinstance(test_processor._screen_analyzer, HealedScreenAnalyzer)
def test_worker_writes_health_file_with_component_status(self, tmp_path, monkeypatch):
"""N2 : le worker écrit _worker_health.json avec le statut des composants
dérivé du processor, le pid, les stats et le statut global."""
from agent_v0.server_v1 import run_worker
data_dir = tmp_path / "data" / "training"
data_dir.mkdir(parents=True)
monkeypatch.setattr(run_worker, "DATA_DIR", data_dir)
worker = run_worker.VLMWorker()
class FakeProc:
_enable_vlm = True
_screen_analyzer = object()
_clip_embedder = object()
_faiss_manager = object()
_state_embedding_builder = object()
worker._processor = FakeProc()
worker._stats["sessions_processed"] = 1
worker._stats["total_screenshots_analyzed"] = 7
worker._write_health("healthy")
health_path = data_dir / "_worker_health.json"
assert health_path.exists()
data = json.loads(health_path.read_text(encoding="utf-8"))
assert data["status"] == "healthy"
assert data["pid"] == os.getpid()
assert data["components"] == {
"screen_analyzer": True,
"clip_embedder": True,
"faiss_manager": True,
"state_embedding_builder": True,
}
assert data["stats"]["sessions_processed"] == 1
assert data["stats"]["total_screenshots_analyzed"] == 7
def test_worker_health_degraded_when_screen_analyzer_missing(self, tmp_path, monkeypatch):
"""N2 : worker VLM dont le ScreenAnalyzer est absent => status 'degraded',
même si l'appelant demande 'healthy'."""
from agent_v0.server_v1 import run_worker
data_dir = tmp_path / "data" / "training"
data_dir.mkdir(parents=True)
monkeypatch.setattr(run_worker, "DATA_DIR", data_dir)
worker = run_worker.VLMWorker()
class DegradedProc:
_enable_vlm = True
_screen_analyzer = None
_clip_embedder = object()
_faiss_manager = object()
_state_embedding_builder = None
worker._processor = DegradedProc()
worker._write_health("healthy")
data = json.loads((data_dir / "_worker_health.json").read_text(encoding="utf-8"))
assert data["status"] == "degraded"
assert data["components"]["screen_analyzer"] is False
def test_worker_health_file_contains_no_patient_data(self, tmp_path, monkeypatch):
"""N2 confidentialité : le health file ne contient que des clés autorisées —
aucune donnée patient (OCR, noms de fichiers screenshots, contenu session)."""
from agent_v0.server_v1 import run_worker
data_dir = tmp_path / "data" / "training"
data_dir.mkdir(parents=True)
monkeypatch.setattr(run_worker, "DATA_DIR", data_dir)
worker = run_worker.VLMWorker()
worker._current_session = "sess_20260529T154427_f95956"
worker._write_health("busy")
data = json.loads((data_dir / "_worker_health.json").read_text(encoding="utf-8"))
allowed_top = {
"pid", "started_at", "last_cycle", "current_session",
"queue_length", "components", "stats", "status",
}
assert set(data.keys()) <= allowed_top, f"clés inattendues: {set(data.keys()) - allowed_top}"
# current_session ne porte que l'identifiant, pas de contenu de session
assert data["current_session"] == "sess_20260529T154427_f95956"
def test_sd_notify_noop_without_socket(self, monkeypatch):
"""N3 : hors systemd (NOTIFY_SOCKET absent), _sd_notify est un no-op
silencieux qui retourne False — jamais d'exception."""
from agent_v0.server_v1 import run_worker
monkeypatch.delenv("NOTIFY_SOCKET", raising=False)
worker = run_worker.VLMWorker()
assert worker._sd_notify("WATCHDOG=1") is False
def test_sd_notify_sends_watchdog_to_socket(self, tmp_path, monkeypatch):
"""N3 : sous systemd, _sd_notify écrit l'état brut dans $NOTIFY_SOCKET."""
import socket
from agent_v0.server_v1 import run_worker
sock_path = str(tmp_path / "notify.sock")
listener = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
listener.bind(sock_path)
listener.settimeout(2)
try:
monkeypatch.setenv("NOTIFY_SOCKET", sock_path)
worker = run_worker.VLMWorker()
assert worker._sd_notify("WATCHDOG=1") is True
received = listener.recv(64)
assert received == b"WATCHDOG=1"
finally:
listener.close()
def test_vlm_worker_uses_training_root_data_dir(self, tmp_path, monkeypatch):
"""Le worker R6 doit produire workflows/embeddings sous data/training."""
from agent_v0.server_v1 import run_worker
data_dir = tmp_path / "data" / "training"
live_sessions_dir = data_dir / "live_sessions"
monkeypatch.setattr(run_worker, "DATA_DIR", data_dir)
monkeypatch.setattr(run_worker, "LIVE_SESSIONS_DIR", live_sessions_dir)
test_worker = run_worker.VLMWorker()
test_processor = test_worker._get_processor()
assert test_processor.data_dir == data_dir
assert test_processor.session_manager._live_sessions_dir == live_sessions_dir
assert test_processor._enable_vlm is True
def test_stream_worker_standalone_uses_training_root_data_dir(self, tmp_path):
"""Le StreamWorker standalone garde aussi data/training comme racine."""
from agent_v0.server_v1.worker_stream import StreamWorker
live_sessions_dir = tmp_path / "data" / "training" / "live_sessions"
test_worker = StreamWorker(live_dir=str(live_sessions_dir))
assert test_worker.processor.data_dir == live_sessions_dir.parent
assert test_worker.processor.session_manager._live_sessions_dir == live_sessions_dir
assert test_worker.processor._enable_vlm is True
def test_process_event(self, processor):
result = processor.process_event("sess_010", {
"type": "mouse_click",
@@ -181,6 +447,49 @@ class TestStreamProcessor:
session = processor.session_manager.get_session("sess_010")
assert session.last_window_info["title"] == "Chrome"
def test_restore_user_events_keeps_key_combo(self, processor, tmp_path):
session_id = "sess_restore_combo"
session_dir = tmp_path / session_id
session_dir.mkdir()
(session_dir / "live_events.jsonl").write_text(
json.dumps({
"session_id": session_id,
"timestamp": 1779900720.0,
"event": {
"type": "key_combo",
"keys": ["win", "s"],
"raw_keys": [
{"action": "release", "kind": "vk", "vk": 83, "char": "s"},
{"action": "release", "kind": "key", "name": "cmd"},
],
"timestamp": 1779900719.5,
"window": {"title": "Rechercher", "app_name": "SearchHost.exe"},
"screenshot_id": "shot_0001",
},
}) + "\n"
+ json.dumps({
"session_id": session_id,
"timestamp": 1779900725.0,
"event": {
"type": "text_input",
"text": "test",
"timestamp": 1779900725.0,
"window": {"title": "Rechercher", "app_name": "SearchHost.exe"},
},
}) + "\n",
encoding="utf-8",
)
processor.session_manager.add_event(session_id, {"type": "text_input", "text": "old"})
processor._restore_user_events(session_id, session_dir)
session = processor.session_manager.get_session(session_id)
assert [event["type"] for event in session.events] == ["key_combo", "text_input"]
assert session.events[0]["keys"] == ["win", "s"]
assert session.events[0]["raw_keys"][0]["vk"] == 83
assert session.events[0]["screenshot_id"] == "shot_0001"
def test_process_crop(self, processor):
result = processor.process_crop("sess_011", "shot_001_crop", "/tmp/crop.png")
assert result["status"] == "crop_stored"
@@ -479,6 +788,156 @@ class TestStreamProcessor:
assert first_hints.get("active_tab_label") == "test"
assert "fermer l'onglet actif 'test'" in first_spec.get("vlm_description", "")
def test_build_replay_save_as_button_gets_semantic_target(
self, tmp_path, monkeypatch,
):
"""Le clic du bouton Enregistrer dans Save As ne doit pas rester
anchor-only/positionnel.
Régression live 2026-05-25 : avec RPA_SKIP_BUILD_VISION, l'action
Save As était seulement décrite par position + crop, puis résolue par
template matching trop haut/gauche. Le builder doit encoder le bouton
primaire stable ``Enregistrer``.
"""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: "abc123")
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {
"anchor_image_base64": "abc123",
"by_text": "",
"by_role": "",
"vlm_description": "positionnel",
},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [1329, 1265],
"button": "left",
"screenshot_id": "shot_006",
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [332, 522, 1613, 1323],
"click_relative": [997, 743],
"window_size": [1281, 801],
"click_inside_window": True,
},
}},
{"event": {
"type": "window_focus_change",
"timestamp": 1.2,
"from": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
"to": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
}},
]
actions = sp.build_replay_from_raw_events(
events,
session_id="sess_save_as_button",
session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert len(clicks) == 1
spec = clicks[0].get("target_spec", {})
hints = spec.get("context_hints") or {}
assert spec.get("by_text") == "Enregistrer"
assert spec.get("by_text_source") == "heuristic"
assert spec.get("by_role") == "button"
assert spec.get("window_title") == "Enregistrer sous"
assert hints.get("interaction") == "save_dialog_primary_button"
assert hints.get("expected_after_window") == "*test Bloc-notes"
assert clicks[0].get("expected_window_title") == "*test Bloc-notes"
def test_build_replay_cuts_post_save_out_of_window_click(
self, tmp_path, monkeypatch,
):
"""Le clic hors fenêtre après retour Bloc-notes est parasite.
C'est l'ancienne action finale 17/18 : coordonnées en bas à droite,
``click_inside_window=false``. Elle ne fait pas partie du coeur
"saisir et enregistrer".
"""
from agent_v0.server_v1 import stream_processor as sp
session_dir = tmp_path / "sess"
(session_dir / "shots").mkdir(parents=True)
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: "abc123")
monkeypatch.setattr(
sp,
"enrich_click_from_screenshot",
lambda *args, **kwargs: {"anchor_image_base64": "abc123"},
)
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
events = [
{"event": {
"type": "mouse_click",
"timestamp": 1.0,
"pos": [1329, 1265],
"button": "left",
"screenshot_id": "shot_006",
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
"window_capture": {
"rect": [332, 522, 1613, 1323],
"click_relative": [997, 743],
"window_size": [1281, 801],
"click_inside_window": True,
},
}},
{"event": {
"type": "window_focus_change",
"timestamp": 1.2,
"from": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
"to": {"title": "*test Bloc-notes", "app_name": "Notepad.exe"},
}},
{"event": {
"type": "mouse_click",
"timestamp": 1.5,
"pos": [2248, 1577],
"button": "left",
"screenshot_id": "shot_007",
"screen_metadata": {"screen_resolution": [2560, 1600]},
"window": {
"title": "http192.168.1.408765dossier.htmlid=.txt Bloc-notes",
"app_name": "Notepad.exe",
},
"window_capture": {
"rect": [323, 522, 2243, 1638],
"click_relative": [1925, 1055],
"window_size": [1920, 1116],
"click_inside_window": False,
},
}},
]
actions = sp.build_replay_from_raw_events(
events,
session_id="sess_post_save_cut",
session_dir=str(session_dir),
)
clicks = [a for a in actions if a.get("type") == "click"]
assert len(clicks) == 1
assert clicks[0].get("target_spec", {}).get("by_text") == "Enregistrer"
assert clicks[0].get("expected_window_title") == "*test Bloc-notes"
# =========================================================================
# StreamWorker

View File

@@ -52,12 +52,12 @@ class TestImageEndpointNotPublic:
mod = _reload_api_stream()
assert "/health" in mod._PUBLIC_PATHS
def test_replay_next_still_public(self, monkeypatch):
"""/replay/next reste public (legacy agent Rust polling)."""
def test_replay_next_removed_from_public_paths(self, monkeypatch):
"""/replay/next distribue des actions et exige desormais un Bearer."""
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
mod = _reload_api_stream()
assert "/api/v1/traces/stream/replay/next" in mod._PUBLIC_PATHS
assert "/api/v1/traces/stream/replay/next" not in mod._PUBLIC_PATHS
# ---------------------------------------------------------------------------
@@ -157,6 +157,23 @@ class TestFailClosedTokenP0C:
asyncio.get_event_loop().run_until_complete(mod._verify_token(req))
assert exc_info.value.status_code == 401
def test_verify_token_rejects_replay_next_without_bearer(self, monkeypatch):
"""P0 révocation : GET /replay/next n'est plus public."""
import asyncio
from unittest.mock import MagicMock
from fastapi import HTTPException
monkeypatch.setenv("RPA_API_TOKEN", "validtoken" * 4)
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
mod = _reload_api_stream()
req = MagicMock()
req.url.path = "/api/v1/traces/stream/replay/next"
req.headers = {}
with pytest.raises(HTTPException) as exc_info:
asyncio.get_event_loop().run_until_complete(mod._verify_token(req))
assert exc_info.value.status_code == 401
@pytest.fixture(autouse=True)
def _cleanup(monkeypatch):

View File

@@ -0,0 +1,350 @@
"""
Tests des gaps de revocation fleet sur agent_v0/server_v1/api_stream.py.
Couvre :
1. test_result_guard_without_pending — le garde est appliqué sur /replay/result
meme sans _retry_pending (garde inconditionnel).
2. test_finalize_revoked_agent — enroll + revoke + finalize → 403
3. test_finalize_unknown_machine_registered — registre avec agents →
machine_id inconnu → 403
4. test_guard_default_machine_id_registered — registre avec agents →
machine_id="default" → 403
"""
from __future__ import annotations
import sys
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
ROOT = str(Path(__file__).resolve().parents[2])
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@pytest.fixture
def isolated_fleet_state(monkeypatch, tmp_path):
"""Fixture qui isole le registre AgentRegistry et les structures replay."""
monkeypatch.setenv("RPA_API_TOKEN", "test_revocation_gaps_token")
from agent_v0.server_v1 import api_stream
from agent_v0.server_v1.agent_registry import AgentRegistry
# Aligner le token attendu par le middleware
monkeypatch.setattr(api_stream, "API_TOKEN", "test_revocation_gaps_token")
# Substituer le registre global par une instance dediee au test
original_registry = api_stream.agent_registry
test_registry = AgentRegistry(db_path=str(tmp_path / "test_agents.db"))
monkeypatch.setattr(api_stream, "agent_registry", test_registry)
# Sauver et nettoyer les structures replay
saved_states = dict(api_stream._replay_states)
api_stream._replay_states.clear()
_auth_headers = {"Authorization": "Bearer test_revocation_gaps_token"}
yield api_stream, test_registry, _auth_headers
# Restauration
api_stream._replay_states.clear()
api_stream._replay_states.update(saved_states)
monkeypatch.setattr(api_stream, "agent_registry", original_registry)
# ---------------------------------------------------------------------------
# Test 1 : /replay/result — le garde est appliqué sans _retry_pending
# ---------------------------------------------------------------------------
def test_result_guard_without_pending(isolated_fleet_state, monkeypatch):
"""Le garde sur /replay/result s'applique meme sans entrée dans _retry_pending.
Scénario : un agent enrolle et actif envoie un rapport de résultat pour
une action qui n'est pas dans _retry_pending (cas nominal, pas un retry).
Le garde doit être appelé et laisser passer car l'agent est actif.
Si l'agent est révoqué, le même rapport doit être bloqué (403).
"""
api_stream, registry, auth_headers = isolated_fleet_state
# Enroller un agent actif
registry.enroll(
machine_id="test-machine-result-guard",
user_name="Test User",
hostname="TEST-HOST",
)
# Forger une session avec machine_id pour que le garde puisse le résoudre
from agent_v0.server_v1.live_session_manager import LiveSessionState
session = LiveSessionState(
session_id="sess-result-guard",
machine_id="test-machine-result-guard",
)
monkeypatch.setattr(
api_stream.processor.session_manager,
"get_session",
lambda sid: session if sid == "sess-result-guard" else None,
)
from fastapi.testclient import TestClient
client = TestClient(api_stream.app, raise_server_exceptions=False)
# Rapport sans _retry_pending — le garde doit quand meme s'appliquer
# et laisser passer car l'agent est actif
resp = client.post(
"/api/v1/traces/stream/replay/result",
json={
"session_id": "sess-result-guard",
"action_id": "act-no-retry",
"success": True,
},
headers=auth_headers,
)
# Ne doit PAS etre 403 (agent actif)
assert resp.status_code != 403, (
f"/replay/result a été bloqué (403) pour un agent actif sans retry. "
f"Body: {resp.text}"
)
# Maintenant révoquer l'agent
registry.uninstall(
machine_id="test-machine-result-guard",
reason="admin_revoke",
)
# Le meme rapport doit maintenant être bloqué
resp = client.post(
"/api/v1/traces/stream/replay/result",
json={
"session_id": "sess-result-guard",
"action_id": "act-no-retry-2",
"success": True,
},
headers=auth_headers,
)
assert resp.status_code == 403, (
f"/replay/result DOIT être bloqué (403) pour un agent révoqué. "
f"Body: {resp.text}"
)
# ---------------------------------------------------------------------------
# Test 2 : /finalize — enroll + revoke + finalize → 403
# ---------------------------------------------------------------------------
def test_finalize_revoked_agent(isolated_fleet_state, monkeypatch):
"""Un agent enrolle puis révoqué doit être bloqué sur /finalize."""
api_stream, registry, auth_headers = isolated_fleet_state
# Enroller un agent
registry.enroll(
machine_id="test-machine-revoked-finalize",
user_name="Test User",
hostname="TEST-HOST",
)
# Le révoquer
registry.uninstall(
machine_id="test-machine-revoked-finalize",
reason="admin_revoke",
)
# Forger une session pour que le finalize trouve la session
from agent_v0.server_v1.live_session_manager import LiveSessionState
session = LiveSessionState(
session_id="sess-revoked-finalize",
machine_id="test-machine-revoked-finalize",
)
monkeypatch.setattr(
api_stream.processor.session_manager,
"get_session",
lambda sid: session if sid == "sess-revoked-finalize" else None,
)
# finalize() appelle aussi finalize() sur le session_manager — mock pour éviter I/O
monkeypatch.setattr(
api_stream.processor.session_manager,
"finalize",
lambda sid: None,
)
monkeypatch.setattr(api_stream.processor, "_find_session_dir", lambda sid: None)
from fastapi.testclient import TestClient
client = TestClient(api_stream.app, raise_server_exceptions=False)
resp = client.post(
"/api/v1/traces/stream/finalize",
params={"session_id": "sess-revoked-finalize"},
headers=auth_headers,
)
assert resp.status_code == 403, (
f"/finalize DOIT être bloqué (403) pour un agent révoqué. "
f"Body: {resp.text}"
)
# ---------------------------------------------------------------------------
# Test 3 : /finalize — machine_id inconnu avec registre non vide → 403
# ---------------------------------------------------------------------------
def test_finalize_unknown_machine_registered(isolated_fleet_state, monkeypatch):
"""Quand le registre contient au moins un agent, un machine_id inconnu → 403."""
api_stream, registry, auth_headers = isolated_fleet_state
# Enroller un agent (le registre n'est donc pas vide)
registry.enroll(
machine_id="known-machine-xyz",
user_name="Known User",
hostname="KNOWN-HOST",
)
# Forger une session avec un machine_id inconnu du registre
from agent_v0.server_v1.live_session_manager import LiveSessionState
session = LiveSessionState(
session_id="sess-unknown-finalize",
machine_id="unknown-machine-abc", # Pas dans le registre
)
monkeypatch.setattr(
api_stream.processor.session_manager,
"get_session",
lambda sid: session if sid == "sess-unknown-finalize" else None,
)
monkeypatch.setattr(
api_stream.processor.session_manager,
"finalize",
lambda sid: None,
)
monkeypatch.setattr(api_stream.processor, "_find_session_dir", lambda sid: None)
from fastapi.testclient import TestClient
client = TestClient(api_stream.app, raise_server_exceptions=False)
resp = client.post(
"/api/v1/traces/stream/finalize",
params={"session_id": "sess-unknown-finalize"},
headers=auth_headers,
)
assert resp.status_code == 403, (
f"/finalize DOIT être bloqué (403) pour un machine_id inconnu "
f"quand le registre contient des agents. Body: {resp.text}"
)
body = resp.json()
assert body.get("detail", {}).get("error") == "agent_unknown", (
f"Erreur attendue 'agent_unknown', obtenu: {body}"
)
# ---------------------------------------------------------------------------
# Test 4 : _guard_agent_registry_access — machine_id="default" avec registre → 403
# ---------------------------------------------------------------------------
def test_guard_default_machine_id_registered(isolated_fleet_state):
"""Quand le registre contient des agents, machine_id='default' → 403."""
api_stream, registry, _auth_headers = isolated_fleet_state
# Enroller un agent (le registre n'est donc pas vide)
registry.enroll(
machine_id="some-enrolled-agent",
user_name="Enrolled User",
hostname="ENROLLED-HOST",
)
from fastapi import HTTPException
# Appel direct du garde avec machine_id="default"
with pytest.raises(HTTPException) as exc_info:
api_stream._guard_agent_registry_access(
"default",
endpoint="/api/v1/traces/stream/finalize",
)
assert exc_info.value.status_code == 403
detail = exc_info.value.detail
assert detail.get("error") == "agent_enrollment_required", (
f"Erreur attendue 'agent_enrollment_required', obtenu: {detail}"
)
# Idem avec machine_id="" (vide)
with pytest.raises(HTTPException) as exc_info:
api_stream._guard_agent_registry_access(
"",
endpoint="/api/v1/traces/stream/event",
)
assert exc_info.value.status_code == 403
assert exc_info.value.detail.get("error") == "agent_enrollment_required"
# Idem avec machine_id=None
with pytest.raises(HTTPException) as exc_info:
api_stream._guard_agent_registry_access(
None,
endpoint="/api/v1/traces/stream/event",
)
assert exc_info.value.status_code == 403
assert exc_info.value.detail.get("error") == "agent_enrollment_required"
# ---------------------------------------------------------------------------
# Test 5 : /replay-session — enroll + revoke → 403
# ---------------------------------------------------------------------------
def test_replay_session_revoked_agent(isolated_fleet_state, monkeypatch):
"""Un agent révoqué ne doit pas pouvoir lancer un replay-session."""
api_stream, registry, auth_headers = isolated_fleet_state
registry.enroll(
machine_id="test-machine-replay-session",
user_name="Test User",
hostname="TEST-HOST",
)
registry.uninstall(
machine_id="test-machine-replay-session",
reason="admin_revoke",
)
from fastapi.testclient import TestClient
client = TestClient(api_stream.app, raise_server_exceptions=False)
resp = client.post(
"/api/v1/traces/stream/replay-session",
params={
"session_id": "sess-some-replay",
"machine_id": "test-machine-replay-session",
},
headers=auth_headers,
)
assert resp.status_code == 403, (
f"/replay-session DOIT être bloqué (403) pour un agent révoqué. "
f"Body: {resp.text}"
)
def test_replay_session_unknown_machine_registered(isolated_fleet_state):
"""Quand le registre contient des agents, machine_id inconnu sur replay-session → 403."""
api_stream, registry, auth_headers = isolated_fleet_state
registry.enroll(
machine_id="known-machine-replay",
user_name="Known User",
hostname="KNOWN-HOST",
)
from fastapi.testclient import TestClient
client = TestClient(api_stream.app, raise_server_exceptions=False)
resp = client.post(
"/api/v1/traces/stream/replay-session",
params={
"session_id": "sess-some-replay",
"machine_id": "unknown-machine-replay",
},
headers=auth_headers,
)
assert resp.status_code == 403
assert resp.json().get("detail", {}).get("error") == "agent_unknown"

View File

@@ -26,6 +26,15 @@ def test_only_declarative_when_no_safety_level():
assert payload.checks[0]["source"] == "declarative"
def test_default_pause_message_is_structured_not_validation_required():
"""Fallback humain: jamais 'Validation requise' seul."""
payload = build_pause_payload({"type": "pause_for_human", "parameters": {}}, {}, last_screenshot=None)
lines = payload.message.splitlines()
assert len(lines) == 4
assert lines[0].startswith("J'essaie de :")
assert "Validation requise" not in payload.message
def test_hybrid_appends_llm_checks_on_medical_critical(monkeypatch):
"""safety_level=medical_critical → LLM appelé, checks concaténés."""
decl = [{"id": "c1", "label": "Vérifier IPP", "required": True}]