snapshot: WIP 5j replay reliability (B1 watchdog + dialog handlers + grounding drift)
Snapshot avant correction du blocage relance Léa (3 incidents 24h: SSH refusé, polls morts ×2). Point de rollback stable. Contenu: - agent_v1/core/executor.py: 5 patchs dialog handling (saveas drift, close_tab hotkey fallback, confirm_save Unicode apostrophe, foreground dialog recontextualization, runtime_dialog in-loop) + helpers normalize_window_hint, requires_post_verify_window_transition - agent_v1/core/grounding.py: garde drift template fix (fallback_x/y plumbed) - server_v1/replay_watchdog.py (NEW): orphan watchdog B1, scan 10s timeout 30s - server_v1/api_stream.py: dispatched_action plumbing, watchdog lifespan, metrics endpoint - server_v1/replay_engine.py: _schedule_retry préserve original_action + dispatched_action - stream_processor.py: gardes _infer_tab_switch_target (no false switch_tab on save_as dialog open) + _attach_expected_window_before - tests/integration: test_replay_watchdog.py (8 cas), test_stream_processor.py - tests/unit: test_executor_verify_window_guard.py (start_button, close_tab, runtime_dialog, post_verify, transition fallbacks) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -74,6 +74,142 @@ class GroundingEngine:
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
@staticmethod
|
||||
def _should_scope_to_active_window(target_spec: Dict[str, Any]) -> bool:
|
||||
"""Déterminer si le grounding doit être limité à la fenêtre active."""
|
||||
if str(target_spec.get("screen_scope", "")).strip().lower() == "full_screen":
|
||||
return False
|
||||
|
||||
by_role = str(target_spec.get("by_role", "")).strip().lower()
|
||||
if by_role in {"start_button"}:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _targets_lea_window(target_spec: Dict[str, Any]) -> bool:
|
||||
"""Déterminer si la cible pointe explicitement vers l'UI de Léa."""
|
||||
try:
|
||||
from ..ui.messages import est_fenetre_lea
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
hints = [
|
||||
target_spec.get("window_title", ""),
|
||||
context_hints.get("window_title", ""),
|
||||
target_spec.get("vlm_description", ""),
|
||||
target_spec.get("by_text", ""),
|
||||
]
|
||||
return any(est_fenetre_lea(str(hint)) for hint in hints if hint)
|
||||
|
||||
@staticmethod
|
||||
def _is_plausible_window_rect(
|
||||
rect: Optional[List[int]],
|
||||
title: str,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> bool:
|
||||
"""Valider qu'un rect actif ressemble à une vraie fenêtre utilisable.
|
||||
|
||||
Rejette explicitement les zones système "bar-like" (taskbar, systray)
|
||||
et les titres inconnus/bruités. Le grounding ne doit jamais se
|
||||
contraindre à une zone non validée.
|
||||
"""
|
||||
if not rect or len(rect) != 4:
|
||||
return False
|
||||
|
||||
try:
|
||||
from ..ui.messages import est_fenetre_bruit
|
||||
except Exception:
|
||||
def est_fenetre_bruit(_title: str) -> bool:
|
||||
return not _title or _title.strip().lower() == "unknown_window"
|
||||
|
||||
w = rect[2] - rect[0]
|
||||
h = rect[3] - rect[1]
|
||||
title_clean = str(title or "").strip()
|
||||
if w <= 50 or h <= 50:
|
||||
return False
|
||||
title_lower = title_clean.lower()
|
||||
is_unknown_title = not title_clean or title_lower == "unknown_window"
|
||||
if not is_unknown_title and est_fenetre_bruit(title_clean):
|
||||
return False
|
||||
|
||||
# Une zone très plate, surtout en bas d'écran et très large, est
|
||||
# typiquement une barre des tâches / systray, pas une vraie fenêtre.
|
||||
# On réduit le seuil de hauteur à 120px pour ne pas rejeter les petits modaux.
|
||||
is_bar_like = (
|
||||
h < 120
|
||||
or (w > 0.9 * screen_width and h < 0.15 * screen_height)
|
||||
)
|
||||
|
||||
# Exception : si le titre contient un mot-clé de dialogue connu,
|
||||
# on considère que c'est plausible même si c'est petit.
|
||||
keywords = ["enregistrer sous", "save as", "voulez-vous", "confirm", "attention", "error", "erreur"]
|
||||
if any(k in title_lower for k in keywords):
|
||||
return h >= 80 # Un dialogue fait au moins 80px (titre + bouton)
|
||||
|
||||
return not is_bar_like
|
||||
|
||||
@staticmethod
|
||||
def _visual_scope_hints(target_spec: Dict[str, Any]) -> List[str]:
|
||||
"""Construire des indices textuels à chercher dans le crop fenêtre."""
|
||||
hints: List[str] = []
|
||||
raw_hints = [
|
||||
target_spec.get("window_title", ""),
|
||||
(target_spec.get("context_hints") or {}).get("window_title", ""),
|
||||
target_spec.get("by_text", ""),
|
||||
]
|
||||
for raw in raw_hints:
|
||||
text = str(raw or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
text = text.lstrip("*").strip()
|
||||
variants = [text]
|
||||
for sep in (" – ", " - ", " — "):
|
||||
if sep in text:
|
||||
variants.extend(part.strip().lstrip("*") for part in text.split(sep))
|
||||
for variant in variants:
|
||||
if variant and len(variant) >= 3 and variant not in hints:
|
||||
hints.append(variant)
|
||||
return hints
|
||||
|
||||
def _window_crop_matches_target_visually(
|
||||
self,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Vérifier visuellement qu'un crop contraint contient la bonne cible.
|
||||
|
||||
Principe: ne jamais faire confiance au rect système seul. Si aucun
|
||||
indice textuel n'est disponible, on laisse passer le crop plausible
|
||||
pour ne pas sur-bloquer les cibles purement iconiques.
|
||||
"""
|
||||
hints = self._visual_scope_hints(target_spec)
|
||||
if not hints:
|
||||
return True
|
||||
|
||||
finder = getattr(self._executor, "_find_text_on_screen", None)
|
||||
if not callable(finder):
|
||||
return True
|
||||
|
||||
for hint in hints:
|
||||
try:
|
||||
if finder(screenshot_b64, hint):
|
||||
logger.info(
|
||||
"Grounding fenêtre validé visuellement via '%s'",
|
||||
hint,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("Validation visuelle du crop échouée pour '%s': %s", hint, e)
|
||||
logger.info(
|
||||
"Grounding plein écran : crop fenêtre rejeté par validation visuelle "
|
||||
"(hints=%s)",
|
||||
hints,
|
||||
)
|
||||
return False
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
@@ -128,35 +264,63 @@ class GroundingEngine:
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
window_rect = None
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
# Validation : fenêtre visible et pas minuscule
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
if w > 50 and h > 50:
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
active_title = ""
|
||||
if self._should_scope_to_active_window(target_spec):
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
from ..ui.messages import est_fenetre_lea
|
||||
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
active_title = str(win_info.get("title", "") or "")
|
||||
if est_fenetre_lea(active_title) and not self._targets_lea_window(target_spec):
|
||||
logger.info(
|
||||
"Grounding plein écran : fenêtre active Léa ignorée pour "
|
||||
"cible externe (%s)",
|
||||
target_spec.get("by_text", "") or target_spec.get("by_role", ""),
|
||||
)
|
||||
win_info = None
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
if self._is_plausible_window_rect(r, active_title, screen_width, screen_height):
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Grounding plein écran : rect actif rejeté "
|
||||
"(title='%s', rect=%s)",
|
||||
active_title,
|
||||
r,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
else:
|
||||
logger.info(
|
||||
"Grounding plein écran pour by_role='%s'",
|
||||
target_spec.get("by_role", ""),
|
||||
)
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if window_rect and screenshot_b64:
|
||||
if not self._window_crop_matches_target_visually(screenshot_b64, target_spec):
|
||||
window_rect = None
|
||||
screenshot_b64 = self._capture_window_or_screen(None)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
@@ -186,6 +350,18 @@ class GroundingEngine:
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
if target_spec.get("allow_position_fallback"):
|
||||
if 0.0 <= fallback_x <= 1.0 and 0.0 <= fallback_y <= 1.0:
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=fallback_x,
|
||||
y_pct=fallback_y,
|
||||
method="position_fallback",
|
||||
score=0.2,
|
||||
detail="fallback positionnel explicite",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
@@ -258,7 +434,12 @@ class GroundingEngine:
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
screenshot_b64,
|
||||
anchor_b64,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x,
|
||||
fallback_y_pct=fallback_y,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
|
||||
39
agent_v0/agent_v1/finalize_contract.py
Normal file
39
agent_v0/agent_v1/finalize_contract.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Dispatch léger du contrat enrichi de /finalize côté agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def dispatch_finalize_result(ui: Any, payload: Dict[str, Any], replay_name: str) -> None:
|
||||
"""Router le résultat de /finalize vers la bonne surface UI agent."""
|
||||
if not isinstance(payload, dict):
|
||||
return
|
||||
|
||||
replay_request = payload.get("replay_request") or {}
|
||||
replay_launch = payload.get("replay_launch") or {}
|
||||
|
||||
if replay_launch.get("status") == "started":
|
||||
logger.info("Replay direct déjà lancé par le serveur après finalize")
|
||||
return
|
||||
|
||||
if not payload.get("replay_ready") or not replay_request:
|
||||
return
|
||||
|
||||
if replay_launch.get("status") == "failed":
|
||||
logger.warning(
|
||||
"Auto-replay serveur échoué après finalize, proposition manuelle"
|
||||
)
|
||||
|
||||
if ui is None or not hasattr(ui, "offer_finalize_replay"):
|
||||
logger.info("UI indisponible pour proposer un test immédiat")
|
||||
return
|
||||
|
||||
ui.offer_finalize_replay(
|
||||
replay_request,
|
||||
replay_name or "la tâche que vous venez d'enregistrer",
|
||||
)
|
||||
@@ -28,6 +28,7 @@ from .ui.chat_window import ChatWindow
|
||||
from .ui.capture_server import CaptureServer
|
||||
from .session.storage import SessionStorage
|
||||
from .vision.capturer import VisionCapturer
|
||||
from .finalize_contract import dispatch_finalize_result
|
||||
|
||||
# Import optionnel du client serveur (pour le chat et les workflows)
|
||||
# Deux chemins : relatif (depuis agent_v0.agent_v1) ou absolu (depuis C:\rpa_vision\agent_v1)
|
||||
@@ -80,6 +81,7 @@ class AgentV1:
|
||||
self._executor = None
|
||||
# Flag pour indiquer qu'un replay est en cours (eviter les conflits)
|
||||
self._replay_active = False
|
||||
self._last_recording_name = ""
|
||||
|
||||
# Etat partage entre systray et chat (source de verite unique)
|
||||
self._state = AgentState()
|
||||
@@ -210,12 +212,14 @@ class AgentV1:
|
||||
time.sleep(30) # Vérifier toutes les 30s
|
||||
|
||||
def start_session(self, workflow_name):
|
||||
self._last_recording_name = workflow_name
|
||||
self.session_id = f"sess_{time.strftime('%Y%m%dT%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
self.session_dir = self.storage.get_session_dir(self.session_id)
|
||||
|
||||
self.vision = VisionCapturer(str(self.session_dir))
|
||||
|
||||
self.streamer = TraceStreamer(self.session_id, machine_id=self.machine_id)
|
||||
self.streamer.set_on_finalize_result(self._on_finalize_result)
|
||||
self.captor = EventCaptorV1(self._on_event_bridge)
|
||||
|
||||
# Initialiser l'executeur partage
|
||||
@@ -325,6 +329,15 @@ class AgentV1:
|
||||
# pour enchainer les actions du workflow
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
if getattr(self._executor, "_replay_paused", False):
|
||||
if not self._replay_active:
|
||||
self._replay_active = True
|
||||
self.ui.set_replay_active(True)
|
||||
self._state.set_replay_active(True)
|
||||
poll_delay = getattr(self._executor, '_poll_backoff', REPLAY_POLL_INTERVAL)
|
||||
time.sleep(max(poll_delay, REPLAY_POLL_INTERVAL))
|
||||
continue
|
||||
|
||||
# Pas d'action en attente — utiliser le backoff de l'executor
|
||||
# (augmente si le serveur est indisponible, reset a 1s sinon)
|
||||
if self._replay_active:
|
||||
@@ -429,6 +442,11 @@ class AgentV1:
|
||||
f"agent_{self.user_id}"
|
||||
)
|
||||
|
||||
def _on_finalize_result(self, payload: dict) -> None:
|
||||
"""Réagir au contrat enrichi de /finalize côté agent."""
|
||||
replay_name = self._last_recording_name or "la tâche que vous venez d'enregistrer"
|
||||
dispatch_finalize_result(self.ui, payload, replay_name)
|
||||
|
||||
_last_heartbeat_hash: str = ""
|
||||
|
||||
def _heartbeat_loop(self):
|
||||
|
||||
@@ -30,6 +30,7 @@ import os
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
@@ -95,6 +96,11 @@ class TraceStreamer:
|
||||
# Initialisé paresseusement pour ne pas payer le coût SQLite en dehors
|
||||
# d'un streaming actif.
|
||||
self._buffer: PersistentBuffer | None = None
|
||||
self._on_finalize_result: Optional[Callable[[dict], None]] = None
|
||||
|
||||
def set_on_finalize_result(self, callback: Optional[Callable[[dict], None]]) -> None:
|
||||
"""Définir un callback appelé avec le payload JSON de /finalize."""
|
||||
self._on_finalize_result = callback
|
||||
|
||||
def _get_buffer(self) -> PersistentBuffer:
|
||||
"""Retourne le buffer persistant, en l'initialisant au besoin."""
|
||||
@@ -621,6 +627,14 @@ class TraceStreamer:
|
||||
if resp.ok:
|
||||
result = resp.json()
|
||||
logger.info(f"Session finalisée: {result}")
|
||||
if self._on_finalize_result is not None:
|
||||
try:
|
||||
self._on_finalize_result(result)
|
||||
except Exception as cb_error:
|
||||
logger.warning(
|
||||
"Callback finalize ignoré après erreur: %s",
|
||||
cb_error,
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
||||
except Exception as e:
|
||||
|
||||
@@ -158,14 +158,25 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
"""Capture l'ecran principal et le renvoie en base64 JPEG."""
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
import mss
|
||||
from PIL import Image
|
||||
from ..vision.capturer import (
|
||||
capture_foreground_window_image,
|
||||
capture_screen_image,
|
||||
)
|
||||
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1] # ecran principal
|
||||
raw = sct.grab(monitor)
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
_monitor, img, meta = capture_screen_image()
|
||||
if img is None:
|
||||
img, win_meta = capture_foreground_window_image()
|
||||
meta.update(win_meta)
|
||||
if img is None:
|
||||
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||
logger.error("Erreur capture : aucun backend exploitable (%s)", meta)
|
||||
self._send_json(503, {
|
||||
"error": "capture_unavailable",
|
||||
"source": meta.get("backend", "unknown"),
|
||||
"capture_ms": round(elapsed_ms),
|
||||
"diagnostics": meta,
|
||||
})
|
||||
return
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
@@ -180,15 +191,22 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
img_b64 = base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||
logger.info(f"Capture {img.width}x{img.height} en {elapsed_ms:.0f}ms")
|
||||
logger.info(
|
||||
"Capture %sx%s via %s en %.0fms",
|
||||
img.width,
|
||||
img.height,
|
||||
meta.get("backend", "unknown"),
|
||||
elapsed_ms,
|
||||
)
|
||||
|
||||
self._send_json(200, {
|
||||
"image": img_b64,
|
||||
"width": img.width,
|
||||
"height": img.height,
|
||||
"format": "jpeg",
|
||||
"source": "windows_live",
|
||||
"source": meta.get("backend", "windows_live"),
|
||||
"capture_ms": round(elapsed_ms),
|
||||
"diagnostics": meta,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -894,6 +894,34 @@ class ChatWindow:
|
||||
except Exception:
|
||||
logger.debug("clear chat history silenced", exc_info=True)
|
||||
|
||||
@staticmethod
|
||||
def _compute_paused_bubble_height(reason_str: str) -> tuple:
|
||||
"""Calcule la hauteur du Text (en lignes) + si une scrollbar est
|
||||
nécessaire pour le message d'une bulle paused.
|
||||
|
||||
Patch 22 mai 2026 — fix troncature : on prend en compte les \\n
|
||||
explicites (les `reason` serveur peuvent lister plusieurs
|
||||
candidats avec un saut de ligne par item) en plus de la longueur
|
||||
en caractères, et on active la scrollbar dès que le cap est
|
||||
atteint pour éviter que du contenu disparaisse silencieusement.
|
||||
|
||||
Retourne ``(height_lines, needs_scrollbar)``.
|
||||
"""
|
||||
if not reason_str:
|
||||
return 2, False
|
||||
text = str(reason_str)
|
||||
# Estimation : ~60 chars/ligne effectifs avec wraplength.
|
||||
wrapped_lines = (len(text) // 60) + 1
|
||||
explicit_lines = text.count("\n") + 1
|
||||
estimated = max(wrapped_lines, explicit_lines)
|
||||
cap = 12
|
||||
height = max(2, min(cap, estimated))
|
||||
# Scrollbar dès que le cap est atteint OU contenu long (filet
|
||||
# textuel : ≥ 200 chars implique souvent un débordement visuel
|
||||
# même quand les lignes brutes sont peu nombreuses).
|
||||
needs_scroll = (estimated >= cap) or (len(text) > 200)
|
||||
return height, needs_scroll
|
||||
|
||||
def _render_paused_bubble(self, payload: Dict[str, Any]) -> None:
|
||||
tk = self._tk
|
||||
if getattr(self, "_msg_frame", None) is None:
|
||||
@@ -923,22 +951,23 @@ class ChatWindow:
|
||||
|
||||
# Message scrollable pour les longs reasons (ex: 200+ chars depuis le serveur).
|
||||
# On utilise un Text en mode read-only avec hauteur calculée selon la longueur.
|
||||
# Au-delà de 280 chars, scrollbar interne ; sinon Text auto-fitté.
|
||||
# Patch 22 mai 2026 : prendre en compte les \n explicites (titres
|
||||
# fenêtre / patterns) et activer la scrollbar dès que le cap de
|
||||
# hauteur est atteint — sinon les bulles de pause étaient
|
||||
# tronquées visuellement sans aucun ascenseur visible.
|
||||
reason_str = str(reason)
|
||||
# Estimation simple : ~70 chars/ligne avec wraplength
|
||||
approx_lines = max(2, min(8, (len(reason_str) // 60) + 1))
|
||||
height_lines, needs_scroll = self._compute_paused_bubble_height(reason_str)
|
||||
msg_frame = tk.Frame(inner, bg=PAUSED_BG)
|
||||
msg_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
|
||||
reason_text = tk.Text(
|
||||
msg_frame, bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=FONT_MSG, wrap=tk.WORD, bd=0, height=approx_lines,
|
||||
font=FONT_MSG, wrap=tk.WORD, bd=0, height=height_lines,
|
||||
highlightthickness=0, relief=tk.FLAT, cursor="arrow",
|
||||
)
|
||||
reason_text.insert("1.0", reason_str)
|
||||
reason_text.configure(state="disabled")
|
||||
reason_text.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||||
# Scrollbar interne uniquement si le contenu déborde (long messages)
|
||||
if len(reason_str) > 280:
|
||||
if needs_scroll:
|
||||
reason_scroll = tk.Scrollbar(
|
||||
msg_frame, orient=tk.VERTICAL,
|
||||
command=reason_text.yview, width=8,
|
||||
@@ -1019,27 +1048,40 @@ class ChatWindow:
|
||||
UX fix 8 mai 2026 : on désactive les 2 boutons et on affiche un message
|
||||
de feedback dès le clic, sans attendre l'ack serveur. Le bus émet en
|
||||
arrière-plan ; si la connexion est tombée, on log un warning visible.
|
||||
|
||||
Fallback HTTP 22 mai 2026 : si le bus SocketIO est déconnecté, on
|
||||
retombe sur un POST direct ``/replay/{id}/resume`` via
|
||||
``server_client``. Si les deux échouent, on ré-active les boutons
|
||||
et on saute l'auto-hide pour permettre à l'utilisateur de
|
||||
réessayer manuellement (sinon le replay reste figé côté serveur).
|
||||
"""
|
||||
if not replay_id:
|
||||
self._update_paused_feedback("⚠ replay_id manquant — impossible de relancer")
|
||||
return
|
||||
emitted = False
|
||||
if self._bus is not None and self._bus.connected:
|
||||
emitted = self._bus.resume_replay(replay_id)
|
||||
# Feedback immédiat : disable boutons + message
|
||||
emitted, channel = self._dispatch_paused_action(
|
||||
replay_id,
|
||||
bus_method="resume_replay",
|
||||
client_method="resume_replay",
|
||||
)
|
||||
self._disable_paused_buttons()
|
||||
if emitted:
|
||||
self._update_paused_feedback("→ Reprise demandée…")
|
||||
logger.info("paused_bubble: lea:replay_resume émis pour %s", replay_id)
|
||||
else:
|
||||
self._update_paused_feedback("⚠ Bus indisponible — réessayez dans 5s")
|
||||
logger.warning("paused_bubble: bus déconnecté, resume non émis")
|
||||
# UX fix mai 2026 : minimiser la fenêtre vers le systray après 500ms
|
||||
# (laisse à l'utilisateur le temps de voir "Reprise demandée…").
|
||||
try:
|
||||
self._root.after(500, self._do_hide)
|
||||
except Exception:
|
||||
logger.debug("auto-hide on resume silenced", exc_info=True)
|
||||
logger.info(
|
||||
"paused_bubble: replay_resume émis pour %s via %s",
|
||||
replay_id, channel,
|
||||
)
|
||||
try:
|
||||
self._root.after(500, self._do_hide)
|
||||
except Exception:
|
||||
logger.debug("auto-hide on resume silenced", exc_info=True)
|
||||
return
|
||||
# Échec sur les deux canaux : laisser l'utilisateur réessayer.
|
||||
self._update_paused_feedback("⚠ Serveur injoignable — réessayez")
|
||||
self._enable_paused_buttons()
|
||||
logger.warning(
|
||||
"paused_bubble: bus et HTTP indisponibles, resume non émis "
|
||||
"pour %s", replay_id,
|
||||
)
|
||||
|
||||
def _on_paused_abort(self, replay_id: str) -> None:
|
||||
"""Bouton Annuler : émettre lea:replay_abort + fermeture locale immédiate.
|
||||
@@ -1048,17 +1090,30 @@ class ChatWindow:
|
||||
n'envoie pas de lea:resumed pour un abort, donc sans cette fermeture
|
||||
locale la bulle restait coincée — c'était la cause de "Annuler ne
|
||||
fonctionne pas" rapportée par Dom).
|
||||
|
||||
Fallback HTTP 22 mai 2026 : symétrique de ``_on_paused_resume`` —
|
||||
si le bus est déconnecté, POST direct ``/replay/{id}/cancel``.
|
||||
L'abort ferme la bulle localement quelle que soit l'issue (l'état
|
||||
serveur sera réconcilié au prochain poll /replay/next).
|
||||
"""
|
||||
emitted = False
|
||||
if self._bus is not None and self._bus.connected:
|
||||
emitted = self._bus.abort_replay(replay_id)
|
||||
emitted, channel = self._dispatch_paused_action(
|
||||
replay_id,
|
||||
bus_method="abort_replay",
|
||||
client_method="abort_replay",
|
||||
)
|
||||
self._disable_paused_buttons()
|
||||
if emitted:
|
||||
self._update_paused_feedback("✗ Annulé")
|
||||
logger.info("paused_bubble: lea:replay_abort émis pour %s", replay_id)
|
||||
logger.info(
|
||||
"paused_bubble: replay_abort émis pour %s via %s",
|
||||
replay_id, channel,
|
||||
)
|
||||
else:
|
||||
self._update_paused_feedback("✗ Annulé (bus indisponible)")
|
||||
logger.warning("paused_bubble: bus déconnecté, abort non émis")
|
||||
self._update_paused_feedback("✗ Annulé (serveur injoignable)")
|
||||
logger.warning(
|
||||
"paused_bubble: bus et HTTP indisponibles, abort non émis "
|
||||
"pour %s", replay_id,
|
||||
)
|
||||
# Fermer la bulle en local (l'abort n'a pas de lea:resumed associé)
|
||||
self._close_active_paused_bubble(reason="abort_local")
|
||||
# UX fix mai 2026 : minimiser la fenêtre après 500ms (cohérence
|
||||
@@ -1068,6 +1123,34 @@ class ChatWindow:
|
||||
except Exception:
|
||||
logger.debug("auto-hide on abort silenced", exc_info=True)
|
||||
|
||||
def _dispatch_paused_action(
|
||||
self,
|
||||
replay_id: str,
|
||||
bus_method: str,
|
||||
client_method: str,
|
||||
) -> tuple:
|
||||
"""Envoyer une action de bulle paused via bus puis fallback HTTP.
|
||||
|
||||
Retourne ``(emitted, channel)`` où ``channel`` vaut ``"bus"``,
|
||||
``"http"`` ou ``""`` (aucun chemin n'a abouti).
|
||||
"""
|
||||
if self._bus is not None and getattr(self._bus, "connected", False):
|
||||
try:
|
||||
if getattr(self._bus, bus_method)(replay_id):
|
||||
return True, "bus"
|
||||
except Exception:
|
||||
logger.debug("paused_bubble: bus %s silenced", bus_method, exc_info=True)
|
||||
if self._server_client is not None and hasattr(self._server_client, client_method):
|
||||
try:
|
||||
if getattr(self._server_client, client_method)(replay_id):
|
||||
return True, "http"
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"paused_bubble: server_client %s silenced",
|
||||
client_method, exc_info=True,
|
||||
)
|
||||
return False, ""
|
||||
|
||||
def _disable_paused_buttons(self) -> None:
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
@@ -1077,6 +1160,19 @@ class ChatWindow:
|
||||
except Exception:
|
||||
logger.debug("disable paused buttons silenced", exc_info=True)
|
||||
|
||||
def _enable_paused_buttons(self) -> None:
|
||||
"""Ré-activer les boutons Continuer/Annuler de la bulle paused
|
||||
active. Appelé quand l'envoi a échoué sur tous les canaux —
|
||||
l'utilisateur doit pouvoir réessayer manuellement.
|
||||
"""
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
try:
|
||||
self._active_paused_bubble["btn_resume"].config(state="normal")
|
||||
self._active_paused_bubble["btn_abort"].config(state="normal")
|
||||
except Exception:
|
||||
logger.debug("enable paused buttons silenced", exc_info=True)
|
||||
|
||||
def _update_paused_feedback(self, text: str) -> None:
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
|
||||
@@ -504,6 +504,100 @@ class SmartTrayV1:
|
||||
|
||||
threading.Thread(target=_replay, daemon=True).start()
|
||||
|
||||
def _launch_replay_request(
|
||||
self,
|
||||
replay_request: Dict[str, Any],
|
||||
replay_name: str,
|
||||
) -> None:
|
||||
"""Lance un replay direct depuis un payload `replay_request` serveur."""
|
||||
endpoint = (replay_request or {}).get("endpoint", "")
|
||||
session_id = (replay_request or {}).get("session_id", "")
|
||||
machine_id = (replay_request or {}).get("machine_id") or self.machine_id
|
||||
|
||||
if endpoint != "/api/v1/traces/stream/replay-session" or not session_id:
|
||||
logger.warning("Replay request non supporté: %s", replay_request)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Je ne peux pas lancer ce test automatique pour le moment.",
|
||||
)
|
||||
return
|
||||
|
||||
def _replay():
|
||||
if self.server_client is None:
|
||||
return
|
||||
|
||||
with self._state_lock:
|
||||
self._replay_active = True
|
||||
self._update_icon()
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"Le système d'intelligence artificielle exécute la "
|
||||
f"tâche '{replay_name}' sur votre écran.",
|
||||
)
|
||||
|
||||
try:
|
||||
import requests
|
||||
auth_headers = {}
|
||||
if self.server_client is not None:
|
||||
auth_headers = self.server_client._auth_headers()
|
||||
resp = requests.post(
|
||||
f"{self.server_client._stream_base}{endpoint}",
|
||||
params={
|
||||
"session_id": session_id,
|
||||
"machine_id": machine_id,
|
||||
},
|
||||
headers=auth_headers,
|
||||
timeout=30,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if resp.ok:
|
||||
logger.info(
|
||||
"Replay direct démarré pour session %s (machine=%s)",
|
||||
session_id,
|
||||
machine_id,
|
||||
)
|
||||
else:
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Hmm, le serveur a refusé le test immédiat.",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Erreur lancement replay direct : %s", e)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"Oups, un problème : {e}",
|
||||
)
|
||||
finally:
|
||||
with self._state_lock:
|
||||
self._replay_active = False
|
||||
self._update_icon()
|
||||
|
||||
threading.Thread(target=_replay, daemon=True).start()
|
||||
|
||||
def offer_finalize_replay(
|
||||
self,
|
||||
replay_request: Dict[str, Any],
|
||||
replay_name: str,
|
||||
) -> None:
|
||||
"""Proposer à l'utilisateur de tester immédiatement la tâche apprise."""
|
||||
if not replay_request or not replay_request.get("session_id"):
|
||||
return
|
||||
|
||||
def _offer():
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"J'ai compris la tâche '{replay_name}'. Voulez-vous la tester ?",
|
||||
)
|
||||
if not _ask_consent(
|
||||
"Léa — Test immédiat",
|
||||
f"J'ai compris la tâche '{replay_name}'. "
|
||||
"Voulez-vous la tester maintenant ?",
|
||||
):
|
||||
return
|
||||
self._launch_replay_request(replay_request, replay_name)
|
||||
|
||||
threading.Thread(target=_offer, daemon=True).start()
|
||||
|
||||
def _on_emergency_stop(self, _icon=None, _item=None) -> None:
|
||||
"""Arret d'urgence — stoppe TOUTES les activites de l'agent immediatement.
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ import time
|
||||
import logging
|
||||
import hashlib
|
||||
import platform
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from PIL import Image, ImageFilter, ImageStat
|
||||
import mss
|
||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
||||
@@ -86,6 +86,337 @@ def _enrich_with_monitor_info(payload: dict) -> dict:
|
||||
payload["monitors_geometry"] = _get_monitors_geometry()
|
||||
return payload
|
||||
|
||||
|
||||
# Garde dimensions monitor (démo GHT 19 mai 2026) : mss.monitors[1] peut
|
||||
# retourner intermittemment des dims tronquées (cas observé 2560×60). Utiliser
|
||||
# ces dims pour normaliser des coords empoisonne la mémoire (TargetMemoryStore).
|
||||
MIN_MONITOR_WIDTH = 200
|
||||
MIN_MONITOR_HEIGHT = 200
|
||||
MONITOR_MAX_ATTEMPTS = 2
|
||||
MONITOR_RETRY_DELAY_S = 0.05
|
||||
BLACK_FRAME_MEAN_MAX = 1.0
|
||||
BLACK_FRAME_STDDEV_MAX = 1.0
|
||||
BLACK_FRAME_MAX_LUMA = 3
|
||||
|
||||
|
||||
def _is_monitor_sane(monitor) -> bool:
|
||||
"""True si les dims du monitor sont au-dessus du seuil de plausibilité."""
|
||||
if not isinstance(monitor, dict):
|
||||
return False
|
||||
w = monitor.get("width", 0) or 0
|
||||
h = monitor.get("height", 0) or 0
|
||||
return w >= MIN_MONITOR_WIDTH and h >= MIN_MONITOR_HEIGHT
|
||||
|
||||
|
||||
def _dim_str(monitor) -> str:
|
||||
"""Représentation courte WxH pour les logs (gère monitor=None)."""
|
||||
if not isinstance(monitor, dict):
|
||||
return "?x?"
|
||||
return f"{monitor.get('width', '?')}x{monitor.get('height', '?')}"
|
||||
|
||||
|
||||
def _acquire_safe_grab(max_attempts: int = MONITOR_MAX_ATTEMPTS,
|
||||
retry_delay_s: float = MONITOR_RETRY_DELAY_S,
|
||||
allow_secondary_fallback: bool = True):
|
||||
"""Ouvre mss et capture un monitor avec dimensions plausibles.
|
||||
|
||||
Stratégie en cascade :
|
||||
1. À chaque tentative, ouvrir un nouveau `mss.mss()` (peut rafraîchir le
|
||||
cache interne) et examiner monitors[1..n].
|
||||
2. Préférer monitors[1] (écran principal physique). Si aberrant ET
|
||||
`allow_secondary_fallback=True`, prendre le premier monitors[2..n]
|
||||
sain avec un WARNING explicite.
|
||||
3. Si `allow_secondary_fallback=False`, on n'accepte QUE monitors[1].
|
||||
Utile pour les méthodes qui reçoivent des coordonnées (x, y) en
|
||||
système écran composite : capturer un monitor secondaire produirait
|
||||
une image saine mais décalée par rapport à ces coords.
|
||||
4. Si aucune dim plausible : attendre `retry_delay_s` et retenter.
|
||||
5. Après `max_attempts` infructueuses : log ERROR et retourner
|
||||
(None, None) pour que l'appelant tombe en sortie d'erreur explicite.
|
||||
|
||||
Args:
|
||||
max_attempts: nombre de tentatives mss avant abandon.
|
||||
retry_delay_s: délai entre tentatives.
|
||||
allow_secondary_fallback: si False, refuser monitors[2..n] (fail-closed
|
||||
pour les méthodes coord-bearing).
|
||||
|
||||
Returns:
|
||||
Tuple (monitor_dict, PIL.Image) si capture saine réussie,
|
||||
(None, None) sinon.
|
||||
"""
|
||||
last_aberrant = None
|
||||
secondary_seen = False # un monitor secondaire sain a été vu mais refusé
|
||||
for attempt in range(max_attempts):
|
||||
with mss.mss() as sct:
|
||||
monitors = list(sct.monitors) if sct.monitors else []
|
||||
chosen = None
|
||||
chosen_idx = None
|
||||
for idx in range(1, len(monitors)):
|
||||
candidate = monitors[idx]
|
||||
if not _is_monitor_sane(candidate):
|
||||
last_aberrant = candidate
|
||||
logger.warning(
|
||||
"Monitor[%d] dims aberrantes (%s, seuil %dx%d) "
|
||||
"— attempt %d/%d",
|
||||
idx, _dim_str(candidate),
|
||||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||||
attempt + 1, max_attempts,
|
||||
)
|
||||
continue
|
||||
# Monitor sain trouvé
|
||||
if idx == 1 or allow_secondary_fallback:
|
||||
chosen = candidate
|
||||
chosen_idx = idx
|
||||
break
|
||||
# Sinon : sain mais secondaire interdit pour cet appelant
|
||||
secondary_seen = True
|
||||
logger.warning(
|
||||
"Monitor[%d] sain (%s) mais fallback secondaire refusé "
|
||||
"(allow_secondary_fallback=False) — capture cohérente "
|
||||
"des coords impossible",
|
||||
idx, _dim_str(candidate),
|
||||
)
|
||||
if chosen is not None:
|
||||
if chosen_idx != 1 or attempt > 0:
|
||||
logger.warning(
|
||||
"Capture fallback : monitor[%d] dim=%s, attempt=%d",
|
||||
chosen_idx, _dim_str(chosen), attempt + 1,
|
||||
)
|
||||
sct_img = sct.grab(chosen)
|
||||
img = Image.frombytes(
|
||||
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX",
|
||||
)
|
||||
return chosen, img
|
||||
if attempt < max_attempts - 1:
|
||||
time.sleep(retry_delay_s)
|
||||
if secondary_seen and not allow_secondary_fallback:
|
||||
logger.error(
|
||||
"Capture abandonnée : monitor[1] aberrant après %d tentatives "
|
||||
"(dernier vu %s) et fallback secondaire désactivé "
|
||||
"pour préserver la cohérence des coordonnées",
|
||||
max_attempts, _dim_str(last_aberrant),
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Aucun monitor avec dims plausibles trouvé après %d tentatives "
|
||||
"(dernier vu : %s, seuil %dx%d) — capture abandonnée",
|
||||
max_attempts, _dim_str(last_aberrant),
|
||||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
||||
def _compute_luma_stats(img: Image.Image) -> Dict[str, float | int]:
|
||||
"""Retourne des stats simples de luminance pour diagnostiquer un frame noir."""
|
||||
gray = img.convert("L")
|
||||
stat = ImageStat.Stat(gray)
|
||||
min_luma, max_luma = gray.getextrema()
|
||||
return {
|
||||
"mean": round(float(stat.mean[0]) if stat.mean else 0.0, 2),
|
||||
"stddev": round(float(stat.stddev[0]) if stat.stddev else 0.0, 2),
|
||||
"min": int(min_luma),
|
||||
"max": int(max_luma),
|
||||
}
|
||||
|
||||
|
||||
def _is_effectively_black(img: Image.Image) -> bool:
|
||||
"""Heuristique fail-closed pour refuser un screenshot pratiquement noir."""
|
||||
stats = _compute_luma_stats(img)
|
||||
return (
|
||||
stats["max"] <= BLACK_FRAME_MAX_LUMA
|
||||
and stats["mean"] <= BLACK_FRAME_MEAN_MAX
|
||||
and stats["stddev"] <= BLACK_FRAME_STDDEV_MAX
|
||||
)
|
||||
|
||||
|
||||
def _capture_via_imagegrab() -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Fallback Windows via Pillow/ImageGrab.
|
||||
|
||||
Utile quand `mss` retourne un frame noir alors que la session graphique
|
||||
utilisateur reste visible.
|
||||
"""
|
||||
if _SYSTEM != "Windows":
|
||||
return None, None, {"backend": "imagegrab", "error": "unsupported_platform"}
|
||||
|
||||
try:
|
||||
from PIL import ImageGrab
|
||||
except ImportError as exc:
|
||||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||||
|
||||
try:
|
||||
img = ImageGrab.grab(all_screens=True)
|
||||
except Exception as exc:
|
||||
logger.warning("ImageGrab indisponible pour le fallback capture : %s", exc)
|
||||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||||
|
||||
monitor = {"left": 0, "top": 0, "width": img.width, "height": img.height}
|
||||
return monitor, img, {
|
||||
"backend": "imagegrab",
|
||||
"luma": _compute_luma_stats(img),
|
||||
}
|
||||
|
||||
|
||||
def capture_screen_image(
|
||||
allow_secondary_fallback: bool = True,
|
||||
) -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture plein écran avec diagnostic noir + fallback Windows.
|
||||
|
||||
Returns:
|
||||
(monitor, image, meta) où image peut être None si aucun backend plein
|
||||
écran n'a produit une image exploitable.
|
||||
"""
|
||||
monitor, img = _acquire_safe_grab(
|
||||
allow_secondary_fallback=allow_secondary_fallback
|
||||
)
|
||||
meta: Dict[str, Any] = {"backend": "mss"}
|
||||
|
||||
if img is not None:
|
||||
meta["luma"] = _compute_luma_stats(img)
|
||||
if not _is_effectively_black(img):
|
||||
return monitor, img, meta
|
||||
logger.warning(
|
||||
"Capture mss quasi noire (%s) — tentative de fallback",
|
||||
meta["luma"],
|
||||
)
|
||||
meta["mss_black_frame"] = True
|
||||
else:
|
||||
meta["mss_unavailable"] = True
|
||||
|
||||
fallback_monitor, fallback_img, fallback_meta = _capture_via_imagegrab()
|
||||
if fallback_img is not None:
|
||||
if not _is_effectively_black(fallback_img):
|
||||
logger.warning(
|
||||
"Capture fallback via ImageGrab (%sx%s)",
|
||||
fallback_img.width,
|
||||
fallback_img.height,
|
||||
)
|
||||
return fallback_monitor, fallback_img, fallback_meta
|
||||
logger.warning(
|
||||
"Capture ImageGrab quasi noire (%s)",
|
||||
fallback_meta.get("luma"),
|
||||
)
|
||||
meta["imagegrab_black_frame"] = True
|
||||
|
||||
meta["imagegrab_error"] = fallback_meta.get("error")
|
||||
return None, None, meta
|
||||
|
||||
|
||||
def _capture_window_image_windows(
|
||||
hwnd: int,
|
||||
width: int,
|
||||
height: int,
|
||||
) -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture une fenêtre Windows via PrintWindow.
|
||||
|
||||
Fallback utile quand la capture plein écran est noire mais que la fenêtre
|
||||
active reste imprimable par l'API Win32.
|
||||
"""
|
||||
if _SYSTEM != "Windows":
|
||||
return None, {"backend": "printwindow", "error": "unsupported_platform"}
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
import win32gui
|
||||
import win32ui
|
||||
except ImportError as exc:
|
||||
return None, {"backend": "printwindow", "error": str(exc)}
|
||||
|
||||
last_error = None
|
||||
for flag in (3, 2, 0):
|
||||
wnd_dc = None
|
||||
src_dc = None
|
||||
mem_dc = None
|
||||
bmp = None
|
||||
try:
|
||||
wnd_dc = win32gui.GetWindowDC(hwnd)
|
||||
if not wnd_dc:
|
||||
raise RuntimeError("GetWindowDC a retourné 0")
|
||||
src_dc = win32ui.CreateDCFromHandle(wnd_dc)
|
||||
mem_dc = src_dc.CreateCompatibleDC()
|
||||
bmp = win32ui.CreateBitmap()
|
||||
bmp.CreateCompatibleBitmap(src_dc, width, height)
|
||||
mem_dc.SelectObject(bmp)
|
||||
result = ctypes.windll.user32.PrintWindow(
|
||||
hwnd, mem_dc.GetSafeHdc(), flag
|
||||
)
|
||||
bits = bmp.GetBitmapBits(True)
|
||||
img = Image.frombuffer(
|
||||
"RGB", (width, height), bits, "raw", "BGRX", 0, 1
|
||||
)
|
||||
luma = _compute_luma_stats(img)
|
||||
if result or not _is_effectively_black(img):
|
||||
return img, {
|
||||
"backend": f"printwindow:{flag}",
|
||||
"printwindow_result": int(result),
|
||||
"luma": luma,
|
||||
}
|
||||
except Exception as exc:
|
||||
last_error = str(exc)
|
||||
finally:
|
||||
try:
|
||||
if bmp is not None:
|
||||
win32gui.DeleteObject(bmp.GetHandle())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if mem_dc is not None:
|
||||
mem_dc.DeleteDC()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if src_dc is not None:
|
||||
src_dc.DeleteDC()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if wnd_dc is not None:
|
||||
win32gui.ReleaseDC(hwnd, wnd_dc)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None, {
|
||||
"backend": "printwindow",
|
||||
"error": last_error or "no_usable_frame",
|
||||
}
|
||||
|
||||
|
||||
def capture_foreground_window_image() -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture la fenêtre au focus via API native si disponible."""
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
|
||||
rect_info = get_active_window_rect()
|
||||
except Exception as exc:
|
||||
return None, {"backend": "printwindow", "error": str(exc)}
|
||||
|
||||
if not rect_info:
|
||||
return None, {"backend": "printwindow", "error": "active_window_unavailable"}
|
||||
|
||||
win_w, win_h = rect_info.get("size", [0, 0])
|
||||
hwnd = rect_info.get("hwnd")
|
||||
if not hwnd or win_w <= 0 or win_h <= 0:
|
||||
return None, {
|
||||
"backend": "printwindow",
|
||||
"error": "active_window_handle_unavailable",
|
||||
"title": rect_info.get("title", "unknown_window"),
|
||||
}
|
||||
|
||||
img, meta = _capture_window_image_windows(hwnd, win_w, win_h)
|
||||
if img is None:
|
||||
return None, meta
|
||||
|
||||
meta.update(
|
||||
{
|
||||
"title": rect_info.get("title", "unknown_window"),
|
||||
"app_name": rect_info.get("app_name", "unknown_app"),
|
||||
"rect": rect_info.get("rect"),
|
||||
"window_size": rect_info.get("size"),
|
||||
"hwnd": hwnd,
|
||||
}
|
||||
)
|
||||
return img, meta
|
||||
|
||||
|
||||
class VisionCapturer:
|
||||
def __init__(self, session_dir: str):
|
||||
self.session_dir = session_dir
|
||||
@@ -103,25 +434,35 @@ class VisionCapturer:
|
||||
(utile pour le contextualisation des heartbeats côté serveur).
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||
_monitor, img, meta = capture_screen_image()
|
||||
if img is None:
|
||||
img, win_meta = capture_foreground_window_image()
|
||||
if img is None:
|
||||
logger.error(
|
||||
"Capture plein contexte indisponible (meta=%s, window=%s)",
|
||||
meta,
|
||||
win_meta,
|
||||
)
|
||||
return ""
|
||||
logger.warning(
|
||||
"Capture plein contexte dégradée via fenêtre active (%s)",
|
||||
win_meta.get("backend"),
|
||||
)
|
||||
|
||||
# Détection de changement (pour Heartbeat)
|
||||
if not force:
|
||||
current_hash = self._compute_quick_hash(img)
|
||||
if current_hash == self.last_img_hash:
|
||||
return "" # Pas de changement, on économise la fibre
|
||||
self.last_img_hash = current_hash
|
||||
# Détection de changement (pour Heartbeat)
|
||||
if not force:
|
||||
current_hash = self._compute_quick_hash(img)
|
||||
if current_hash == self.last_img_hash:
|
||||
return "" # Pas de changement, on économise la fibre
|
||||
self.last_img_hash = current_hash
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
|
||||
path = os.path.join(self.shots_dir, f"context_{int(time.time())}_{name_suffix}.png")
|
||||
img.save(path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
return path
|
||||
path = os.path.join(self.shots_dir, f"context_{int(time.time())}_{name_suffix}.png")
|
||||
img.save(path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
return path
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Context Capture: {e}")
|
||||
return ""
|
||||
@@ -145,46 +486,62 @@ class VisionCapturer:
|
||||
sont toujours retournés (fallback gracieux).
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||
|
||||
# Capture du Crop (Cœur de l'apprentissage qwen3-vl)
|
||||
crop_path = os.path.join(self.shots_dir, f"{screenshot_id}_crop.png")
|
||||
w, h = TARGETED_CROP_SIZE
|
||||
left = max(0, x - w // 2)
|
||||
top = max(0, y - h // 2)
|
||||
crop_img = img.crop((left, top, left + w, top + h))
|
||||
|
||||
if anonymize:
|
||||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
blur_sensitive_regions(crop_img)
|
||||
|
||||
img.save(full_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
crop_img.save(crop_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
|
||||
result = {"full": full_path, "crop": crop_path}
|
||||
|
||||
# --- Capture de la fenêtre active ---
|
||||
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||||
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||||
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||||
# Coords (x, y) sont en système écran composite ; cropper depuis
|
||||
# un monitor secondaire (offset ≠ 0) produirait une image saine
|
||||
# mais décalée → fail-closed sur fallback secondaire.
|
||||
_monitor, img, meta = capture_screen_image(
|
||||
allow_secondary_fallback=False
|
||||
)
|
||||
if img is None:
|
||||
window_info = self.capture_active_window(
|
||||
x, y, screenshot_id, full_img=None
|
||||
)
|
||||
if window_info:
|
||||
result["window_capture"] = window_info
|
||||
result = {"window_capture": window_info}
|
||||
_enrich_with_monitor_info(result)
|
||||
logger.warning(
|
||||
"capture_dual dégradée: fenêtre active seule (%s)",
|
||||
meta,
|
||||
)
|
||||
return result
|
||||
return {}
|
||||
|
||||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||
_enrich_with_monitor_info(result)
|
||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||
|
||||
return result
|
||||
# Capture du Crop (Cœur de l'apprentissage qwen3-vl)
|
||||
crop_path = os.path.join(self.shots_dir, f"{screenshot_id}_crop.png")
|
||||
w, h = TARGETED_CROP_SIZE
|
||||
left = max(0, x - w // 2)
|
||||
top = max(0, y - h // 2)
|
||||
crop_img = img.crop((left, top, left + w, top + h))
|
||||
|
||||
if anonymize:
|
||||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
blur_sensitive_regions(crop_img)
|
||||
|
||||
img.save(full_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
crop_img.save(crop_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
|
||||
result = {"full": full_path, "crop": crop_path}
|
||||
|
||||
# --- Capture de la fenêtre active ---
|
||||
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||||
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||||
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||||
if window_info:
|
||||
result["window_capture"] = window_info
|
||||
|
||||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||
_enrich_with_monitor_info(result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Dual Capture: {e}")
|
||||
return {}
|
||||
@@ -239,33 +596,54 @@ class VisionCapturer:
|
||||
# Si le clic est en dehors de la fenêtre, on le signale mais on continue
|
||||
click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)
|
||||
|
||||
window_img = None
|
||||
|
||||
# --- Crop de la fenêtre depuis le plein écran ---
|
||||
if full_img is None:
|
||||
# Pas de screenshot fourni — en capturer un (cas standalone)
|
||||
# Pas de screenshot fourni — en capturer un (cas standalone).
|
||||
# win_rect est en coords globales ; cropper depuis un monitor
|
||||
# secondaire produirait une image décalée → fail-closed sur
|
||||
# fallback secondaire.
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
full_img = Image.frombytes(
|
||||
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX"
|
||||
)
|
||||
_monitor, full_img, _meta = capture_screen_image(
|
||||
allow_secondary_fallback=False
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
|
||||
return None
|
||||
full_img = None
|
||||
|
||||
# Borner le crop aux limites de l'image plein écran
|
||||
img_w, img_h = full_img.size
|
||||
crop_left = max(0, win_left)
|
||||
crop_top = max(0, win_top)
|
||||
crop_right = min(img_w, win_right)
|
||||
crop_bottom = min(img_h, win_bottom)
|
||||
if full_img is not None and not _is_effectively_black(full_img):
|
||||
img_w, img_h = full_img.size
|
||||
crop_left = max(0, win_left)
|
||||
crop_top = max(0, win_top)
|
||||
crop_right = min(img_w, win_right)
|
||||
crop_bottom = min(img_h, win_bottom)
|
||||
|
||||
if crop_right <= crop_left or crop_bottom <= crop_top:
|
||||
logger.debug("Fenêtre hors écran — skip capture fenêtre")
|
||||
if crop_right > crop_left and crop_bottom > crop_top:
|
||||
window_img = full_img.crop(
|
||||
(crop_left, crop_top, crop_right, crop_bottom)
|
||||
)
|
||||
else:
|
||||
logger.debug("Fenêtre hors écran — fallback natif si possible")
|
||||
elif full_img is not None:
|
||||
logger.warning(
|
||||
"capture_active_window: screenshot plein écran noir, fallback natif"
|
||||
)
|
||||
|
||||
if window_img is None and rect_info.get("hwnd"):
|
||||
window_img, native_meta = _capture_window_image_windows(
|
||||
rect_info["hwnd"], win_w, win_h
|
||||
)
|
||||
if window_img is not None:
|
||||
logger.warning(
|
||||
"capture_active_window via fallback natif (%s)",
|
||||
native_meta.get("backend"),
|
||||
)
|
||||
|
||||
if window_img is None:
|
||||
logger.debug("Fenêtre hors écran ou capture native indisponible")
|
||||
return None
|
||||
|
||||
window_img = full_img.crop((crop_left, crop_top, crop_right, crop_bottom))
|
||||
|
||||
# Floutage conformité AI Act
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(window_img)
|
||||
|
||||
@@ -338,6 +338,50 @@ class LeaServerClient:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def resume_replay(self, replay_id: str) -> bool:
|
||||
"""Reprendre un replay en pause supervisée via HTTP direct.
|
||||
|
||||
Fallback du chemin SocketIO (`lea:replay_resume` → agent_chat)
|
||||
utilisé quand le bus feedback est déconnecté au moment où
|
||||
l'utilisateur clique « Continuer » dans la bulle paused.
|
||||
|
||||
Retourne True si le serveur streaming a accepté la reprise.
|
||||
"""
|
||||
if not replay_id:
|
||||
return False
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post(
|
||||
f"{self._stream_url}/traces/stream/replay/{replay_id}/resume",
|
||||
headers=self._auth_headers(),
|
||||
timeout=10,
|
||||
)
|
||||
return bool(resp.ok)
|
||||
except Exception:
|
||||
logger.debug("resume_replay HTTP silenced", exc_info=True)
|
||||
return False
|
||||
|
||||
def abort_replay(self, replay_id: str) -> bool:
|
||||
"""Annuler un replay en pause supervisée via HTTP direct.
|
||||
|
||||
Symétrique de ``resume_replay`` : fallback du chemin SocketIO
|
||||
(`lea:replay_abort`) quand le bus feedback est déconnecté.
|
||||
POSTe sur ``/replay/{id}/cancel`` côté serveur streaming.
|
||||
"""
|
||||
if not replay_id:
|
||||
return False
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post(
|
||||
f"{self._stream_url}/traces/stream/replay/{replay_id}/cancel",
|
||||
headers=self._auth_headers(),
|
||||
timeout=10,
|
||||
)
|
||||
return bool(resp.ok)
|
||||
except Exception:
|
||||
logger.debug("abort_replay HTTP silenced", exc_info=True)
|
||||
return False
|
||||
|
||||
def report_action_result(
|
||||
self,
|
||||
session_id: str,
|
||||
|
||||
@@ -61,7 +61,9 @@ MAX_ACTIONS_PER_REPLAY = 500 # Max actions par requête de replay
|
||||
MAX_REPLAY_STATES = 1000 # Max entrées dans _replay_states
|
||||
REPLAY_STATE_TTL_SECONDS = 3600 # Nettoyage auto des replays terminés après 1h
|
||||
|
||||
# Actions en cours de retry : action_id -> {"action": ..., "retry_count": N, "replay_id": ...}
|
||||
# Actions in-flight / retry : action_id -> transport + retry metadata.
|
||||
# `action` remains the semantic/original action for reporting/retry logic,
|
||||
# while `dispatched_action` tracks the exact payload last sent to Lea.
|
||||
_retry_pending: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Callbacks d'erreur par replay_id : replay_id -> callback_url
|
||||
@@ -207,12 +209,14 @@ from .replay_engine import (
|
||||
_MAX_ACTION_TEXT_LENGTH,
|
||||
_MAX_KEYS_PER_COMBO,
|
||||
_KNOWN_KEY_NAMES,
|
||||
_auto_launch_replay_after_finalize,
|
||||
_validate_replay_action,
|
||||
_APP_LAUNCH_COMMANDS,
|
||||
_APP_VISUAL_SEARCH,
|
||||
_SETUP_IGNORE_APPS,
|
||||
_extract_required_apps_from_events,
|
||||
_extract_required_apps_from_workflow,
|
||||
_trim_redundant_setup_events,
|
||||
_resolve_launch_command,
|
||||
_infer_app_from_window_titles,
|
||||
_get_visual_search_info,
|
||||
@@ -475,6 +479,19 @@ def _clear_replay_lock():
|
||||
logger.error(f"Erreur suppression replay lock : {e}")
|
||||
|
||||
|
||||
def _memory_window_title_for_action(action_meta: Dict[str, Any]) -> str:
|
||||
"""Résoudre le meilleur window_title disponible pour la mémoire persistante."""
|
||||
action_meta = action_meta or {}
|
||||
target_spec = action_meta.get("target_spec") or {}
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
return (
|
||||
action_meta.get("expected_window_before", "")
|
||||
or target_spec.get("window_title", "")
|
||||
or context_hints.get("window_title", "")
|
||||
or action_meta.get("window_title", "")
|
||||
)
|
||||
|
||||
|
||||
def _get_worker_queue_status() -> Dict[str, Any]:
|
||||
"""Retourne l'état de la queue du worker VLM (pour le monitoring)."""
|
||||
queue = []
|
||||
@@ -544,6 +561,34 @@ _machine_replay_target: Dict[str, str] = {}
|
||||
_replay_states: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
|
||||
def _remove_queued_action_duplicates(session_id: str, action_id: str) -> int:
|
||||
"""Retirer d'une queue les copies exactes d'une action déjà acquittée.
|
||||
|
||||
Le watchdog peut re-pousser une action orpheline en tête de queue. Si le
|
||||
report original arrive juste après, cette copie resend doit être jetée,
|
||||
sinon Léa ré-exécute la même action avec le même `action_id` et peut
|
||||
toggler l'état UI (ex: touche Windows qui referme Démarrer).
|
||||
"""
|
||||
if not session_id or not action_id:
|
||||
return 0
|
||||
queue = _replay_queues.get(session_id, [])
|
||||
if not queue:
|
||||
return 0
|
||||
|
||||
filtered: List[Dict[str, Any]] = []
|
||||
removed = 0
|
||||
for queued_action in queue:
|
||||
queued_id = str((queued_action or {}).get("action_id", "") or "")
|
||||
if queued_id == action_id:
|
||||
removed += 1
|
||||
continue
|
||||
filtered.append(queued_action)
|
||||
|
||||
if removed:
|
||||
_replay_queues[session_id] = filtered
|
||||
return removed
|
||||
|
||||
|
||||
class StreamEvent(BaseModel):
|
||||
session_id: str
|
||||
timestamp: float
|
||||
@@ -832,6 +877,16 @@ async def startup():
|
||||
|
||||
threading.Thread(target=_preload_easyocr, daemon=True, name="preload_easyocr").start()
|
||||
|
||||
from .replay_watchdog import get_or_create_watchdog
|
||||
|
||||
app.state.replay_watchdog = get_or_create_watchdog(
|
||||
retry_pending=_retry_pending,
|
||||
replay_queues=_replay_queues,
|
||||
async_lock_factory=_async_replay_lock,
|
||||
sse_notifier=None,
|
||||
)
|
||||
await app.state.replay_watchdog.start()
|
||||
|
||||
logger.info(
|
||||
"API Streaming démarrée — StreamProcessor, Worker et Cleanup prêts. "
|
||||
"VLM Worker dans un process séparé (run_worker.py)."
|
||||
@@ -886,6 +941,9 @@ def _load_existing_workflows():
|
||||
async def shutdown():
|
||||
global _cleanup_running
|
||||
_cleanup_running = False
|
||||
watchdog = getattr(app.state, "replay_watchdog", None)
|
||||
if watchdog is not None:
|
||||
await watchdog.stop(timeout_s=3.0)
|
||||
worker.stop()
|
||||
# Nettoyer le replay lock au shutdown (sinon le worker VLM resterait bloqué)
|
||||
_clear_replay_lock()
|
||||
@@ -1477,17 +1535,24 @@ def _process_screenshot_thread(session_id: str, shot_id: str, path: str):
|
||||
# =========================================================================
|
||||
|
||||
@app.post("/api/v1/traces/stream/finalize")
|
||||
async def finalize(session_id: str, machine_id: str = "default"):
|
||||
async def finalize(
|
||||
session_id: str,
|
||||
machine_id: str = "default",
|
||||
launch_replay: bool = False,
|
||||
):
|
||||
"""Clôture la session et place le traitement en file d'attente.
|
||||
|
||||
Ne bloque plus : marque la session comme finalisée et l'ajoute à la queue
|
||||
du worker VLM (process séparé) pour analyse + construction workflow.
|
||||
|
||||
Le client peut suivre la progression via GET /api/v1/traces/stream/processing/status.
|
||||
Optionnellement, il peut aussi déclencher immédiatement un replay direct
|
||||
depuis la session finalisée (chemin Lea-first, sans attendre le workflow VLM).
|
||||
|
||||
Args:
|
||||
session_id: Identifiant de la session à finaliser
|
||||
machine_id: Identifiant machine (informatif, le machine_id est déjà dans la session)
|
||||
launch_replay: Si vrai, tente de lancer immédiatement /replay-session
|
||||
"""
|
||||
# Vérifier que la session existe
|
||||
session = processor.session_manager.get_session(session_id)
|
||||
@@ -1501,6 +1566,10 @@ async def finalize(session_id: str, machine_id: str = "default"):
|
||||
processor.session_manager.finalize(session_id)
|
||||
logger.info(f"Session {session_id} finalisée, ajout à la queue du worker VLM")
|
||||
|
||||
resolved_machine_id = machine_id
|
||||
if resolved_machine_id == "default" and getattr(session, "machine_id", ""):
|
||||
resolved_machine_id = session.machine_id
|
||||
|
||||
# Nettoyer les structures d'enrichissement temps réel pour cette session
|
||||
with _enrichment_lock:
|
||||
keys_to_remove = [k for k in _pending_click_enrichments if k[0] == session_id]
|
||||
@@ -1521,17 +1590,70 @@ async def finalize(session_id: str, machine_id: str = "default"):
|
||||
if shots_dir.exists():
|
||||
full_shots_count = len(list(shots_dir.glob("shot_*_full.png")))
|
||||
|
||||
return {
|
||||
# Patch 2026-05-23 (brief 0902 deferred-workflow) : par défaut, on
|
||||
# ne propose plus le replay direct immédiat post-finalize — le chemin
|
||||
# produit cible est le workflow compilé par le worker VLM. Le client
|
||||
# attend la disponibilité du workflow nommé pour proposer un test.
|
||||
# Le replay direct reste accessible (smoke/debug) en activant
|
||||
# RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE=true côté serveur, OU
|
||||
# en appelant explicitement POST /api/v1/traces/stream/replay-session
|
||||
# depuis un outil de test.
|
||||
_direct_replay_enabled = _auto_launch_replay_after_finalize()
|
||||
|
||||
response = {
|
||||
"status": "queued_for_processing",
|
||||
"session_id": session_id,
|
||||
"machine_id": session.machine_id,
|
||||
"screenshots_to_analyze": full_shots_count,
|
||||
"replay_ready": _direct_replay_enabled,
|
||||
"message": (
|
||||
f"Session finalisée. {full_shots_count} screenshots seront analysés "
|
||||
"en arrière-plan. Suivez la progression via "
|
||||
"GET /api/v1/traces/stream/processing/status"
|
||||
"GET /api/v1/traces/stream/processing/status."
|
||||
),
|
||||
}
|
||||
if _direct_replay_enabled:
|
||||
response["replay_request"] = {
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": session_id,
|
||||
"machine_id": resolved_machine_id,
|
||||
}
|
||||
response["message"] += (
|
||||
" Le replay direct est disponible via "
|
||||
"POST /api/v1/traces/stream/replay-session"
|
||||
)
|
||||
|
||||
if not launch_replay:
|
||||
return response
|
||||
|
||||
try:
|
||||
replay_result = await replay_from_session(
|
||||
session_id=session_id,
|
||||
machine_id=resolved_machine_id,
|
||||
)
|
||||
except HTTPException as exc:
|
||||
logger.warning(
|
||||
"Finalize %s : replay direct non lancé (%s)",
|
||||
session_id,
|
||||
exc.detail,
|
||||
)
|
||||
response["replay_launch"] = {
|
||||
"status": "failed",
|
||||
"status_code": exc.status_code,
|
||||
"detail": exc.detail,
|
||||
}
|
||||
response["message"] += (
|
||||
" Le lancement automatique du replay direct a échoué ; "
|
||||
"la session reste finalisée et re-jouable manuellement."
|
||||
)
|
||||
return response
|
||||
|
||||
response["replay_launch"] = {
|
||||
"status": "started",
|
||||
"replay": replay_result,
|
||||
}
|
||||
response["message"] += " Le replay direct a été lancé immédiatement."
|
||||
return response
|
||||
|
||||
|
||||
# =========================================================================
|
||||
@@ -2262,18 +2384,39 @@ async def replay_from_session(
|
||||
if session_mem and session_mem.events:
|
||||
_merge_enrichments_into_raw_events(raw_events, session_mem.events)
|
||||
|
||||
# ── 3. Construire le replay propre depuis les events bruts ──
|
||||
# Passer le répertoire de session pour activer le visual replay (crops de référence)
|
||||
# Répertoire de session utilisé par le visual replay et les anchors setup
|
||||
session_dir = str(events_file.parent)
|
||||
|
||||
# ── 3. Préparer le setup environnement et couper le préambule source ──
|
||||
setup_actions = []
|
||||
app_info = _extract_required_apps_from_events(
|
||||
raw_events,
|
||||
session_dir=session_dir,
|
||||
)
|
||||
replay_raw_events = raw_events
|
||||
if app_info:
|
||||
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess")
|
||||
if setup_actions:
|
||||
replay_raw_events = _trim_redundant_setup_events(raw_events, app_info)
|
||||
logger.info(
|
||||
"replay-session %s : %d actions de setup préparées avant le replay "
|
||||
"(app=%s, cmd=%s, raw_trim=%d→%d)",
|
||||
session_id, len(setup_actions),
|
||||
app_info.get("primary_app"), app_info.get("primary_launch_cmd"),
|
||||
len(raw_events), len(replay_raw_events),
|
||||
)
|
||||
|
||||
# ── 4. Construire le replay propre depuis les events bruts ──
|
||||
# Passer le répertoire de session pour activer le visual replay (crops de référence)
|
||||
actions = build_replay_from_raw_events(
|
||||
raw_events, session_id=session_id, session_dir=session_dir,
|
||||
replay_raw_events, session_id=session_id, session_dir=session_dir,
|
||||
)
|
||||
|
||||
if not actions:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Session '{session_id}' : aucune action exploitable après nettoyage "
|
||||
f"({len(raw_events)} événements bruts)"
|
||||
f"({len(replay_raw_events)} événements bruts)"
|
||||
)
|
||||
|
||||
# Limite de sécurité
|
||||
@@ -2305,23 +2448,10 @@ async def replay_from_session(
|
||||
if _gesture_catalog and actions:
|
||||
actions = _gesture_catalog.optimize_replay_actions(actions)
|
||||
|
||||
# ── 3b. Setup environnement — ouvrir les applications nécessaires ──
|
||||
# Analyser les événements bruts pour détecter quelles applications sont requises
|
||||
# et injecter des actions de setup en tête de la queue de replay.
|
||||
setup_actions = []
|
||||
app_info = _extract_required_apps_from_events(raw_events)
|
||||
if app_info:
|
||||
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess")
|
||||
if setup_actions:
|
||||
actions = setup_actions + actions
|
||||
logger.info(
|
||||
"replay-session %s : %d actions de setup injectées avant le replay "
|
||||
"(app=%s, cmd=%s)",
|
||||
session_id, len(setup_actions),
|
||||
app_info.get("primary_app"), app_info.get("primary_launch_cmd"),
|
||||
)
|
||||
if setup_actions:
|
||||
actions = setup_actions + actions
|
||||
|
||||
# ── 4. Trouver la session de replay cible (Agent V1 actif) ──
|
||||
# ── 5. Trouver la session de replay cible (Agent V1 actif) ──
|
||||
# L'agent actif peut avoir une session différente de la session source
|
||||
target_session_id = _find_active_agent_session(machine_id=machine_id)
|
||||
if not target_session_id:
|
||||
@@ -2335,7 +2465,7 @@ async def replay_from_session(
|
||||
"Lancez l'Agent V1 sur le PC cible."
|
||||
)
|
||||
|
||||
# ── 5. Injecter dans la queue de replay ──
|
||||
# ── 6. Injecter dans la queue de replay ──
|
||||
replay_id = f"replay_sess_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
async with _async_replay_lock():
|
||||
@@ -3265,11 +3395,35 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
# NE PAS écraser si _schedule_retry a déjà mis le bon retry_count
|
||||
action_id_sent = action.get("action_id", "")
|
||||
if action_id_sent and action_id_sent not in _retry_pending:
|
||||
now = time.time()
|
||||
_retry_pending[action_id_sent] = {
|
||||
"action": dict(action),
|
||||
"dispatched_action": dict(action),
|
||||
"retry_count": 0,
|
||||
"replay_id": "",
|
||||
"replay_id": owning_replay.get("replay_id", "") if owning_replay else "",
|
||||
"session_id": session_id,
|
||||
"machine_id": machine_id,
|
||||
"dispatched_at": now,
|
||||
"first_dispatched_at": now,
|
||||
"resent_count": 0,
|
||||
"last_resent_at": 0.0,
|
||||
}
|
||||
elif action_id_sent:
|
||||
existing = _retry_pending.get(action_id_sent)
|
||||
if existing is not None:
|
||||
now = time.time()
|
||||
existing.setdefault("action", dict(action))
|
||||
existing["dispatched_action"] = dict(action)
|
||||
existing["replay_id"] = existing.get("replay_id") or (
|
||||
owning_replay.get("replay_id", "") if owning_replay else ""
|
||||
)
|
||||
existing["session_id"] = session_id
|
||||
existing["machine_id"] = machine_id
|
||||
existing["dispatched_at"] = now
|
||||
if not existing.get("first_dispatched_at"):
|
||||
existing["first_dispatched_at"] = now
|
||||
existing.setdefault("resent_count", 0)
|
||||
existing.setdefault("last_resent_at", 0.0)
|
||||
|
||||
# [REPLAY] log structuré pour suivre une action à travers toute la chaîne
|
||||
# Grep facile : journalctl --user -u rpa-streaming -f | grep REPLAY
|
||||
@@ -3400,6 +3554,15 @@ async def report_action_result(report: ReplayResultReport):
|
||||
)
|
||||
return {"status": "no_active_replay", "session_id": session_id}
|
||||
|
||||
removed_dupes = _remove_queued_action_duplicates(session_id, action_id)
|
||||
if removed_dupes:
|
||||
logger.warning(
|
||||
"[REPLAY] REPORT cleanup session=%s action_id=%s removed_queue_duplicates=%d",
|
||||
session_id,
|
||||
action_id,
|
||||
removed_dupes,
|
||||
)
|
||||
|
||||
# Récupérer l'info de retry pour cette action (si c'est un retry)
|
||||
retry_info = _retry_pending.pop(action_id, None)
|
||||
retry_count = retry_info["retry_count"] if retry_info else 0
|
||||
@@ -3631,10 +3794,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
_current = _actions_meta[_idx] or {}
|
||||
if _current.get("type") == "click":
|
||||
_mem_target_spec = _current.get("target_spec") or {}
|
||||
_mem_window_title = (
|
||||
_mem_target_spec.get("window_title", "")
|
||||
or _mem_target_spec.get("expected_window_before", "")
|
||||
)
|
||||
_mem_window_title = _memory_window_title_for_action(_current)
|
||||
|
||||
if _mem_window_title:
|
||||
_mem_success = (
|
||||
@@ -3749,6 +3909,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"target_description": f"Dialogue système : {_sys_category}",
|
||||
"screenshot_b64": screenshot_after or report.screenshot,
|
||||
"target_spec": _tspec_sys,
|
||||
"original_action": dict(original_action or {}),
|
||||
"reason": "system_dialog",
|
||||
"system_dialog": _sys_info,
|
||||
"error_detail": _sys_reason or (report.error or ""),
|
||||
@@ -3814,6 +3975,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"target_description": _target_desc_ww,
|
||||
"screenshot_b64": screenshot_after or report.screenshot,
|
||||
"target_spec": _tspec_ww,
|
||||
"original_action": dict(original_action or {}),
|
||||
"reason": "wrong_window",
|
||||
"error_detail": report.error or "",
|
||||
}
|
||||
@@ -3888,6 +4050,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"target_description": _target_desc,
|
||||
"screenshot_b64": screenshot_after or report.screenshot,
|
||||
"target_spec": _tspec,
|
||||
"original_action": dict(original_action or {}),
|
||||
"reason": "no_screen_change_strict",
|
||||
"resolution_method": report.resolution_method or "",
|
||||
"resolution_score": report.resolution_score or 0,
|
||||
@@ -3947,6 +4110,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"target_description": target_desc,
|
||||
"screenshot_b64": screenshot_after or report.screenshot,
|
||||
"target_spec": report.target_spec,
|
||||
"original_action": dict(original_action or {}),
|
||||
}
|
||||
replay_state["pause_message"] = f"Je ne vois pas '{target_desc}' à l'écran"
|
||||
error_entry = {
|
||||
@@ -3989,6 +4153,7 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"target_description": target_desc,
|
||||
"screenshot_b64": screenshot_after or report.screenshot,
|
||||
"target_spec": report.target_spec,
|
||||
"original_action": dict(original_action or {}),
|
||||
}
|
||||
replay_state["pause_message"] = f"Je ne vois pas '{target_desc}' à l'écran"
|
||||
error_entry = {
|
||||
@@ -4341,8 +4506,14 @@ async def resume_replay(
|
||||
and failed_action.get("reason") != "user_request"):
|
||||
# Reconstruire l'action a partir du retry_pending ou de l'original
|
||||
original_action_id = failed_action["action_id"]
|
||||
original = failed_action.get("original_action")
|
||||
if isinstance(original, dict) and original:
|
||||
original = dict(original)
|
||||
else:
|
||||
original = None
|
||||
# Chercher l'action originale dans les retry_pending
|
||||
original = _retry_pending.pop(original_action_id, {}).get("action")
|
||||
if not original:
|
||||
original = _retry_pending.pop(original_action_id, {}).get("action")
|
||||
if not original:
|
||||
# Reconstruire un minimum depuis le failed_action context
|
||||
original = {
|
||||
@@ -4358,8 +4529,15 @@ async def resume_replay(
|
||||
# Stocker dans retry_pending pour le suivi
|
||||
_retry_pending[resume_id] = {
|
||||
"action": original,
|
||||
"dispatched_action": dict(resume_action),
|
||||
"retry_count": 0,
|
||||
"replay_id": replay_id,
|
||||
"session_id": session_id,
|
||||
"machine_id": state.get("machine_id", "default"),
|
||||
"dispatched_at": 0.0,
|
||||
"first_dispatched_at": 0.0,
|
||||
"resent_count": 0,
|
||||
"last_resent_at": 0.0,
|
||||
"reason": "resume_after_pause",
|
||||
}
|
||||
queue = _replay_queues.get(session_id, [])
|
||||
@@ -4399,6 +4577,13 @@ async def cancel_replay(replay_id: str):
|
||||
return {"status": "cancelled", "replay_id": replay_id, "session_id": session_id}
|
||||
|
||||
|
||||
@app.get("/api/v1/traces/stream/replay/watchdog/metrics")
|
||||
async def watchdog_metrics():
|
||||
from .replay_watchdog import get_metrics_snapshot
|
||||
|
||||
return {"watchdog": get_metrics_snapshot()}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Visual Replay — Résolution visuelle des cibles (module resolve_engine)
|
||||
# =========================================================================
|
||||
@@ -4545,10 +4730,13 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
# Validation qualité en sortie de cascade : seuil de score + garde
|
||||
# de proximité contre les coords enregistrées. Single point of
|
||||
# insertion, n'altère pas la cascade existante.
|
||||
# target_spec propagé pour relaxation contextuelle (switch_tab +
|
||||
# som_element calibré, cf. resolve_engine.py 2026-05-22).
|
||||
result = _validate_resolution_quality(
|
||||
result,
|
||||
request.fallback_x_pct,
|
||||
request.fallback_y_pct,
|
||||
target_spec=request.target_spec,
|
||||
)
|
||||
|
||||
# Pré-check sémantique post-cascade : OCR sur une zone autour de la
|
||||
@@ -4581,6 +4769,15 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
_by_text = (request.target_spec.get("by_text") or "").strip()
|
||||
if _by_text:
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
# Propager la bbox SoM enregistrée (si présente) au
|
||||
# pré-check OCR : pour les éléments étroits (onglets
|
||||
# Notepad moderne, ~30-40px haut), le radius générique
|
||||
# capture du texte voisin et rejette à tort.
|
||||
# Patch 2026-05-23 — cf. inbox_codex/…_notepad-tab-ocr-precheck.
|
||||
_som_bbox = (
|
||||
(request.target_spec.get("som_element") or {})
|
||||
.get("bbox_norm")
|
||||
)
|
||||
_is_valid, _observed, _ocr_ms = _validate_text_at_position(
|
||||
tmp_path,
|
||||
float(result.get("x_pct", 0) or 0),
|
||||
@@ -4588,6 +4785,7 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
_by_text,
|
||||
effective_w,
|
||||
effective_h,
|
||||
som_bbox_norm=_som_bbox,
|
||||
)
|
||||
logger.info(
|
||||
"[REPLAY] Pre-check OCR ACTIF : '%s' attendu @ (%.4f, %.4f) "
|
||||
@@ -4600,7 +4798,16 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
_is_valid,
|
||||
_ocr_ms,
|
||||
)
|
||||
if not _is_valid:
|
||||
# Patch 2026-05-23 : rejet uniquement si OCR a effectivement
|
||||
# lu *autre chose* que la cible. Si observed est vide, l'OCR
|
||||
# n'a rien lu (crop bbox SoM trop petit / contraste faible
|
||||
# sur onglet Notepad moderne) — ambigu, on garde la
|
||||
# résolution serveur. La garde drift ANCHOR-TM côté agent
|
||||
# bloque les vrais faux positifs.
|
||||
from agent_v0.server_v1.resolve_engine import (
|
||||
_should_reject_on_text_mismatch,
|
||||
)
|
||||
if _should_reject_on_text_mismatch(_is_valid, _observed):
|
||||
logger.warning(
|
||||
"[REPLAY] Pre-check OCR REJET : '%s' attendu @ (%.4f, %.4f) "
|
||||
"via %s mais OCR voit '%s' (%.0fms)",
|
||||
@@ -4620,6 +4827,15 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
"x_pct": None,
|
||||
"y_pct": None,
|
||||
}
|
||||
elif not _is_valid:
|
||||
# observed vide → on log mais on accepte
|
||||
logger.info(
|
||||
"[REPLAY] Pre-check OCR observed='' (crop trop "
|
||||
"petit/contraste faible) — on garde la résolution "
|
||||
"via %s (score=%s), garde drift agent protège en aval",
|
||||
result.get("method", "?"),
|
||||
result.get("score"),
|
||||
)
|
||||
|
||||
# [REPLAY] log structuré de sortie résolution (après validation)
|
||||
# Note: x_pct/y_pct peuvent être None quand le pré-check OCR rejette
|
||||
|
||||
@@ -17,6 +17,20 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _infer_machine_id_from_session_id(session_id: str, fallback: str = "default") -> str:
|
||||
"""Déduire le machine_id depuis un session_id spécial si possible.
|
||||
|
||||
Les heartbeats de fond de Léa utilisent `bg_<machine_id>` comme
|
||||
identifiant de session. Lors d'un redémarrage serveur, ces sessions
|
||||
peuvent être restaurées depuis la persistance JSON avec `machine_id`
|
||||
resté à `default`. On rétablit ici l'information machine pour que les
|
||||
replays ciblés retrouvent bien la session de fond active.
|
||||
"""
|
||||
if session_id.startswith("bg_") and len(session_id) > 3:
|
||||
return session_id[3:]
|
||||
return fallback
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveSessionState:
|
||||
"""État d'une session active en mémoire."""
|
||||
@@ -86,11 +100,18 @@ class LiveSessionManager:
|
||||
def _load_persisted_sessions(self):
|
||||
"""Charger les sessions sauvegardées au démarrage (JSON state files)."""
|
||||
count = 0
|
||||
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
||||
session_files = sorted(self._persist_dir.glob("sess_*.json"))
|
||||
session_files += sorted(self._persist_dir.glob("bg_*.json"))
|
||||
for session_file in session_files:
|
||||
try:
|
||||
with open(session_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
session = LiveSessionState.from_dict(data)
|
||||
if session.machine_id == "default":
|
||||
session.machine_id = _infer_machine_id_from_session_id(
|
||||
session.session_id,
|
||||
fallback=session.machine_id,
|
||||
)
|
||||
self._sessions[session.session_id] = session
|
||||
count += 1
|
||||
except Exception as e:
|
||||
@@ -117,7 +138,7 @@ class LiveSessionManager:
|
||||
for jsonl_file in sorted(live_dir.glob("**/live_events.jsonl")):
|
||||
session_dir = jsonl_file.parent
|
||||
session_id = session_dir.name
|
||||
if not session_id.startswith("sess_"):
|
||||
if not (session_id.startswith("sess_") or session_id.startswith("bg_")):
|
||||
continue
|
||||
if session_id in self._sessions:
|
||||
continue
|
||||
@@ -125,7 +146,7 @@ class LiveSessionManager:
|
||||
# Déduire le machine_id depuis le chemin parent
|
||||
parent_name = session_dir.parent.name
|
||||
if parent_name == live_dir.name:
|
||||
machine_id = "default"
|
||||
machine_id = _infer_machine_id_from_session_id(session_id)
|
||||
else:
|
||||
machine_id = parent_name
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -188,7 +188,12 @@ class ReplayLearner:
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window_title = target_spec.get("window_title", "")
|
||||
window_title = (
|
||||
target_spec.get("window_title", "")
|
||||
or action.get("window_title", "")
|
||||
or target_spec.get("expected_window_before", "")
|
||||
or (target_spec.get("context_hints") or {}).get("window_title", "")
|
||||
)
|
||||
x_pct = correction.get("x_pct", 0.0)
|
||||
y_pct = correction.get("y_pct", 0.0)
|
||||
|
||||
@@ -207,20 +212,36 @@ class ReplayLearner:
|
||||
|
||||
# Stocker dans target_memory.db pour le lookup futur
|
||||
try:
|
||||
from .replay_memory import get_target_memory_store
|
||||
store = get_target_memory_store()
|
||||
if store:
|
||||
store.record_success(
|
||||
screen_signature="human_correction",
|
||||
from .replay_memory import memory_record_success
|
||||
stored = False
|
||||
if window_title:
|
||||
stored = memory_record_success(
|
||||
window_title=window_title,
|
||||
target_spec=target_spec,
|
||||
resolved_position={"x_pct": x_pct, "y_pct": y_pct},
|
||||
x_pct=float(x_pct),
|
||||
y_pct=float(y_pct),
|
||||
method="human_supervised",
|
||||
score=1.0,
|
||||
confidence=1.0,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"window_title absent pour '%s'",
|
||||
by_text,
|
||||
)
|
||||
|
||||
if stored:
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
elif window_title:
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"échec memory_record_success pour '%s' dans '%s'",
|
||||
by_text,
|
||||
window_title,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||
|
||||
|
||||
@@ -103,15 +103,53 @@ def compute_screen_sig(window_title: str) -> str:
|
||||
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _round_float_list(values: Any, precision: int = 4) -> Optional[tuple[float, ...]]:
|
||||
"""Normaliser une liste de coordonnées flottantes pour le hash mémoire."""
|
||||
if not isinstance(values, (list, tuple)):
|
||||
return None
|
||||
out = []
|
||||
for value in values:
|
||||
try:
|
||||
out.append(round(float(value), precision))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return tuple(out)
|
||||
|
||||
|
||||
def _int_pair(values: Any) -> Optional[tuple[int, int]]:
|
||||
"""Extraire une paire entière stable pour les hints spatiaux."""
|
||||
if not isinstance(values, (list, tuple)) or len(values) < 2:
|
||||
return None
|
||||
try:
|
||||
return int(values[0]), int(values[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _should_reuse_recorded_window_relative_coords(fp: Any) -> bool:
|
||||
"""Décider si on doit remplacer la mémoire apprise par la position source.
|
||||
|
||||
Cette réécriture n'est légitime que pour les entrées faibles de type
|
||||
`position_fallback`/`v4_unknown`, où la mémoire ne contient pas une vraie
|
||||
localisation visuelle robuste mais seulement un clic écran dépendant de la
|
||||
résolution. Pour les méthodes visuelles apprises (template, SoM, OCR...),
|
||||
réinjecter un vieux `click_relative` source crée des collisions et des
|
||||
dérives sur des boutons homonymes (`Enregistrer`, `OK`, etc.).
|
||||
"""
|
||||
method = str(getattr(fp, "etype", "") or "").strip().lower()
|
||||
return method in {"position_fallback", "v4_unknown"}
|
||||
|
||||
|
||||
class _TargetSpecLike:
|
||||
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||
|
||||
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||
|
||||
On intègre aussi `resolve_order` et `vlm_description` dans `context_hints`
|
||||
pour qu'ils entrent dans le hash — deux actions avec le même `by_text`
|
||||
mais un `resolve_order` différent doivent avoir des hashes distincts.
|
||||
On intègre aussi `resolve_order`, `vlm_description` et des indices
|
||||
spatiaux (SoM, click_relative) dans `context_hints` pour qu'ils entrent
|
||||
dans le hash. Sinon, deux actions `Enregistrer` dans la même fenêtre
|
||||
mais à des emplacements différents collisionnent.
|
||||
"""
|
||||
|
||||
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||
@@ -131,6 +169,21 @@ class _TargetSpecLike:
|
||||
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||
if d.get("anchor_hint"):
|
||||
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||
|
||||
som_element = d.get("som_element") or {}
|
||||
som_bbox = _round_float_list(som_element.get("bbox_norm"))
|
||||
if som_bbox:
|
||||
hints["_som_bbox"] = som_bbox
|
||||
som_center = _round_float_list(som_element.get("center_norm"), precision=5)
|
||||
if som_center:
|
||||
hints["_som_center"] = som_center
|
||||
|
||||
window_capture = d.get("window_capture") or {}
|
||||
click_relative = _int_pair(window_capture.get("click_relative"))
|
||||
window_size = _int_pair(window_capture.get("window_size"))
|
||||
if click_relative and window_size:
|
||||
hints["_window_rel"] = f"{click_relative[0]},{click_relative[1]}@{window_size[0]}x{window_size[1]}"
|
||||
|
||||
self.context_hints = hints
|
||||
|
||||
|
||||
@@ -176,6 +229,46 @@ def memory_lookup(
|
||||
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||
return None
|
||||
|
||||
# Quand l'entrée mémoire provient d'un simple `position_fallback`, les
|
||||
# coordonnées stockées reflètent surtout la géométrie écran source. Dans
|
||||
# ce cas précis, réutiliser la position relative enregistrée dans la
|
||||
# fenêtre source reste préférable si elle existe.
|
||||
#
|
||||
# En revanche, pour une méthode visuelle réellement apprise
|
||||
# (`anchor_template`, `som_*`, `hybrid_text_direct`, ...), remplacer les
|
||||
# coords mémorisées par un vieux `click_relative` crée des dérives sur
|
||||
# des cibles textuelles homonymes. On garde donc les coords apprises.
|
||||
window_capture = target_spec.get("window_capture") or {}
|
||||
click_relative = window_capture.get("click_relative")
|
||||
window_size = window_capture.get("window_size")
|
||||
if (
|
||||
_should_reuse_recorded_window_relative_coords(fp)
|
||||
and (
|
||||
isinstance(click_relative, (list, tuple))
|
||||
and len(click_relative) >= 2
|
||||
and isinstance(window_size, (list, tuple))
|
||||
and len(window_size) >= 2
|
||||
)
|
||||
):
|
||||
try:
|
||||
rel_x = float(click_relative[0])
|
||||
rel_y = float(click_relative[1])
|
||||
win_w = float(window_size[0])
|
||||
win_h = float(window_size[1])
|
||||
if win_w > 1 and win_h > 1:
|
||||
x_pct = rel_x / win_w
|
||||
y_pct = rel_y / win_h
|
||||
logger.info(
|
||||
"memory_lookup: coords fenêtre source réutilisées "
|
||||
"(click_relative=%s, window_size=%s) -> (%.4f, %.4f)",
|
||||
click_relative,
|
||||
window_size,
|
||||
x_pct,
|
||||
y_pct,
|
||||
)
|
||||
except (TypeError, ValueError, ZeroDivisionError):
|
||||
logger.debug("memory_lookup: window_capture invalide, fallback bbox")
|
||||
|
||||
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
|
||||
@@ -328,10 +328,11 @@ class ReplayVerifier:
|
||||
),
|
||||
)
|
||||
|
||||
# Cas 4 : Pas de changement (key_combo, wait)
|
||||
# Pour les raccourcis clavier et attentes, l'absence de changement
|
||||
# n'est pas forcément un problème (ex: Ctrl+C ne change pas l'écran)
|
||||
if action_type in ("key_combo", "wait"):
|
||||
# Cas 4 : Pas de changement (key_combo, wait, verify_screen)
|
||||
# `verify_screen` côté agent n'est qu'une temporisation de stabilisation.
|
||||
# Il ne doit pas exiger un NOUVEAU changement visuel sinon le setup
|
||||
# boucle inutilement une fois l'application déjà ouverte.
|
||||
if action_type in ("key_combo", "wait", "verify_screen"):
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=0.4,
|
||||
|
||||
329
agent_v0/server_v1/replay_watchdog.py
Normal file
329
agent_v0/server_v1/replay_watchdog.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""Replay orphan watchdog for in-flight replay actions.
|
||||
|
||||
This module watches `_retry_pending` and re-pushes actions that were
|
||||
dispatched by the server but never acknowledged by the Windows agent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _env_bool(name: str, default: str) -> bool:
|
||||
return os.environ.get(name, default).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.environ.get(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid env %s, fallback=%s", name, default)
|
||||
return default
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid env %s, fallback=%s", name, default)
|
||||
return default
|
||||
|
||||
|
||||
def _env_max_resends(default: int) -> int:
|
||||
raw = os.environ.get("RPA_WATCHDOG_MAX_RESENDS")
|
||||
if raw is None or not str(raw).strip():
|
||||
raw = os.environ.get("RPA_WATCHDOG_MAX_RETRIES")
|
||||
try:
|
||||
return int(raw) if raw is not None else default
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid max resend env, fallback=%s", default)
|
||||
return default
|
||||
|
||||
|
||||
WATCHDOG_ENABLED = _env_bool("RPA_WATCHDOG_ENABLED", "1")
|
||||
WATCHDOG_SCAN_INTERVAL_S = _env_float("RPA_WATCHDOG_SCAN_INTERVAL_S", 10.0)
|
||||
WATCHDOG_ORPHAN_TIMEOUT_S = _env_float("RPA_WATCHDOG_ORPHAN_TIMEOUT_S", 45.0)
|
||||
WATCHDOG_MAX_RESENDS = _env_max_resends(2)
|
||||
WATCHDOG_REPUSH_POSITION = (
|
||||
os.environ.get("RPA_WATCHDOG_REPUSH_POSITION", "head").strip().lower()
|
||||
)
|
||||
|
||||
|
||||
_metrics_lock = asyncio.Lock()
|
||||
_metrics: Dict[str, Any] = {
|
||||
"orphans_detected_total": 0,
|
||||
"orphans_resent_total": 0,
|
||||
"orphans_giveup_total": 0,
|
||||
"scans_total": 0,
|
||||
"scans_failed_total": 0,
|
||||
"last_scan_ts": 0.0,
|
||||
"last_scan_duration_ms": 0.0,
|
||||
"current_in_flight_count": 0,
|
||||
"current_orphan_count": 0,
|
||||
}
|
||||
|
||||
|
||||
async def _bump(key: str, delta: int = 1) -> None:
|
||||
async with _metrics_lock:
|
||||
_metrics[key] = _metrics.get(key, 0) + delta
|
||||
|
||||
|
||||
def get_metrics_snapshot() -> Dict[str, Any]:
|
||||
return dict(_metrics)
|
||||
|
||||
|
||||
SseNotifier = Callable[[str, str], None]
|
||||
|
||||
|
||||
class ReplayWatchdog:
|
||||
"""Background coroutine that re-pushes orphaned replay actions."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
) -> None:
|
||||
self._retry_pending = retry_pending
|
||||
self._replay_queues = replay_queues
|
||||
self._async_lock = async_lock_factory
|
||||
self._sse_notifier = sse_notifier
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._stopped = asyncio.Event()
|
||||
|
||||
async def start(self) -> None:
|
||||
if not WATCHDOG_ENABLED:
|
||||
logger.info("[WATCHDOG] disabled via RPA_WATCHDOG_ENABLED=0")
|
||||
return
|
||||
if self._task is not None and not self._task.done():
|
||||
logger.warning("[WATCHDOG] already started")
|
||||
return
|
||||
self._stopped.clear()
|
||||
self._task = asyncio.create_task(self._run(), name="replay_watchdog")
|
||||
logger.info(
|
||||
"[WATCHDOG] started scan=%.1fs orphan_timeout=%.1fs max_resends=%d repush=%s",
|
||||
WATCHDOG_SCAN_INTERVAL_S,
|
||||
WATCHDOG_ORPHAN_TIMEOUT_S,
|
||||
WATCHDOG_MAX_RESENDS,
|
||||
WATCHDOG_REPUSH_POSITION,
|
||||
)
|
||||
|
||||
async def stop(self, timeout_s: float = 5.0) -> None:
|
||||
if self._task is None:
|
||||
return
|
||||
self._stopped.set()
|
||||
self._task.cancel()
|
||||
try:
|
||||
await asyncio.wait_for(self._task, timeout=timeout_s)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[WATCHDOG] stop timeout after %.1fs", timeout_s)
|
||||
except Exception:
|
||||
logger.exception("[WATCHDOG] unexpected stop error")
|
||||
self._task = None
|
||||
logger.info("[WATCHDOG] stopped")
|
||||
|
||||
async def _run(self) -> None:
|
||||
try:
|
||||
while not self._stopped.is_set():
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
self._stopped.wait(),
|
||||
timeout=WATCHDOG_SCAN_INTERVAL_S,
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self._scan_once()
|
||||
except Exception:
|
||||
await _bump("scans_failed_total")
|
||||
logger.exception("[WATCHDOG] scan failed")
|
||||
except asyncio.CancelledError:
|
||||
logger.info("[WATCHDOG] cancelled")
|
||||
raise
|
||||
finally:
|
||||
logger.info("[WATCHDOG] loop terminated")
|
||||
|
||||
async def _scan_once(self) -> Dict[str, int]:
|
||||
t0 = time.time()
|
||||
await _bump("scans_total")
|
||||
|
||||
resent = 0
|
||||
gaveup = 0
|
||||
skipped = 0
|
||||
in_flight = 0
|
||||
orphans = 0
|
||||
|
||||
orphan_targets: List[Tuple[str, Dict[str, Any]]] = []
|
||||
async with self._async_lock():
|
||||
for action_id, info in list(self._retry_pending.items()):
|
||||
dispatched_at = info.get("dispatched_at", 0.0) or 0.0
|
||||
if dispatched_at <= 0:
|
||||
skipped += 1
|
||||
continue
|
||||
age = t0 - dispatched_at
|
||||
in_flight += 1
|
||||
if age < WATCHDOG_ORPHAN_TIMEOUT_S:
|
||||
continue
|
||||
orphans += 1
|
||||
orphan_targets.append((action_id, dict(info)))
|
||||
|
||||
for action_id, info in orphan_targets:
|
||||
await _bump("orphans_detected_total")
|
||||
resent_count = int(info.get("resent_count", 0) or 0)
|
||||
|
||||
if resent_count >= WATCHDOG_MAX_RESENDS:
|
||||
async with self._async_lock():
|
||||
self._retry_pending.pop(action_id, None)
|
||||
age_total = t0 - float(info.get("first_dispatched_at", t0) or t0)
|
||||
logger.error(
|
||||
"[BUS] lea:dispatch_orphan_giveup action_id=%s resent=%d age_total=%.1fs "
|
||||
"session=%s machine=%s replay=%s",
|
||||
action_id,
|
||||
resent_count,
|
||||
age_total,
|
||||
info.get("session_id", "?"),
|
||||
info.get("machine_id", "?"),
|
||||
info.get("replay_id", "?"),
|
||||
)
|
||||
gaveup += 1
|
||||
await _bump("orphans_giveup_total")
|
||||
continue
|
||||
|
||||
session_id = info.get("session_id")
|
||||
machine_id = info.get("machine_id", "default")
|
||||
action = info.get("dispatched_action") or info.get("action")
|
||||
if not session_id or not isinstance(action, dict):
|
||||
logger.warning(
|
||||
"[WATCHDOG] invalid schema for %s session_id=%r action_type=%s",
|
||||
action_id,
|
||||
session_id,
|
||||
type(action).__name__,
|
||||
)
|
||||
async with self._async_lock():
|
||||
self._retry_pending.pop(action_id, None)
|
||||
continue
|
||||
|
||||
async with self._async_lock():
|
||||
existing = self._retry_pending.get(action_id)
|
||||
if existing is None:
|
||||
logger.debug(
|
||||
"[WATCHDOG] %s acked between snapshot and resend; skip",
|
||||
action_id,
|
||||
)
|
||||
continue
|
||||
queue = self._replay_queues.setdefault(session_id, [])
|
||||
if WATCHDOG_REPUSH_POSITION == "tail":
|
||||
queue.append(dict(action))
|
||||
else:
|
||||
queue.insert(0, dict(action))
|
||||
existing["resent_count"] = resent_count + 1
|
||||
existing["last_resent_at"] = time.time()
|
||||
existing["dispatched_at"] = 0.0
|
||||
|
||||
age_total = t0 - float(info.get("first_dispatched_at", t0) or t0)
|
||||
logger.warning(
|
||||
"[BUS] lea:dispatch_orphan_resent action_id=%s resent=%d/%d age=%.1fs "
|
||||
"session=%s machine=%s replay=%s",
|
||||
action_id,
|
||||
resent_count + 1,
|
||||
WATCHDOG_MAX_RESENDS,
|
||||
age_total,
|
||||
session_id,
|
||||
machine_id,
|
||||
info.get("replay_id", "?"),
|
||||
)
|
||||
resent += 1
|
||||
await _bump("orphans_resent_total")
|
||||
|
||||
if self._sse_notifier is not None:
|
||||
try:
|
||||
self._sse_notifier(session_id, machine_id)
|
||||
except Exception as exc:
|
||||
logger.debug("[WATCHDOG] sse notifier failed: %s", exc)
|
||||
|
||||
elapsed_ms = (time.time() - t0) * 1000.0
|
||||
async with _metrics_lock:
|
||||
_metrics["last_scan_ts"] = t0
|
||||
_metrics["last_scan_duration_ms"] = elapsed_ms
|
||||
_metrics["current_in_flight_count"] = in_flight
|
||||
_metrics["current_orphan_count"] = orphans
|
||||
scans_total = _metrics["scans_total"]
|
||||
|
||||
if orphans or gaveup:
|
||||
logger.info(
|
||||
"[METRIC] watchdog scan=%d orphans=%d resent=%d gaveup=%d "
|
||||
"in_flight=%d skipped=%d elapsed_ms=%.1f",
|
||||
scans_total,
|
||||
orphans,
|
||||
resent,
|
||||
gaveup,
|
||||
in_flight,
|
||||
skipped,
|
||||
elapsed_ms,
|
||||
)
|
||||
|
||||
return {
|
||||
"orphans": orphans,
|
||||
"resent": resent,
|
||||
"gaveup": gaveup,
|
||||
"skipped": skipped,
|
||||
"in_flight": in_flight,
|
||||
}
|
||||
|
||||
|
||||
_singleton: Optional[ReplayWatchdog] = None
|
||||
|
||||
|
||||
def get_or_create_watchdog(
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
) -> ReplayWatchdog:
|
||||
global _singleton
|
||||
if _singleton is None:
|
||||
_singleton = ReplayWatchdog(
|
||||
retry_pending=retry_pending,
|
||||
replay_queues=replay_queues,
|
||||
async_lock_factory=async_lock_factory,
|
||||
sse_notifier=sse_notifier,
|
||||
)
|
||||
return _singleton
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def watchdog_lifespan(
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
):
|
||||
watchdog = get_or_create_watchdog(
|
||||
retry_pending=retry_pending,
|
||||
replay_queues=replay_queues,
|
||||
async_lock_factory=async_lock_factory,
|
||||
sse_notifier=sse_notifier,
|
||||
)
|
||||
await watchdog.start()
|
||||
try:
|
||||
yield watchdog
|
||||
finally:
|
||||
await watchdog.stop()
|
||||
@@ -243,6 +243,168 @@ def _validate_match_context(
|
||||
return True
|
||||
|
||||
|
||||
def _has_meaningful_recorded_coords(
|
||||
fallback_x_pct: float,
|
||||
fallback_y_pct: float,
|
||||
) -> bool:
|
||||
"""Indiquer si les coordonnées fallback représentent une vraie position source."""
|
||||
return (
|
||||
fallback_x_pct > 0.001
|
||||
and fallback_y_pct > 0.001
|
||||
and not (
|
||||
abs(fallback_x_pct - 0.5) < 0.001
|
||||
and abs(fallback_y_pct - 0.5) < 0.001
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _is_close_tab_target(target_spec: Optional[Dict[str, Any]]) -> bool:
|
||||
"""Détecter une action close_tab issue du compilateur replay."""
|
||||
if not isinstance(target_spec, dict):
|
||||
return False
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
return str((context_hints.get("interaction") or "")).strip().lower() == "close_tab"
|
||||
|
||||
|
||||
def _get_expected_close_tab_coords(
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
fallback_x_pct: float = 0.0,
|
||||
fallback_y_pct: float = 0.0,
|
||||
) -> Optional[tuple[float, float]]:
|
||||
"""Retrouver la position attendue la plus fiable pour un close_tab.
|
||||
|
||||
Ordre de préférence :
|
||||
1. Coordonnées fallback explicites de l'action replay
|
||||
2. centre SoM calibré à l'enregistrement
|
||||
3. click_relative + rect fenêtre source
|
||||
"""
|
||||
if _has_meaningful_recorded_coords(fallback_x_pct, fallback_y_pct):
|
||||
return float(fallback_x_pct), float(fallback_y_pct)
|
||||
|
||||
if not isinstance(target_spec, dict):
|
||||
return None
|
||||
|
||||
som_center = (target_spec.get("som_element") or {}).get("center_norm")
|
||||
if isinstance(som_center, (list, tuple)) and len(som_center) >= 2:
|
||||
try:
|
||||
exp_x = float(som_center[0])
|
||||
exp_y = float(som_center[1])
|
||||
if 0.0 <= exp_x <= 1.0 and 0.0 <= exp_y <= 1.0:
|
||||
return exp_x, exp_y
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
window_capture = target_spec.get("window_capture") or {}
|
||||
rect = window_capture.get("rect")
|
||||
click_relative = window_capture.get("click_relative")
|
||||
if (
|
||||
isinstance(rect, (list, tuple))
|
||||
and len(rect) >= 4
|
||||
and isinstance(click_relative, (list, tuple))
|
||||
and len(click_relative) >= 2
|
||||
and screen_width > 0
|
||||
and screen_height > 0
|
||||
):
|
||||
try:
|
||||
abs_x = float(rect[0]) + float(click_relative[0])
|
||||
abs_y = float(rect[1]) + float(click_relative[1])
|
||||
exp_x = abs_x / float(screen_width)
|
||||
exp_y = abs_y / float(screen_height)
|
||||
if 0.0 <= exp_x <= 1.0 and 0.0 <= exp_y <= 1.0:
|
||||
return exp_x, exp_y
|
||||
except (TypeError, ValueError, ZeroDivisionError):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _is_close_tab_result_plausible(
|
||||
resolved_x: float,
|
||||
resolved_y: float,
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
fallback_x_pct: float = 0.0,
|
||||
fallback_y_pct: float = 0.0,
|
||||
) -> bool:
|
||||
"""Filtrer les faux positifs close_tab qui dérivent vers le bouton fermer."""
|
||||
if not _is_close_tab_target(target_spec):
|
||||
return True
|
||||
|
||||
expected = _get_expected_close_tab_coords(
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
)
|
||||
if expected is None:
|
||||
return True
|
||||
|
||||
exp_x, exp_y = expected
|
||||
dx = abs(float(resolved_x) - exp_x)
|
||||
dy = abs(float(resolved_y) - exp_y)
|
||||
distance = (dx ** 2 + dy ** 2) ** 0.5
|
||||
is_plausible = dx <= 0.18 and distance <= 0.20
|
||||
if not is_plausible:
|
||||
logger.warning(
|
||||
"close_tab guard : résultat rejeté car trop éloigné de la zone "
|
||||
"source (resolved=(%.4f, %.4f), expected=(%.4f, %.4f), "
|
||||
"drift=(%.4f, %.4f), dist=%.4f)",
|
||||
float(resolved_x),
|
||||
float(resolved_y),
|
||||
exp_x,
|
||||
exp_y,
|
||||
dx,
|
||||
dy,
|
||||
distance,
|
||||
)
|
||||
return is_plausible
|
||||
|
||||
|
||||
def _is_start_button_vlm_result_plausible(
|
||||
result: Dict[str, Any],
|
||||
fallback_x_pct: float,
|
||||
fallback_y_pct: float,
|
||||
target_spec: Dict[str, Any],
|
||||
max_distance: float = 0.20,
|
||||
) -> bool:
|
||||
"""Filtrer les faux positifs VLM sur le bouton Démarrer.
|
||||
|
||||
Le bouton Démarrer est un singleton système. Quand on dispose d'un vrai clic
|
||||
enregistré (`fallback_*`), une localisation VLM très éloignée de cette zone
|
||||
est plus probablement un faux positif qu'un vrai déplacement UI.
|
||||
"""
|
||||
by_role = str(target_spec.get("by_role", "") or "").strip().lower()
|
||||
if by_role != "start_button":
|
||||
return True
|
||||
|
||||
if not _has_meaningful_recorded_coords(fallback_x_pct, fallback_y_pct):
|
||||
return True
|
||||
|
||||
if _validate_match_context(
|
||||
result,
|
||||
fallback_x_pct,
|
||||
fallback_y_pct,
|
||||
target_spec,
|
||||
max_distance=max_distance,
|
||||
):
|
||||
return True
|
||||
|
||||
logger.warning(
|
||||
"Start button guard : résultat VLM rejeté car trop éloigné de la "
|
||||
"position enregistrée (resolved=(%.4f, %.4f), expected=(%.4f, %.4f), max=%.2f)",
|
||||
float(result.get("x_pct", 0) or 0),
|
||||
float(result.get("y_pct", 0) or 0),
|
||||
fallback_x_pct,
|
||||
fallback_y_pct,
|
||||
max_distance,
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# YOLO/OmniParser — Résolution par détection d'éléments UI
|
||||
# =========================================================================
|
||||
@@ -1109,16 +1271,66 @@ def _resolve_by_som(
|
||||
# Centre du match
|
||||
match_cx = max_loc[0] + anc_w // 2
|
||||
match_cy = max_loc[1] + anc_h // 2
|
||||
interaction = str(
|
||||
(target_spec.get("context_hints") or {}).get("interaction", "") or ""
|
||||
).strip().lower()
|
||||
|
||||
if interaction == "close_tab":
|
||||
elapsed = time.time() - t0
|
||||
cx_norm = match_cx / screen_width if screen_width > 0 else 0.0
|
||||
cy_norm = match_cy / screen_height if screen_height > 0 else 0.0
|
||||
if _is_close_tab_result_plausible(
|
||||
cx_norm,
|
||||
cy_norm,
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
):
|
||||
logger.info(
|
||||
"SoM resolve ANCHOR exact close_tab : score=%.3f "
|
||||
"centre=(%d, %d) → (%.4f, %.4f) en %.1fs",
|
||||
max_score, match_cx, match_cy, cx_norm, cy_norm, elapsed,
|
||||
)
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": "som_anchor_match",
|
||||
"x_pct": round(cx_norm, 6),
|
||||
"y_pct": round(cy_norm, 6),
|
||||
"matched_element": {
|
||||
"label": "close_tab_button",
|
||||
"type": "visual_anchor",
|
||||
"role": "som_anchor_exact",
|
||||
"confidence": max_score,
|
||||
},
|
||||
"score": max_score,
|
||||
"match_box": {
|
||||
"x": int(max_loc[0]),
|
||||
"y": int(max_loc[1]),
|
||||
"width": int(anc_w),
|
||||
"height": int(anc_h),
|
||||
},
|
||||
}
|
||||
logger.warning(
|
||||
"SoM resolve ANCHOR exact close_tab rejeté : score=%.3f "
|
||||
"centre=(%d, %d) → (%.4f, %.4f), passage VLM/fallback",
|
||||
max_score, match_cx, match_cy, cx_norm, cy_norm,
|
||||
)
|
||||
# Ne pas recycler ce faux match vers l'élément SoM le plus
|
||||
# proche : pour close_tab, cela retombe facilement sur le
|
||||
# bouton de fermeture de la fenêtre.
|
||||
best_elem = None
|
||||
else:
|
||||
best_elem = None
|
||||
|
||||
# Trouver l'élément SomEngine le plus proche du centre du match
|
||||
best_elem = None
|
||||
best_dist = float("inf")
|
||||
for elem in som_result.elements:
|
||||
cx, cy = elem.center
|
||||
dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5
|
||||
if dist < best_dist:
|
||||
best_dist = dist
|
||||
best_elem = elem
|
||||
if best_elem is None and interaction != "close_tab":
|
||||
for elem in som_result.elements:
|
||||
cx, cy = elem.center
|
||||
dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5
|
||||
if dist < best_dist:
|
||||
best_dist = dist
|
||||
best_elem = elem
|
||||
|
||||
if best_elem and best_dist < 100: # Max 100px de distance
|
||||
elapsed = time.time() - t0
|
||||
@@ -1584,6 +1796,49 @@ def _resolve_target_sync(
|
||||
"fallback cascade legacy"
|
||||
)
|
||||
|
||||
# ===================================================================
|
||||
# Cas spécial : boutons de dialogue runtime ("Oui", "Non", "OK", ...)
|
||||
# ===================================================================
|
||||
# Ces boutons sont textuels, sans ancre stable, et apparaissent souvent
|
||||
# au milieu d'une action déjà en cours. Si on les laisse partir dans la
|
||||
# cascade générique (VLM -> SoM -> ScreenAnalyzer), on peut bloquer
|
||||
# l'action principale assez longtemps pour déclencher le watchdog.
|
||||
# Contrat voulu : OCR direct rapide, sinon abandon immédiat pour que le
|
||||
# client essaie son fallback local par template texte.
|
||||
dialog_role = str(target_spec.get("by_role", "") or "").strip().lower()
|
||||
dialog_text = str(target_spec.get("by_text", "") or "").strip()
|
||||
if dialog_role == "dialog_button" and dialog_text and not anchor_image_b64:
|
||||
ocr_result = _resolve_by_ocr_text(
|
||||
screenshot_path=screenshot_path,
|
||||
target_text=dialog_text,
|
||||
screen_width=screen_width,
|
||||
screen_height=screen_height,
|
||||
)
|
||||
if ocr_result and ocr_result.get("score", 0) >= 0.80:
|
||||
ocr_result["method"] = "hybrid_text_direct"
|
||||
logger.info(
|
||||
"Resolve dialog_button OCR-DIRECT : OK '%s' → (%.4f, %.4f) score=%.2f",
|
||||
dialog_text[:40],
|
||||
ocr_result.get("x_pct", 0),
|
||||
ocr_result.get("y_pct", 0),
|
||||
ocr_result.get("score", 0),
|
||||
)
|
||||
return ocr_result
|
||||
|
||||
logger.info(
|
||||
"Resolve dialog_button OCR-only : '%s' non trouvé "
|
||||
"(fenêtre='%s') — skip VLM/SoM/ScreenAnalyzer",
|
||||
dialog_text[:40],
|
||||
str(target_spec.get("window_title", "") or "")[:80],
|
||||
)
|
||||
return {
|
||||
"resolved": False,
|
||||
"method": "dialog_button_ocr_only",
|
||||
"reason": "ocr_direct_failed_dialog_button_no_vlm",
|
||||
"x_pct": fallback_x_pct,
|
||||
"y_pct": fallback_y_pct,
|
||||
}
|
||||
|
||||
# ===================================================================
|
||||
# MODE STRICT (replay sessions) — Stratégie VLM-FIRST
|
||||
# ===================================================================
|
||||
@@ -1656,13 +1911,25 @@ def _resolve_target_sync(
|
||||
screen_height=screen_height,
|
||||
)
|
||||
if grounding_result and grounding_result.get("resolved"):
|
||||
logger.info(
|
||||
"Strict resolve GROUNDING : OK (%.4f, %.4f) pour '%s'",
|
||||
grounding_result.get("x_pct", 0),
|
||||
grounding_result.get("y_pct", 0),
|
||||
by_text_strict[:50],
|
||||
if _is_close_tab_result_plausible(
|
||||
float(grounding_result.get("x_pct", 0) or 0),
|
||||
float(grounding_result.get("y_pct", 0) or 0),
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.info(
|
||||
"Strict resolve GROUNDING : OK (%.4f, %.4f) pour '%s'",
|
||||
grounding_result.get("x_pct", 0),
|
||||
grounding_result.get("y_pct", 0),
|
||||
by_text_strict[:50],
|
||||
)
|
||||
return grounding_result
|
||||
logger.warning(
|
||||
"Strict resolve GROUNDING : résultat close_tab rejeté, passage template/VLM"
|
||||
)
|
||||
return grounding_result
|
||||
|
||||
if not by_text_strict or by_text_source not in ("ocr", "vlm"):
|
||||
# Template matching pour les éléments sans texte (icônes pures)
|
||||
@@ -1690,11 +1957,23 @@ def _resolve_target_sync(
|
||||
abs_y = window_rect[1] + y_tm * tm_screen_h
|
||||
result["x_pct"] = round(abs_x / screen_width, 6)
|
||||
result["y_pct"] = round(abs_y / screen_height, 6)
|
||||
logger.info(
|
||||
"Strict resolve TEMPLATE : icon match (score=%.3f)",
|
||||
result.get("score", 0),
|
||||
if _is_close_tab_result_plausible(
|
||||
float(result.get("x_pct", 0) or 0),
|
||||
float(result.get("y_pct", 0) or 0),
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.info(
|
||||
"Strict resolve TEMPLATE : icon match (score=%.3f)",
|
||||
result.get("score", 0),
|
||||
)
|
||||
return result
|
||||
logger.warning(
|
||||
"Strict resolve TEMPLATE : résultat close_tab rejeté, passage cascade suivante"
|
||||
)
|
||||
return result
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Étape 0.5 : OCR direct (hybrid_text_direct) — chemin rapide
|
||||
@@ -1739,6 +2018,27 @@ def _resolve_target_sync(
|
||||
by_text_strict[:40],
|
||||
)
|
||||
|
||||
# Les boutons de dialogues runtime connus ("Oui", "Non", "OK", etc.)
|
||||
# ne doivent pas partir dans la cascade lente VLM -> SoM. Si l'OCR
|
||||
# direct ne les trouve pas immédiatement, on rend la main au client
|
||||
# pour son fallback local par template texte, sinon on bloque l'action
|
||||
# principale assez longtemps pour déclencher le watchdog.
|
||||
dialog_role = str(target_spec.get("by_role", "") or "").strip().lower()
|
||||
if dialog_role == "dialog_button" and by_text_strict and not anchor_image_b64:
|
||||
logger.info(
|
||||
"Strict resolve dialog_button : OCR-direct only pour '%s' "
|
||||
"(fenêtre='%s') — skip VLM/SoM/template",
|
||||
by_text_strict[:40],
|
||||
str(target_spec.get("window_title", "") or "")[:80],
|
||||
)
|
||||
return {
|
||||
"resolved": False,
|
||||
"method": "dialog_button_ocr_only",
|
||||
"reason": "ocr_direct_failed_dialog_button_no_vlm",
|
||||
"x_pct": fallback_x_pct,
|
||||
"y_pct": fallback_y_pct,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Étape 1 : VLM Quick Find (fallback, multi-image)
|
||||
# ---------------------------------------------------------------
|
||||
@@ -1750,12 +2050,29 @@ def _resolve_target_sync(
|
||||
)
|
||||
if vlm_result and vlm_result.get("resolved"):
|
||||
if vlm_result.get("score", 0) >= 0.3:
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : VLM OK (score=%.2f) pour '%s'",
|
||||
vlm_result.get("score", 0),
|
||||
vlm_description[:60] if vlm_description else "(anchor)",
|
||||
if _is_start_button_vlm_result_plausible(
|
||||
vlm_result,
|
||||
fallback_x_pct,
|
||||
fallback_y_pct,
|
||||
target_spec,
|
||||
) and _is_close_tab_result_plausible(
|
||||
float(vlm_result.get("x_pct", 0) or 0),
|
||||
float(vlm_result.get("y_pct", 0) or 0),
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : VLM OK (score=%.2f) pour '%s'",
|
||||
vlm_result.get("score", 0),
|
||||
vlm_description[:60] if vlm_description else "(anchor)",
|
||||
)
|
||||
return vlm_result
|
||||
logger.warning(
|
||||
"Strict resolve VLM-first : résultat VLM rejeté par un garde-fou, passage SoM/template"
|
||||
)
|
||||
return vlm_result
|
||||
else:
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : VLM score=%.2f trop bas, passage template",
|
||||
@@ -1782,12 +2099,24 @@ def _resolve_target_sync(
|
||||
screen_height=screen_height,
|
||||
)
|
||||
if som_result and som_result.get("resolved"):
|
||||
logger.info(
|
||||
"Strict resolve SoM+VLM : OK (score=%.2f, mark=#%s)",
|
||||
som_result.get("score", 0),
|
||||
som_result.get("matched_element", {}).get("som_id", "?"),
|
||||
if _is_close_tab_result_plausible(
|
||||
float(som_result.get("x_pct", 0) or 0),
|
||||
float(som_result.get("y_pct", 0) or 0),
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.info(
|
||||
"Strict resolve SoM+VLM : OK (score=%.2f, mark=#%s)",
|
||||
som_result.get("score", 0),
|
||||
som_result.get("matched_element", {}).get("som_id", "?"),
|
||||
)
|
||||
return som_result
|
||||
logger.warning(
|
||||
"Strict resolve SoM+VLM : résultat close_tab rejeté, passage template matching"
|
||||
)
|
||||
return som_result
|
||||
else:
|
||||
logger.info("Strict resolve SoM+VLM : échoué, passage template matching")
|
||||
|
||||
@@ -1805,12 +2134,24 @@ def _resolve_target_sync(
|
||||
score = result.get("score", 0)
|
||||
# Score >= 0.95 : match quasi-parfait, pas besoin de valider le contexte
|
||||
if score >= 0.95:
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : template matching fallback OK "
|
||||
"(score=%.3f >= 0.95, contexte skip — match quasi-parfait)",
|
||||
score,
|
||||
if _is_close_tab_result_plausible(
|
||||
float(result.get("x_pct", 0) or 0),
|
||||
float(result.get("y_pct", 0) or 0),
|
||||
target_spec,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : template matching fallback OK "
|
||||
"(score=%.3f >= 0.95, contexte skip — match quasi-parfait)",
|
||||
score,
|
||||
)
|
||||
return result
|
||||
logger.warning(
|
||||
"Strict resolve TEMPLATE : match close_tab très fort mais hors zone source, rejeté"
|
||||
)
|
||||
return result
|
||||
elif _validate_match_context(result, fallback_x_pct, fallback_y_pct, target_spec):
|
||||
logger.info(
|
||||
"Strict resolve VLM-first : template matching fallback OK "
|
||||
@@ -2189,6 +2530,37 @@ def _text_match_fuzzy(expected: str, observed: str, min_token_ratio: float = 0.6
|
||||
return matched / len(tokens) >= min_token_ratio
|
||||
|
||||
|
||||
_SOM_BBOX_OCR_PADDING_PX: int = 8
|
||||
_SOM_BBOX_MIN_DIM_PX: int = 12
|
||||
|
||||
|
||||
def _should_reject_on_text_mismatch(
|
||||
is_valid: bool,
|
||||
observed: Optional[str],
|
||||
) -> bool:
|
||||
"""Décide si le pré-check OCR doit rejeter la résolution.
|
||||
|
||||
Patch 2026-05-23 : on distingue deux cas d'échec du fuzzy match :
|
||||
|
||||
- ``observed`` contient du texte (ex: ``'9 ?'``, ``'OBS Studio…'``)
|
||||
→ mismatch confirmé, la cascade a probablement cliqué ailleurs
|
||||
→ on rejette.
|
||||
- ``observed`` est vide ou whitespace
|
||||
→ l'OCR n'a rien lu (zone trop petite, texte peu contrasté,
|
||||
modèle EasyOCR sous le seuil de détection). C'est ambigu :
|
||||
ce n'est PAS la preuve d'un faux positif, on accepte la
|
||||
résolution serveur. La garde drift ANCHOR-TM côté agent
|
||||
protège en aval contre les vrais faux positifs.
|
||||
|
||||
Si ``is_valid=True`` → jamais de rejet (cas nominal).
|
||||
"""
|
||||
if is_valid:
|
||||
return False
|
||||
if observed is None:
|
||||
return False
|
||||
return bool(str(observed).strip())
|
||||
|
||||
|
||||
def _validate_text_at_position(
|
||||
screenshot_path: str,
|
||||
x_pct: float,
|
||||
@@ -2197,9 +2569,20 @@ def _validate_text_at_position(
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
radius_px: int = 280,
|
||||
som_bbox_norm: Optional[List[float]] = None,
|
||||
) -> tuple:
|
||||
"""Pré-check sémantique : OCR sur une zone autour de (x_pct, y_pct) et
|
||||
vérifie que `expected_text` y est présent (substring ou fuzzy 50%).
|
||||
"""Pré-check sémantique : OCR sur une zone et vérifie que
|
||||
`expected_text` y est présent (substring ou fuzzy 50%).
|
||||
|
||||
Zone OCR (par priorité) :
|
||||
1. Si ``som_bbox_norm = [x1, y1, x2, y2]`` (normalisé 0..1) est
|
||||
fourni et a une largeur/hauteur > _SOM_BBOX_MIN_DIM_PX en
|
||||
pixels écran : OCR sur cette bbox élargie d'un padding court.
|
||||
Plus précis pour les éléments étroits (onglets Notepad
|
||||
moderne, ~30-40px haut) que le radius générique qui capture
|
||||
le texte voisin (status bar, etc.).
|
||||
2. Sinon : fallback historique → carré de ``radius_px`` autour
|
||||
de (x_pct, y_pct).
|
||||
|
||||
Retourne (is_valid: bool, observed_text: str, elapsed_ms: float).
|
||||
|
||||
@@ -2219,16 +2602,52 @@ def _validate_text_at_position(
|
||||
t0 = time.time()
|
||||
img = Image.open(screenshot_path).convert("RGB")
|
||||
img_w, img_h = img.size
|
||||
cx = int(x_pct * screen_width)
|
||||
cy = int(y_pct * screen_height)
|
||||
# Saturer dans les bornes de l'image (le screenshot peut être plus
|
||||
# large que la fenêtre logique — utiliser min(img_*, screen_*) en sécurité).
|
||||
max_x = min(img_w, screen_width)
|
||||
max_y = min(img_h, screen_height)
|
||||
x1 = max(0, cx - radius_px)
|
||||
y1 = max(0, cy - radius_px)
|
||||
x2 = min(max_x, cx + radius_px)
|
||||
y2 = min(max_y, cy + radius_px)
|
||||
|
||||
# --- Tentative 1 : zone OCR depuis la bbox SoM (préférée) ---
|
||||
x1 = y1 = x2 = y2 = None
|
||||
if (
|
||||
isinstance(som_bbox_norm, (list, tuple))
|
||||
and len(som_bbox_norm) == 4
|
||||
):
|
||||
try:
|
||||
bx1, by1, bx2, by2 = (float(v) for v in som_bbox_norm)
|
||||
# Tolérer ordre inversé.
|
||||
bx1, bx2 = sorted((bx1, bx2))
|
||||
by1, by2 = sorted((by1, by2))
|
||||
# Refuser les bboxes dégénérées AVANT padding : si
|
||||
# l'élément cible fait < _SOM_BBOX_MIN_DIM_PX en
|
||||
# natif, c'est probablement une bbox d'apparence
|
||||
# (curseur, séparateur 1px) — pas un label OCRable.
|
||||
raw_w = (bx2 - bx1) * screen_width
|
||||
raw_h = (by2 - by1) * screen_height
|
||||
if (
|
||||
raw_w >= _SOM_BBOX_MIN_DIM_PX
|
||||
and raw_h >= _SOM_BBOX_MIN_DIM_PX
|
||||
):
|
||||
# Conversion en pixels écran + clipping et padding.
|
||||
px1 = int(bx1 * screen_width) - _SOM_BBOX_OCR_PADDING_PX
|
||||
py1 = int(by1 * screen_height) - _SOM_BBOX_OCR_PADDING_PX
|
||||
px2 = int(bx2 * screen_width) + _SOM_BBOX_OCR_PADDING_PX
|
||||
py2 = int(by2 * screen_height) + _SOM_BBOX_OCR_PADDING_PX
|
||||
x1 = max(0, px1)
|
||||
y1 = max(0, py1)
|
||||
x2 = min(max_x, px2)
|
||||
y2 = min(max_y, py2)
|
||||
except (TypeError, ValueError):
|
||||
# Bbox malformée : fallback silencieux sur le radius.
|
||||
x1 = y1 = x2 = y2 = None
|
||||
|
||||
# --- Fallback : carré radius_px autour de (x_pct, y_pct) ---
|
||||
if x1 is None:
|
||||
cx = int(x_pct * screen_width)
|
||||
cy = int(y_pct * screen_height)
|
||||
x1 = max(0, cx - radius_px)
|
||||
y1 = max(0, cy - radius_px)
|
||||
x2 = min(max_x, cx + radius_px)
|
||||
y2 = min(max_y, cy + radius_px)
|
||||
|
||||
if x2 - x1 < 10 or y2 - y1 < 10:
|
||||
return True, "", 0.0
|
||||
crop = img.crop((x1, y1, x2, y2))
|
||||
@@ -2246,6 +2665,7 @@ def _validate_resolution_quality(
|
||||
result: Optional[Dict[str, Any]],
|
||||
fallback_x_pct: float,
|
||||
fallback_y_pct: float,
|
||||
target_spec: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Valide un résultat de résolution et le rejette s'il est peu fiable.
|
||||
|
||||
@@ -2263,6 +2683,16 @@ def _validate_resolution_quality(
|
||||
elle n'est PAS appelée par les méthodes internes de la cascade, mais
|
||||
uniquement depuis le handler HTTP `/resolve_target` après que la
|
||||
cascade a produit son meilleur candidat.
|
||||
|
||||
Argument optionnel `target_spec` : permet d'appliquer des relaxations
|
||||
contextuelles. Cas couvert (2026-05-22) : pour une cible
|
||||
`context_hints.interaction == "switch_tab"` qui dispose d'un
|
||||
`som_element.bbox_norm`, on abaisse le seuil des méthodes ``som_*``
|
||||
de 0.75 → 0.60. Justification : (1) le focus_change pré-clic
|
||||
prouve qu'on est dans la bonne fenêtre, (2) la bbox SoM a été
|
||||
calibrée à l'enregistrement et reste valide, (3) les onglets
|
||||
Notepad moderne sont visuellement quasi-identiques → score VLM
|
||||
inévitablement lower.
|
||||
"""
|
||||
if not result or not isinstance(result, dict):
|
||||
return result
|
||||
@@ -2291,6 +2721,52 @@ def _validate_resolution_quality(
|
||||
min_score = threshold
|
||||
break
|
||||
|
||||
# Relaxation contextuelle pour switch_tab + SoM calibré (2026-05-22).
|
||||
# Les onglets Notepad moderne (et apps similaires) sont visuellement
|
||||
# quasi-identiques : le grounding VLM/SoM produit fréquemment un
|
||||
# score 0.65-0.75, juste sous le seuil strict. Comme le contexte
|
||||
# `interaction=switch_tab` + bbox SoM enregistrée + focus_change
|
||||
# pré-clic confirment déjà la fenêtre et la zone, on relâche le
|
||||
# seuil des méthodes som_* à 0.60 dans CE cas précis uniquement.
|
||||
if (
|
||||
min_score is not None
|
||||
and target_spec
|
||||
and method.startswith("som_")
|
||||
):
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
is_tab_switch = (
|
||||
context_hints.get("interaction") == "switch_tab"
|
||||
and target_spec.get("by_role") == "tab"
|
||||
)
|
||||
som_element = target_spec.get("som_element") or {}
|
||||
has_calibrated_som = bool(som_element.get("bbox_norm"))
|
||||
if is_tab_switch and has_calibrated_som:
|
||||
relaxed = 0.60
|
||||
if relaxed < min_score:
|
||||
logger.info(
|
||||
"[REPLAY] switch_tab + som_element calibré → seuil "
|
||||
"som_* relâché %.2f → %.2f (cible='%s')",
|
||||
min_score, relaxed,
|
||||
target_spec.get("by_text", ""),
|
||||
)
|
||||
min_score = relaxed
|
||||
|
||||
is_close_tab = (
|
||||
method == "som_anchor_match"
|
||||
and str((context_hints.get("interaction") or "")).strip().lower() == "close_tab"
|
||||
and not str(target_spec.get("by_text", "") or "").strip()
|
||||
and bool(target_spec.get("anchor_image_base64"))
|
||||
)
|
||||
if is_close_tab:
|
||||
relaxed = 0.70
|
||||
if relaxed < min_score:
|
||||
logger.info(
|
||||
"[REPLAY] close_tab + anchor-only → seuil som_anchor_match "
|
||||
"relâché %.2f → %.2f",
|
||||
min_score, relaxed,
|
||||
)
|
||||
min_score = relaxed
|
||||
|
||||
if min_score is not None and score < min_score:
|
||||
logger.warning(
|
||||
"[REPLAY] Resolution REJETÉE (score trop bas) : method=%s score=%.3f < %.2f",
|
||||
@@ -2306,13 +2782,40 @@ def _validate_resolution_quality(
|
||||
"y_pct": fallback_y_pct,
|
||||
}
|
||||
|
||||
if _is_close_tab_target(target_spec) and not _is_close_tab_result_plausible(
|
||||
resolved_x,
|
||||
resolved_y,
|
||||
target_spec,
|
||||
0,
|
||||
0,
|
||||
fallback_x_pct=fallback_x_pct,
|
||||
fallback_y_pct=fallback_y_pct,
|
||||
):
|
||||
logger.warning(
|
||||
"[REPLAY] Resolution REJETÉE (close_tab hors zone source) : "
|
||||
"method=%s resolved=(%.3f, %.3f) expected=(%.3f, %.3f)",
|
||||
method,
|
||||
resolved_x,
|
||||
resolved_y,
|
||||
fallback_x_pct,
|
||||
fallback_y_pct,
|
||||
)
|
||||
return {
|
||||
"resolved": False,
|
||||
"method": f"rejected_close_tab_zone_{method}",
|
||||
"reason": "close_tab_out_of_recorded_zone",
|
||||
"original_method": method,
|
||||
"original_score": score,
|
||||
"x_pct": fallback_x_pct,
|
||||
"y_pct": fallback_y_pct,
|
||||
}
|
||||
|
||||
# --- Check 2 : garde de proximité ---
|
||||
# On n'applique la garde que si les coordonnées enregistrées ont un
|
||||
# sens (pas des placeholders 0.5/0.5 des plans V4 ni des 0.0/0.0).
|
||||
_has_recorded_coords = (
|
||||
fallback_x_pct > 0.001
|
||||
and fallback_y_pct > 0.001
|
||||
and not (abs(fallback_x_pct - 0.5) < 0.001 and abs(fallback_y_pct - 0.5) < 0.001)
|
||||
_has_recorded_coords = _has_meaningful_recorded_coords(
|
||||
fallback_x_pct,
|
||||
fallback_y_pct,
|
||||
)
|
||||
if _has_recorded_coords:
|
||||
dx = abs(resolved_x - fallback_x_pct)
|
||||
|
||||
@@ -1025,6 +1025,345 @@ def enrich_click_from_screenshot(
|
||||
return result
|
||||
|
||||
|
||||
def _title_to_tab_label(window_title: str) -> str:
|
||||
"""Réduire un titre de fenêtre en libellé d'onglet probable.
|
||||
|
||||
Exemples:
|
||||
- "Sans titre – Bloc-notes" -> "Sans titre"
|
||||
- "*test – Bloc-notes" -> "test"
|
||||
"""
|
||||
title = str(window_title or "").strip()
|
||||
if not title:
|
||||
return ""
|
||||
|
||||
for sep in (" – ", " - "):
|
||||
if sep in title:
|
||||
head = title.split(sep, 1)[0].strip()
|
||||
if head:
|
||||
title = head
|
||||
break
|
||||
|
||||
return title.lstrip("*").strip()
|
||||
|
||||
|
||||
def _split_window_title_head_suffix(window_title: str) -> tuple[str, str]:
|
||||
"""Découper un titre de fenêtre en ``(head, suffix)`` si possible.
|
||||
|
||||
Exemples:
|
||||
- ``Sans titre – Bloc-notes`` -> (``Sans titre``, ``Bloc-notes``)
|
||||
- ``Page 1 - Google Chrome`` -> (``Page 1``, ``Google Chrome``)
|
||||
- ``Enregistrer sous`` -> ("", "")
|
||||
"""
|
||||
title = str(window_title or "").strip()
|
||||
if not title:
|
||||
return "", ""
|
||||
|
||||
for sep in (" – ", " - "):
|
||||
if sep in title:
|
||||
head, suffix = title.split(sep, 1)
|
||||
head = head.strip()
|
||||
suffix = suffix.strip()
|
||||
if head and suffix:
|
||||
return head, suffix
|
||||
return "", ""
|
||||
|
||||
|
||||
def _looks_like_same_app_tab_switch(from_title: str, to_title: str) -> bool:
|
||||
"""Vrai si la transition de focus ressemble à un vrai changement d'onglet.
|
||||
|
||||
On exige que les deux titres partagent un suffixe applicatif stable
|
||||
(ex: ``Bloc-notes``, ``Google Chrome``). Cela exclut les dialogs
|
||||
modaux same-app comme ``Enregistrer sous`` qui ne sont pas des
|
||||
onglets et ne doivent pas être compilés en ``switch_tab``.
|
||||
"""
|
||||
from_head, from_suffix = _split_window_title_head_suffix(from_title)
|
||||
to_head, to_suffix = _split_window_title_head_suffix(to_title)
|
||||
if not (from_head and from_suffix and to_head and to_suffix):
|
||||
return False
|
||||
return from_suffix.casefold() == to_suffix.casefold()
|
||||
|
||||
|
||||
def _infer_tab_switch_target(
|
||||
raw_events: list,
|
||||
click_event: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Détecter un clic d'onglet à partir d'une bascule de focus dans la même app.
|
||||
|
||||
Cas réel observé:
|
||||
- fenêtre active `http...txt – Bloc-notes`
|
||||
- clic dans la barre d'onglets (y relatif ~40 px)
|
||||
- focus immédiat vers `Sans titre – Bloc-notes`
|
||||
|
||||
Dans ce cas, l'ancre image seule est trop fragile. On enrichit donc le
|
||||
target_spec avec un libellé d'onglet explicite (`by_text='Sans titre'`,
|
||||
`by_role='tab'`).
|
||||
"""
|
||||
event_type = click_event.get("type", "")
|
||||
if event_type != "mouse_click":
|
||||
return None
|
||||
|
||||
window = click_event.get("window", {})
|
||||
if not isinstance(window, dict):
|
||||
return None
|
||||
|
||||
from_title = str(window.get("title", "")).strip()
|
||||
app_name = str(window.get("app_name", "")).strip().lower()
|
||||
if not from_title or not app_name:
|
||||
return None
|
||||
|
||||
# Heuristique: on ne traite que les clics très hauts dans la fenêtre,
|
||||
# typiques d'une barre d'onglets / bouton de fermeture d'onglet.
|
||||
window_capture = click_event.get("window_capture", {})
|
||||
if not isinstance(window_capture, dict):
|
||||
return None
|
||||
click_relative = window_capture.get("click_relative")
|
||||
if not (isinstance(click_relative, list) and len(click_relative) == 2):
|
||||
return None
|
||||
try:
|
||||
rel_y = int(click_relative[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if rel_y > 90:
|
||||
return None
|
||||
|
||||
click_ts = click_event.get("timestamp")
|
||||
click_pos = click_event.get("pos") or []
|
||||
|
||||
match_idx = None
|
||||
for idx, raw_evt in enumerate(raw_events):
|
||||
event_data = raw_evt.get("event", raw_evt)
|
||||
if event_data.get("type") != "mouse_click":
|
||||
continue
|
||||
if event_data.get("timestamp") != click_ts:
|
||||
continue
|
||||
if (event_data.get("pos") or []) != click_pos:
|
||||
continue
|
||||
match_idx = idx
|
||||
break
|
||||
|
||||
if match_idx is None:
|
||||
return None
|
||||
|
||||
for follow_evt in raw_events[match_idx + 1: match_idx + 7]:
|
||||
follow_data = follow_evt.get("event", follow_evt)
|
||||
follow_type = follow_data.get("type", "")
|
||||
if follow_type in {"mouse_click", "text_input", "key_press", "key_combo"}:
|
||||
# Un autre geste utilisateur est intervenu avant le focus_change :
|
||||
# le focus observé n'est plus attribuable avec confiance à CE clic.
|
||||
return None
|
||||
if follow_type != "window_focus_change":
|
||||
continue
|
||||
|
||||
to_info = follow_data.get("to", {})
|
||||
if not isinstance(to_info, dict):
|
||||
continue
|
||||
if str(to_info.get("app_name", "")).strip().lower() != app_name:
|
||||
continue
|
||||
|
||||
to_title = str(to_info.get("title", "")).strip()
|
||||
if not to_title or to_title == from_title:
|
||||
continue
|
||||
if not _looks_like_same_app_tab_switch(from_title, to_title):
|
||||
return None
|
||||
|
||||
follow_ts = follow_data.get("timestamp")
|
||||
if (
|
||||
isinstance(click_ts, (int, float))
|
||||
and isinstance(follow_ts, (int, float))
|
||||
and follow_ts - click_ts > 3.0
|
||||
):
|
||||
break
|
||||
|
||||
tab_label = _title_to_tab_label(to_title)
|
||||
if not tab_label:
|
||||
return None
|
||||
|
||||
return {
|
||||
"by_text": tab_label,
|
||||
"by_role": "tab",
|
||||
"window_title": from_title,
|
||||
"context_hints": {
|
||||
"window_title": from_title,
|
||||
"switch_to_window_title": to_title,
|
||||
"interaction": "switch_tab",
|
||||
},
|
||||
"vlm_description": (
|
||||
f"Dans la fenêtre '{from_title}', l'onglet '{tab_label}' "
|
||||
"dans la barre d'onglets en haut"
|
||||
),
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _infer_close_tab_target(
|
||||
raw_events: list,
|
||||
click_event: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Détecter un clic sur le bouton fermer de l'onglet actif.
|
||||
|
||||
Pattern ciblé observé sur Bloc-notes moderne :
|
||||
- clic très haut dans la barre d'onglets sur un titre ``*... – Bloc-notes``
|
||||
- un clic suivant dans la même fenêtre
|
||||
- puis focus vers ``Enregistrer sous``
|
||||
|
||||
Cela correspond à la fermeture d'un onglet modifié qui déclenche ensuite
|
||||
le flow de sauvegarde. On enrichit le clic avec un hint sémantique pour
|
||||
viser le vrai bouton ``x`` de l'onglet actif plutôt qu'un simple `yolo`.
|
||||
"""
|
||||
event_type = click_event.get("type", "")
|
||||
if event_type != "mouse_click":
|
||||
return None
|
||||
|
||||
window = click_event.get("window", {})
|
||||
if not isinstance(window, dict):
|
||||
return None
|
||||
|
||||
from_title = str(window.get("title", "")).strip()
|
||||
app_name = str(window.get("app_name", "")).strip().lower()
|
||||
if not from_title or not app_name or not from_title.startswith("*"):
|
||||
return None
|
||||
|
||||
window_capture = click_event.get("window_capture", {})
|
||||
if not isinstance(window_capture, dict):
|
||||
return None
|
||||
click_relative = window_capture.get("click_relative")
|
||||
if not (isinstance(click_relative, list) and len(click_relative) == 2):
|
||||
return None
|
||||
try:
|
||||
rel_y = int(click_relative[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if rel_y > 90:
|
||||
return None
|
||||
|
||||
click_ts = click_event.get("timestamp")
|
||||
click_pos = click_event.get("pos") or []
|
||||
match_idx = None
|
||||
for idx, raw_evt in enumerate(raw_events):
|
||||
event_data = raw_evt.get("event", raw_evt)
|
||||
if event_data.get("type") != "mouse_click":
|
||||
continue
|
||||
if event_data.get("timestamp") != click_ts:
|
||||
continue
|
||||
if (event_data.get("pos") or []) != click_pos:
|
||||
continue
|
||||
match_idx = idx
|
||||
break
|
||||
|
||||
if match_idx is None:
|
||||
return None
|
||||
|
||||
saw_follow_click_same_window = False
|
||||
for follow_evt in raw_events[match_idx + 1: match_idx + 8]:
|
||||
follow_data = follow_evt.get("event", follow_evt)
|
||||
follow_type = follow_data.get("type", "")
|
||||
|
||||
if follow_type in {"text_input", "key_press", "key_combo"}:
|
||||
return None
|
||||
|
||||
if follow_type == "mouse_click":
|
||||
follow_window = follow_data.get("window", {})
|
||||
if not isinstance(follow_window, dict):
|
||||
return None
|
||||
follow_app = str(follow_window.get("app_name", "")).strip().lower()
|
||||
follow_title = str(follow_window.get("title", "")).strip()
|
||||
if follow_app != app_name:
|
||||
return None
|
||||
if follow_title == from_title:
|
||||
saw_follow_click_same_window = True
|
||||
continue
|
||||
return None
|
||||
|
||||
if follow_type != "window_focus_change" or not saw_follow_click_same_window:
|
||||
continue
|
||||
|
||||
to_info = follow_data.get("to", {})
|
||||
if not isinstance(to_info, dict):
|
||||
continue
|
||||
if str(to_info.get("app_name", "")).strip().lower() != app_name:
|
||||
continue
|
||||
to_title = str(to_info.get("title", "")).strip()
|
||||
if to_title != "Enregistrer sous":
|
||||
continue
|
||||
|
||||
follow_ts = follow_data.get("timestamp")
|
||||
if (
|
||||
isinstance(click_ts, (int, float))
|
||||
and isinstance(follow_ts, (int, float))
|
||||
and follow_ts - click_ts > 5.0
|
||||
):
|
||||
break
|
||||
|
||||
tab_label = _title_to_tab_label(from_title)
|
||||
if not tab_label:
|
||||
return None
|
||||
|
||||
return {
|
||||
"by_text": "",
|
||||
"by_role": "tab_close_button",
|
||||
"window_title": from_title,
|
||||
"context_hints": {
|
||||
"window_title": from_title,
|
||||
"active_tab_label": tab_label,
|
||||
"interaction": "close_tab",
|
||||
},
|
||||
"vlm_description": (
|
||||
f"Dans la fenêtre '{from_title}', le bouton x pour fermer "
|
||||
f"l'onglet actif '{tab_label}' dans la barre d'onglets en haut"
|
||||
),
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _attach_expected_window_before(actions: list, raw_events: list) -> None:
|
||||
"""Attacher la fenêtre attendue AVANT chaque clic en rejouant les
|
||||
raw events et en conservant le dernier ``window_focus_change.to.title``.
|
||||
|
||||
Pourquoi : ``mouse_click.window.title`` capturé pendant
|
||||
l'enregistrement peut être obsolète si une transition de fenêtre
|
||||
se produit juste avant la capture (ex: dialog Windows qui s'ouvre
|
||||
milliseconde avant le clic suivant). Le serveur dispose pourtant
|
||||
des ``window_focus_change`` consécutifs — on s'en sert pour poser
|
||||
explicitement ``expected_window_before`` sur le clic, lu en priorité
|
||||
absolue par la pré-vérif côté agent.
|
||||
|
||||
Idempotent : si une action a déjà ``expected_window_before``, on
|
||||
ne touche pas.
|
||||
"""
|
||||
if not actions or not raw_events:
|
||||
return
|
||||
|
||||
last_focus_title = ""
|
||||
action_idx = 0
|
||||
|
||||
def _next_click_idx(start: int) -> int:
|
||||
i = start
|
||||
while i < len(actions) and actions[i].get("type") != "click":
|
||||
i += 1
|
||||
return i
|
||||
|
||||
for raw_evt in raw_events:
|
||||
ev = raw_evt.get("event", raw_evt) if isinstance(raw_evt, dict) else {}
|
||||
etype = ev.get("type", "")
|
||||
if etype == "window_focus_change":
|
||||
to_info = ev.get("to") or {}
|
||||
title = str(to_info.get("title", "") or "").strip()
|
||||
if title and title != "unknown_window":
|
||||
last_focus_title = title
|
||||
continue
|
||||
if etype != "mouse_click":
|
||||
continue
|
||||
action_idx = _next_click_idx(action_idx)
|
||||
if action_idx >= len(actions):
|
||||
return
|
||||
a = actions[action_idx]
|
||||
if last_focus_title and not a.get("expected_window_before"):
|
||||
a["expected_window_before"] = last_focus_title
|
||||
action_idx += 1
|
||||
|
||||
|
||||
def _attach_expected_screenshots(
|
||||
actions: list, raw_events: list, session_dir: Path,
|
||||
) -> None:
|
||||
@@ -1591,6 +1930,8 @@ def build_replay_from_raw_events(
|
||||
k: v for k, v in enrichment.items()
|
||||
if k != "by_position" # by_position est déjà dans x_pct/y_pct
|
||||
}
|
||||
if action.get("window_title") and not action["target_spec"].get("window_title"):
|
||||
action["target_spec"]["window_title"] = action["window_title"]
|
||||
# Ajouter les métadonnées fenêtre pour le grounding ciblé
|
||||
wc = evt.get("window_capture", {})
|
||||
if wc.get("rect"):
|
||||
@@ -1600,6 +1941,33 @@ def build_replay_from_raw_events(
|
||||
"click_relative": wc.get("click_relative"),
|
||||
}
|
||||
|
||||
tab_switch_target = _infer_tab_switch_target(events, evt)
|
||||
if tab_switch_target:
|
||||
target_spec = action.setdefault("target_spec", {})
|
||||
# Préférer une sémantique explicite d'onglet à un rôle brut
|
||||
# `yolo`/anchor-only quand le flux brut montre une vraie
|
||||
# bascule de focus dans la même application.
|
||||
if not target_spec.get("by_text"):
|
||||
target_spec["by_text"] = tab_switch_target["by_text"]
|
||||
target_spec["by_role"] = tab_switch_target["by_role"]
|
||||
target_spec["window_title"] = tab_switch_target["window_title"]
|
||||
target_spec["vlm_description"] = tab_switch_target["vlm_description"]
|
||||
context_hints = dict(target_spec.get("context_hints") or {})
|
||||
context_hints.update(tab_switch_target["context_hints"])
|
||||
target_spec["context_hints"] = context_hints
|
||||
action["visual_mode"] = True
|
||||
|
||||
close_tab_target = _infer_close_tab_target(events, evt)
|
||||
if close_tab_target:
|
||||
target_spec = action.setdefault("target_spec", {})
|
||||
target_spec["by_role"] = close_tab_target["by_role"]
|
||||
target_spec["window_title"] = close_tab_target["window_title"]
|
||||
target_spec["vlm_description"] = close_tab_target["vlm_description"]
|
||||
context_hints = dict(target_spec.get("context_hints") or {})
|
||||
context_hints.update(close_tab_target["context_hints"])
|
||||
target_spec["context_hints"] = context_hints
|
||||
action["visual_mode"] = True
|
||||
|
||||
elif evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if not text:
|
||||
@@ -1695,6 +2063,21 @@ def build_replay_from_raw_events(
|
||||
if next_title:
|
||||
result[ci]["expected_window_title"] = next_title
|
||||
|
||||
# ── 9b. Pré-condition fiable : expected_window_before ──
|
||||
# Bug live 2026-05-22 (act_raw_c70976c8) : window.title d'un
|
||||
# mouse_click peut être obsolète quand une transition de fenêtre
|
||||
# (ex: ouverture dialog "Enregistrer sous") se produit juste avant
|
||||
# la capture du click. Sans correction, target_spec.window_title
|
||||
# reste sur l'ancien titre et la pré-vérif côté agent
|
||||
# (executor.py:653) déclenche une pause supervisée à tort.
|
||||
#
|
||||
# On rejoue les raw events en maintenant le dernier titre vu via
|
||||
# window_focus_change.to.title et on le pose comme
|
||||
# expected_window_before sur chaque clic qui n'en a pas déjà un.
|
||||
# Le champ est lu en priorité absolue par la pré-vérif agent, donc
|
||||
# il prime sur target_spec.window_title obsolète.
|
||||
_attach_expected_window_before(result, events)
|
||||
|
||||
# ── 10. Enrichir avec intention + expected_result via gemma4 (Critic) ──
|
||||
# gemma4 analyse chaque action dans son contexte pour produire :
|
||||
# - intention : ce que l'utilisateur veut accomplir
|
||||
|
||||
@@ -64,6 +64,31 @@ class TestStreamerEndpoints:
|
||||
_, kwargs = finalize_calls[0]
|
||||
assert kwargs["params"]["session_id"] == "sess_test_002"
|
||||
|
||||
def test_finalize_callback_receives_server_payload(self):
|
||||
"""Le payload enrichi de /finalize est remonté au callback client."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
payload = {
|
||||
"status": "queued_for_processing",
|
||||
"replay_ready": True,
|
||||
"replay_request": {
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_test_008",
|
||||
"machine_id": "pc-alpha",
|
||||
},
|
||||
}
|
||||
seen = []
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True, json=lambda: payload)
|
||||
streamer = TraceStreamer("sess_test_008")
|
||||
streamer.set_on_finalize_result(seen.append)
|
||||
streamer._server_available = True
|
||||
streamer.running = False
|
||||
streamer._finalize_session()
|
||||
|
||||
assert seen == [payload]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Payload formats
|
||||
|
||||
134
tests/integration/test_finalize_replay_chain.py
Normal file
134
tests/integration/test_finalize_replay_chain.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Tests du chainage produit finalize -> replay-session."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
class TestFinalizeReplayChain:
|
||||
_TEST_API_TOKEN = "test_finalize_replay_chain_token_0123456789"
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_api_token(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_API_TOKEN", self._TEST_API_TOKEN)
|
||||
api_stream_mod = sys.modules.get("agent_v0.server_v1.api_stream")
|
||||
if api_stream_mod is not None:
|
||||
monkeypatch.setattr(api_stream_mod, "API_TOKEN", self._TEST_API_TOKEN)
|
||||
|
||||
@pytest.fixture
|
||||
def client(self, tmp_path, monkeypatch):
|
||||
from fastapi.testclient import TestClient
|
||||
from agent_v0.server_v1 import api_stream
|
||||
from agent_v0.server_v1.stream_processor import StreamProcessor
|
||||
from agent_v0.server_v1.worker_stream import StreamWorker
|
||||
|
||||
original_processor = api_stream.processor
|
||||
original_worker = api_stream.worker
|
||||
test_processor = StreamProcessor(data_dir=str(tmp_path))
|
||||
api_stream.processor = test_processor
|
||||
api_stream.worker = StreamWorker(
|
||||
live_dir=str(tmp_path),
|
||||
processor=test_processor,
|
||||
)
|
||||
monkeypatch.setattr(api_stream, "_enqueue_to_worker", lambda session_id: None)
|
||||
|
||||
client = TestClient(api_stream.app, raise_server_exceptions=False)
|
||||
yield client, api_stream, test_processor, api_stream.API_TOKEN
|
||||
|
||||
api_stream.processor = original_processor
|
||||
api_stream.worker = original_worker
|
||||
|
||||
def test_finalize_exposes_replay_request_without_launch(self, client):
|
||||
c, _, proc, token = client
|
||||
proc.session_manager.register_session("sess_final_001", machine_id="pc-alpha")
|
||||
|
||||
resp = c.post(
|
||||
"/api/v1/traces/stream/finalize",
|
||||
params={"session_id": "sess_final_001"},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["status"] == "queued_for_processing"
|
||||
assert data["replay_ready"] is True
|
||||
assert data["replay_request"] == {
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_final_001",
|
||||
"machine_id": "pc-alpha",
|
||||
}
|
||||
assert "replay_launch" not in data
|
||||
|
||||
def test_finalize_can_launch_replay_session(self, client, monkeypatch):
|
||||
c, api_stream, proc, token = client
|
||||
proc.session_manager.register_session("sess_final_002", machine_id="pc-beta")
|
||||
calls = []
|
||||
|
||||
async def fake_replay_from_session(session_id: str, machine_id: str = "default"):
|
||||
calls.append((session_id, machine_id))
|
||||
return {
|
||||
"replay_id": "replay_sess_1234abcd",
|
||||
"status": "running",
|
||||
"source_session_id": session_id,
|
||||
"target_session_id": "agent_demo",
|
||||
"machine_id": machine_id,
|
||||
"total_actions": 7,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(api_stream, "replay_from_session", fake_replay_from_session)
|
||||
|
||||
resp = c.post(
|
||||
"/api/v1/traces/stream/finalize",
|
||||
params={
|
||||
"session_id": "sess_final_002",
|
||||
"launch_replay": "true",
|
||||
},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert calls == [("sess_final_002", "pc-beta")]
|
||||
assert data["replay_launch"]["status"] == "started"
|
||||
assert data["replay_launch"]["replay"]["replay_id"] == "replay_sess_1234abcd"
|
||||
assert data["replay_launch"]["replay"]["source_session_id"] == "sess_final_002"
|
||||
assert data["replay_launch"]["replay"]["machine_id"] == "pc-beta"
|
||||
|
||||
def test_finalize_remains_successful_if_auto_replay_fails(self, client, monkeypatch):
|
||||
c, api_stream, proc, token = client
|
||||
proc.session_manager.register_session("sess_final_003", machine_id="pc-gamma")
|
||||
|
||||
async def fake_replay_from_session(session_id: str, machine_id: str = "default"):
|
||||
raise api_stream.HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Aucune session Agent V1 active sur {machine_id}",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(api_stream, "replay_from_session", fake_replay_from_session)
|
||||
|
||||
resp = c.post(
|
||||
"/api/v1/traces/stream/finalize",
|
||||
params={
|
||||
"session_id": "sess_final_003",
|
||||
"launch_replay": "true",
|
||||
},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["status"] == "queued_for_processing"
|
||||
assert data["replay_launch"] == {
|
||||
"status": "failed",
|
||||
"status_code": 404,
|
||||
"detail": "Aucune session Agent V1 active sur pc-gamma",
|
||||
}
|
||||
assert data["replay_request"]["machine_id"] == "pc-gamma"
|
||||
@@ -0,0 +1,161 @@
|
||||
"""Tests intégration : /replay/resume doit réinjecter l'action complète en pause."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
class TestReplayResumePreservesOriginalAction:
|
||||
_TEST_API_TOKEN = "test_replay_resume_preserves_original_action_token"
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_api_token(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_API_TOKEN", self._TEST_API_TOKEN)
|
||||
api_stream_mod = sys.modules.get("agent_v0.server_v1.api_stream")
|
||||
if api_stream_mod is not None:
|
||||
monkeypatch.setattr(api_stream_mod, "API_TOKEN", self._TEST_API_TOKEN)
|
||||
|
||||
@pytest.fixture
|
||||
def client(self, monkeypatch):
|
||||
from fastapi.testclient import TestClient
|
||||
from agent_v0.server_v1 import api_stream
|
||||
|
||||
monkeypatch.setattr(api_stream, "API_TOKEN", self._TEST_API_TOKEN)
|
||||
|
||||
saved_states = dict(api_stream._replay_states)
|
||||
saved_queues = dict(api_stream._replay_queues)
|
||||
saved_retry = dict(api_stream._retry_pending)
|
||||
|
||||
api_stream._replay_states.clear()
|
||||
api_stream._replay_queues.clear()
|
||||
api_stream._retry_pending.clear()
|
||||
|
||||
client = TestClient(api_stream.app, raise_server_exceptions=False)
|
||||
yield client, api_stream, self._TEST_API_TOKEN
|
||||
|
||||
api_stream._replay_states.clear()
|
||||
api_stream._replay_states.update(saved_states)
|
||||
api_stream._replay_queues.clear()
|
||||
api_stream._replay_queues.update(saved_queues)
|
||||
api_stream._retry_pending.clear()
|
||||
api_stream._retry_pending.update(saved_retry)
|
||||
|
||||
def test_resume_reinjects_full_original_action_from_failed_action(self, client):
|
||||
http_client, api_stream, token = client
|
||||
|
||||
original_action = {
|
||||
"action_id": "act_raw_75272d22",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.8781,
|
||||
"y_pct": 0.9856,
|
||||
"expected_window_before": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"target_spec": {
|
||||
"window_title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"by_role": "yolo",
|
||||
},
|
||||
}
|
||||
|
||||
api_stream._replay_states["replay_xyz"] = {
|
||||
"replay_id": "replay_xyz",
|
||||
"session_id": "sess_resume_xyz",
|
||||
"machine_id": "pc-alpha",
|
||||
"status": "paused_need_help",
|
||||
"failed_action": {
|
||||
"action_id": "act_raw_75272d22",
|
||||
"type": "click",
|
||||
"reason": "wrong_window",
|
||||
"target_spec": original_action["target_spec"],
|
||||
"original_action": original_action,
|
||||
},
|
||||
"pause_message": "Replay en pause",
|
||||
"safety_checks": [],
|
||||
"checks_acknowledged": [],
|
||||
"params": {},
|
||||
}
|
||||
api_stream._replay_queues["sess_resume_xyz"] = []
|
||||
|
||||
resp = http_client.post(
|
||||
"/api/v1/traces/stream/replay/replay_xyz/resume",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["status"] == "resumed"
|
||||
|
||||
reinjected = api_stream._replay_queues["sess_resume_xyz"][0]
|
||||
assert reinjected["action_id"] == "act_raw_75272d22_resume"
|
||||
assert reinjected["x_pct"] == pytest.approx(0.8781)
|
||||
assert reinjected["y_pct"] == pytest.approx(0.9856)
|
||||
assert reinjected["expected_window_before"] == (
|
||||
"http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"
|
||||
)
|
||||
assert reinjected["target_spec"]["window_title"] == (
|
||||
"http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"
|
||||
)
|
||||
|
||||
def test_resume_dispatch_backfills_retry_pending_for_watchdog(self, client):
|
||||
http_client, api_stream, token = client
|
||||
|
||||
original_action = {
|
||||
"action_id": "act_resume_01",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.41,
|
||||
"y_pct": 0.52,
|
||||
"target_spec": {"window_title": "test - Bloc-notes"},
|
||||
}
|
||||
|
||||
api_stream._replay_states["replay_resume_watchdog"] = {
|
||||
"replay_id": "replay_resume_watchdog",
|
||||
"session_id": "sess_resume_watchdog",
|
||||
"machine_id": "pc-watchdog",
|
||||
"status": "paused_need_help",
|
||||
"failed_action": {
|
||||
"action_id": "act_resume_01",
|
||||
"type": "click",
|
||||
"reason": "wrong_window",
|
||||
"target_spec": original_action["target_spec"],
|
||||
"original_action": original_action,
|
||||
},
|
||||
"pause_message": "Replay en pause",
|
||||
"safety_checks": [],
|
||||
"checks_acknowledged": [],
|
||||
"params": {},
|
||||
}
|
||||
api_stream._replay_queues["sess_resume_watchdog"] = []
|
||||
|
||||
resume_resp = http_client.post(
|
||||
"/api/v1/traces/stream/replay/replay_resume_watchdog/resume",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
|
||||
assert resume_resp.status_code == 200
|
||||
|
||||
next_resp = http_client.get(
|
||||
"/api/v1/traces/stream/replay/next",
|
||||
params={"session_id": "sess_resume_watchdog", "machine_id": "pc-watchdog"},
|
||||
)
|
||||
|
||||
assert next_resp.status_code == 200
|
||||
payload = next_resp.json()
|
||||
dispatched = payload["action"]
|
||||
assert dispatched["action_id"] == "act_resume_01_resume"
|
||||
|
||||
retry_info = api_stream._retry_pending["act_resume_01_resume"]
|
||||
assert retry_info["action"]["action_id"] == "act_resume_01"
|
||||
assert retry_info["dispatched_action"]["action_id"] == "act_resume_01_resume"
|
||||
assert retry_info["session_id"] == "sess_resume_watchdog"
|
||||
assert retry_info["machine_id"] == "pc-watchdog"
|
||||
assert retry_info["replay_id"] == "replay_resume_watchdog"
|
||||
assert retry_info["first_dispatched_at"] > 0
|
||||
assert retry_info["dispatched_at"] >= retry_info["first_dispatched_at"]
|
||||
151
tests/integration/test_replay_session_trim_neutral.py
Normal file
151
tests/integration/test_replay_session_trim_neutral.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""Non-régression — trim du préambule redondant pour /replay-session.
|
||||
|
||||
Bug fixé le 2026-05-20 (cf. ``docs/AUDIT_FINALIZE_CONTRACT_INTEGRATION_2026-05-20.md``
|
||||
et ``CR_AUDIT_PAUSED_RESUME_BUS_2026-05-22.md``) : sur la session source
|
||||
``sess_20260520T102916_066851``, le premier event raw rejoué après le
|
||||
setup auto Windows était un clic intra-Notepad sur la barre d'onglets
|
||||
qui basculait de ``http...txt – Bloc-notes`` vers ``Sans titre – Bloc-notes``.
|
||||
Comme le setup amène déjà Notepad dans ``Sans titre``, ce clic ne
|
||||
modifiait rien à l'écran → `retry_threshold`.
|
||||
|
||||
Ce test reproduit la chaîne complète d'``api_stream.replay-session``
|
||||
côté serveur (sans HTTP) sur une fixture synthétique correspondante,
|
||||
et vérifie que la première action utile post-setup est bien la
|
||||
saisie de texte ``test`` — pas un clic de bascule d'onglet ``Sans titre``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
import pytest
|
||||
|
||||
from agent_v0.server_v1.replay_engine import ( # noqa: E402
|
||||
_extract_required_apps_from_events,
|
||||
_generate_setup_actions,
|
||||
_trim_redundant_setup_events,
|
||||
)
|
||||
from agent_v0.server_v1.stream_processor import ( # noqa: E402
|
||||
build_replay_from_raw_events,
|
||||
)
|
||||
|
||||
|
||||
def _make_session_events() -> list:
|
||||
"""Reproduit le pattern de ``sess_20260520T102916_066851`` :
|
||||
Démarrer → Rechercher → Notepad ouvre un fichier .txt → l'utilisateur
|
||||
clique sur l'onglet ``Sans titre`` → tape ``test`` → Ctrl+S.
|
||||
|
||||
L'enregistrement initial passe par un titre non-neutre puis bascule
|
||||
sur un titre neutre — c'est le scénario qui piégeait le trim."""
|
||||
return [
|
||||
# Démarrer
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"to": {"app_name": "explorer.exe", "title": "Explorateur"},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click", "pos": [50, 1430], "timestamp": 1.0,
|
||||
"window": {"app_name": "explorer.exe", "title": "Explorateur"},
|
||||
}},
|
||||
# SearchHost
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"to": {"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "text_input", "text": "bloc", "timestamp": 2.0,
|
||||
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click", "pos": [681, 448], "timestamp": 2.5,
|
||||
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
}},
|
||||
# Notepad ouvre un fichier .txt existant (non-neutre)
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"to": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
},
|
||||
}},
|
||||
# Clic dans la barre d'onglets (y=40) → bascule vers Sans titre
|
||||
{"event": {
|
||||
"type": "mouse_click", "pos": [1191, 40], "timestamp": 4.0,
|
||||
"window": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
},
|
||||
"window_capture": {"click_relative": [1191, 40]},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"to": {"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"},
|
||||
}},
|
||||
# Saisie réelle de l'utilisateur — c'est la première action utile
|
||||
{"event": {
|
||||
"type": "text_input", "text": "test", "timestamp": 5.0,
|
||||
"window": {"app_name": "Notepad.exe",
|
||||
"title": "Sans titre – Bloc-notes"},
|
||||
}},
|
||||
]
|
||||
|
||||
|
||||
def test_replay_session_pipeline_skips_redundant_tab_switch(tmp_path):
|
||||
"""Pipeline complet replay-session : setup auto + trim + build doit
|
||||
produire un replay dont la première action post-setup est la saisie
|
||||
``test``, pas le clic de bascule d'onglet ``Sans titre``.
|
||||
"""
|
||||
raw_events = _make_session_events()
|
||||
app_info = _extract_required_apps_from_events(raw_events)
|
||||
|
||||
# 1) Setup auto reconnaît Notepad et génère ses actions
|
||||
assert app_info.get("primary_app") == "Notepad.exe"
|
||||
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess")
|
||||
assert setup_actions, "le setup auto doit injecter des actions Notepad"
|
||||
action_ids = {a.get("action_id", "") for a in setup_actions}
|
||||
assert any("click_start" in aid for aid in action_ids)
|
||||
assert any("click_result" in aid for aid in action_ids)
|
||||
|
||||
# 2) Trim : le clic intra-Notepad redondant doit disparaître
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
click_titles = [
|
||||
(ev.get("event") or ev).get("window", {}).get("title", "")
|
||||
for ev in trimmed
|
||||
if (ev.get("event") or ev).get("type") == "mouse_click"
|
||||
]
|
||||
assert not any(
|
||||
"http192.168.1.40" in t for t in click_titles
|
||||
), "le clic intra-Notepad redondant doit être coupé par le trim"
|
||||
|
||||
# 3) Build replay propre : la première action utile post-trim est
|
||||
# la saisie 'test' — pas un click "Sans titre" issu de
|
||||
# _infer_tab_switch_target.
|
||||
actions = build_replay_from_raw_events(
|
||||
trimmed, session_id="sess_synthetic", session_dir=str(tmp_path),
|
||||
)
|
||||
actionable = [a for a in actions if a.get("type") in ("click", "type", "key_combo")]
|
||||
assert actionable, "le replay doit contenir au moins une action utile"
|
||||
|
||||
first = actionable[0]
|
||||
assert first.get("type") == "type", (
|
||||
f"première action utile doit être 'type', pas '{first.get('type')}' "
|
||||
f"(target_spec={first.get('target_spec')})"
|
||||
)
|
||||
assert first.get("text") == "test"
|
||||
|
||||
# Sanity : aucune action click ne doit cibler "Sans titre" (= la
|
||||
# bascule d'onglet inférée par _infer_tab_switch_target) dans le
|
||||
# replay nettoyé.
|
||||
sans_titre_clicks = [
|
||||
a for a in actions
|
||||
if a.get("type") == "click"
|
||||
and a.get("target_spec", {}).get("by_text", "").strip().lower() == "sans titre"
|
||||
]
|
||||
assert not sans_titre_clicks, (
|
||||
"le replay ne doit plus contenir de click ciblant 'Sans titre' "
|
||||
f"(trouvés : {sans_titre_clicks})"
|
||||
)
|
||||
352
tests/integration/test_replay_watchdog.py
Normal file
352
tests/integration/test_replay_watchdog.py
Normal file
@@ -0,0 +1,352 @@
|
||||
"""Integration tests for the replay orphan watchdog."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import importlib
|
||||
import time
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def fake_lock():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_watchdog_singleton():
|
||||
import agent_v0.server_v1.replay_watchdog as wd_mod
|
||||
|
||||
wd_mod._singleton = None
|
||||
for key in list(wd_mod._metrics.keys()):
|
||||
if isinstance(wd_mod._metrics[key], (int, float)):
|
||||
wd_mod._metrics[key] = 0
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env_short_timeout(monkeypatch):
|
||||
monkeypatch.setenv("RPA_WATCHDOG_ENABLED", "1")
|
||||
monkeypatch.setenv("RPA_WATCHDOG_SCAN_INTERVAL_S", "0.1")
|
||||
monkeypatch.setenv("RPA_WATCHDOG_ORPHAN_TIMEOUT_S", "0.2")
|
||||
monkeypatch.setenv("RPA_WATCHDOG_MAX_RESENDS", "2")
|
||||
|
||||
import agent_v0.server_v1.replay_watchdog as wd_mod
|
||||
|
||||
importlib.reload(wd_mod)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_orphan_below_timeout(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
now = time.time()
|
||||
retry_pending: Dict[str, Dict[str, Any]] = {
|
||||
"act1": {
|
||||
"action": {"action_id": "act1", "type": "click"},
|
||||
"dispatched_action": {"action_id": "act1", "type": "click"},
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": now,
|
||||
"first_dispatched_at": now,
|
||||
"resent_count": 0,
|
||||
}
|
||||
}
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]] = {"sess1": []}
|
||||
watchdog = ReplayWatchdog(retry_pending, replay_queues, fake_lock)
|
||||
|
||||
result = await watchdog._scan_once()
|
||||
|
||||
assert result == {
|
||||
"orphans": 0,
|
||||
"resent": 0,
|
||||
"gaveup": 0,
|
||||
"skipped": 0,
|
||||
"in_flight": 1,
|
||||
}
|
||||
assert replay_queues["sess1"] == []
|
||||
assert retry_pending["act1"]["resent_count"] == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphan_above_timeout_resent_in_head(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
action = {"action_id": "act1", "type": "click"}
|
||||
other = {"action_id": "act_next", "type": "click"}
|
||||
retry_pending = {
|
||||
"act1": {
|
||||
"action": {"action_id": "original", "type": "click"},
|
||||
"dispatched_action": action,
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": time.time() - 5.0,
|
||||
"first_dispatched_at": time.time() - 5.0,
|
||||
"resent_count": 0,
|
||||
}
|
||||
}
|
||||
replay_queues = {"sess1": [other]}
|
||||
watchdog = ReplayWatchdog(retry_pending, replay_queues, fake_lock)
|
||||
|
||||
result = await watchdog._scan_once()
|
||||
|
||||
assert result["resent"] == 1
|
||||
assert replay_queues["sess1"] == [action, other]
|
||||
assert retry_pending["act1"]["resent_count"] == 1
|
||||
assert retry_pending["act1"]["dispatched_at"] == 0.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_giveup_after_max_resends(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
retry_pending = {
|
||||
"act1": {
|
||||
"action": {"action_id": "act1", "type": "click"},
|
||||
"dispatched_action": {"action_id": "act1", "type": "click"},
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": time.time() - 5.0,
|
||||
"first_dispatched_at": time.time() - 90.0,
|
||||
"resent_count": 2,
|
||||
}
|
||||
}
|
||||
replay_queues = {"sess1": []}
|
||||
watchdog = ReplayWatchdog(retry_pending, replay_queues, fake_lock)
|
||||
|
||||
result = await watchdog._scan_once()
|
||||
|
||||
assert result["gaveup"] == 1
|
||||
assert result["resent"] == 0
|
||||
assert "act1" not in retry_pending
|
||||
assert replay_queues["sess1"] == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_race_report_arrives_during_scan(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
retry_pending = {
|
||||
"act1": {
|
||||
"action": {"action_id": "act1", "type": "click"},
|
||||
"dispatched_action": {"action_id": "act1", "type": "click"},
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": time.time() - 5.0,
|
||||
"first_dispatched_at": time.time() - 5.0,
|
||||
"resent_count": 0,
|
||||
}
|
||||
}
|
||||
replay_queues = {"sess1": []}
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def lock_that_pops_before_resend():
|
||||
count = getattr(lock_that_pops_before_resend, "_count", 0) + 1
|
||||
lock_that_pops_before_resend._count = count
|
||||
if count == 2:
|
||||
retry_pending.pop("act1", None)
|
||||
yield
|
||||
|
||||
watchdog = ReplayWatchdog(retry_pending, replay_queues, lock_that_pops_before_resend)
|
||||
result = await watchdog._scan_once()
|
||||
|
||||
assert result["orphans"] == 1
|
||||
assert result["resent"] == 0
|
||||
assert replay_queues["sess1"] == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_disabled_via_env(monkeypatch):
|
||||
monkeypatch.setenv("RPA_WATCHDOG_ENABLED", "0")
|
||||
|
||||
import agent_v0.server_v1.replay_watchdog as wd_mod
|
||||
|
||||
importlib.reload(wd_mod)
|
||||
watchdog = wd_mod.ReplayWatchdog({}, {}, fake_lock)
|
||||
|
||||
await watchdog.start()
|
||||
|
||||
assert watchdog._task is None
|
||||
await watchdog.stop()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lifecycle_start_stop_clean(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
watchdog = ReplayWatchdog({}, {}, fake_lock)
|
||||
await watchdog.start()
|
||||
|
||||
assert watchdog._task is not None
|
||||
assert not watchdog._task.done()
|
||||
|
||||
await asyncio.sleep(0.25)
|
||||
await watchdog.stop(timeout_s=2.0)
|
||||
|
||||
assert watchdog._task is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_orphan_with_repush_tail(monkeypatch, env_short_timeout):
|
||||
monkeypatch.setenv("RPA_WATCHDOG_REPUSH_POSITION", "tail")
|
||||
|
||||
import agent_v0.server_v1.replay_watchdog as wd_mod
|
||||
|
||||
importlib.reload(wd_mod)
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog
|
||||
|
||||
action = {"action_id": "act1", "type": "click"}
|
||||
other = {"action_id": "act_next", "type": "click"}
|
||||
retry_pending = {
|
||||
"act1": {
|
||||
"action": {"action_id": "original", "type": "click"},
|
||||
"dispatched_action": action,
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": time.time() - 5.0,
|
||||
"first_dispatched_at": time.time() - 5.0,
|
||||
"resent_count": 0,
|
||||
}
|
||||
}
|
||||
replay_queues = {"sess1": [other]}
|
||||
watchdog = ReplayWatchdog(retry_pending, replay_queues, fake_lock)
|
||||
|
||||
await watchdog._scan_once()
|
||||
|
||||
assert replay_queues["sess1"] == [other, action]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_metrics_snapshot(env_short_timeout):
|
||||
from agent_v0.server_v1.replay_watchdog import ReplayWatchdog, get_metrics_snapshot
|
||||
|
||||
retry_pending = {
|
||||
"act1": {
|
||||
"action": {"action_id": "act1", "type": "click"},
|
||||
"dispatched_action": {"action_id": "act1", "type": "click"},
|
||||
"session_id": "sess1",
|
||||
"machine_id": "m1",
|
||||
"dispatched_at": time.time() - 5.0,
|
||||
"first_dispatched_at": time.time() - 5.0,
|
||||
"resent_count": 0,
|
||||
}
|
||||
}
|
||||
watchdog = ReplayWatchdog(retry_pending, {"sess1": []}, fake_lock)
|
||||
|
||||
await watchdog._scan_once()
|
||||
snapshot = get_metrics_snapshot()
|
||||
|
||||
assert snapshot["scans_total"] >= 1
|
||||
assert snapshot["orphans_detected_total"] >= 1
|
||||
assert snapshot["orphans_resent_total"] >= 1
|
||||
|
||||
|
||||
def test_default_orphan_timeout_matches_spec(monkeypatch):
|
||||
monkeypatch.delenv("RPA_WATCHDOG_ORPHAN_TIMEOUT_S", raising=False)
|
||||
|
||||
import agent_v0.server_v1.replay_watchdog as wd_mod
|
||||
|
||||
importlib.reload(wd_mod)
|
||||
|
||||
assert wd_mod.WATCHDOG_ORPHAN_TIMEOUT_S == 45.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_late_report_clears_resent_duplicate_from_queue(monkeypatch):
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "test_replay_watchdog_token")
|
||||
|
||||
from agent_v0.server_v1 import api_stream
|
||||
|
||||
monkeypatch.setattr(api_stream, "API_TOKEN", "test_replay_watchdog_token")
|
||||
|
||||
saved_states = dict(api_stream._replay_states)
|
||||
saved_queues = dict(api_stream._replay_queues)
|
||||
saved_retry = dict(api_stream._retry_pending)
|
||||
|
||||
api_stream._replay_states.clear()
|
||||
api_stream._replay_queues.clear()
|
||||
api_stream._retry_pending.clear()
|
||||
|
||||
try:
|
||||
action = {
|
||||
"action_id": "act_setup_sess_click_start",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.387891,
|
||||
"y_pct": 0.974375,
|
||||
"_setup_phase": True,
|
||||
"target_spec": {"by_role": "start_button"},
|
||||
}
|
||||
next_action = {"action_id": "act_setup_sess_wait_start", "type": "wait"}
|
||||
replay_id = "replay_watchdog_dup"
|
||||
session_id = "sess_watchdog_dup"
|
||||
now = time.time()
|
||||
|
||||
api_stream._replay_states[replay_id] = {
|
||||
"replay_id": replay_id,
|
||||
"workflow_id": "session_replay:test",
|
||||
"session_id": session_id,
|
||||
"machine_id": "pc-watchdog",
|
||||
"status": "running",
|
||||
"total_actions": 2,
|
||||
"completed_actions": 0,
|
||||
"failed_actions": 0,
|
||||
"current_action_index": 0,
|
||||
"params": {},
|
||||
"results": [],
|
||||
"actions": [action, next_action],
|
||||
"retried_actions": 0,
|
||||
"unverified_actions": 0,
|
||||
"error_log": [],
|
||||
"last_screenshot": None,
|
||||
"failed_action": None,
|
||||
"pause_message": None,
|
||||
"variables": {},
|
||||
"safety_checks": [],
|
||||
"checks_acknowledged": [],
|
||||
"pause_reason": "",
|
||||
"pause_payload": None,
|
||||
}
|
||||
api_stream._replay_queues[session_id] = [dict(action), dict(next_action)]
|
||||
api_stream._retry_pending[action["action_id"]] = {
|
||||
"action": dict(action),
|
||||
"dispatched_action": dict(action),
|
||||
"retry_count": 0,
|
||||
"replay_id": replay_id,
|
||||
"session_id": session_id,
|
||||
"machine_id": "pc-watchdog",
|
||||
"dispatched_at": now,
|
||||
"first_dispatched_at": now - 5.0,
|
||||
"resent_count": 1,
|
||||
"last_resent_at": now - 1.0,
|
||||
}
|
||||
|
||||
report = api_stream.ReplayResultReport(
|
||||
session_id=session_id,
|
||||
action_id=action["action_id"],
|
||||
success=True,
|
||||
warning="start_button_hotkey_fallback",
|
||||
resolution_method="semantic_start_button_hotkey",
|
||||
resolution_score=1.0,
|
||||
)
|
||||
|
||||
result = await api_stream.report_action_result(report)
|
||||
|
||||
assert result["status"] == "recorded"
|
||||
assert [a["action_id"] for a in api_stream._replay_queues[session_id]] == [
|
||||
"act_setup_sess_wait_start"
|
||||
]
|
||||
assert action["action_id"] not in api_stream._retry_pending
|
||||
assert api_stream._replay_states[replay_id]["completed_actions"] == 1
|
||||
assert api_stream._replay_states[replay_id]["current_action_index"] == 1
|
||||
finally:
|
||||
api_stream._replay_states.clear()
|
||||
api_stream._replay_states.update(saved_states)
|
||||
api_stream._replay_queues.clear()
|
||||
api_stream._replay_queues.update(saved_queues)
|
||||
api_stream._retry_pending.clear()
|
||||
api_stream._retry_pending.update(saved_retry)
|
||||
@@ -112,6 +112,58 @@ class TestLiveSessionManager:
|
||||
assert len(raw["screenshots"]) == 1
|
||||
assert raw["screenshots"][0]["screenshot_id"] == "shot_full_001"
|
||||
|
||||
def test_discovers_bg_session_machine_id_from_root_folder(self, tmp_path):
|
||||
from agent_v0.server_v1.live_session_manager import LiveSessionManager
|
||||
|
||||
live_dir = tmp_path / "live_sessions"
|
||||
session_dir = live_dir / "bg_DESKTOP-58D5CAC_windows"
|
||||
session_dir.mkdir(parents=True)
|
||||
(session_dir / "live_events.jsonl").write_text("{}", encoding="utf-8")
|
||||
|
||||
mgr = LiveSessionManager(
|
||||
persist_dir=str(tmp_path / "persist"),
|
||||
live_sessions_dir=str(live_dir),
|
||||
)
|
||||
|
||||
session = mgr.get_session("bg_DESKTOP-58D5CAC_windows")
|
||||
assert session is not None
|
||||
assert session.machine_id == "DESKTOP-58D5CAC_windows"
|
||||
|
||||
def test_loads_persisted_bg_session_with_machine_id_inferred(self, tmp_path):
|
||||
from agent_v0.server_v1.live_session_manager import LiveSessionManager
|
||||
|
||||
persist_dir = tmp_path / "persist"
|
||||
persist_dir.mkdir()
|
||||
(persist_dir / "bg_DESKTOP-58D5CAC_windows.json").write_text(
|
||||
'{"session_id":"bg_DESKTOP-58D5CAC_windows","machine_id":"default",'
|
||||
'"events":[],"shot_paths":{},"last_window_info":{"title":"Unknown","app_name":"unknown"},'
|
||||
'"created_at":"2026-05-20T14:00:00","last_activity":"2026-05-20T14:00:00",'
|
||||
'"finalized":false,"window_titles_seen":{},"app_names_seen":{}}',
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
mgr = LiveSessionManager(persist_dir=str(persist_dir))
|
||||
|
||||
session = mgr.get_session("bg_DESKTOP-58D5CAC_windows")
|
||||
assert session is not None
|
||||
assert session.machine_id == "DESKTOP-58D5CAC_windows"
|
||||
|
||||
def test_find_active_agent_session_falls_back_to_bg_machine_session(self, tmp_path):
|
||||
from agent_v0.server_v1.live_session_manager import LiveSessionManager
|
||||
from agent_v0.server_v1.replay_engine import _find_active_agent_session
|
||||
|
||||
mgr = LiveSessionManager(persist_dir=str(tmp_path / "persist"))
|
||||
mgr.register_session(
|
||||
"sess_20260520T102916_066851",
|
||||
machine_id="DESKTOP-58D5CAC_windows",
|
||||
)
|
||||
mgr.finalize("sess_20260520T102916_066851")
|
||||
mgr.register_session("bg_DESKTOP-58D5CAC_windows")
|
||||
|
||||
active = _find_active_agent_session(mgr, machine_id="DESKTOP-58D5CAC_windows")
|
||||
|
||||
assert active == "bg_DESKTOP-58D5CAC_windows"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# StreamProcessor
|
||||
@@ -195,6 +247,238 @@ class TestStreamProcessor:
|
||||
assert stats["total_workflows"] == 0
|
||||
assert stats["initialized"] is False
|
||||
|
||||
def test_build_replay_does_not_compile_save_dialog_open_as_switch_tab(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
"""`Enregistrer sous` same-app n'est pas un onglet.
|
||||
|
||||
Régression live 2026-05-23 : un clic menu dans Notepad était
|
||||
recompilé en faux `switch_tab`, ce qui injectait un clic parasite
|
||||
avant la vraie ouverture de dialog.
|
||||
"""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
events = [
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [820, 630],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"window_capture": {
|
||||
"rect": [320, 520, 2240, 1636],
|
||||
"click_relative": [500, 110],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.2,
|
||||
"pos": [860, 562],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_002",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"window_capture": {
|
||||
"rect": [320, 520, 2240, 1636],
|
||||
"click_relative": [540, 40],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 1.35,
|
||||
"from": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.6,
|
||||
"pos": [997, 743],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_003",
|
||||
"window": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events, session_id="sess_save_dialog", session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
clicks = [a for a in actions if a.get("type") == "click"]
|
||||
assert len(clicks) == 3
|
||||
assert all(
|
||||
(c.get("target_spec", {}).get("context_hints") or {}).get("interaction") != "switch_tab"
|
||||
for c in clicks
|
||||
)
|
||||
assert clicks[1].get("expected_window_title") == "Enregistrer sous"
|
||||
assert clicks[2].get("expected_window_before") == "Enregistrer sous"
|
||||
|
||||
def test_build_replay_tab_switch_focus_belongs_to_latest_click_only(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
"""Le focus d'onglet doit être rattaché au dernier clic causal."""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
events = [
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [1410, 562],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
"window_capture": {
|
||||
"rect": [323, 522, 2243, 1638],
|
||||
"click_relative": [1087, 40],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.1,
|
||||
"pos": [1514, 562],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_002",
|
||||
"window": {
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
"window_capture": {
|
||||
"rect": [323, 522, 2243, 1638],
|
||||
"click_relative": [1191, 40],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 1.2,
|
||||
"from": {
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
"to": {
|
||||
"title": "Sans titre – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
}},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events,
|
||||
session_id="sess_intervening_click",
|
||||
session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
assert len(actions) == 2
|
||||
first_hints = actions[0].get("target_spec", {}).get("context_hints") or {}
|
||||
second_hints = actions[1].get("target_spec", {}).get("context_hints") or {}
|
||||
|
||||
assert first_hints.get("interaction") != "switch_tab"
|
||||
assert actions[1]["target_spec"]["by_text"] == "Sans titre"
|
||||
assert actions[1]["target_spec"]["by_role"] == "tab"
|
||||
assert second_hints.get("interaction") == "switch_tab"
|
||||
|
||||
def test_build_replay_infers_close_tab_before_save_dialog(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
"""Le clic sur le x d'onglet actif doit être sémantisé comme close_tab."""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
events = [
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [1814, 560],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"window_capture": {
|
||||
"rect": [323, 522, 2243, 1638],
|
||||
"click_relative": [1491, 38],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.3,
|
||||
"pos": [1183, 1156],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_002",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"window_capture": {
|
||||
"rect": [323, 522, 2243, 1638],
|
||||
"click_relative": [860, 634],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 1.5,
|
||||
"from": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events,
|
||||
session_id="sess_close_tab",
|
||||
session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
clicks = [a for a in actions if a.get("type") == "click"]
|
||||
assert len(clicks) == 2
|
||||
first_spec = clicks[0].get("target_spec", {})
|
||||
first_hints = first_spec.get("context_hints") or {}
|
||||
|
||||
assert first_spec.get("by_role") == "tab_close_button"
|
||||
assert first_spec.get("by_text", "") == ""
|
||||
assert first_hints.get("interaction") == "close_tab"
|
||||
assert first_hints.get("active_tab_label") == "test"
|
||||
assert "fermer l'onglet actif 'test'" in first_spec.get("vlm_description", "")
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# StreamWorker
|
||||
|
||||
184
tests/unit/test_agent_finalize_replay_contract.py
Normal file
184
tests/unit/test_agent_finalize_replay_contract.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Tests ciblés sur l'intégration agent du contrat finalize enrichi."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
class _ImmediateThread:
|
||||
def __init__(self, target=None, args=(), kwargs=None, daemon=None):
|
||||
self._target = target
|
||||
self._args = args
|
||||
self._kwargs = kwargs or {}
|
||||
|
||||
def start(self):
|
||||
if self._target is not None:
|
||||
self._target(*self._args, **self._kwargs)
|
||||
|
||||
|
||||
class _DummyServerClient:
|
||||
_stream_base = "http://server.test:5005"
|
||||
|
||||
def __init__(self):
|
||||
self.on_connection_change = None
|
||||
|
||||
def set_on_connection_change(self, callback):
|
||||
self.on_connection_change = callback
|
||||
|
||||
def _auth_headers(self):
|
||||
return {"Authorization": "Bearer test-token"}
|
||||
|
||||
|
||||
def _install_pystray_stub():
|
||||
pystray_stub = types.ModuleType("pystray")
|
||||
|
||||
class _DummyMenu:
|
||||
SEPARATOR = object()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
class _DummyIcon:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def run(self):
|
||||
return None
|
||||
|
||||
def stop(self):
|
||||
return None
|
||||
|
||||
def update_menu(self):
|
||||
return None
|
||||
|
||||
pystray_stub.MenuItem = lambda *args, **kwargs: (args, kwargs)
|
||||
pystray_stub.Menu = _DummyMenu
|
||||
pystray_stub.Icon = _DummyIcon
|
||||
sys.modules["pystray"] = pystray_stub
|
||||
|
||||
|
||||
def _build_tray():
|
||||
_install_pystray_stub()
|
||||
|
||||
from agent_v0.agent_v1.ui.smart_tray import SmartTrayV1
|
||||
|
||||
tray = SmartTrayV1(
|
||||
on_start_callback=lambda _name: None,
|
||||
on_stop_callback=lambda: None,
|
||||
server_client=_DummyServerClient(),
|
||||
)
|
||||
tray._notifier = MagicMock()
|
||||
return tray
|
||||
|
||||
|
||||
def test_offer_finalize_replay_requires_user_consent():
|
||||
_install_pystray_stub()
|
||||
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
|
||||
|
||||
tray = _build_tray()
|
||||
tray._launch_replay_request = MagicMock()
|
||||
|
||||
with patch.object(smart_tray_mod.threading, "Thread", _ImmediateThread), \
|
||||
patch.object(smart_tray_mod, "_ask_consent", return_value=False):
|
||||
tray.offer_finalize_replay(
|
||||
{
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_offer_001",
|
||||
"machine_id": "pc-offer",
|
||||
},
|
||||
"Bloc-notes",
|
||||
)
|
||||
|
||||
tray._notifier.notify.assert_called_once()
|
||||
tray._launch_replay_request.assert_not_called()
|
||||
|
||||
|
||||
def test_launch_replay_request_calls_replay_session_endpoint():
|
||||
_install_pystray_stub()
|
||||
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
|
||||
|
||||
tray = _build_tray()
|
||||
|
||||
with patch.object(smart_tray_mod.threading, "Thread", _ImmediateThread), \
|
||||
patch("requests.post") as mock_post:
|
||||
mock_post.return_value = MagicMock(ok=True)
|
||||
tray._launch_replay_request(
|
||||
{
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_offer_002",
|
||||
"machine_id": "pc-replay",
|
||||
},
|
||||
"Bloc-notes",
|
||||
)
|
||||
|
||||
mock_post.assert_called_once()
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs["params"] == {
|
||||
"session_id": "sess_offer_002",
|
||||
"machine_id": "pc-replay",
|
||||
}
|
||||
assert kwargs["headers"] == {"Authorization": "Bearer test-token"}
|
||||
assert kwargs["allow_redirects"] is False
|
||||
|
||||
|
||||
def test_agent_finalize_result_delegates_to_tray_offer():
|
||||
from agent_v0.agent_v1.finalize_contract import dispatch_finalize_result
|
||||
|
||||
ui = MagicMock()
|
||||
|
||||
dispatch_finalize_result(
|
||||
ui,
|
||||
{
|
||||
"replay_ready": True,
|
||||
"replay_request": {
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_offer_003",
|
||||
"machine_id": "pc-main",
|
||||
},
|
||||
},
|
||||
"Saisie dossier",
|
||||
)
|
||||
|
||||
ui.offer_finalize_replay.assert_called_once_with(
|
||||
{
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_offer_003",
|
||||
"machine_id": "pc-main",
|
||||
},
|
||||
"Saisie dossier",
|
||||
)
|
||||
|
||||
|
||||
def test_agent_finalize_result_ignores_already_started_replay():
|
||||
from agent_v0.agent_v1.finalize_contract import dispatch_finalize_result
|
||||
|
||||
ui = MagicMock()
|
||||
|
||||
dispatch_finalize_result(
|
||||
ui,
|
||||
{
|
||||
"replay_ready": True,
|
||||
"replay_request": {
|
||||
"endpoint": "/api/v1/traces/stream/replay-session",
|
||||
"session_id": "sess_offer_004",
|
||||
"machine_id": "pc-main",
|
||||
},
|
||||
"replay_launch": {
|
||||
"status": "started",
|
||||
"replay": {"replay_id": "replay_sess_1234"},
|
||||
},
|
||||
},
|
||||
"Saisie dossier",
|
||||
)
|
||||
|
||||
ui.offer_finalize_replay.assert_not_called()
|
||||
78
tests/unit/test_agent_v1_replay_pause_state.py
Normal file
78
tests/unit/test_agent_v1_replay_pause_state.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Tests ciblés sur l'état replay côté AgentV1 pendant pause supervisée."""
|
||||
|
||||
import sys
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
def _make_agent():
|
||||
sys.modules.setdefault("pynput", MagicMock())
|
||||
sys.modules.setdefault("pynput.mouse", MagicMock())
|
||||
sys.modules.setdefault("pynput.keyboard", MagicMock())
|
||||
sys.modules.setdefault("pystray", MagicMock())
|
||||
|
||||
from agent_v0.agent_v1.main import AgentV1
|
||||
|
||||
agent = AgentV1.__new__(AgentV1)
|
||||
agent.user_id = "demo_user"
|
||||
agent.machine_id = "machine_test"
|
||||
agent.running = True
|
||||
agent._replay_active = True
|
||||
agent._state = SimpleNamespace(calls=[], set_replay_active=lambda active: agent._state.calls.append(active))
|
||||
agent.ui = SimpleNamespace(calls=[], set_replay_active=lambda active: agent.ui.calls.append(active))
|
||||
return agent
|
||||
|
||||
|
||||
def test_replay_pause_does_not_mark_replay_finished(monkeypatch):
|
||||
"""Quand l'executor signale replay_paused, AgentV1 doit rester en mode replay."""
|
||||
agent = _make_agent()
|
||||
|
||||
class _Executor:
|
||||
_poll_backoff = 1.0
|
||||
_replay_paused = True
|
||||
|
||||
def poll_and_execute(self, session_id: str, server_url: str, machine_id: str = "default") -> bool:
|
||||
return False
|
||||
|
||||
agent._executor = _Executor()
|
||||
|
||||
def _fake_sleep(_delay):
|
||||
agent.running = False
|
||||
|
||||
monkeypatch.setattr("agent_v0.agent_v1.main.time.sleep", _fake_sleep)
|
||||
|
||||
t = threading.Thread(target=agent._replay_poll_loop)
|
||||
t.start()
|
||||
t.join(timeout=1)
|
||||
|
||||
assert agent._replay_active is True
|
||||
assert agent.ui.calls == []
|
||||
assert agent._state.calls == []
|
||||
|
||||
|
||||
def test_replay_without_action_and_without_pause_marks_replay_finished(monkeypatch):
|
||||
"""Sans action et sans pause, AgentV1 doit sortir du mode replay."""
|
||||
agent = _make_agent()
|
||||
|
||||
class _Executor:
|
||||
_poll_backoff = 1.0
|
||||
_replay_paused = False
|
||||
|
||||
def poll_and_execute(self, session_id: str, server_url: str, machine_id: str = "default") -> bool:
|
||||
return False
|
||||
|
||||
agent._executor = _Executor()
|
||||
|
||||
def _fake_sleep(_delay):
|
||||
agent.running = False
|
||||
|
||||
monkeypatch.setattr("agent_v0.agent_v1.main.time.sleep", _fake_sleep)
|
||||
|
||||
t = threading.Thread(target=agent._replay_poll_loop)
|
||||
t.start()
|
||||
t.join(timeout=1)
|
||||
|
||||
assert agent._replay_active is False
|
||||
assert agent.ui.calls == [False]
|
||||
assert agent._state.calls == [False]
|
||||
485
tests/unit/test_capturer_monitor_guard.py
Normal file
485
tests/unit/test_capturer_monitor_guard.py
Normal file
@@ -0,0 +1,485 @@
|
||||
"""Garde dimensions monitor — agent_v0/agent_v1/vision/capturer.py
|
||||
|
||||
Contexte (démo GHT 19 mai 2026) : `mss.monitors[1]` peut retourner
|
||||
intermittemment des dimensions tronquées (cas observé : 2560×60 au lieu
|
||||
de 2560×1600). Toute capture utilisant ces dims pour normaliser des
|
||||
coordonnées empoisonne ensuite la mémoire persistante (`TargetMemoryStore`).
|
||||
|
||||
Ce module teste la garde qui doit :
|
||||
- détecter une dimension aberrante avant capture
|
||||
- retenter (mss peut avoir un cache stale)
|
||||
- tomber en fallback sur un autre monitor physique si dispo
|
||||
- abandonner explicitement (logs WARNING/ERROR) sans empoisonner
|
||||
|
||||
Périmètre : capturer.py uniquement (pas executor, pas replay).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def _make_mock_mss(monitors_sequence):
|
||||
"""Construit un mock `mss.mss()` qui renvoie successivement les listes
|
||||
`monitors` fournies. Permet de simuler retry / changement de dims
|
||||
entre deux appels.
|
||||
|
||||
Args:
|
||||
monitors_sequence: liste de listes-de-monitors. Chaque entrée
|
||||
représente l'état renvoyé par `sct.monitors` à un appel
|
||||
successif de `mss.mss()`. La dernière entrée est réutilisée
|
||||
si plus d'appels ont lieu.
|
||||
|
||||
Returns:
|
||||
Un mock utilisable comme `patch(..., side_effect=mock)` côté `mss.mss`.
|
||||
"""
|
||||
call_counter = {"n": 0}
|
||||
instances = []
|
||||
|
||||
def factory():
|
||||
idx = min(call_counter["n"], len(monitors_sequence) - 1)
|
||||
call_counter["n"] += 1
|
||||
instance = MagicMock(name=f"mss_instance_{idx}")
|
||||
instance.monitors = monitors_sequence[idx]
|
||||
|
||||
# grab() renvoie un objet avec size + bgra pour passer dans PIL
|
||||
grab_result = MagicMock()
|
||||
# On simule un buffer cohérent avec les dims du monitor sain
|
||||
m = monitors_sequence[idx][1] if len(monitors_sequence[idx]) > 1 else {}
|
||||
w = m.get("width", 100)
|
||||
h = m.get("height", 100)
|
||||
grab_result.size = (w, h)
|
||||
# Une image saine ne doit pas être entièrement noire, sinon le nouveau
|
||||
# fail-closed black-frame la rejetterait.
|
||||
grab_result.bgra = b"\x80\x80\x80\x00" * (w * h)
|
||||
instance.grab = MagicMock(return_value=grab_result)
|
||||
|
||||
# context manager
|
||||
cm = MagicMock(name=f"mss_cm_{idx}")
|
||||
cm.__enter__ = MagicMock(return_value=instance)
|
||||
cm.__exit__ = MagicMock(return_value=False)
|
||||
instances.append((cm, instance))
|
||||
return cm
|
||||
|
||||
factory.instances = instances
|
||||
return factory
|
||||
|
||||
|
||||
def _vision_capturer(tmp_path):
|
||||
"""Import paresseux pour permettre au patch d'opérer avant le import."""
|
||||
from agent_v0.agent_v1.vision.capturer import VisionCapturer
|
||||
return VisionCapturer(str(tmp_path))
|
||||
|
||||
|
||||
def _solid_img(color: tuple[int, int, int], size=(320, 240)) -> Image.Image:
|
||||
"""Image unie simple pour piloter les tests de fallback noir."""
|
||||
return Image.new("RGB", size, color)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 1 — Dim aberrante (height=60) refusée : capture_full_context renvoie ""
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_full_context_returns_empty_when_monitor_height_aberrant(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""Cas démo GHT : mss.monitors[1] = 2560×60 (au lieu de 2560×1600).
|
||||
|
||||
La capture doit refuser de produire un PNG basé sur ces dims (sinon
|
||||
toute coord normalisée derrière sera fausse d'un facteur ~27×).
|
||||
Retour attendu : chaîne vide (comme le contrat existant en cas
|
||||
d'erreur).
|
||||
"""
|
||||
aberrant_monitors = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660}, # composite
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60}, # PRIMAIRE aberrant
|
||||
]
|
||||
factory = _make_mock_mss([aberrant_monitors])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_full_context("test_aberrant")
|
||||
|
||||
assert result == "", (
|
||||
f"Capture devrait retourner '' sur dim aberrante, got {result!r}"
|
||||
)
|
||||
|
||||
# Sanity : aucun grab() ne doit avoir été appelé sur un monitor aberrant.
|
||||
# Tous les mss instances créés ne doivent JAMAIS avoir appelé grab().
|
||||
for _cm, instance in factory.instances:
|
||||
instance.grab.assert_not_called()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 2 — Le log WARNING doit citer la dim observée (debuggabilité)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_aberrant_monitor_logs_warning_with_observed_dimensions(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""L'opérateur doit pouvoir diagnostiquer la cause depuis les logs sans
|
||||
rejouer la session. Le WARNING doit contenir les dims aberrantes vues.
|
||||
"""
|
||||
aberrant_monitors = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
]
|
||||
factory = _make_mock_mss([aberrant_monitors])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
cap.capture_full_context("test")
|
||||
|
||||
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||
assert warnings, "Au moins un WARNING attendu sur dim aberrante"
|
||||
msg = " ".join(r.getMessage() for r in warnings)
|
||||
assert "2560" in msg, f"Largeur observée doit apparaître dans le WARNING : {msg!r}"
|
||||
assert "60" in msg, f"Hauteur observée doit apparaître dans le WARNING : {msg!r}"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 3 — Retry : un 1er appel aberrant suivi d'un appel sain produit la capture
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_retries_when_first_monitor_query_is_aberrant(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""Le bug observé est intermittent (mss peut avoir un cache stale). Si on
|
||||
retente immédiatement, le second appel renvoie souvent les vraies dims.
|
||||
La capture doit donc retenter et réussir quand le second appel est sain.
|
||||
"""
|
||||
aberrant_then_ok = [
|
||||
# 1er appel : aberrant
|
||||
[
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
],
|
||||
# 2e appel : OK
|
||||
[
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1600},
|
||||
],
|
||||
]
|
||||
factory = _make_mock_mss(aberrant_then_ok)
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_full_context("test_retry", force=True)
|
||||
|
||||
assert result, (
|
||||
f"Capture doit réussir après retry sur dims saines, got {result!r}"
|
||||
)
|
||||
assert Path(result).exists(), "Le PNG doit être physiquement créé"
|
||||
|
||||
# Au moins 2 appels mss.mss() : le premier (aberrant) + le retry
|
||||
assert len(factory.instances) >= 2, (
|
||||
f"Au moins 2 appels mss.mss() attendus (retry), vu {len(factory.instances)}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 4 — Fallback : monitors[1] aberrant mais monitors[2] sain → capture OK
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_falls_back_to_secondary_monitor_when_primary_aberrant(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""Cas multi-écrans : monitors[1] cassé en permanence, monitors[2] sain.
|
||||
La capture doit utiliser monitors[2] et logger un WARNING fallback.
|
||||
"""
|
||||
monitors_with_fallback = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660}, # composite
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60}, # primaire cassé
|
||||
{"left": 2560, "top": 0, "width": 1920, "height": 1080}, # secondaire sain
|
||||
]
|
||||
# Même état renvoyé à tous les appels (cas stationnaire, pas intermittent)
|
||||
factory = _make_mock_mss([monitors_with_fallback])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_full_context("test_fallback", force=True)
|
||||
|
||||
assert result, f"Capture doit réussir via monitor[2], got {result!r}"
|
||||
msg = " ".join(r.getMessage() for r in caplog.records)
|
||||
assert "fallback" in msg.lower(), (
|
||||
f"Un log doit signaler le fallback monitor : {msg!r}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 5 — capture_dual bénéficie aussi de la garde
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_dual_returns_empty_dict_when_monitor_aberrant(tmp_path: Path):
|
||||
"""capture_dual (3 captures simultanées) ne doit pas non plus produire
|
||||
de PNG sur dim aberrante : c'est la même source d'empoisonnement.
|
||||
"""
|
||||
aberrant_monitors = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
]
|
||||
factory = _make_mock_mss([aberrant_monitors])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_dual(x=100, y=200, screenshot_id="shot_dual")
|
||||
|
||||
assert result == {}, (
|
||||
f"capture_dual doit retourner {{}} sur dim aberrante, got {result!r}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 6 — capture_active_window bénéficie aussi de la garde
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_active_window_returns_none_when_monitor_aberrant(tmp_path: Path):
|
||||
"""capture_active_window (standalone, sans full_img fourni) doit aussi
|
||||
refuser de capturer sur monitor aberrant.
|
||||
"""
|
||||
aberrant_monitors = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
]
|
||||
factory = _make_mock_mss([aberrant_monitors])
|
||||
|
||||
# Mocker get_active_window_rect pour qu'il renvoie une fenêtre valide
|
||||
# (sinon le test sort prématurément avant d'atteindre le grab).
|
||||
fake_rect = {
|
||||
"rect": [100, 100, 800, 600],
|
||||
"size": [700, 500],
|
||||
"title": "Test Window",
|
||||
"app_name": "test_app",
|
||||
}
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"), \
|
||||
patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value=fake_rect,
|
||||
):
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_active_window(x=200, y=300, screenshot_id="shot_win")
|
||||
|
||||
assert result is None, (
|
||||
f"capture_active_window doit retourner None sur dim aberrante, got {result!r}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 7 — Non-régression : dim normale produit toujours un PNG
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_full_context_succeeds_on_normal_dimensions(tmp_path: Path):
|
||||
"""Sanity check : la garde ne casse pas le chemin nominal."""
|
||||
normal_monitors = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1600},
|
||||
]
|
||||
factory = _make_mock_mss([normal_monitors])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_full_context("test_normal", force=True)
|
||||
|
||||
assert result, f"Capture nominale doit produire un PNG, got {result!r}"
|
||||
assert Path(result).exists(), "PNG doit exister sur disque"
|
||||
# Un seul appel mss.mss() attendu en cas normal (pas de retry)
|
||||
assert len(factory.instances) == 1, (
|
||||
f"Un seul appel mss.mss() attendu sur dims saines, vu {len(factory.instances)}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 8 — fail-closed : capture_dual refuse le fallback monitor secondaire
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_dual_fails_closed_when_only_secondary_monitor_sane(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""capture_dual reçoit des coords (x, y) en système écran composite.
|
||||
Si on capture monitors[2] (offset 2560, 0), le crop calculé via
|
||||
img.crop((x, y, ...)) pointe à la mauvaise zone car les coords ne
|
||||
sont pas traduites. Plutôt que de produire une image décalée
|
||||
silencieusement, on refuse le fallback secondaire pour cette méthode.
|
||||
"""
|
||||
monitors_with_fallback = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60}, # primary cassé
|
||||
{"left": 2560, "top": 0, "width": 1920, "height": 1080}, # secondary sain
|
||||
]
|
||||
factory = _make_mock_mss([monitors_with_fallback])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_dual(x=300, y=400, screenshot_id="shot_dual_fb")
|
||||
|
||||
assert result == {}, (
|
||||
f"capture_dual doit fail-closed sur fallback secondaire, got {result!r}"
|
||||
)
|
||||
msg = " ".join(r.getMessage() for r in caplog.records).lower()
|
||||
assert "fallback" in msg or "secondaire" in msg or "refus" in msg, (
|
||||
f"Un log doit expliquer le refus du fallback pour coords : {msg!r}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 9 — fail-closed : capture_active_window refuse le fallback secondaire
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_active_window_fails_closed_when_only_secondary_monitor_sane(
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""Même raison que test 8 : capture_active_window cropperait depuis l'image
|
||||
de monitors[2] avec un win_rect en coords globales → zone fausse.
|
||||
"""
|
||||
monitors_with_fallback = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
{"left": 2560, "top": 0, "width": 1920, "height": 1080},
|
||||
]
|
||||
factory = _make_mock_mss([monitors_with_fallback])
|
||||
fake_rect = {
|
||||
"rect": [100, 100, 800, 600], # coords globales dans monitors[1]
|
||||
"size": [700, 500],
|
||||
"title": "Test Window",
|
||||
"app_name": "test_app",
|
||||
}
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"), \
|
||||
patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value=fake_rect,
|
||||
):
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_active_window(x=200, y=300, screenshot_id="shot_win_fb")
|
||||
|
||||
assert result is None, (
|
||||
f"capture_active_window doit fail-closed sur fallback secondaire, got {result!r}"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 10 — mss noir : fallback ImageGrab
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_screen_image_falls_back_to_imagegrab_when_mss_is_black():
|
||||
"""Un frame mss noir ne doit plus être accepté silencieusement.
|
||||
|
||||
Si ImageGrab fournit une image exploitable, elle doit être retenue.
|
||||
"""
|
||||
from agent_v0.agent_v1.vision import capturer
|
||||
|
||||
black_img = _solid_img((0, 0, 0))
|
||||
fallback_img = _solid_img((210, 180, 90))
|
||||
monitor = {"left": 0, "top": 0, "width": 320, "height": 240}
|
||||
|
||||
with patch.object(
|
||||
capturer, "_acquire_safe_grab", return_value=(monitor, black_img)
|
||||
), patch.object(
|
||||
capturer,
|
||||
"_capture_via_imagegrab",
|
||||
return_value=(monitor, fallback_img, {
|
||||
"backend": "imagegrab",
|
||||
"luma": {"mean": 180.0, "stddev": 0.0, "min": 180, "max": 180},
|
||||
}),
|
||||
):
|
||||
out_monitor, out_img, meta = capturer.capture_screen_image()
|
||||
|
||||
assert out_monitor == monitor
|
||||
assert out_img is fallback_img
|
||||
assert meta["backend"] == "imagegrab"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 11 — capture_dual dégradé : conserver window_capture
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_dual_keeps_window_capture_when_fullscreen_is_unavailable(
|
||||
tmp_path: Path,
|
||||
):
|
||||
"""Même sans full/crop, la capture fenêtre doit survivre.
|
||||
|
||||
Cela permet au serveur de conserver un contexte utile plutôt que de
|
||||
travailler sur un écran noir.
|
||||
"""
|
||||
fake_window = {
|
||||
"window_image": str(tmp_path / "window_only.png"),
|
||||
"window_title": "Bloc-notes",
|
||||
"app_name": "notepad.exe",
|
||||
"window_rect": [100, 100, 800, 600],
|
||||
"window_size": [700, 500],
|
||||
"click_in_window": [42, 24],
|
||||
"click_inside_window": True,
|
||||
}
|
||||
|
||||
cap = _vision_capturer(tmp_path)
|
||||
with patch(
|
||||
"agent_v0.agent_v1.vision.capturer.capture_screen_image",
|
||||
return_value=(None, None, {"backend": "mss_black"}),
|
||||
), patch.object(cap, "capture_active_window", return_value=fake_window):
|
||||
result = cap.capture_dual(x=200, y=300, screenshot_id="shot_dual")
|
||||
|
||||
assert "full" not in result
|
||||
assert "crop" not in result
|
||||
assert result["window_capture"] == fake_window
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 12 — non-régression : capture_full_context PEUT utiliser le fallback
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_capture_full_context_still_uses_secondary_fallback(
|
||||
tmp_path: Path, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""capture_full_context (heartbeat) ne porte pas de coords client : un
|
||||
écran sain quelconque suffit. Le fallback secondaire reste autorisé.
|
||||
Sinon le heartbeat tomberait dès qu'un monitor est cassé en permanence.
|
||||
"""
|
||||
monitors_with_fallback = [
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 1660},
|
||||
{"left": 0, "top": 0, "width": 2560, "height": 60},
|
||||
{"left": 2560, "top": 0, "width": 1920, "height": 1080},
|
||||
]
|
||||
factory = _make_mock_mss([monitors_with_fallback])
|
||||
|
||||
with patch("agent_v0.agent_v1.vision.capturer.mss.mss", side_effect=factory), \
|
||||
patch("agent_v0.agent_v1.vision.capturer.time.sleep"):
|
||||
caplog.set_level(logging.WARNING, logger="agent_v0.agent_v1.vision.capturer")
|
||||
cap = _vision_capturer(tmp_path)
|
||||
result = cap.capture_full_context("test_heartbeat_fb", force=True)
|
||||
|
||||
assert result, (
|
||||
f"capture_full_context doit accepter fallback (heartbeat sans coords), got {result!r}"
|
||||
)
|
||||
assert Path(result).exists()
|
||||
165
tests/unit/test_chat_window_paused_dispatch.py
Normal file
165
tests/unit/test_chat_window_paused_dispatch.py
Normal file
@@ -0,0 +1,165 @@
|
||||
"""Tests pour ChatWindow._dispatch_paused_action.
|
||||
|
||||
Couvre le routage bus SocketIO → fallback HTTP de la bulle paused.
|
||||
Le bug d'origine ``paused_bubble: bus déconnecté, resume non émis``
|
||||
était causé par l'absence de ce fallback (cf.
|
||||
``docs/CR_AUDIT_PAUSED_RESUME_BUS_2026-05-22.md``).
|
||||
|
||||
Les tests appellent ``ChatWindow._dispatch_paused_action`` en tant
|
||||
que fonction unbound avec un faux ``self`` (``SimpleNamespace``) pour
|
||||
éviter de démarrer Tkinter pendant les tests unitaires.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.agent_v1.ui.chat_window import ChatWindow # noqa: E402
|
||||
|
||||
|
||||
def _make_self(bus=None, server_client=None):
|
||||
return SimpleNamespace(_bus=bus, _server_client=server_client)
|
||||
|
||||
|
||||
def _call(mock_self, replay_id="replay_xyz",
|
||||
bus_method="resume_replay", client_method="resume_replay"):
|
||||
return ChatWindow._dispatch_paused_action(
|
||||
mock_self, replay_id, bus_method=bus_method, client_method=client_method,
|
||||
)
|
||||
|
||||
|
||||
class TestDispatchPausedAction:
|
||||
def test_bus_connected_and_emits_uses_bus(self):
|
||||
bus = MagicMock(connected=True)
|
||||
bus.resume_replay.return_value = True
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(_make_self(bus=bus, server_client=client))
|
||||
assert emitted is True
|
||||
assert channel == "bus"
|
||||
bus.resume_replay.assert_called_once_with("replay_xyz")
|
||||
client.resume_replay.assert_not_called()
|
||||
|
||||
def test_bus_disconnected_falls_back_to_http(self):
|
||||
bus = MagicMock(connected=False)
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(_make_self(bus=bus, server_client=client))
|
||||
assert emitted is True
|
||||
assert channel == "http"
|
||||
bus.resume_replay.assert_not_called()
|
||||
client.resume_replay.assert_called_once_with("replay_xyz")
|
||||
|
||||
def test_bus_emit_returns_false_falls_back_to_http(self):
|
||||
"""Bus marqué connecté mais l'emit retourne False (socket cassé
|
||||
entre connect() et send) → bascule sur HTTP."""
|
||||
bus = MagicMock(connected=True)
|
||||
bus.resume_replay.return_value = False
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(_make_self(bus=bus, server_client=client))
|
||||
assert emitted is True
|
||||
assert channel == "http"
|
||||
|
||||
def test_bus_emit_raises_falls_back_to_http(self):
|
||||
bus = MagicMock(connected=True)
|
||||
bus.resume_replay.side_effect = RuntimeError("socket broken")
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(_make_self(bus=bus, server_client=client))
|
||||
assert emitted is True
|
||||
assert channel == "http"
|
||||
|
||||
def test_no_bus_uses_http_directly(self):
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(_make_self(bus=None, server_client=client))
|
||||
assert emitted is True
|
||||
assert channel == "http"
|
||||
|
||||
def test_all_channels_fail_returns_false(self):
|
||||
"""Cas critique : bus déconnecté ET HTTP injoignable → l'UI
|
||||
doit ré-activer les boutons côté appelant. Ici on vérifie
|
||||
juste que dispatch retourne (False, '')."""
|
||||
bus = MagicMock(connected=False)
|
||||
client = MagicMock(resume_replay=MagicMock(return_value=False))
|
||||
emitted, channel = _call(_make_self(bus=bus, server_client=client))
|
||||
assert emitted is False
|
||||
assert channel == ""
|
||||
|
||||
def test_neither_bus_nor_client_returns_false(self):
|
||||
emitted, channel = _call(_make_self(bus=None, server_client=None))
|
||||
assert emitted is False
|
||||
assert channel == ""
|
||||
|
||||
def test_client_method_missing_falls_through(self):
|
||||
"""Si server_client est un vieux client sans resume_replay,
|
||||
on ne plante pas — on retourne (False, '')."""
|
||||
bus = MagicMock(connected=False)
|
||||
legacy_client = SimpleNamespace() # pas de resume_replay
|
||||
emitted, channel = _call(
|
||||
_make_self(bus=bus, server_client=legacy_client),
|
||||
)
|
||||
assert emitted is False
|
||||
assert channel == ""
|
||||
|
||||
def test_abort_routing_symmetric(self):
|
||||
"""Le même mécanisme couvre l'abort — vérifie qu'on utilise
|
||||
bien la méthode demandée par le caller."""
|
||||
bus = MagicMock(connected=False)
|
||||
client = MagicMock(abort_replay=MagicMock(return_value=True))
|
||||
emitted, channel = _call(
|
||||
_make_self(bus=bus, server_client=client),
|
||||
bus_method="abort_replay",
|
||||
client_method="abort_replay",
|
||||
)
|
||||
assert emitted is True
|
||||
assert channel == "http"
|
||||
client.abort_replay.assert_called_once_with("replay_xyz")
|
||||
|
||||
|
||||
class TestPausedBubbleHeight:
|
||||
"""Couvre _compute_paused_bubble_height — patch troncature 22 mai 2026."""
|
||||
|
||||
def test_empty_message_uses_minimum_height(self):
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height("")
|
||||
assert h == 2
|
||||
assert scroll is False
|
||||
|
||||
def test_short_message_no_scrollbar(self):
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height("Court message.")
|
||||
assert h == 2
|
||||
assert scroll is False
|
||||
|
||||
def test_long_single_line_triggers_scrollbar(self):
|
||||
# ~600 chars sans \n → wrapped_lines = 600 // 60 + 1 = 11
|
||||
msg = "x" * 600
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
|
||||
assert h == 11
|
||||
assert scroll is True
|
||||
|
||||
def test_message_with_many_newlines_uses_explicit_count(self):
|
||||
"""Cas du bug : reason serveur listant 6 candidats sur 6 lignes
|
||||
courtes — wrapped_lines bas mais explicit_lines élevé."""
|
||||
msg = "\n".join([f"option {i}" for i in range(6)])
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
|
||||
# 6 lignes explicites > 2 lignes wrappées → hauteur = 6
|
||||
assert h == 6
|
||||
# Pas encore au cap, contenu court → pas de scrollbar
|
||||
assert scroll is False
|
||||
|
||||
def test_cap_reached_triggers_scrollbar_even_if_short(self):
|
||||
"""Quand on dépasse le cap (12 lignes), la scrollbar DOIT
|
||||
s'afficher quel que soit la longueur en caractères."""
|
||||
msg = "\n".join([f"l{i}" for i in range(20)])
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
|
||||
assert h == 12 # plafond
|
||||
assert scroll is True
|
||||
|
||||
def test_long_content_triggers_scrollbar_at_200_chars(self):
|
||||
"""Seuil sécurité texte : ≥ 200 chars → scrollbar même si
|
||||
peu de lignes (filet anti-troncature visuel)."""
|
||||
msg = "x" * 220
|
||||
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
|
||||
assert scroll is True
|
||||
@@ -16,6 +16,7 @@ sys.path.insert(0, str(ROOT))
|
||||
from agent_v0.server_v1.api_stream import (
|
||||
_extract_required_apps_from_events,
|
||||
_extract_required_apps_from_workflow,
|
||||
_trim_redundant_setup_events,
|
||||
_resolve_launch_command,
|
||||
_infer_app_from_window_titles,
|
||||
_generate_setup_actions,
|
||||
@@ -220,6 +221,139 @@ class TestExtractRequiredAppsFromEvents:
|
||||
# Le premier app hors ignorées est Notepad
|
||||
assert result["first_window_title"] == "Bloc-notes"
|
||||
|
||||
def test_extracts_searchhost_launch_result_target(self):
|
||||
"""Récupère le vrai clic SearchHost qui lance l'app."""
|
||||
events = [
|
||||
{"event": {"type": "window_focus_change", "from": None, "to": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "text_input", "text": "bloc", "window": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "mouse_click", "button": "left", "pos": [1449, 641],
|
||||
"timestamp": 10.0,
|
||||
"screen_metadata": {"screen_resolution": [2560, 1600]},
|
||||
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
"window_capture": {
|
||||
"click_relative": [681, 448],
|
||||
"window_size": [1287, 1407],
|
||||
}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
|
||||
"app_name": "explorer.exe", "title": "unknown_window"},
|
||||
"timestamp": 10.4}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "explorer.exe", "title": "unknown_window"}, "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"},
|
||||
"timestamp": 11.1}},
|
||||
]
|
||||
|
||||
result = _extract_required_apps_from_events(events)
|
||||
target = result["launch_result_target"]
|
||||
assert result["primary_app"] == "Notepad.exe"
|
||||
assert target["window_title"] == "Rechercher"
|
||||
assert target["expected_window_before"] == "Rechercher"
|
||||
assert target["x_pct"] == pytest.approx(1449 / 2560, rel=0, abs=1e-6)
|
||||
assert target["y_pct"] == pytest.approx(641 / 1600, rel=0, abs=1e-6)
|
||||
assert target["original_position"]["x_relative"] == "au centre"
|
||||
assert target["original_position"]["y_relative"] == "au milieu"
|
||||
assert target["window_capture"]["click_relative"] == [681, 448]
|
||||
|
||||
def test_extracts_start_menu_target(self):
|
||||
"""Récupère le vrai clic Démarrer qui ouvre SearchHost."""
|
||||
events = [
|
||||
{"event": {"type": "window_focus_change", "from": None, "to": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
|
||||
"timestamp": 1.0,
|
||||
"screen_metadata": {"screen_resolution": [2560, 1600]},
|
||||
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
"timestamp": 1.2}},
|
||||
{"event": {"type": "mouse_click", "button": "left", "pos": [1449, 641],
|
||||
"timestamp": 4.0,
|
||||
"screen_metadata": {"screen_resolution": [2560, 1600]},
|
||||
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"},
|
||||
"timestamp": 4.4}},
|
||||
]
|
||||
|
||||
result = _extract_required_apps_from_events(events)
|
||||
target = result["start_menu_target"]
|
||||
assert target["x_pct"] == pytest.approx(993 / 2560, rel=0, abs=1e-6)
|
||||
assert target["y_pct"] == pytest.approx(1559 / 1600, rel=0, abs=1e-6)
|
||||
assert target["original_position"]["x_relative"] == "au centre"
|
||||
assert target["original_position"]["y_relative"] == "en bas"
|
||||
assert "en bas" in target["position_desc"]
|
||||
|
||||
def test_extracts_start_menu_target_anchor_from_session_shot(self, tmp_path):
|
||||
"""Le clic Démarrer récupère aussi une ancre image depuis le shot source."""
|
||||
from PIL import Image
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
shots_dir = session_dir / "shots"
|
||||
shots_dir.mkdir(parents=True)
|
||||
Image.new("RGB", (2560, 1600), color="white").save(
|
||||
shots_dir / "shot_start_full.png"
|
||||
)
|
||||
|
||||
events = [
|
||||
{"event": {"type": "window_focus_change", "from": None, "to": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
|
||||
"timestamp": 1.0,
|
||||
"screenshot_id": "shot_start",
|
||||
"screen_metadata": {"screen_resolution": [2560, 1600]},
|
||||
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
"timestamp": 1.2}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"},
|
||||
"timestamp": 2.0}},
|
||||
]
|
||||
|
||||
result = _extract_required_apps_from_events(
|
||||
events,
|
||||
session_dir=str(session_dir),
|
||||
)
|
||||
target = result["start_menu_target"]
|
||||
|
||||
assert target["anchor_image_base64"]
|
||||
|
||||
def test_extracts_direct_typing_search_interaction(self):
|
||||
"""Détecte qu'aucun clic SearchHost n'est requis avant la saisie."""
|
||||
events = [
|
||||
{"event": {"type": "window_focus_change", "from": None, "to": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "mouse_click", "button": "left", "pos": [993, 1559],
|
||||
"timestamp": 1.0,
|
||||
"screen_metadata": {"screen_resolution": [2560, 1600]},
|
||||
"window": {"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}, "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
"timestamp": 1.2}},
|
||||
{"event": {"type": "text_input", "text": "bloc",
|
||||
"window": {"app_name": "SearchHost.exe", "title": "Rechercher"},
|
||||
"timestamp": 2.0}},
|
||||
{"event": {"type": "window_focus_change", "from": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}, "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"},
|
||||
"timestamp": 2.4}},
|
||||
]
|
||||
|
||||
result = _extract_required_apps_from_events(events)
|
||||
assert result["search_box_interaction"]["mode"] == "direct_typing"
|
||||
assert result["search_box_interaction"]["window_title"] == "Rechercher"
|
||||
|
||||
def test_empty_events(self):
|
||||
"""Pas d'événements → dict vide."""
|
||||
assert _extract_required_apps_from_events([]) == {}
|
||||
@@ -245,6 +379,187 @@ class TestExtractRequiredAppsFromEvents:
|
||||
assert result["primary_launch_cmd"] == "calc"
|
||||
|
||||
|
||||
class TestTrimRedundantSetupEvents:
|
||||
"""Tests pour la coupe du préambule déjà couvert par le setup."""
|
||||
|
||||
def test_trims_until_first_primary_app_focus(self):
|
||||
raw_events = [
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "mouse_click", "pos": [993, 1559], "window": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "text_input", "text": "bloc", "window": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "mouse_click", "pos": [1449, 641], "window": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
}}},
|
||||
{"event": {"type": "mouse_click", "pos": [1514, 562], "window": {
|
||||
"app_name": "Notepad.exe", "title": "*test – Bloc-notes"}}},
|
||||
{"event": {"type": "text_input", "text": "test", "window": {
|
||||
"app_name": "Notepad.exe", "title": "*test – Bloc-notes"}}},
|
||||
]
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"first_window_title": "Bloc-notes",
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
assert len(trimmed) == 2
|
||||
assert trimmed[0]["event"]["type"] == "mouse_click"
|
||||
assert trimmed[0]["event"]["pos"] == [1514, 562]
|
||||
assert trimmed[1]["event"]["type"] == "text_input"
|
||||
|
||||
def test_keeps_events_when_no_matching_focus_found(self):
|
||||
raw_events = [
|
||||
{"event": {"type": "mouse_click", "pos": [10, 10], "window": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
{"event": {"type": "text_input", "text": "abc", "window": {
|
||||
"app_name": "explorer.exe", "title": "Explorateur"}}},
|
||||
]
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"first_window_title": "Bloc-notes",
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
assert trimmed == raw_events
|
||||
|
||||
def test_prefers_neutral_title_focus_after_non_neutral_first_focus(self):
|
||||
"""Cas observé sess_20260520T102916_066851 : premier focus Notepad
|
||||
a un titre non-neutre (http...txt), suivi d'un clic intra-Notepad
|
||||
et d'un focus vers 'Sans titre' (= état initial neutre que le setup
|
||||
auto produit). Le trim doit couper jusqu'au focus neutre pour
|
||||
éliminer le clic intra-Notepad redondant.
|
||||
"""
|
||||
raw_events = [
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "mouse_click", "pos": [681, 448], "window": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
}}},
|
||||
{"event": {"type": "mouse_click", "pos": [1191, 40], "window": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"}}},
|
||||
{"event": {"type": "text_input", "text": "test", "window": {
|
||||
"app_name": "Notepad.exe", "title": "*test – Bloc-notes"}}},
|
||||
]
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"first_window_title": (
|
||||
"http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"
|
||||
),
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
# Le clic intra-Notepad (event idx 3) doit être supprimé : il
|
||||
# bascule vers 'Sans titre' qui est déjà l'état setup, donc
|
||||
# rejoué il n'a aucun effet visuel et déclenche retry_threshold.
|
||||
assert len(trimmed) == 1
|
||||
assert trimmed[0]["event"]["type"] == "text_input"
|
||||
assert trimmed[0]["event"]["text"] == "test"
|
||||
|
||||
def test_neutral_focus_outside_lookahead_window_is_ignored(self):
|
||||
"""Filet de sécurité : un focus 'Sans titre' qui arrive trop loin
|
||||
après le premier focus primary_app n'est pas considéré comme
|
||||
l'état de bootstrap. Évite de couper un workflow qui re-visite
|
||||
'Sans titre' bien après le démarrage."""
|
||||
# 30 events séparent le premier focus du focus neutre
|
||||
raw_events = [
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe",
|
||||
"title": "rapport_final.txt – Bloc-notes"}}},
|
||||
]
|
||||
# Bourrer avec des events utiles intra-Notepad
|
||||
for i in range(30):
|
||||
raw_events.append({"event": {
|
||||
"type": "mouse_click", "pos": [100 + i, 200],
|
||||
"window": {"app_name": "Notepad.exe",
|
||||
"title": "rapport_final.txt – Bloc-notes"},
|
||||
}})
|
||||
raw_events.append({"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"}}})
|
||||
raw_events.append({"event": {"type": "text_input", "text": "x",
|
||||
"window": {"app_name": "Notepad.exe",
|
||||
"title": "Sans titre – Bloc-notes"}}})
|
||||
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"first_window_title": "rapport_final.txt – Bloc-notes",
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
# Doit garder les 30 clicks + focus tardif + text_input = 32 events
|
||||
# (cut uniquement au premier focus primary_app, comportement legacy)
|
||||
assert len(trimmed) == 32
|
||||
assert trimmed[0]["event"]["type"] == "mouse_click"
|
||||
assert trimmed[0]["event"]["pos"] == [100, 200]
|
||||
|
||||
def test_keeps_legacy_behavior_when_first_focus_already_neutral(self):
|
||||
"""Non-régression : si le premier focus primary_app est déjà sur
|
||||
un titre neutre (cas normal), on coupe au premier focus comme
|
||||
avant — pas de chasse au neutral_idx inutile."""
|
||||
raw_events = [
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "SearchHost.exe", "title": "Rechercher"}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "Notepad.exe", "title": "Sans titre – Bloc-notes"}}},
|
||||
{"event": {"type": "text_input", "text": "hello",
|
||||
"window": {"app_name": "Notepad.exe",
|
||||
"title": "Sans titre – Bloc-notes"}}},
|
||||
]
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"first_window_title": "Sans titre – Bloc-notes",
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
assert len(trimmed) == 1
|
||||
assert trimmed[0]["event"]["type"] == "text_input"
|
||||
|
||||
def test_neutral_detection_recognizes_office_default_titles(self):
|
||||
"""Word, Excel, PowerPoint utilisent leurs propres titres
|
||||
par défaut (Document1, Classeur1, etc.) que le setup auto
|
||||
amène également."""
|
||||
raw_events = [
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "winword.exe",
|
||||
"title": "rapport.docx - Word"}}},
|
||||
{"event": {"type": "mouse_click", "pos": [100, 40],
|
||||
"window": {"app_name": "winword.exe",
|
||||
"title": "rapport.docx - Word"}}},
|
||||
{"event": {"type": "window_focus_change", "to": {
|
||||
"app_name": "winword.exe", "title": "Document1 - Word"}}},
|
||||
{"event": {"type": "text_input", "text": "abc",
|
||||
"window": {"app_name": "winword.exe",
|
||||
"title": "Document1 - Word"}}},
|
||||
]
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"first_window_title": "rapport.docx - Word",
|
||||
}
|
||||
|
||||
trimmed = _trim_redundant_setup_events(raw_events, app_info)
|
||||
|
||||
assert len(trimmed) == 1
|
||||
assert trimmed[0]["event"]["type"] == "text_input"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests pour _extract_required_apps_from_workflow
|
||||
# =========================================================================
|
||||
@@ -304,10 +619,10 @@ class TestExtractRequiredAppsFromWorkflow:
|
||||
# =========================================================================
|
||||
|
||||
class TestGenerateSetupActions:
|
||||
"""Tests pour la génération des actions de setup 100% visuelles."""
|
||||
"""Tests pour la génération des actions de setup."""
|
||||
|
||||
def test_notepad_setup_visual(self):
|
||||
"""Génère les bonnes actions visuelles pour lancer Notepad."""
|
||||
def test_notepad_setup_uses_run_dialog(self):
|
||||
"""Bloc-notes utilise désormais le setup sémantique Win+R."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
@@ -315,74 +630,52 @@ class TestGenerateSetupActions:
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
|
||||
# 9 actions : click_start, wait, click_search, wait, type, wait, click_result, wait, verify
|
||||
assert len(actions) == 9
|
||||
assert len(actions) == 7
|
||||
|
||||
# Étape 1 : clic visuel sur le bouton Démarrer
|
||||
assert actions[0]["type"] == "click"
|
||||
assert actions[0]["visual_mode"] is True
|
||||
assert actions[0]["target_spec"]["by_role"] == "start_button"
|
||||
assert actions[0]["target_spec"]["by_text"] == "Démarrer"
|
||||
assert actions[0]["type"] == "key_combo"
|
||||
assert actions[0]["keys"] == ["win", "r"]
|
||||
assert actions[0]["_setup_step"] == "open_run_dialog"
|
||||
|
||||
# Étape 2 : attente menu Démarrer
|
||||
assert actions[1]["type"] == "wait"
|
||||
assert actions[1]["duration_ms"] == 1000
|
||||
assert actions[1]["duration_ms"] == 500
|
||||
|
||||
# Étape 3 : clic visuel sur la barre de recherche
|
||||
assert actions[2]["type"] == "click"
|
||||
assert actions[2]["visual_mode"] is True
|
||||
assert actions[2]["target_spec"]["by_role"] == "search_box"
|
||||
assert actions[2]["type"] == "type"
|
||||
assert actions[2]["text"] == "notepad"
|
||||
|
||||
# Étape 4 : attente barre de recherche active
|
||||
assert actions[3]["type"] == "wait"
|
||||
assert actions[3]["duration_ms"] == 500
|
||||
assert actions[3]["duration_ms"] == 300
|
||||
|
||||
# Étape 5 : taper le nom visuel français
|
||||
assert actions[4]["type"] == "type"
|
||||
assert actions[4]["text"] == "Bloc-notes"
|
||||
assert actions[4]["type"] == "key_combo"
|
||||
assert actions[4]["keys"] == ["enter"]
|
||||
|
||||
# Étape 6 : attente résultats
|
||||
assert actions[5]["type"] == "wait"
|
||||
assert actions[5]["duration_ms"] == 1200
|
||||
assert actions[5]["duration_ms"] == 2000
|
||||
|
||||
# Étape 7 : clic visuel sur le résultat
|
||||
assert actions[6]["type"] == "click"
|
||||
assert actions[6]["visual_mode"] is True
|
||||
assert actions[6]["target_spec"]["by_text"] == "Bloc-notes"
|
||||
assert actions[6]["target_spec"]["by_role"] == "app_icon"
|
||||
|
||||
# Étape 8 : attente lancement (app légère = 2000ms)
|
||||
assert actions[7]["type"] == "wait"
|
||||
assert actions[7]["duration_ms"] == 2000
|
||||
|
||||
# Étape 9 : vérification visuelle
|
||||
assert actions[8]["type"] == "verify_screen"
|
||||
assert actions[8]["_expected_title"] == "Sans titre – Bloc-notes"
|
||||
assert actions[6]["type"] == "verify_screen"
|
||||
assert actions[6]["expected_window_title_contains"] == ["Bloc-notes", "notepad"]
|
||||
|
||||
# Toutes les actions sont marquées comme phase setup
|
||||
for action in actions:
|
||||
assert action.get("_setup_phase") is True
|
||||
assert action.get("_setup_strategy") == "run_dialog"
|
||||
|
||||
def test_no_key_combo_in_setup(self):
|
||||
"""AUCUNE action key_combo ne doit être générée dans le setup."""
|
||||
def test_visual_setup_keeps_no_key_combo_for_word(self):
|
||||
"""Le setup visuel classique ne doit pas introduire de key_combo."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
"first_window_title": "Bloc-notes",
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
key_combos = [a for a in actions if a["type"] == "key_combo"]
|
||||
assert key_combos == [], (
|
||||
"Le setup 100% visuel ne doit JAMAIS contenir de key_combo. "
|
||||
f"Trouvé : {key_combos}"
|
||||
)
|
||||
assert key_combos == []
|
||||
|
||||
def test_all_clicks_are_visual(self):
|
||||
"""Tous les clics du setup doivent avoir visual_mode=True et un target_spec."""
|
||||
def test_all_clicks_are_visual_for_visual_setup(self):
|
||||
"""Tous les clics du setup visuel doivent avoir visual_mode=True."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
"first_window_title": "Bloc-notes",
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
clicks = [a for a in actions if a["type"] == "click"]
|
||||
@@ -402,11 +695,11 @@ class TestGenerateSetupActions:
|
||||
assert "vlm_description" in spec, f"target_spec sans vlm_description : {spec}"
|
||||
|
||||
def test_clicks_have_fallback_coordinates(self):
|
||||
"""Tous les clics visuels ont des coordonnées de fallback (x_pct, y_pct)."""
|
||||
"""Tous les clics visuels ont des coordonnées de fallback."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
"first_window_title": "Bloc-notes",
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
clicks = [a for a in actions if a["type"] == "click"]
|
||||
@@ -456,28 +749,130 @@ class TestGenerateSetupActions:
|
||||
click_result = [a for a in actions if a.get("_setup_step") == "click_app_result"][0]
|
||||
assert click_result["target_spec"]["by_text"] == "Microsoft Word"
|
||||
|
||||
def test_verify_screen_present_with_title(self):
|
||||
"""Un verify_screen est ajouté quand un titre de fenêtre est connu."""
|
||||
def test_prefers_recorded_searchhost_click_target(self):
|
||||
"""Le setup réutilise la vraie cible SearchHost quand elle existe."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"launch_result_target": {
|
||||
"x_pct": 0.566016,
|
||||
"y_pct": 0.400625,
|
||||
"window_title": "Rechercher",
|
||||
"expected_window_before": "Rechercher",
|
||||
"original_position": {
|
||||
"x_relative": "au centre",
|
||||
"y_relative": "au milieu",
|
||||
},
|
||||
"window_capture": {
|
||||
"click_relative": [681, 448],
|
||||
"window_size": [1287, 1407],
|
||||
},
|
||||
"position_desc": "au milieu au centre",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
|
||||
click_result = [a for a in actions if a.get("_setup_step") == "click_app_result"][0]
|
||||
assert click_result["x_pct"] == pytest.approx(0.566016)
|
||||
assert click_result["y_pct"] == pytest.approx(0.400625)
|
||||
assert click_result["expected_window_before"] == "Rechercher"
|
||||
assert click_result["target_spec"]["by_text"] == "Microsoft Word"
|
||||
assert click_result["target_spec"]["by_role"] == "search_result"
|
||||
assert click_result["target_spec"]["allow_position_fallback"] is True
|
||||
assert click_result["target_spec"]["window_title"] == "Rechercher"
|
||||
assert click_result["target_spec"]["original_position"]["x_relative"] == "au centre"
|
||||
assert click_result["target_spec"]["window_capture"]["window_size"] == [1287, 1407]
|
||||
assert "résultat de recherche" in click_result["target_spec"]["vlm_description"]
|
||||
|
||||
def test_prefers_recorded_start_button_target(self):
|
||||
"""Le setup visuel réutilise le vrai clic Démarrer quand il existe."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"start_menu_target": {
|
||||
"x_pct": 0.387891,
|
||||
"y_pct": 0.974375,
|
||||
"anchor_image_base64": "abc123",
|
||||
"original_position": {
|
||||
"x_relative": "au centre",
|
||||
"y_relative": "en bas",
|
||||
},
|
||||
"position_desc": "en bas au centre",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
|
||||
click_start = [a for a in actions if a.get("_setup_step") == "click_start_menu"][0]
|
||||
assert click_start["x_pct"] == pytest.approx(0.387891)
|
||||
assert click_start["y_pct"] == pytest.approx(0.974375)
|
||||
assert click_start["target_spec"]["by_text"] == ""
|
||||
assert click_start["target_spec"]["by_role"] == "start_button"
|
||||
assert click_start["target_spec"]["screen_scope"] == "full_screen"
|
||||
assert click_start["target_spec"]["allow_position_fallback"] is True
|
||||
assert click_start["target_spec"]["anchor_image_base64"] == "abc123"
|
||||
assert click_start["target_spec"]["original_position"]["y_relative"] == "en bas"
|
||||
assert "icône Windows" in click_start["target_spec"]["vlm_description"]
|
||||
|
||||
def test_skips_search_click_for_direct_typing(self):
|
||||
"""Quand la session tape directement dans SearchHost, on saute
|
||||
click_search et son wait/verify dédiés. La garde
|
||||
verify_start_menu_open reste obligatoire et précède le type."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"search_box_interaction": {
|
||||
"mode": "direct_typing",
|
||||
"window_title": "Rechercher",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
|
||||
setup_steps = [a.get("_setup_step") for a in actions]
|
||||
assert "click_search_box" not in setup_steps
|
||||
assert "wait_search_ready" not in setup_steps
|
||||
assert "verify_search_box_active" not in setup_steps
|
||||
# Garde générique conservée — c'est elle qui sécurise la frappe.
|
||||
assert "verify_start_menu_open" in setup_steps
|
||||
|
||||
idx_type = setup_steps.index("type_app_name")
|
||||
assert actions[idx_type]["type"] == "type"
|
||||
assert actions[idx_type]["text"] == "Word"
|
||||
|
||||
def test_verify_screen_final_present_with_title(self):
|
||||
"""Le setup run_dialog termine par une vérification souple sur le titre app."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
"first_window_title": "Sans titre – Bloc-notes",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
verify = [a for a in actions if a.get("type") == "verify_screen"]
|
||||
assert len(verify) == 1
|
||||
assert verify[0]["_expected_title"] == "Sans titre – Bloc-notes"
|
||||
final_verifies = [
|
||||
a for a in actions
|
||||
if a.get("type") == "verify_screen"
|
||||
and a.get("_setup_step") == "verify_app_ready"
|
||||
]
|
||||
assert len(final_verifies) == 1
|
||||
assert "Bloc-notes" in final_verifies[0]["expected_window_title_contains"]
|
||||
|
||||
def test_no_verify_without_title(self):
|
||||
"""Pas de verify_screen si aucun titre de fenêtre n'est connu."""
|
||||
def test_run_dialog_keeps_final_verify_even_without_exact_title(self):
|
||||
"""Le setup run_dialog garde une vérification finale générique."""
|
||||
app_info = {
|
||||
"primary_app": "Notepad.exe",
|
||||
"primary_launch_cmd": "notepad",
|
||||
"first_window_title": "",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
verify = [a for a in actions if a.get("type") == "verify_screen"]
|
||||
assert len(verify) == 0
|
||||
# Aucun verify_screen ne doit porter _expected_title.
|
||||
final_verifies = [
|
||||
a for a in actions
|
||||
if a.get("type") == "verify_screen"
|
||||
and a.get("_setup_step") == "verify_app_ready"
|
||||
]
|
||||
assert len(final_verifies) == 1
|
||||
assert "notepad" in [p.lower() for p in final_verifies[0]["expected_window_title_contains"]]
|
||||
|
||||
def test_empty_app_info(self):
|
||||
"""Dict vide → pas d'actions."""
|
||||
@@ -537,12 +932,184 @@ class TestGenerateSetupActions:
|
||||
assert type_action["text"] == "MonAppMedical"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests des gardes visuelles du setup (verify_screen titre fenêtre)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestSetupVisualGuards:
|
||||
"""Couvre les gardes visuelles insérées entre les étapes du setup
|
||||
auto Windows (post-blocage `position_fallback` live du 22 mai 2026).
|
||||
|
||||
Sans ces gardes, un clic Démarrer qui touche en fait le systray
|
||||
overflow popup laissait le setup taper « bloc » dans la mauvaise
|
||||
fenêtre, et seul le `click_result` final remontait l'erreur — trop
|
||||
tard. Les `verify_screen` titre-fenêtre stoppent net après chaque
|
||||
étape critique.
|
||||
"""
|
||||
|
||||
def test_verify_start_menu_open_inserted_after_wait_start(self):
|
||||
"""Une garde verify_screen est insérée juste après wait_start_menu."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
|
||||
# Ordre : click_start_menu → wait_start_menu → verify_start_menu_open
|
||||
assert "verify_start_menu_open" in steps
|
||||
idx_wait = steps.index("wait_start_menu")
|
||||
idx_verify = steps.index("verify_start_menu_open")
|
||||
assert idx_verify == idx_wait + 1
|
||||
|
||||
verify = actions[idx_verify]
|
||||
assert verify["type"] == "verify_screen"
|
||||
assert verify.get("_setup_phase") is True
|
||||
patterns = verify.get("expected_window_title_contains") or []
|
||||
assert isinstance(patterns, list) and patterns
|
||||
lowered = [p.lower() for p in patterns]
|
||||
# Doit couvrir au minimum FR + EN + l'app SearchHost / StartMenu
|
||||
assert any("recherch" in p for p in lowered), patterns
|
||||
assert any("search" in p for p in lowered), patterns
|
||||
|
||||
def test_verify_search_box_active_inserted_when_click_then_type(self):
|
||||
"""Quand le setup clique sur la barre Rechercher puis attend,
|
||||
une garde verify_screen suit l'attente pour bloquer la frappe
|
||||
si le focus n'est pas réellement dans la barre."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"search_box_interaction": {
|
||||
"mode": "click_then_type",
|
||||
"window_title": "Rechercher",
|
||||
"x_pct": 0.10, "y_pct": 0.95,
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
|
||||
assert "verify_search_box_active" in steps
|
||||
idx_wait_ready = steps.index("wait_search_ready")
|
||||
idx_verify = steps.index("verify_search_box_active")
|
||||
idx_type = steps.index("type_app_name")
|
||||
# Ordre : wait_search_ready → verify_search_box_active → type_app_name
|
||||
assert idx_verify == idx_wait_ready + 1
|
||||
assert idx_type == idx_verify + 1
|
||||
|
||||
verify = actions[idx_verify]
|
||||
assert verify["type"] == "verify_screen"
|
||||
patterns = verify.get("expected_window_title_contains") or []
|
||||
assert "Rechercher" in patterns or any(
|
||||
p.lower() == "rechercher" for p in patterns
|
||||
)
|
||||
|
||||
def test_no_verify_search_box_when_direct_typing(self):
|
||||
"""En mode direct_typing on n'a pas de click sur la barre — donc
|
||||
pas de verify_search_box_active dédié (la garde verify_start_menu_open
|
||||
suffit, on tape directement après)."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"search_box_interaction": {
|
||||
"mode": "direct_typing",
|
||||
"window_title": "Rechercher",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
assert "verify_search_box_active" not in steps
|
||||
# La garde verify_start_menu_open reste présente (couvre la frappe).
|
||||
assert "verify_start_menu_open" in steps
|
||||
idx_verify = steps.index("verify_start_menu_open")
|
||||
idx_type = steps.index("type_app_name")
|
||||
assert idx_type > idx_verify, (
|
||||
"type_app_name doit suivre verify_start_menu_open en direct_typing"
|
||||
)
|
||||
|
||||
def test_verify_search_results_visible_inserted_before_click_result(self):
|
||||
"""Dernier filet : la barre Rechercher (et ses résultats) doit
|
||||
être encore active juste avant `click_app_result`. Sans cette
|
||||
garde finale, un focus perdu pendant `wait_search_results`
|
||||
peut faire cliquer le `click_app_result` dans la mauvaise
|
||||
surface (constat live 2026-05-22 — fenêtre observée
|
||||
``Fenêtre de dépassement de capacité de la barre d'état
|
||||
système.``)."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
|
||||
assert "verify_search_results_visible" in steps
|
||||
idx_wait_results = steps.index("wait_search_results")
|
||||
idx_verify = steps.index("verify_search_results_visible")
|
||||
idx_click_result = steps.index("click_app_result")
|
||||
# Ordre : wait_search_results → verify_search_results_visible → click_app_result
|
||||
assert idx_verify == idx_wait_results + 1
|
||||
assert idx_click_result == idx_verify + 1
|
||||
|
||||
verify = actions[idx_verify]
|
||||
assert verify["type"] == "verify_screen"
|
||||
patterns = verify.get("expected_window_title_contains") or []
|
||||
assert isinstance(patterns, list) and patterns
|
||||
lowered = [p.lower() for p in patterns]
|
||||
assert any("recherch" in p for p in lowered), patterns
|
||||
assert any("search" in p for p in lowered), patterns
|
||||
|
||||
def test_verify_search_results_visible_present_in_direct_typing(self):
|
||||
"""La garde finale avant click_app_result reste obligatoire
|
||||
quelle que soit la modalité de la barre Rechercher."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"search_box_interaction": {
|
||||
"mode": "direct_typing",
|
||||
"window_title": "Rechercher",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
assert "verify_search_results_visible" in steps
|
||||
|
||||
def test_setup_guards_have_short_timeout(self):
|
||||
"""Les gardes verify_screen ont un timeout court (≤ 2 s) — c'est
|
||||
un check titre, pas un wait long."""
|
||||
app_info = {
|
||||
"primary_app": "winword.exe",
|
||||
"primary_launch_cmd": "winword",
|
||||
"first_window_title": "Document1 - Word",
|
||||
"search_box_interaction": {
|
||||
"mode": "click_then_type",
|
||||
"window_title": "Rechercher",
|
||||
},
|
||||
}
|
||||
actions = _generate_setup_actions(app_info)
|
||||
guards = [
|
||||
a for a in actions
|
||||
if a.get("_setup_step") in (
|
||||
"verify_start_menu_open",
|
||||
"verify_search_box_active",
|
||||
"verify_search_results_visible",
|
||||
)
|
||||
]
|
||||
assert guards, "il doit exister au moins une garde verify_screen"
|
||||
for g in guards:
|
||||
assert g.get("timeout_ms", 5000) <= 2000
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests d'intégration : pipeline complet events → setup visuel
|
||||
# =========================================================================
|
||||
|
||||
class TestSetupPipeline:
|
||||
"""Tests du pipeline complet : extraction + génération visuelle."""
|
||||
"""Tests du pipeline complet : extraction + génération du setup."""
|
||||
|
||||
def test_full_pipeline_from_events(self):
|
||||
"""Pipeline complet depuis des événements bruts de type Notepad."""
|
||||
@@ -561,24 +1128,25 @@ class TestSetupPipeline:
|
||||
assert app_info["primary_app"] == "Notepad.exe"
|
||||
|
||||
actions = _generate_setup_actions(app_info)
|
||||
assert len(actions) >= 8 # Au minimum 8 actions visuelles (sans verify si pas de titre)
|
||||
assert len(actions) == 7
|
||||
|
||||
# Vérifier l'ordre logique 100% visuel
|
||||
types = [a["type"] for a in actions]
|
||||
assert types[0] == "click" # Clic Démarrer
|
||||
assert types[1] == "wait" # Attente menu
|
||||
assert types[2] == "click" # Clic barre de recherche
|
||||
assert types[3] == "wait" # Attente barre active
|
||||
assert types[4] == "type" # Taper le nom
|
||||
assert types[5] == "wait" # Attente résultats
|
||||
assert types[6] == "click" # Clic sur le résultat
|
||||
assert types[7] == "wait" # Attente lancement
|
||||
steps = [a.get("_setup_step") for a in actions]
|
||||
expected_step_order = [
|
||||
"open_run_dialog",
|
||||
"wait_run_dialog",
|
||||
"type_launch_command",
|
||||
"wait_launch_command",
|
||||
"submit_run_dialog",
|
||||
"wait_app_launch",
|
||||
"verify_app_ready",
|
||||
]
|
||||
assert steps == expected_step_order, steps
|
||||
|
||||
# AUCUN key_combo dans le pipeline
|
||||
assert "key_combo" not in types, "Le pipeline ne doit contenir aucun key_combo"
|
||||
assert types.count("key_combo") == 2
|
||||
|
||||
# Le texte tapé est le nom visuel français
|
||||
assert actions[4]["text"] == "Bloc-notes"
|
||||
idx_type = steps.index("type_launch_command")
|
||||
assert actions[idx_type]["text"] == "notepad"
|
||||
|
||||
def test_full_pipeline_from_workflow(self):
|
||||
"""Pipeline complet depuis un workflow structuré."""
|
||||
@@ -599,12 +1167,12 @@ class TestSetupPipeline:
|
||||
assert app_info["primary_app"] == "Notepad.exe"
|
||||
|
||||
actions = _generate_setup_actions(app_info)
|
||||
assert len(actions) >= 8
|
||||
assert len(actions) == 7
|
||||
|
||||
# Le texte tapé doit être le nom visuel, pas la commande shell
|
||||
# Le texte tapé doit être la commande shell pour le setup Win+R.
|
||||
type_action = [a for a in actions if a["type"] == "type"][0]
|
||||
assert type_action["text"] == "Bloc-notes"
|
||||
assert type_action["text"] == "notepad"
|
||||
|
||||
# Aucun key_combo
|
||||
# Le setup Notepad s'appuie maintenant sur deux key_combo.
|
||||
key_combos = [a for a in actions if a["type"] == "key_combo"]
|
||||
assert key_combos == []
|
||||
assert len(key_combos) == 2
|
||||
|
||||
79
tests/unit/test_executor_anchor_drift_guard.py
Normal file
79
tests/unit/test_executor_anchor_drift_guard.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Tests pour la garde drift de `_template_match_anchor`.
|
||||
|
||||
Brief Codex 2026-05-23 07:56 : faux succès live `act_raw_77db702f` où
|
||||
ANCHOR-TM matche un crop dans OBS Studio à (0.205, 0.170) score 0.842
|
||||
alors que la position enregistrée est ~(0.706, 0.348) dans Bloc-notes.
|
||||
La cascade serveur avait rejeté (`rejected_text_mismatch`) mais l'agent
|
||||
fallback ANCHOR-TM côté client sans aucune garde de position acceptait
|
||||
n'importe quel match au-dessus du seuil score.
|
||||
|
||||
Le helper statique `_anchor_match_within_drift` rejette les matchs
|
||||
loin de la position fallback enregistrée.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402
|
||||
|
||||
|
||||
class TestAnchorMatchDriftGuard:
|
||||
def test_match_close_to_fallback_accepted(self):
|
||||
# 5% de drift en x → accepté
|
||||
assert ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.71, matched_y_pct=0.35,
|
||||
fallback_x_pct=0.706, fallback_y_pct=0.348,
|
||||
)
|
||||
|
||||
def test_match_far_from_fallback_rejected(self):
|
||||
# cas live exact
|
||||
assert not ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.205, matched_y_pct=0.170,
|
||||
fallback_x_pct=0.706, fallback_y_pct=0.348,
|
||||
)
|
||||
|
||||
def test_drift_at_threshold_accepted(self):
|
||||
# drift = 0.25 exact (frontière)
|
||||
assert ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.5, matched_y_pct=0.5,
|
||||
fallback_x_pct=0.25, fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
def test_drift_just_above_threshold_rejected(self):
|
||||
assert not ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.5, matched_y_pct=0.5,
|
||||
fallback_x_pct=0.24, fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
def test_no_recorded_fallback_keeps_legacy_behavior(self):
|
||||
"""Si pas de fallback enregistré (0,0), pas de garde possible."""
|
||||
assert ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.5, matched_y_pct=0.5,
|
||||
fallback_x_pct=0.0, fallback_y_pct=0.0,
|
||||
)
|
||||
|
||||
def test_custom_max_drift(self):
|
||||
"""Le seuil est configurable par caller."""
|
||||
# Avec max_drift=0.10, un drift 0.15 est rejeté
|
||||
assert not ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.65, matched_y_pct=0.50,
|
||||
fallback_x_pct=0.50, fallback_y_pct=0.50,
|
||||
max_drift=0.10,
|
||||
)
|
||||
# Mais accepté avec le défaut 0.25
|
||||
assert ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.65, matched_y_pct=0.50,
|
||||
fallback_x_pct=0.50, fallback_y_pct=0.50,
|
||||
)
|
||||
|
||||
def test_drift_y_axis(self):
|
||||
"""Drift y > seuil → rejet (même si x dans la zone)."""
|
||||
assert not ActionExecutorV1._anchor_match_within_drift(
|
||||
matched_x_pct=0.50, matched_y_pct=0.95,
|
||||
fallback_x_pct=0.50, fallback_y_pct=0.50,
|
||||
)
|
||||
744
tests/unit/test_executor_verify_window_guard.py
Normal file
744
tests/unit/test_executor_verify_window_guard.py
Normal file
@@ -0,0 +1,744 @@
|
||||
"""Tests pour la garde verify_screen.expected_window_title_contains.
|
||||
|
||||
Cette garde protège les étapes du setup auto Windows contre les
|
||||
configurations où ``click_start_menu`` se trompe de cible (systray
|
||||
overflow popup, par exemple) et laisse la frappe partir dans la
|
||||
mauvaise fenêtre. Ajoutée le 22 mai 2026 — cf.
|
||||
``docs/CR_AUDIT_SETUP_VISUAL_GUARDS_2026-05-22.md``.
|
||||
|
||||
On teste deux choses :
|
||||
1. Le helper statique ``_window_title_matches_any`` (substring + case).
|
||||
2. Le routage de la garde dans ``verify_screen`` : succès si titre
|
||||
matche, bascule en mode apprentissage / pause sinon.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Helper substring matching
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestWindowTitleMatchesAny:
|
||||
def test_substring_match(self):
|
||||
assert ActionExecutorV1._window_title_matches_any(
|
||||
"Rechercher", ["Rechercher"]
|
||||
)
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert ActionExecutorV1._window_title_matches_any(
|
||||
"RECHERCHER - Cortana", ["rechercher"]
|
||||
)
|
||||
|
||||
def test_partial_match_first_pattern(self):
|
||||
assert ActionExecutorV1._window_title_matches_any(
|
||||
"Cortana - Rechercher", ["search", "rechercher", "cortana"]
|
||||
)
|
||||
|
||||
def test_no_match_returns_false(self):
|
||||
assert not ActionExecutorV1._window_title_matches_any(
|
||||
"Fenêtre de dépassement de capacité de la barre d'état système",
|
||||
["Rechercher", "Search", "Cortana"],
|
||||
)
|
||||
|
||||
def test_empty_patterns_returns_true(self):
|
||||
"""Pas de patterns demandés → la garde est neutre."""
|
||||
assert ActionExecutorV1._window_title_matches_any("X", [])
|
||||
assert ActionExecutorV1._window_title_matches_any("X", None)
|
||||
|
||||
def test_empty_title_with_patterns_returns_false(self):
|
||||
assert not ActionExecutorV1._window_title_matches_any("", ["X"])
|
||||
|
||||
def test_ignore_empty_pattern_entries(self):
|
||||
"""Les chaînes vides dans la liste ne doivent pas matcher
|
||||
l'ensemble du titre."""
|
||||
assert not ActionExecutorV1._window_title_matches_any(
|
||||
"rien à voir", ["", None, ""]
|
||||
)
|
||||
|
||||
|
||||
class TestKnownRuntimeDialogs:
|
||||
def test_match_confirm_save_overwrite_dialog(self):
|
||||
spec = ActionExecutorV1._match_known_runtime_dialog(
|
||||
"Confirmer l'enregistrement"
|
||||
)
|
||||
assert spec is not None
|
||||
assert spec["id"] == "confirm_save_overwrite"
|
||||
assert spec["button_texts"][0] == "Oui"
|
||||
|
||||
def test_match_confirm_save_overwrite_dialog_with_typographic_apostrophe(self):
|
||||
spec = ActionExecutorV1._match_known_runtime_dialog(
|
||||
"Confirmer l’enregistrement"
|
||||
)
|
||||
assert spec is not None
|
||||
assert spec["id"] == "confirm_save_overwrite"
|
||||
|
||||
def test_unknown_title_returns_none(self):
|
||||
assert ActionExecutorV1._match_known_runtime_dialog(
|
||||
"Bloc-notes"
|
||||
) is None
|
||||
|
||||
|
||||
class TestContextualRuntimeDialogs:
|
||||
def test_contextual_notepad_unsaved_dialog_is_detected_via_visual_evidence(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="shot")
|
||||
exe._find_text_on_screen = MagicMock(
|
||||
side_effect=lambda _shot, text: (100, 100)
|
||||
if text == "Ne pas enregistrer"
|
||||
else None
|
||||
)
|
||||
|
||||
action = {
|
||||
"action_id": "act_save_from_dialog",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"window_title": "*test – Bloc-notes",
|
||||
"by_text": "Enregistrer",
|
||||
},
|
||||
"expected_window_before": "*test – Bloc-notes",
|
||||
}
|
||||
target_spec = dict(action["target_spec"])
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
"rect": [500, 300, 1400, 900],
|
||||
},
|
||||
):
|
||||
adapted = exe._maybe_contextualize_action_to_foreground_dialog(
|
||||
action,
|
||||
target_spec,
|
||||
)
|
||||
|
||||
assert adapted is not None
|
||||
assert adapted["dialog_spec"]["id"] == "notepad_unsaved_changes"
|
||||
assert adapted["action"]["expected_window_before"] == "Bloc-notes"
|
||||
assert adapted["target_spec"]["window_title"] == "Bloc-notes"
|
||||
assert adapted["target_spec"]["context_hints"]["foreground_dialog_id"] == (
|
||||
"notepad_unsaved_changes"
|
||||
)
|
||||
assert adapted["target_spec"]["window_capture"]["rect"] == [500, 300, 1400, 900]
|
||||
|
||||
def test_contextual_notepad_dialog_is_ignored_without_matching_action(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="shot")
|
||||
exe._find_text_on_screen = MagicMock(
|
||||
side_effect=lambda _shot, text: (100, 100)
|
||||
if text == "Ne pas enregistrer"
|
||||
else None
|
||||
)
|
||||
|
||||
action = {
|
||||
"action_id": "act_other_button",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"window_title": "*test – Bloc-notes",
|
||||
"by_text": "Annuler",
|
||||
},
|
||||
}
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
):
|
||||
adapted = exe._maybe_contextualize_action_to_foreground_dialog(
|
||||
action,
|
||||
dict(action["target_spec"]),
|
||||
)
|
||||
|
||||
assert adapted is None
|
||||
|
||||
|
||||
class TestPostVerifyWindowTransition:
|
||||
def test_requires_transition_when_expected_after_differs_from_source_window(self):
|
||||
assert ActionExecutorV1._requires_post_verify_window_transition(
|
||||
action={"expected_window_before": "*test – Bloc-notes"},
|
||||
target_spec=None,
|
||||
expected_after="Enregistrer sous",
|
||||
)
|
||||
|
||||
def test_same_window_title_does_not_require_transition(self):
|
||||
assert not ActionExecutorV1._requires_post_verify_window_transition(
|
||||
action={"expected_window_before": "*test – Bloc-notes"},
|
||||
target_spec=None,
|
||||
expected_after="test – Bloc-notes",
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Routage de la garde dans verify_screen
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _make_executor_skeleton():
|
||||
"""Construit un ActionExecutorV1 sans son __init__ lourd
|
||||
(MouseController/KeyboardController/mss). On câble manuellement
|
||||
les attributs strictement nécessaires aux branches testées.
|
||||
"""
|
||||
exe = ActionExecutorV1.__new__(ActionExecutorV1)
|
||||
exe._notification_manager = None
|
||||
exe._system_dialog_pause = None
|
||||
exe._chat_window_ref = None
|
||||
exe._api_token = ""
|
||||
exe._poll_backoff = 1.0
|
||||
exe._poll_backoff_min = 1.0
|
||||
exe._poll_backoff_max = 30.0
|
||||
exe._poll_backoff_factor = 1.5
|
||||
# mss factice (monitor 1920×1080)
|
||||
exe._sct = MagicMock()
|
||||
exe._sct.monitors = [None, {"width": 1920, "height": 1080}]
|
||||
# Patcher les helpers IO côté agent
|
||||
exe._check_and_pause_on_system_dialog = MagicMock(return_value=False)
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value=None)
|
||||
return exe
|
||||
|
||||
|
||||
def _verify_action(patterns, timeout_ms=200):
|
||||
return {
|
||||
"action_id": "act_test_verify",
|
||||
"type": "verify_screen",
|
||||
"expected_node": "",
|
||||
"timeout_ms": timeout_ms,
|
||||
"expected_window_title_contains": patterns,
|
||||
}
|
||||
|
||||
|
||||
class TestVerifyScreenWindowGuard:
|
||||
def test_matching_title_returns_success(self):
|
||||
exe = _make_executor_skeleton()
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "Rechercher"},
|
||||
):
|
||||
res = exe.execute_replay_action(_verify_action(
|
||||
["Rechercher", "Search"]
|
||||
))
|
||||
assert res["success"] is True
|
||||
assert res.get("warning") != "setup_guard_window_mismatch"
|
||||
|
||||
def test_mismatch_with_human_correction_returns_success_supervised(self):
|
||||
exe = _make_executor_skeleton()
|
||||
# L'utilisateur fait un clic correctif quand le mode apprentissage
|
||||
# se déclenche → on récupère la séquence et on rend la main au serveur.
|
||||
exe._capture_human_correction = MagicMock(return_value=[
|
||||
{"type": "click", "x_pct": 0.10, "y_pct": 0.95},
|
||||
])
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={
|
||||
"title": "Fenêtre de dépassement de capacité de la barre d'état système",
|
||||
},
|
||||
):
|
||||
res = exe.execute_replay_action(_verify_action(
|
||||
["Rechercher", "Search"]
|
||||
))
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "setup_guard_window_mismatch"
|
||||
assert res["resolution_method"] == "human_supervised"
|
||||
assert res["correction"]["trigger"] == "setup_guard_window_mismatch"
|
||||
assert res["correction"]["expected_patterns"] == ["Rechercher", "Search"]
|
||||
|
||||
def test_mismatch_without_human_pauses_replay(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "Notepad - Sans titre"},
|
||||
):
|
||||
res = exe.execute_replay_action(_verify_action(["Rechercher"]))
|
||||
assert res["success"] is False
|
||||
assert res["warning"] == "setup_guard_window_mismatch"
|
||||
assert res.get("needs_human") is True
|
||||
assert "Rechercher" in res["error"]
|
||||
|
||||
def test_verify_without_patterns_is_neutral_wait(self):
|
||||
"""Sans expected_window_title_contains, verify_screen reste un
|
||||
simple wait — pas de check fenêtre, pas de mode apprentissage."""
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_human_correction = MagicMock()
|
||||
action = {
|
||||
"action_id": "act_test_verify_neutral",
|
||||
"type": "verify_screen",
|
||||
"expected_node": "node_x",
|
||||
"timeout_ms": 200,
|
||||
}
|
||||
res = exe.execute_replay_action(action)
|
||||
assert res["success"] is True
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
|
||||
def test_known_runtime_dialog_is_auto_handled_before_pause(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
exe._maybe_handle_runtime_dialog_before_pause = MagicMock(
|
||||
return_value={
|
||||
"action_id": "act_test_click",
|
||||
"success": True,
|
||||
"warning": "runtime_dialog_handled_skip",
|
||||
"resolution_method": "runtime_dialog:confirm_save_overwrite",
|
||||
"screenshot": None,
|
||||
"visual_resolved": False,
|
||||
}
|
||||
)
|
||||
action = {
|
||||
"action_id": "act_test_click",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
"target_spec": {
|
||||
"window_title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"by_text": "",
|
||||
},
|
||||
}
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "Confirmer l'enregistrement"},
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "runtime_dialog_handled_skip"
|
||||
exe._maybe_handle_runtime_dialog_before_pause.assert_called_once()
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Skip pixel-change validation pour les actions _setup_phase
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _make_executor_with_mouse_skeleton():
|
||||
"""Comme `_make_executor_skeleton` mais avec aussi un mouse mock,
|
||||
pour pouvoir traverser la branche click de execute_replay_action
|
||||
sans toucher au desktop."""
|
||||
exe = _make_executor_skeleton()
|
||||
exe.mouse = MagicMock()
|
||||
exe.mouse.position = (0, 0)
|
||||
exe.keyboard = MagicMock()
|
||||
# _quick_screenshot_hash retourne une string non-vide → pixel check actif
|
||||
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
|
||||
return exe
|
||||
|
||||
|
||||
class TestSetupActionsSkipPixelChange:
|
||||
"""Pour les actions du setup auto (`_setup_phase=True`), la
|
||||
validation par simple pixel-change est neutralisée. C'est la garde
|
||||
verify_screen suivante qui décide — sinon un click_start qui ouvre
|
||||
le systray overflow popup serait validé sur changement d'écran.
|
||||
"""
|
||||
|
||||
def test_setup_click_skips_screen_change_check(self):
|
||||
exe = _make_executor_with_mouse_skeleton()
|
||||
exe._wait_for_screen_change = MagicMock(return_value=False)
|
||||
exe._capture_human_correction = MagicMock()
|
||||
# On évite la résolution visuelle réelle : pas de visual_mode.
|
||||
action = {
|
||||
"action_id": "act_setup_click_start",
|
||||
"type": "click",
|
||||
"x_pct": 0.02,
|
||||
"y_pct": 0.98,
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "click_start_menu",
|
||||
}
|
||||
res = exe.execute_replay_action(action)
|
||||
assert res["success"] is True
|
||||
# La fonction _wait_for_screen_change ne doit PAS être appelée
|
||||
# pour les actions setup.
|
||||
exe._wait_for_screen_change.assert_not_called()
|
||||
# Et le mode apprentissage ne doit pas se déclencher non plus.
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
|
||||
def test_non_setup_click_still_runs_screen_change_check(self):
|
||||
"""Non-régression : une action click hors setup conserve la
|
||||
validation pixel-change qui déclenche le mode apprentissage si
|
||||
l'écran ne change pas."""
|
||||
exe = _make_executor_with_mouse_skeleton()
|
||||
exe._wait_for_screen_change = MagicMock(return_value=False)
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
action = {
|
||||
"action_id": "act_user_click",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
# Pas de _setup_phase
|
||||
}
|
||||
res = exe.execute_replay_action(action)
|
||||
exe._wait_for_screen_change.assert_called_once()
|
||||
# Pas visual_mode → branche échec simple, success=False
|
||||
assert res.get("warning") == "no_screen_change"
|
||||
assert res["success"] is False
|
||||
|
||||
|
||||
class TestRuntimeDialogHandling:
|
||||
def test_handle_confirm_save_dialog_clicks_oui_via_server(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="abc")
|
||||
exe._server_resolve_target = MagicMock(
|
||||
return_value={
|
||||
"resolved": True,
|
||||
"x_pct": 0.25,
|
||||
"y_pct": 0.75,
|
||||
"method": "hybrid_text_direct",
|
||||
"score": 0.91,
|
||||
}
|
||||
)
|
||||
exe._find_text_on_screen = MagicMock(return_value=None)
|
||||
exe._click = MagicMock()
|
||||
|
||||
spec = ActionExecutorV1._match_known_runtime_dialog(
|
||||
"Confirmer l'enregistrement"
|
||||
)
|
||||
|
||||
with patch("agent_v0.agent_v1.config.SERVER_URL", "http://srv"):
|
||||
handled = exe._handle_known_runtime_dialog(
|
||||
spec, "Confirmer l'enregistrement", 1920, 1080
|
||||
)
|
||||
|
||||
assert handled["handled"] is True
|
||||
assert handled["button_text"] == "Oui"
|
||||
exe._server_resolve_target.assert_called_once()
|
||||
exe._click.assert_called_once_with((480, 810), "left")
|
||||
|
||||
def test_runtime_dialog_before_pause_returns_skip_result(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._check_and_pause_on_system_dialog = MagicMock(return_value=False)
|
||||
exe._handle_known_runtime_dialog = MagicMock(
|
||||
return_value={
|
||||
"handled": True,
|
||||
"button_text": "Oui",
|
||||
"x_pct": 0.33,
|
||||
"y_pct": 0.66,
|
||||
"resolution_score": 0.9,
|
||||
}
|
||||
)
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
|
||||
res = exe._maybe_handle_runtime_dialog_before_pause(
|
||||
action={"action_id": "act_final_click", "type": "click"},
|
||||
target_spec={},
|
||||
expected_title="http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
current_title="Confirmer l'enregistrement",
|
||||
screen_width=1920,
|
||||
screen_height=1080,
|
||||
)
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "runtime_dialog_handled_skip"
|
||||
assert res["correction"]["button_text"] == "Oui"
|
||||
assert res["actual_position"] == {"x_pct": 0.33, "y_pct": 0.66}
|
||||
|
||||
def test_post_verify_handles_runtime_dialog_and_recovers_expected_window(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._click = MagicMock()
|
||||
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
|
||||
handled_state = {"done": False}
|
||||
|
||||
def _fake_handle(dialog_spec, current_title, screen_width, screen_height):
|
||||
handled_state["done"] = True
|
||||
return {
|
||||
"handled": True,
|
||||
"button_text": "Oui",
|
||||
"x_pct": 0.33,
|
||||
"y_pct": 0.66,
|
||||
"resolution_score": 0.9,
|
||||
}
|
||||
|
||||
exe._handle_known_runtime_dialog = MagicMock(side_effect=_fake_handle)
|
||||
|
||||
action = {
|
||||
"action_id": "act_save_dialog",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
"expected_window_title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
}
|
||||
|
||||
def _window_info():
|
||||
if handled_state["done"]:
|
||||
return {"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"}
|
||||
return {"title": "Confirmer l’enregistrement"}
|
||||
|
||||
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
side_effect=_window_info,
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "runtime_dialog_handled_post_verify"
|
||||
assert res["actual_position"] == {"x_pct": 0.5, "y_pct": 0.5}
|
||||
exe._handle_known_runtime_dialog.assert_called_once()
|
||||
|
||||
def test_post_verify_can_retry_same_runtime_dialog_before_recovery(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._click = MagicMock()
|
||||
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
|
||||
handled_state = {"count": 0}
|
||||
|
||||
def _fake_handle(dialog_spec, current_title, screen_width, screen_height):
|
||||
handled_state["count"] += 1
|
||||
return {
|
||||
"handled": True,
|
||||
"button_text": "Oui",
|
||||
"x_pct": 0.33,
|
||||
"y_pct": 0.66,
|
||||
"resolution_score": 0.9,
|
||||
}
|
||||
|
||||
exe._handle_known_runtime_dialog = MagicMock(side_effect=_fake_handle)
|
||||
|
||||
action = {
|
||||
"action_id": "act_save_dialog_retry",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
"expected_window_title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
}
|
||||
|
||||
def _window_info():
|
||||
if handled_state["count"] >= 2:
|
||||
return {"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"}
|
||||
return {"title": "Confirmer l’enregistrement"}
|
||||
|
||||
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
side_effect=_window_info,
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "runtime_dialog_handled_post_verify"
|
||||
assert handled_state["count"] == 2
|
||||
assert res["runtime_dialog"]["dialog_id"] == "confirm_save_overwrite"
|
||||
|
||||
def test_post_verify_wrong_window_fails_when_dialog_transition_was_expected(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._click = MagicMock()
|
||||
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
exe._notification_manager = MagicMock()
|
||||
|
||||
action = {
|
||||
"action_id": "act_open_save_dialog",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
"expected_window_before": "*test – Bloc-notes",
|
||||
"expected_window_title": "Enregistrer sous",
|
||||
}
|
||||
|
||||
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "rpa_vision : Explorateur de fichiers"},
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
|
||||
assert res["success"] is False
|
||||
assert res["warning"] == "wrong_window"
|
||||
assert "Enregistrer sous" in res["error"]
|
||||
assert "rpa_vision : Explorateur de fichiers" in res["error"]
|
||||
assert res["needs_human"] is True
|
||||
exe._notification_manager.replay_wrong_window.assert_called_once()
|
||||
|
||||
def test_post_verify_same_window_mismatch_stays_legacy_warning(self):
|
||||
exe = _make_executor_skeleton()
|
||||
exe._click = MagicMock()
|
||||
exe._quick_screenshot_hash = MagicMock(return_value="hash_before")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
|
||||
action = {
|
||||
"action_id": "act_same_window_click",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.5,
|
||||
"expected_window_before": "*test – Bloc-notes",
|
||||
"expected_window_title": "test – Bloc-notes",
|
||||
}
|
||||
|
||||
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "rpa_vision : Explorateur de fichiers"},
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "post_verif_timeout:rpa_vision : Explorateur de fichiers"
|
||||
|
||||
|
||||
class TestCloseTabHotkeyFallback:
|
||||
def test_visual_close_tab_uses_ctrl_w_when_tab_x_is_hidden(self):
|
||||
exe = _make_executor_with_mouse_skeleton()
|
||||
exe._observe_screen = MagicMock(return_value=None)
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
exe._execute_key_combo = MagicMock()
|
||||
exe._click = MagicMock()
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
|
||||
action = {
|
||||
"action_id": "act_close_tab",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.88,
|
||||
"y_pct": 0.04,
|
||||
"target_spec": {
|
||||
"window_title": "*test – Bloc-notes",
|
||||
"by_role": "tab_close_button",
|
||||
"context_hints": {
|
||||
"interaction": "close_tab",
|
||||
"active_tab_label": "test",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
with patch("agent_v0.agent_v1.core.executor.time.sleep", lambda *_a, **_k: None):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_info",
|
||||
return_value={"title": "*test – Bloc-notes"},
|
||||
):
|
||||
res = exe.execute_replay_action(action)
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "close_tab_hotkey_fallback"
|
||||
assert res["resolution_method"] == "semantic_close_tab_hotkey"
|
||||
exe._execute_key_combo.assert_called_once_with(["ctrl", "w"])
|
||||
exe._click.assert_not_called()
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
|
||||
|
||||
class TestStartButtonHotkeyFallback:
|
||||
def test_setup_start_button_position_fallback_uses_windows_key(self):
|
||||
exe = _make_executor_with_mouse_skeleton()
|
||||
exe._observe_screen = MagicMock(return_value=None)
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
exe._execute_key_combo = MagicMock()
|
||||
exe._click = MagicMock()
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
|
||||
action = {
|
||||
"action_id": "act_setup_click_start",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.387891,
|
||||
"y_pct": 0.974375,
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "click_start_menu",
|
||||
"target_spec": {
|
||||
"by_role": "start_button",
|
||||
"by_text": "",
|
||||
"anchor_image_base64": "abc123",
|
||||
"allow_position_fallback": True,
|
||||
"screen_scope": "full_screen",
|
||||
},
|
||||
}
|
||||
|
||||
grounding_result = SimpleNamespace(
|
||||
found=True,
|
||||
x_pct=0.387891,
|
||||
y_pct=0.974375,
|
||||
method="position_fallback",
|
||||
score=0.2,
|
||||
detail="fallback positionnel explicite",
|
||||
elapsed_ms=12.0,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.core.grounding.GroundingEngine.locate",
|
||||
return_value=grounding_result,
|
||||
) as locate_mock:
|
||||
with patch(
|
||||
"agent_v0.agent_v1.core.executor.time.sleep",
|
||||
lambda *_a, **_k: None,
|
||||
):
|
||||
res = exe.execute_replay_action(action, server_url="http://srv")
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["warning"] == "start_button_hotkey_fallback"
|
||||
assert res["resolution_method"] == "semantic_start_button_hotkey"
|
||||
exe._execute_key_combo.assert_called_once_with(["win"])
|
||||
exe._click.assert_not_called()
|
||||
exe._wait_for_screen_change.assert_not_called()
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
|
||||
def test_real_visual_start_button_match_keeps_mouse_click(self):
|
||||
exe = _make_executor_with_mouse_skeleton()
|
||||
exe._observe_screen = MagicMock(return_value=None)
|
||||
exe._capture_human_correction = MagicMock(return_value=[])
|
||||
exe._execute_key_combo = MagicMock()
|
||||
exe._click = MagicMock()
|
||||
exe._capture_screenshot_b64 = MagicMock(return_value="after")
|
||||
exe._wait_for_screen_change = MagicMock(return_value=True)
|
||||
|
||||
action = {
|
||||
"action_id": "act_setup_click_start",
|
||||
"type": "click",
|
||||
"visual_mode": True,
|
||||
"x_pct": 0.387891,
|
||||
"y_pct": 0.974375,
|
||||
"_setup_phase": True,
|
||||
"_setup_step": "click_start_menu",
|
||||
"target_spec": {
|
||||
"by_role": "start_button",
|
||||
"by_text": "",
|
||||
"anchor_image_base64": "abc123",
|
||||
"allow_position_fallback": True,
|
||||
"screen_scope": "full_screen",
|
||||
},
|
||||
}
|
||||
|
||||
grounding_result = SimpleNamespace(
|
||||
found=True,
|
||||
x_pct=0.389,
|
||||
y_pct=0.973,
|
||||
method="vlm_quick_find",
|
||||
score=0.93,
|
||||
detail="match VLM plausible",
|
||||
elapsed_ms=35.0,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.core.grounding.GroundingEngine.locate",
|
||||
return_value=grounding_result,
|
||||
):
|
||||
with patch(
|
||||
"agent_v0.agent_v1.core.executor.time.sleep",
|
||||
lambda *_a, **_k: None,
|
||||
):
|
||||
res = exe.execute_replay_action(action, server_url="http://srv")
|
||||
|
||||
assert res["success"] is True
|
||||
assert res["resolution_method"] == "vlm_quick_find"
|
||||
exe._execute_key_combo.assert_not_called()
|
||||
exe._click.assert_called_once()
|
||||
exe._wait_for_screen_change.assert_not_called()
|
||||
exe._capture_human_correction.assert_not_called()
|
||||
58
tests/unit/test_finalize_auto_replay_flag.py
Normal file
58
tests/unit/test_finalize_auto_replay_flag.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Tests pour le flag RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE.
|
||||
|
||||
Brief Codex 2026-05-23 09:02 : le chemin produit cible est le workflow
|
||||
compilé (post worker VLM), pas le replay direct depuis raw events.
|
||||
Le flag env désactive la proposition automatique de replay direct par
|
||||
défaut. Le chemin direct reste accessible (smoke/debug) via RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE=true.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1.replay_engine import ( # noqa: E402
|
||||
_auto_launch_replay_after_finalize,
|
||||
)
|
||||
|
||||
|
||||
class TestAutoLaunchReplayFlag:
|
||||
def test_default_is_false(self, monkeypatch):
|
||||
"""Sans variable d'env, le mode produit est actif → pas de
|
||||
proposition automatique de replay direct."""
|
||||
monkeypatch.delenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", raising=False)
|
||||
assert _auto_launch_replay_after_finalize() is False
|
||||
|
||||
def test_true_value_activates(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "true")
|
||||
assert _auto_launch_replay_after_finalize() is True
|
||||
|
||||
def test_1_value_activates(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "1")
|
||||
assert _auto_launch_replay_after_finalize() is True
|
||||
|
||||
def test_yes_value_activates(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "yes")
|
||||
assert _auto_launch_replay_after_finalize() is True
|
||||
|
||||
def test_false_value_deactivates(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "false")
|
||||
assert _auto_launch_replay_after_finalize() is False
|
||||
|
||||
def test_empty_value_deactivates(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "")
|
||||
assert _auto_launch_replay_after_finalize() is False
|
||||
|
||||
def test_arbitrary_value_deactivates(self, monkeypatch):
|
||||
"""Toute valeur non-truthy retourne False (default-deny)."""
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "maybe")
|
||||
assert _auto_launch_replay_after_finalize() is False
|
||||
|
||||
def test_case_insensitive(self, monkeypatch):
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "TRUE")
|
||||
assert _auto_launch_replay_after_finalize() is True
|
||||
monkeypatch.setenv("RPA_AUTO_LAUNCH_REPLAY_AFTER_FINALIZE", "Yes")
|
||||
assert _auto_launch_replay_after_finalize() is True
|
||||
46
tests/unit/test_grounding_engine.py
Normal file
46
tests/unit/test_grounding_engine.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.agent_v1.core.grounding import GroundingEngine # noqa: E402
|
||||
|
||||
|
||||
def test_template_strategy_passes_fallback_coords_to_anchor_drift_guard():
|
||||
executor = MagicMock()
|
||||
executor._template_match_anchor = MagicMock(
|
||||
return_value={
|
||||
"resolved": True,
|
||||
"x_pct": 0.7,
|
||||
"y_pct": 0.35,
|
||||
"score": 0.95,
|
||||
}
|
||||
)
|
||||
|
||||
engine = GroundingEngine(executor)
|
||||
target_spec = {"anchor_image_base64": "abc123"}
|
||||
|
||||
result = engine._try_strategy(
|
||||
"template",
|
||||
server_url="",
|
||||
screenshot_b64="shot",
|
||||
target_spec=target_spec,
|
||||
fallback_x=0.708594,
|
||||
fallback_y=0.35,
|
||||
screen_width=2560,
|
||||
screen_height=1600,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
executor._template_match_anchor.assert_called_once_with(
|
||||
"shot",
|
||||
"abc123",
|
||||
2560,
|
||||
1600,
|
||||
fallback_x_pct=0.708594,
|
||||
fallback_y_pct=0.35,
|
||||
)
|
||||
@@ -111,6 +111,310 @@ class TestGroundingEngine:
|
||||
assert d["x_pct"] == 0.5
|
||||
assert d["method"] == "som"
|
||||
|
||||
def test_start_button_uses_full_screen_instead_of_active_window(self):
|
||||
"""Le bouton Démarrer doit être résolu sur l'écran entier."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.02,
|
||||
"y_pct": 0.98,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Démarrer"},
|
||||
}
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={"rect": [100, 100, 1100, 900]},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{"by_text": "Démarrer", "by_role": "start_button"},
|
||||
0.02, 0.98, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(None)
|
||||
|
||||
def test_regular_targets_stay_scoped_to_active_window(self):
|
||||
"""Les cibles applicatives ordinaires restent bornées à la fenêtre active."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Enregistrer"},
|
||||
}
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={"rect": [100, 200, 1100, 1000]},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{"by_text": "Enregistrer", "by_role": "button"},
|
||||
0.5, 0.3, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(
|
||||
{"left": 100, "top": 200, "width": 1000, "height": 800}
|
||||
)
|
||||
|
||||
def test_unknown_window_rect_falls_back_to_full_screen_on_visual_mismatch(self):
|
||||
"""Un titre inconnu n'est accepté que si le crop est validé visuellement."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Enregistrer"},
|
||||
}
|
||||
executor._find_text_on_screen.return_value = None
|
||||
engine._capture_window_or_screen = MagicMock(
|
||||
side_effect=["fake_window_b64", "fake_screen_b64"]
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "unknown_window",
|
||||
"rect": [100, 200, 1100, 1000],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{"by_text": "Enregistrer", "by_role": "button"},
|
||||
0.5, 0.3, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
|
||||
{"left": 100, "top": 200, "width": 1000, "height": 800},
|
||||
None,
|
||||
]
|
||||
|
||||
def test_taskbar_like_rect_falls_back_to_full_screen(self):
|
||||
"""Une taskbar/systray ne doit jamais être utilisée comme fenêtre active."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Enregistrer"},
|
||||
}
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Fenêtre de dépassement de capacité de la barre d'état système",
|
||||
"rect": [0, 1492, 2560, 1600],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{"by_text": "Enregistrer", "by_role": "button"},
|
||||
0.5, 0.3, 2560, 1600,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(None)
|
||||
|
||||
def test_visually_mismatched_window_crop_falls_back_to_full_screen(self):
|
||||
"""Un crop fenêtre plausible mais visuellement faux est rejeté."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Enregistrer"},
|
||||
}
|
||||
executor._find_text_on_screen.return_value = None
|
||||
engine._capture_window_or_screen = MagicMock(
|
||||
side_effect=["fake_window_b64", "fake_screen_b64"]
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Enregistrer sous",
|
||||
"rect": [100, 200, 1100, 1000],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "button",
|
||||
"window_title": "Enregistrer sous",
|
||||
},
|
||||
0.5, 0.3, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
|
||||
{"left": 100, "top": 200, "width": 1000, "height": 800},
|
||||
None,
|
||||
]
|
||||
executor._server_resolve_target.assert_called_once_with(
|
||||
"http://server",
|
||||
"fake_screen_b64",
|
||||
{
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "button",
|
||||
"window_title": "Enregistrer sous",
|
||||
},
|
||||
0.5,
|
||||
0.3,
|
||||
1920,
|
||||
1080,
|
||||
)
|
||||
|
||||
def test_visually_validated_window_crop_stays_scoped(self):
|
||||
"""Un crop fenêtre plausible et validé visuellement reste autorisé."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Enregistrer"},
|
||||
}
|
||||
executor._find_text_on_screen.return_value = (321, 222)
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_window_b64")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Enregistrer sous",
|
||||
"rect": [100, 200, 1100, 1000],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "button",
|
||||
"window_title": "Enregistrer sous",
|
||||
},
|
||||
0.5, 0.3, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(
|
||||
{"left": 100, "top": 200, "width": 1000, "height": 800}
|
||||
)
|
||||
|
||||
def test_lea_active_window_does_not_scope_external_target(self):
|
||||
"""Une fenêtre Léa au premier plan ne doit jamais contraindre une cible externe."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Bloc-notes"},
|
||||
}
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Léa — Assistante",
|
||||
"app_name": "pythonw.exe",
|
||||
"rect": [1948, 750, 2570, 1606],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{"by_text": "Bloc-notes", "by_role": "search_result"},
|
||||
0.2, 0.5, 2560, 1600,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(None)
|
||||
executor._server_resolve_target.assert_called_once_with(
|
||||
"http://server",
|
||||
"fake_b64_data",
|
||||
{"by_text": "Bloc-notes", "by_role": "search_result"},
|
||||
0.2,
|
||||
0.5,
|
||||
2560,
|
||||
1600,
|
||||
)
|
||||
|
||||
def test_lea_active_window_stays_scoped_for_explicit_lea_target(self):
|
||||
"""Si la cible mentionne explicitement Léa, le scope sur sa fenêtre reste autorisé."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.25,
|
||||
"method": "som_text",
|
||||
"score": 0.9,
|
||||
"matched_element": {"label": "Continuer"},
|
||||
}
|
||||
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
|
||||
|
||||
with patch(
|
||||
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
|
||||
return_value={
|
||||
"title": "Léa — Assistante",
|
||||
"app_name": "pythonw.exe",
|
||||
"rect": [1948, 750, 2570, 1606],
|
||||
},
|
||||
):
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{
|
||||
"by_text": "Continuer",
|
||||
"by_role": "button",
|
||||
"window_title": "Léa — Assistante",
|
||||
},
|
||||
0.5, 0.3, 3000, 2000,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
engine._capture_window_or_screen.assert_called_once_with(
|
||||
{"left": 1948, "top": 750, "width": 622, "height": 856}
|
||||
)
|
||||
|
||||
def test_allow_position_fallback_returns_recorded_coords(self):
|
||||
"""Quand autorisé, le grounding peut retomber sur la position enregistrée."""
|
||||
engine, executor = self._make_engine()
|
||||
executor._server_resolve_target.return_value = None
|
||||
executor._template_match_anchor.return_value = None
|
||||
executor._hybrid_vlm_resolve.return_value = None
|
||||
|
||||
result = engine.locate(
|
||||
"http://server",
|
||||
{
|
||||
"by_role": "start_button",
|
||||
"vlm_description": "icône Windows",
|
||||
"screen_scope": "full_screen",
|
||||
"allow_position_fallback": True,
|
||||
},
|
||||
0.387891, 0.974375, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result.found is True
|
||||
assert result.method == "position_fallback"
|
||||
assert result.x_pct == pytest.approx(0.387891)
|
||||
assert result.y_pct == pytest.approx(0.974375)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# P2 : Policy — décisions quand grounding échoue
|
||||
@@ -407,6 +711,65 @@ class TestReplayLearner:
|
||||
assert "action_id" in data
|
||||
assert "success" in data
|
||||
|
||||
def test_record_human_correction_persists_to_memory_helper(self, learner, monkeypatch):
|
||||
"""Une correction humaine doit alimenter la mémoire persistante via replay_memory."""
|
||||
captured = {}
|
||||
|
||||
def fake_memory_record_success(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(
|
||||
"agent_v0.server_v1.replay_memory.memory_record_success",
|
||||
fake_memory_record_success,
|
||||
)
|
||||
|
||||
learner.record_human_correction(
|
||||
session_id="s_corr",
|
||||
action={
|
||||
"action_id": "a_corr",
|
||||
"target_spec": {"by_text": "Valider", "window_title": "Bloc-notes"},
|
||||
},
|
||||
correction={"x_pct": 0.42, "y_pct": 0.84},
|
||||
)
|
||||
|
||||
loaded = learner.load_session("s_corr")
|
||||
assert len(loaded) == 1
|
||||
assert loaded[0].resolution_method == "human_supervised"
|
||||
assert loaded[0].window_title == "Bloc-notes"
|
||||
|
||||
assert captured["window_title"] == "Bloc-notes"
|
||||
assert captured["target_spec"]["by_text"] == "Valider"
|
||||
assert captured["x_pct"] == 0.42
|
||||
assert captured["y_pct"] == 0.84
|
||||
assert captured["method"] == "human_supervised"
|
||||
assert captured["confidence"] == 1.0
|
||||
|
||||
def test_record_human_correction_fallback_window_title_from_action(self, learner, monkeypatch):
|
||||
"""Si target_spec.window_title est absent, on retombe sur action.window_title."""
|
||||
captured = {}
|
||||
|
||||
def fake_memory_record_success(**kwargs):
|
||||
captured.update(kwargs)
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(
|
||||
"agent_v0.server_v1.replay_memory.memory_record_success",
|
||||
fake_memory_record_success,
|
||||
)
|
||||
|
||||
learner.record_human_correction(
|
||||
session_id="s_corr2",
|
||||
action={
|
||||
"action_id": "a_corr2",
|
||||
"window_title": "Fenêtre fallback",
|
||||
"target_spec": {"by_text": "Enregistrer"},
|
||||
},
|
||||
correction={"x_pct": 0.1, "y_pct": 0.2},
|
||||
)
|
||||
|
||||
assert captured["window_title"] == "Fenêtre fallback"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Boucle d'apprentissage : consolidation cross-workflow
|
||||
|
||||
@@ -145,6 +145,20 @@ class TestVerifyWithCritic:
|
||||
assert result.suggestion == "retry"
|
||||
assert result.semantic_verified is None # VLM non appelé
|
||||
|
||||
def test_verify_screen_identique_ne_declenche_pas_retry(
|
||||
self, verifier, screenshot_gray,
|
||||
):
|
||||
"""verify_screen est une stabilisation, pas une action qui doit re-changer l'écran."""
|
||||
result = verifier.verify_action(
|
||||
action={"type": "verify_screen", "action_id": "verify_setup"},
|
||||
result={"success": True},
|
||||
screenshot_before=screenshot_gray,
|
||||
screenshot_after=screenshot_gray,
|
||||
)
|
||||
assert result.verified is True
|
||||
assert result.suggestion == "continue"
|
||||
assert result.changes_detected is False
|
||||
|
||||
@patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic")
|
||||
def test_pixel_ok_semantic_ok(
|
||||
self, mock_semantic, verifier, screenshot_gray, screenshot_white,
|
||||
|
||||
118
tests/unit/test_replay_memory.py
Normal file
118
tests/unit/test_replay_memory.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent_v0.server_v1 import replay_memory
|
||||
from core.learning.target_memory_store import TargetMemoryStore
|
||||
|
||||
|
||||
class _DummyStore:
|
||||
def __init__(self, fp):
|
||||
self._fp = fp
|
||||
|
||||
def lookup(self, screen_sig, spec_shim):
|
||||
return self._fp
|
||||
|
||||
|
||||
def test_memory_lookup_uses_window_relative_coords_when_available(monkeypatch):
|
||||
fp = SimpleNamespace(
|
||||
bbox=(0.566016, 0.400625, 0.0, 0.0),
|
||||
etype="position_fallback",
|
||||
confidence=0.2,
|
||||
)
|
||||
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
|
||||
|
||||
result = replay_memory.memory_lookup(
|
||||
window_title="Rechercher",
|
||||
target_spec={
|
||||
"by_text": "Bloc-notes",
|
||||
"window_capture": {
|
||||
"click_relative": [681, 448],
|
||||
"window_size": [1287, 1407],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["method"] == "memory_position_fallback"
|
||||
assert result["x_pct"] == 681 / 1287
|
||||
assert result["y_pct"] == 448 / 1407
|
||||
|
||||
|
||||
def test_memory_lookup_keeps_bbox_coords_without_window_capture(monkeypatch):
|
||||
fp = SimpleNamespace(
|
||||
bbox=(0.566016, 0.400625, 0.0, 0.0),
|
||||
etype="position_fallback",
|
||||
confidence=0.2,
|
||||
)
|
||||
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
|
||||
|
||||
result = replay_memory.memory_lookup(
|
||||
window_title="Rechercher",
|
||||
target_spec={"by_text": "Bloc-notes"},
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["x_pct"] == 0.566016
|
||||
assert result["y_pct"] == 0.400625
|
||||
|
||||
|
||||
def test_memory_lookup_keeps_learned_visual_coords_with_window_capture(monkeypatch):
|
||||
fp = SimpleNamespace(
|
||||
bbox=(0.402734375, 0.578125, 0.0, 0.0),
|
||||
etype="anchor_template",
|
||||
confidence=0.99,
|
||||
)
|
||||
monkeypatch.setattr(replay_memory, "get_memory_store", lambda: _DummyStore(fp))
|
||||
|
||||
result = replay_memory.memory_lookup(
|
||||
window_title="*test – Bloc-notes",
|
||||
target_spec={
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "yolo",
|
||||
"window_capture": {
|
||||
"click_relative": [860, 634],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["method"] == "memory_anchor_template"
|
||||
assert result["x_pct"] == 0.402734375
|
||||
assert result["y_pct"] == 0.578125
|
||||
|
||||
|
||||
def test_target_spec_hash_distinguishes_same_text_with_different_spatial_hints(tmp_path):
|
||||
store = TargetMemoryStore(base_path=str(tmp_path / "learning"))
|
||||
|
||||
spec_left = replay_memory._TargetSpecLike(
|
||||
{
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "yolo",
|
||||
"vlm_description": "Dans la fenêtre '*test – Bloc-notes', l'élément cliqué se trouve au milieu au centre de l'écran",
|
||||
"window_capture": {
|
||||
"click_relative": [860, 634],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
"som_element": {
|
||||
"bbox_norm": [0.40234375, 0.701875, 0.46640625, 0.74125],
|
||||
"center_norm": [0.434375, 0.72125],
|
||||
},
|
||||
}
|
||||
)
|
||||
spec_right = replay_memory._TargetSpecLike(
|
||||
{
|
||||
"by_text": "Enregistrer",
|
||||
"by_role": "yolo",
|
||||
"vlm_description": "Dans la fenêtre '*test – Bloc-notes', l'élément cliqué se trouve au milieu au centre de l'écran",
|
||||
"window_capture": {
|
||||
"click_relative": [1491, 38],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
"som_element": {
|
||||
"bbox_norm": [0.697265625, 0.335625, 0.715625, 0.3625],
|
||||
"center_norm": [0.70625, 0.34875],
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
assert store._hash_target_spec(spec_left) != store._hash_target_spec(spec_right)
|
||||
152
tests/unit/test_resolve_engine_close_tab_anchor.py
Normal file
152
tests/unit/test_resolve_engine_close_tab_anchor.py
Normal file
@@ -0,0 +1,152 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1 import resolve_engine # noqa: E402
|
||||
|
||||
|
||||
class _FakeElem:
|
||||
def __init__(self, elem_id, label, source, center, center_norm, confidence=0.9):
|
||||
self.id = elem_id
|
||||
self.label = label
|
||||
self.source = source
|
||||
self.center = center
|
||||
self.center_norm = center_norm
|
||||
self.confidence = confidence
|
||||
|
||||
|
||||
class _FakeSomResult:
|
||||
def __init__(self, elements):
|
||||
self.elements = elements
|
||||
self.som_image = None
|
||||
|
||||
|
||||
class _FakeSomEngine:
|
||||
def __init__(self, elements):
|
||||
self._elements = elements
|
||||
|
||||
def analyze(self, _img):
|
||||
return _FakeSomResult(self._elements)
|
||||
|
||||
|
||||
def _make_close_button_image(tmp_path: Path) -> tuple[str, str]:
|
||||
screenshot = Image.new("RGB", (200, 100), "white")
|
||||
draw = ImageDraw.Draw(screenshot)
|
||||
draw.rounded_rectangle((130, 4, 170, 36), radius=8, fill=(242, 244, 247))
|
||||
draw.line((144, 12, 156, 24), fill="black", width=2)
|
||||
draw.line((156, 12, 144, 24), fill="black", width=2)
|
||||
screenshot_path = tmp_path / "screen.png"
|
||||
screenshot.save(screenshot_path)
|
||||
|
||||
anchor = screenshot.crop((130, 4, 170, 36))
|
||||
buf = io.BytesIO()
|
||||
anchor.save(buf, format="PNG")
|
||||
anchor_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
|
||||
return str(screenshot_path), anchor_b64
|
||||
|
||||
|
||||
def test_close_tab_uses_exact_anchor_coords(tmp_path, monkeypatch):
|
||||
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
|
||||
fake_engine = _FakeSomEngine([
|
||||
_FakeElem(
|
||||
elem_id=47,
|
||||
label="test",
|
||||
source="yolo",
|
||||
center=(120, 20),
|
||||
center_norm=(0.60, 0.20),
|
||||
),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
|
||||
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
|
||||
|
||||
result = resolve_engine._resolve_by_som(
|
||||
screenshot_path=screenshot_path,
|
||||
target_spec={
|
||||
"anchor_image_base64": anchor_b64,
|
||||
"by_text": "",
|
||||
"vlm_description": "fermer l'onglet actif",
|
||||
"context_hints": {"interaction": "close_tab"},
|
||||
"window_capture": {
|
||||
"rect": [0, 0, 200, 100],
|
||||
"click_relative": [150, 20],
|
||||
"window_size": [200, 100],
|
||||
},
|
||||
},
|
||||
screen_width=200,
|
||||
screen_height=100,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["method"] == "som_anchor_match"
|
||||
assert result["matched_element"]["role"] == "som_anchor_exact"
|
||||
assert result["x_pct"] == 0.75
|
||||
assert result["y_pct"] == 0.20
|
||||
|
||||
|
||||
def test_close_tab_rejects_exact_anchor_far_from_recorded_click(tmp_path, monkeypatch):
|
||||
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
|
||||
fake_engine = _FakeSomEngine([])
|
||||
|
||||
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
|
||||
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
|
||||
|
||||
result = resolve_engine._resolve_by_som(
|
||||
screenshot_path=screenshot_path,
|
||||
target_spec={
|
||||
"anchor_image_base64": anchor_b64,
|
||||
"by_text": "",
|
||||
"vlm_description": "fermer l'onglet actif",
|
||||
"context_hints": {"interaction": "close_tab"},
|
||||
"window_capture": {
|
||||
"rect": [0, 0, 200, 100],
|
||||
"click_relative": [50, 20],
|
||||
"window_size": [200, 100],
|
||||
},
|
||||
},
|
||||
screen_width=200,
|
||||
screen_height=100,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_non_close_tab_keeps_nearest_som_center(tmp_path, monkeypatch):
|
||||
screenshot_path, anchor_b64 = _make_close_button_image(tmp_path)
|
||||
fake_engine = _FakeSomEngine([
|
||||
_FakeElem(
|
||||
elem_id=47,
|
||||
label="test",
|
||||
source="yolo",
|
||||
center=(120, 20),
|
||||
center_norm=(0.60, 0.20),
|
||||
),
|
||||
])
|
||||
|
||||
monkeypatch.setattr(resolve_engine, "_get_som_engine_api", lambda: fake_engine)
|
||||
monkeypatch.setattr(resolve_engine, "_get_vlm_client", lambda: object())
|
||||
|
||||
result = resolve_engine._resolve_by_som(
|
||||
screenshot_path=screenshot_path,
|
||||
target_spec={
|
||||
"anchor_image_base64": anchor_b64,
|
||||
"by_text": "",
|
||||
"vlm_description": "icône en haut",
|
||||
},
|
||||
screen_width=200,
|
||||
screen_height=100,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["method"] == "som_anchor_match"
|
||||
assert result["matched_element"]["role"] == "som_anchor_match"
|
||||
assert result["x_pct"] == 0.60
|
||||
assert result["y_pct"] == 0.20
|
||||
51
tests/unit/test_resolve_engine_dialog_button_guard.py
Normal file
51
tests/unit/test_resolve_engine_dialog_button_guard.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import pytest
|
||||
|
||||
from agent_v0.server_v1 import resolve_engine
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _disable_memory_lookup(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent_v0.server_v1.replay_memory.memory_lookup",
|
||||
lambda **kwargs: None,
|
||||
)
|
||||
|
||||
|
||||
def test_dialog_button_skips_vlm_cascade_when_ocr_misses(tmp_path, monkeypatch):
|
||||
screenshot = tmp_path / "screen.jpg"
|
||||
screenshot.write_bytes(b"fake")
|
||||
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_resolve_by_ocr_text",
|
||||
lambda *args, **kwargs: None,
|
||||
)
|
||||
|
||||
def _unexpected_vlm(*args, **kwargs):
|
||||
raise AssertionError("VLM ne doit pas être appelé pour dialog_button")
|
||||
|
||||
def _unexpected_som(*args, **kwargs):
|
||||
raise AssertionError("SoM ne doit pas être appelé pour dialog_button")
|
||||
|
||||
monkeypatch.setattr(resolve_engine, "_vlm_quick_find", _unexpected_vlm)
|
||||
monkeypatch.setattr(resolve_engine, "_resolve_by_som", _unexpected_som)
|
||||
|
||||
result = resolve_engine._resolve_target_sync(
|
||||
str(screenshot),
|
||||
{
|
||||
"by_role": "dialog_button",
|
||||
"by_text": "Oui",
|
||||
"window_title": "Confirmer l’enregistrement",
|
||||
"vlm_description": "Dans la fenêtre 'Confirmer l’enregistrement', le bouton 'Oui'",
|
||||
},
|
||||
2560,
|
||||
1600,
|
||||
0.5,
|
||||
0.5,
|
||||
True,
|
||||
processor=None,
|
||||
)
|
||||
|
||||
assert result["resolved"] is False
|
||||
assert result["method"] == "dialog_button_ocr_only"
|
||||
assert result["reason"] == "ocr_direct_failed_dialog_button_no_vlm"
|
||||
139
tests/unit/test_resolve_engine_start_button_guard.py
Normal file
139
tests/unit/test_resolve_engine_start_button_guard.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import pytest
|
||||
|
||||
from agent_v0.server_v1 import resolve_engine
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _disable_memory_lookup(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent_v0.server_v1.replay_memory.memory_lookup",
|
||||
lambda **kwargs: None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _patched_resolvers(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_resolve_by_template_matching",
|
||||
lambda *args, **kwargs: None,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_resolve_by_som",
|
||||
lambda *args, **kwargs: None,
|
||||
)
|
||||
|
||||
|
||||
def _start_button_spec():
|
||||
return {
|
||||
"by_role": "start_button",
|
||||
"by_text": "",
|
||||
"anchor_image_base64": "abc123",
|
||||
"vlm_description": "Le bouton Démarrer (icône Windows) dans la barre des tâches, en bas",
|
||||
"screen_scope": "full_screen",
|
||||
}
|
||||
|
||||
|
||||
def _generic_button_spec():
|
||||
return {
|
||||
"by_role": "button",
|
||||
"by_text": "",
|
||||
"anchor_image_base64": "abc123",
|
||||
"vlm_description": "Le bouton principal",
|
||||
}
|
||||
|
||||
|
||||
def _vlm_result(x_pct: float, y_pct: float, score: float = 0.95):
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": "vlm_quick_find",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"score": score,
|
||||
"matched_element": {
|
||||
"label": "target",
|
||||
"type": "vlm_located",
|
||||
"role": "vlm_quick_find",
|
||||
"confidence": score,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_start_button_rejects_far_vlm_false_positive(tmp_path, monkeypatch, _patched_resolvers):
|
||||
screenshot = tmp_path / "screen.jpg"
|
||||
screenshot.write_bytes(b"fake")
|
||||
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_vlm_quick_find",
|
||||
lambda *args, **kwargs: _vlm_result(0.01, 0.95),
|
||||
)
|
||||
|
||||
result = resolve_engine._resolve_target_sync(
|
||||
str(screenshot),
|
||||
_start_button_spec(),
|
||||
1920,
|
||||
1080,
|
||||
0.387891,
|
||||
0.974375,
|
||||
True,
|
||||
processor=None,
|
||||
)
|
||||
|
||||
assert result["resolved"] is False
|
||||
assert result["method"] == "strict_vlm_template_failed"
|
||||
|
||||
|
||||
def test_start_button_accepts_plausible_vlm_result(tmp_path, monkeypatch, _patched_resolvers):
|
||||
screenshot = tmp_path / "screen.jpg"
|
||||
screenshot.write_bytes(b"fake")
|
||||
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_vlm_quick_find",
|
||||
lambda *args, **kwargs: _vlm_result(0.395, 0.972),
|
||||
)
|
||||
|
||||
result = resolve_engine._resolve_target_sync(
|
||||
str(screenshot),
|
||||
_start_button_spec(),
|
||||
1920,
|
||||
1080,
|
||||
0.387891,
|
||||
0.974375,
|
||||
True,
|
||||
processor=None,
|
||||
)
|
||||
|
||||
assert result["resolved"] is True
|
||||
assert result["method"] == "vlm_quick_find"
|
||||
assert result["x_pct"] == pytest.approx(0.395)
|
||||
assert result["y_pct"] == pytest.approx(0.972)
|
||||
|
||||
|
||||
def test_non_start_button_keeps_vlm_result_even_if_far(tmp_path, monkeypatch, _patched_resolvers):
|
||||
screenshot = tmp_path / "screen.jpg"
|
||||
screenshot.write_bytes(b"fake")
|
||||
|
||||
monkeypatch.setattr(
|
||||
resolve_engine,
|
||||
"_vlm_quick_find",
|
||||
lambda *args, **kwargs: _vlm_result(0.01, 0.95),
|
||||
)
|
||||
|
||||
result = resolve_engine._resolve_target_sync(
|
||||
str(screenshot),
|
||||
_generic_button_spec(),
|
||||
1920,
|
||||
1080,
|
||||
0.387891,
|
||||
0.974375,
|
||||
True,
|
||||
processor=None,
|
||||
)
|
||||
|
||||
assert result["resolved"] is True
|
||||
assert result["method"] == "vlm_quick_find"
|
||||
assert result["x_pct"] == pytest.approx(0.01)
|
||||
assert result["y_pct"] == pytest.approx(0.95)
|
||||
103
tests/unit/test_server_client_replay_controls.py
Normal file
103
tests/unit/test_server_client_replay_controls.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Tests pour les contrôles HTTP de replay paused (resume/abort).
|
||||
|
||||
Ces appels sont le fallback du chemin SocketIO `lea:replay_resume`
|
||||
/ `lea:replay_abort` quand le bus feedback est déconnecté au moment
|
||||
où l'utilisateur clique dans la bulle paused (cf.
|
||||
`docs/CR_AUDIT_PAUSED_RESUME_BUS_2026-05-22.md`).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.lea_ui.server_client import LeaServerClient # noqa: E402
|
||||
|
||||
|
||||
# Préfixe partagé pour comparer les URLs sans coller à la valeur de
|
||||
# RPA_STREAMING_URL côté env d'exécution des tests.
|
||||
RESUME_PATH = "/traces/stream/replay/replay_xyz/resume"
|
||||
CANCEL_PATH = "/traces/stream/replay/replay_xyz/cancel"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(monkeypatch):
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "tok-test-1234")
|
||||
c = LeaServerClient()
|
||||
return c
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# resume_replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestResumeReplay:
|
||||
def test_returns_true_when_server_accepts(self, client):
|
||||
resp = MagicMock(ok=True)
|
||||
with patch("requests.post", return_value=resp) as post:
|
||||
assert client.resume_replay("replay_xyz") is True
|
||||
assert post.call_count == 1
|
||||
|
||||
def test_returns_false_when_server_rejects(self, client):
|
||||
resp = MagicMock(ok=False)
|
||||
with patch("requests.post", return_value=resp):
|
||||
assert client.resume_replay("replay_xyz") is False
|
||||
|
||||
def test_returns_false_on_empty_replay_id(self, client):
|
||||
with patch("requests.post") as post:
|
||||
assert client.resume_replay("") is False
|
||||
post.assert_not_called()
|
||||
|
||||
def test_returns_false_on_exception(self, client):
|
||||
with patch("requests.post", side_effect=ConnectionError("network down")):
|
||||
assert client.resume_replay("replay_xyz") is False
|
||||
|
||||
def test_posts_to_resume_endpoint_with_auth_header(self, client):
|
||||
resp = MagicMock(ok=True)
|
||||
with patch("requests.post", return_value=resp) as post:
|
||||
client.resume_replay("replay_xyz")
|
||||
call = post.call_args
|
||||
url = call.args[0] if call.args else call.kwargs.get("url", "")
|
||||
assert url.endswith(RESUME_PATH)
|
||||
headers = call.kwargs.get("headers", {})
|
||||
assert headers.get("Authorization") == "Bearer tok-test-1234"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# abort_replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestAbortReplay:
|
||||
def test_returns_true_when_server_accepts(self, client):
|
||||
resp = MagicMock(ok=True)
|
||||
with patch("requests.post", return_value=resp):
|
||||
assert client.abort_replay("replay_xyz") is True
|
||||
|
||||
def test_returns_false_when_server_rejects(self, client):
|
||||
resp = MagicMock(ok=False)
|
||||
with patch("requests.post", return_value=resp):
|
||||
assert client.abort_replay("replay_xyz") is False
|
||||
|
||||
def test_returns_false_on_empty_replay_id(self, client):
|
||||
with patch("requests.post") as post:
|
||||
assert client.abort_replay("") is False
|
||||
post.assert_not_called()
|
||||
|
||||
def test_returns_false_on_exception(self, client):
|
||||
with patch("requests.post", side_effect=TimeoutError("timeout")):
|
||||
assert client.abort_replay("replay_xyz") is False
|
||||
|
||||
def test_posts_to_cancel_endpoint(self, client):
|
||||
resp = MagicMock(ok=True)
|
||||
with patch("requests.post", return_value=resp) as post:
|
||||
client.abort_replay("replay_xyz")
|
||||
url = post.call_args.args[0]
|
||||
assert url.endswith(CANCEL_PATH)
|
||||
83
tests/unit/test_text_mismatch_empty_observed.py
Normal file
83
tests/unit/test_text_mismatch_empty_observed.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Tests pour `_should_reject_on_text_mismatch` — patch 2026-05-23 :
|
||||
distinguer `observed=''` (OCR n'a rien lu, ambigu) de `observed='X'`
|
||||
(autre texte lu = mismatch confirmé) dans le pré-check OCR.
|
||||
|
||||
Brief Codex 2026-05-23 08:55 : le crop bbox SoM précis (50 × 48 px)
|
||||
sur un onglet Notepad moderne donne `observed=''` car EasyOCR n'a pas
|
||||
suffisamment de signal (texte peu contrasté, zone trop petite). Le
|
||||
patch précédent rejetait ce cas comme mismatch — alors qu'aucune
|
||||
preuve d'un mauvais clic n'existe. On ne rejette plus que quand l'OCR
|
||||
a effectivement lu autre chose que la cible attendue.
|
||||
|
||||
Le faux succès OBS Studio reste bloqué : (1) son OCR retournait
|
||||
`'ue audio disponible GUI OBS Studio…'` = non-vide → rejet conservé ;
|
||||
(2) la garde drift agent posée sur ANCHOR-TM bloque déjà ce match.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1.resolve_engine import ( # noqa: E402
|
||||
_should_reject_on_text_mismatch,
|
||||
)
|
||||
|
||||
|
||||
class TestShouldRejectOnTextMismatch:
|
||||
def test_valid_passes(self):
|
||||
"""Cas nominal : OCR a vu la cible → on ne rejette pas."""
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=True, observed="Enregistrer sous",
|
||||
)
|
||||
|
||||
def test_invalid_with_text_rejects(self):
|
||||
"""Cas 0745 historique : OCR voit '9 ?' qui ne matche pas
|
||||
'Enregistrer sous' → rejet confirmé."""
|
||||
assert _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed="9 ?",
|
||||
)
|
||||
|
||||
def test_invalid_with_obs_studio_rejects(self):
|
||||
"""Cas 0756 : OCR voit du texte OBS Studio → rejet confirmé."""
|
||||
assert _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed="ue audio disponible GUI OBS Studio",
|
||||
)
|
||||
|
||||
def test_invalid_with_empty_observed_does_not_reject(self):
|
||||
"""Cas 0855 : OCR n'a rien lu (zone trop petite/peu contrastée)
|
||||
→ ambigu, pas un mismatch confirmé. On préserve la résolution
|
||||
serveur — la garde drift agent protège en aval."""
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed="",
|
||||
)
|
||||
|
||||
def test_invalid_with_whitespace_only_does_not_reject(self):
|
||||
"""Espace seul = équivalent vide pour notre logique."""
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed=" ",
|
||||
)
|
||||
|
||||
def test_invalid_with_newline_only_does_not_reject(self):
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed="\n\t",
|
||||
)
|
||||
|
||||
def test_invalid_with_none_observed_does_not_reject(self):
|
||||
"""Robustesse : observed None (cas dégénéré OCR-lib absente)
|
||||
ne doit pas planter."""
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=False, observed=None,
|
||||
)
|
||||
|
||||
def test_valid_with_empty_passes(self):
|
||||
"""is_valid=True avec observed vide — ne peut normalement pas
|
||||
arriver via _text_match_fuzzy (qui retourne False sur vide)
|
||||
mais on garde la logique cohérente : si is_valid=True, on
|
||||
ne rejette pas, peu importe observed."""
|
||||
assert not _should_reject_on_text_mismatch(
|
||||
is_valid=True, observed="",
|
||||
)
|
||||
62
tests/unit/test_validate_resolution_quality_close_tab.py
Normal file
62
tests/unit/test_validate_resolution_quality_close_tab.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1.resolve_engine import _validate_resolution_quality # noqa: E402
|
||||
|
||||
|
||||
def _result(score: float) -> dict:
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": "som_anchor_match",
|
||||
"score": score,
|
||||
"x_pct": 0.75,
|
||||
"y_pct": 0.20,
|
||||
}
|
||||
|
||||
|
||||
def _close_tab_spec() -> dict:
|
||||
return {
|
||||
"by_text": "",
|
||||
"by_role": "tab_close_button",
|
||||
"anchor_image_base64": "abc123",
|
||||
"context_hints": {"interaction": "close_tab", "active_tab_label": "test"},
|
||||
}
|
||||
|
||||
|
||||
def test_close_tab_relaxes_threshold_for_near_match():
|
||||
out = _validate_resolution_quality(
|
||||
_result(0.744),
|
||||
0.708594,
|
||||
0.35,
|
||||
target_spec=_close_tab_spec(),
|
||||
)
|
||||
assert out["resolved"] is True
|
||||
assert out["score"] == 0.744
|
||||
|
||||
|
||||
def test_close_tab_still_rejects_low_score():
|
||||
out = _validate_resolution_quality(
|
||||
_result(0.65),
|
||||
0.708594,
|
||||
0.35,
|
||||
target_spec=_close_tab_spec(),
|
||||
)
|
||||
assert out["resolved"] is False
|
||||
assert "below_threshold" in out["reason"]
|
||||
|
||||
|
||||
def test_close_tab_rejects_far_zone_even_with_good_score():
|
||||
out = _validate_resolution_quality(
|
||||
_result(0.80),
|
||||
0.30,
|
||||
0.20,
|
||||
target_spec=_close_tab_spec(),
|
||||
)
|
||||
assert out["resolved"] is False
|
||||
assert out["reason"] == "close_tab_out_of_recorded_zone"
|
||||
assert out["method"] == "rejected_close_tab_zone_som_anchor_match"
|
||||
134
tests/unit/test_validate_resolution_quality_switch_tab.py
Normal file
134
tests/unit/test_validate_resolution_quality_switch_tab.py
Normal file
@@ -0,0 +1,134 @@
|
||||
"""Tests pour `_validate_resolution_quality` — relâchement contextuel
|
||||
du seuil de score pour les cibles `interaction = switch_tab` avec un
|
||||
`som_element` calibré.
|
||||
|
||||
Cas live 2026-05-22 (act_raw_2f7e316c) :
|
||||
- Onglet Notepad moderne `Enregistrer sous`
|
||||
- Score som_text_match = 0.745 (juste sous seuil 0.75)
|
||||
- Cible bien localisée par SoM (bbox_norm) + focus_change pré-clic
|
||||
confirmant déjà la bonne fenêtre
|
||||
- Rejeté à tort → pause supervisée
|
||||
|
||||
Le patch abaisse le seuil à 0.60 UNIQUEMENT pour
|
||||
`context_hints.interaction == "switch_tab"` + `som_element` présent
|
||||
+ méthode `som_*`. Pas de baisse globale.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1.resolve_engine import ( # noqa: E402
|
||||
_validate_resolution_quality,
|
||||
)
|
||||
|
||||
|
||||
def _result(method: str, score: float, x: float = 0.5, y: float = 0.5) -> dict:
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": method,
|
||||
"score": score,
|
||||
"x_pct": x,
|
||||
"y_pct": y,
|
||||
}
|
||||
|
||||
|
||||
def _switch_tab_spec(with_som: bool = True) -> dict:
|
||||
spec = {
|
||||
"by_text": "Enregistrer sous",
|
||||
"by_role": "tab",
|
||||
"window_title": "*test – Bloc-notes",
|
||||
"context_hints": {
|
||||
"interaction": "switch_tab",
|
||||
"switch_to_window_title": "Enregistrer sous",
|
||||
},
|
||||
}
|
||||
if with_som:
|
||||
spec["som_element"] = {
|
||||
"bbox_norm": [0.697, 0.335, 0.715, 0.362],
|
||||
"center_norm": [0.706, 0.348],
|
||||
}
|
||||
return spec
|
||||
|
||||
|
||||
class TestSwitchTabThresholdRelaxation:
|
||||
def test_baseline_no_target_spec_keeps_strict_threshold(self):
|
||||
"""Sans target_spec passé, comportement legacy : 0.745 < 0.75 → rejet."""
|
||||
res = _result("som_text_match", score=0.745)
|
||||
out = _validate_resolution_quality(res, 0.5, 0.5)
|
||||
assert out is not None
|
||||
assert out["resolved"] is False
|
||||
assert "below_threshold" in out["reason"]
|
||||
|
||||
def test_switch_tab_with_som_accepts_score_above_relaxed_threshold(self):
|
||||
"""switch_tab + som_element + method som_* + score 0.745 → accepté."""
|
||||
res = _result("som_text_match", score=0.745, x=0.706, y=0.348)
|
||||
out = _validate_resolution_quality(
|
||||
res, 0.706, 0.348, target_spec=_switch_tab_spec(with_som=True),
|
||||
)
|
||||
assert out is not None
|
||||
assert out["resolved"] is True
|
||||
assert out["method"] == "som_text_match"
|
||||
assert out["score"] == 0.745
|
||||
|
||||
def test_switch_tab_with_som_still_rejects_very_low_score(self):
|
||||
"""Filet final : même en switch_tab, un score 0.50 reste rejeté
|
||||
(seuil relâché 0.60). On ne valide pas n'importe quoi."""
|
||||
res = _result("som_text_match", score=0.50)
|
||||
out = _validate_resolution_quality(
|
||||
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=True),
|
||||
)
|
||||
assert out["resolved"] is False
|
||||
assert "below_threshold" in out["reason"]
|
||||
|
||||
def test_switch_tab_without_som_keeps_strict_threshold(self):
|
||||
"""Sans som_element calibré, on garde le seuil strict — on ne
|
||||
peut pas faire confiance à un score VLM lower sans ancre spatiale."""
|
||||
res = _result("som_text_match", score=0.745)
|
||||
out = _validate_resolution_quality(
|
||||
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=False),
|
||||
)
|
||||
assert out["resolved"] is False
|
||||
|
||||
def test_non_switch_tab_keeps_strict_threshold(self):
|
||||
"""Cible non-tab : pas de relaxation. Le 0.745 reste rejeté."""
|
||||
spec = {
|
||||
"by_text": "Submit",
|
||||
"by_role": "button",
|
||||
"som_element": {"bbox_norm": [0.4, 0.4, 0.5, 0.5]},
|
||||
}
|
||||
res = _result("som_text_match", score=0.745)
|
||||
out = _validate_resolution_quality(res, 0.5, 0.5, target_spec=spec)
|
||||
assert out["resolved"] is False
|
||||
|
||||
def test_switch_tab_with_non_som_method_keeps_strict_threshold(self):
|
||||
"""La relaxation ne s'applique qu'aux méthodes som_* (qui exploitent
|
||||
la bbox calibrée). Un vlm_quick_find à 0.745 sur une cible
|
||||
switch_tab reste régi par son propre seuil legacy (0.60 → accepté)."""
|
||||
# vlm_quick_find a déjà un seuil 0.60 (cf. _RESOLUTION_MIN_SCORES),
|
||||
# donc 0.745 est largement au-dessus. On vérifie juste l'absence
|
||||
# de régression sur ce cas.
|
||||
res = _result("vlm_quick_find", score=0.745)
|
||||
out = _validate_resolution_quality(
|
||||
res, 0.5, 0.5, target_spec=_switch_tab_spec(with_som=True),
|
||||
)
|
||||
assert out["resolved"] is True
|
||||
|
||||
def test_unresolved_result_passes_through(self):
|
||||
"""Non-régression : un result resolved=False traverse sans modif."""
|
||||
res = {"resolved": False, "method": "no_target_criteria"}
|
||||
out = _validate_resolution_quality(
|
||||
res, 0.5, 0.5, target_spec=_switch_tab_spec(),
|
||||
)
|
||||
assert out is res
|
||||
|
||||
def test_target_spec_parameter_is_optional_for_legacy_callers(self):
|
||||
"""Compatibilité ascendante : appel sans target_spec ne plante pas
|
||||
et applique le seuil legacy."""
|
||||
res = _result("som_anchor_match", score=0.80)
|
||||
out = _validate_resolution_quality(res, 0.5, 0.5)
|
||||
assert out["resolved"] is True
|
||||
158
tests/unit/test_validate_text_at_position_som_bbox.py
Normal file
158
tests/unit/test_validate_text_at_position_som_bbox.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Tests pour `_validate_text_at_position` — patch 2026-05-23 :
|
||||
utilisation prioritaire de la bbox SoM enregistrée quand disponible.
|
||||
|
||||
Cas live (brief Codex 2026-05-23 07:45) : pré-check OCR rejette à tort
|
||||
`expected='Enregistrer sous' observed='9 ?'` car le crop fait
|
||||
``radius_px=280`` autour de la coord résolue capture du texte voisin
|
||||
(numéro de ligne « 9 » de la status bar Notepad) au lieu du label
|
||||
étroit de l'onglet. La bbox SoM ``[0.697, 0.335, 0.715, 0.362]``
|
||||
localise précisément l'onglet — l'utiliser comme zone OCR donne
|
||||
l'OCR exact du label.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
ROOT = Path(__file__).parent.parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_screenshot(tmp_path):
|
||||
"""Crée un screenshot 1920×1200 noir."""
|
||||
from PIL import Image
|
||||
p = tmp_path / "shot.png"
|
||||
img = Image.new("RGB", (1920, 1200), (0, 0, 0))
|
||||
img.save(p)
|
||||
return str(p)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_reader():
|
||||
"""Mock EasyOCR reader qui retourne ce qu'on veut selon la taille
|
||||
du crop reçu. Permet de simuler 'voit Enregistrer sous' vs 'voit 9 ?'.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
reader = MagicMock()
|
||||
# observed_by_size : map taille_crop_approx → texte OCR retourné
|
||||
reader._observed_by_size = {}
|
||||
|
||||
def fake_readtext(arr):
|
||||
h, w = arr.shape[:2]
|
||||
key = (w, h)
|
||||
text = reader._observed_by_size.get(key, "fallback text")
|
||||
return [(None, text, 0.95)]
|
||||
|
||||
reader.readtext.side_effect = fake_readtext
|
||||
with patch(
|
||||
"agent_v0.server_v1.resolve_engine._get_validation_ocr_reader",
|
||||
return_value=reader,
|
||||
):
|
||||
yield reader
|
||||
|
||||
|
||||
def _spec_with_som_bbox():
|
||||
return {
|
||||
"by_text": "Enregistrer sous",
|
||||
"som_element": {
|
||||
"bbox_norm": [0.697, 0.335, 0.715, 0.362],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestValidateTextWithSomBbox:
|
||||
def test_uses_som_bbox_when_present(self, fake_screenshot, patched_reader):
|
||||
"""Quand som_bbox_norm est fourni, la zone OCR est calculée
|
||||
depuis cette bbox (pas le radius autour de x/y_pct)."""
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
|
||||
spec = _spec_with_som_bbox()
|
||||
bbox = spec["som_element"]["bbox_norm"]
|
||||
# Le crop attendu fait largeur = (0.715-0.697)*1920 = 34 + 2*padding
|
||||
# et hauteur = (0.362-0.335)*1200 = 32 + 2*padding (padding=8)
|
||||
# → environ (50, 48) px.
|
||||
patched_reader._observed_by_size[(50, 48)] = "Enregistrer sous"
|
||||
|
||||
is_valid, observed, _ms = _validate_text_at_position(
|
||||
fake_screenshot,
|
||||
x_pct=0.706, y_pct=0.348,
|
||||
expected_text="Enregistrer sous",
|
||||
screen_width=1920, screen_height=1200,
|
||||
som_bbox_norm=bbox,
|
||||
)
|
||||
assert observed == "Enregistrer sous"
|
||||
assert is_valid is True
|
||||
|
||||
def test_falls_back_to_radius_when_no_bbox(self, fake_screenshot, patched_reader):
|
||||
"""Sans som_bbox_norm, comportement legacy : crop radius_px=280
|
||||
autour de (x_pct, y_pct)."""
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
|
||||
# Sans bbox → crop ≈ 560×560
|
||||
patched_reader._observed_by_size[(560, 560)] = "Enregistrer sous"
|
||||
|
||||
is_valid, observed, _ms = _validate_text_at_position(
|
||||
fake_screenshot,
|
||||
x_pct=0.5, y_pct=0.5,
|
||||
expected_text="Enregistrer sous",
|
||||
screen_width=1920, screen_height=1200,
|
||||
)
|
||||
assert is_valid is True
|
||||
assert observed == "Enregistrer sous"
|
||||
|
||||
def test_invalid_bbox_falls_back_gracefully(self, fake_screenshot, patched_reader):
|
||||
"""Une bbox malformée ne doit pas planter — fallback radius."""
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
|
||||
patched_reader._observed_by_size[(560, 560)] = "OK"
|
||||
|
||||
is_valid, observed, _ms = _validate_text_at_position(
|
||||
fake_screenshot,
|
||||
x_pct=0.5, y_pct=0.5,
|
||||
expected_text="OK",
|
||||
screen_width=1920, screen_height=1200,
|
||||
som_bbox_norm=[0.5], # malformé
|
||||
)
|
||||
# Pas de crash, fallback applique le radius classique.
|
||||
assert observed == "OK"
|
||||
|
||||
def test_bbox_too_small_falls_back_to_radius(self, fake_screenshot, patched_reader):
|
||||
"""Une bbox dégénérée (largeur/hauteur < quelques px) → fallback
|
||||
sur le radius, on ne tente pas un crop minuscule inutilisable."""
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
|
||||
patched_reader._observed_by_size[(560, 560)] = "OK"
|
||||
|
||||
is_valid, observed, _ms = _validate_text_at_position(
|
||||
fake_screenshot,
|
||||
x_pct=0.5, y_pct=0.5,
|
||||
expected_text="OK",
|
||||
screen_width=1920, screen_height=1200,
|
||||
som_bbox_norm=[0.500, 0.500, 0.501, 0.501],
|
||||
)
|
||||
# Bbox de ~2×1px → fallback radius
|
||||
assert observed == "OK"
|
||||
|
||||
def test_bbox_normalized_values_outside_unit_clipped(self, fake_screenshot, patched_reader):
|
||||
"""Bbox dépassant les bornes [0, 1] est clippée aux dimensions
|
||||
écran sans crash."""
|
||||
from agent_v0.server_v1.resolve_engine import _validate_text_at_position
|
||||
|
||||
# Bbox qui déborderait → clip à l'écran
|
||||
# x = (-0.05 → 0) * 1920 - 8 = -8 → 0, x2 = 1.05 * 1920 + 8 = 2024 → 1920
|
||||
# largeur = 1920, hauteur = (1.05-0)*1200 +16 = 1216 → 1200
|
||||
patched_reader._observed_by_size[(1920, 1200)] = "déborde"
|
||||
|
||||
is_valid, observed, _ms = _validate_text_at_position(
|
||||
fake_screenshot,
|
||||
x_pct=0.5, y_pct=0.5,
|
||||
expected_text="déborde",
|
||||
screen_width=1920, screen_height=1200,
|
||||
som_bbox_norm=[-0.05, 0.0, 1.05, 1.05],
|
||||
)
|
||||
assert observed == "déborde"
|
||||
296
tests/unit/test_window_title_memory_path.py
Normal file
296
tests/unit/test_window_title_memory_path.py
Normal file
@@ -0,0 +1,296 @@
|
||||
"""Tests ciblés sur le contrat window_title -> mémoire persistante."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
def _reload_api_stream():
|
||||
mod_name = "agent_v0.server_v1.api_stream"
|
||||
if mod_name in sys.modules:
|
||||
del sys.modules[mod_name]
|
||||
return importlib.import_module(mod_name)
|
||||
|
||||
|
||||
def test_build_replay_from_raw_events_propagates_window_title_into_target_spec(
|
||||
tmp_path, monkeypatch,
|
||||
):
|
||||
"""Le flux Lea-first doit propager window_title dans target_spec si connu."""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
[
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [100, 200],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {"title": "Bloc-notes", "app_name": "notepad"},
|
||||
}
|
||||
}
|
||||
],
|
||||
session_id="sess_test",
|
||||
session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
assert len(actions) == 1
|
||||
assert actions[0]["window_title"] == "Bloc-notes"
|
||||
assert actions[0]["target_spec"]["window_title"] == "Bloc-notes"
|
||||
|
||||
|
||||
def test_build_replay_from_raw_events_infers_notepad_tab_switch_target(
|
||||
tmp_path, monkeypatch,
|
||||
):
|
||||
"""Un clic haut suivi d'un focus same-app doit devenir une cible d'onglet."""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123", "by_role": "yolo"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
events = [
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [1514, 562],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_003",
|
||||
"window": {
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
"window_capture": {
|
||||
"rect": [323, 522, 2243, 1638],
|
||||
"click_relative": [1191, 40],
|
||||
"window_size": [1920, 1116],
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 1.2,
|
||||
"from": {
|
||||
"title": "http192.168.1.408765dossier.htmlid=.txt – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
"to": {
|
||||
"title": "Sans titre – Bloc-notes",
|
||||
"app_name": "Notepad.exe",
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events,
|
||||
session_id="sess_tab_switch",
|
||||
session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
assert len(actions) == 1
|
||||
assert actions[0]["target_spec"]["by_text"] == "Sans titre"
|
||||
assert actions[0]["target_spec"]["by_role"] == "tab"
|
||||
assert actions[0]["target_spec"]["window_title"] == (
|
||||
"http192.168.1.408765dossier.htmlid=.txt – Bloc-notes"
|
||||
)
|
||||
assert actions[0]["target_spec"]["context_hints"]["interaction"] == "switch_tab"
|
||||
|
||||
|
||||
def test_build_replay_propagates_focus_change_into_expected_window_before(
|
||||
tmp_path, monkeypatch,
|
||||
):
|
||||
"""Cas live ``act_raw_c70976c8`` (2026-05-22) : un focus_change vers
|
||||
``Enregistrer sous`` se produit entre deux clics consécutifs, mais
|
||||
le mouse_click suivant capture encore le titre pré-transition
|
||||
(``*test – Bloc-notes``) dans son ``window.title``. Sans correction
|
||||
serveur, la pré-vérif côté agent retombe sur target_spec.window_title
|
||||
(obsolète) et déclenche une pause supervisée à tort.
|
||||
|
||||
Le serveur doit poser explicitement ``expected_window_before`` égal
|
||||
au dernier ``window_focus_change.to.title`` observé avant le clic.
|
||||
"""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp,
|
||||
"enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {"anchor_image_base64": "abc123"},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *args, **kwargs: None)
|
||||
|
||||
events = [
|
||||
# Click 1 — dans Notepad, déclenche l'ouverture de la dialog.
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [860, 634],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
# Transition de focus vers la dialog "Enregistrer sous".
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 1.2,
|
||||
"from": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
# Click 2 — bouton "Enregistrer" dans la dialog. Mais
|
||||
# window.title capturé est obsolète (toujours sur Notepad).
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.5,
|
||||
"pos": [997, 743],
|
||||
"button": "left",
|
||||
"screenshot_id": "shot_002",
|
||||
"window": {"title": "*test – Bloc-notes", "app_name": "Notepad.exe"},
|
||||
}},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events, session_id="sess_save_dialog", session_dir=str(session_dir),
|
||||
)
|
||||
|
||||
clicks = [a for a in actions if a.get("type") == "click"]
|
||||
assert len(clicks) == 2
|
||||
|
||||
# Le clic 2 doit avoir expected_window_before = "Enregistrer sous"
|
||||
# (issu du focus_change précédent), pas "*test – Bloc-notes"
|
||||
# (le titre obsolète capturé dans l'event raw).
|
||||
assert clicks[1].get("expected_window_before") == "Enregistrer sous", (
|
||||
f"clic 2 doit pointer sur la dialog ouverte par le focus_change, "
|
||||
f"trouvé: {clicks[1].get('expected_window_before')!r} "
|
||||
f"(target_spec.window_title={clicks[1].get('target_spec', {}).get('window_title')!r})"
|
||||
)
|
||||
|
||||
# Le clic 1 n'a pas eu de focus_change vers une fenêtre avant lui
|
||||
# → pas de expected_window_before (ou vide).
|
||||
assert not clicks[0].get("expected_window_before"), (
|
||||
f"clic 1 ne doit pas avoir d'expected_window_before, "
|
||||
f"trouvé: {clicks[0].get('expected_window_before')!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_build_replay_does_not_overwrite_existing_expected_window_before(
|
||||
tmp_path, monkeypatch,
|
||||
):
|
||||
"""La propagation depuis focus_change ne doit pas écraser un
|
||||
expected_window_before déjà posé en amont (ex: par un setup
|
||||
action ou un patch précédent)."""
|
||||
from agent_v0.server_v1 import stream_processor as sp
|
||||
|
||||
session_dir = tmp_path / "sess"
|
||||
(session_dir / "shots").mkdir(parents=True)
|
||||
|
||||
monkeypatch.setattr(sp, "_load_crop_for_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
sp, "enrich_click_from_screenshot",
|
||||
lambda *args, **kwargs: {
|
||||
"anchor_image_base64": "abc",
|
||||
# Pré-existant : un autre composant a déjà posé la pré-condition.
|
||||
# build_replay_from_raw_events ne crée pas expected_window_before
|
||||
# depuis enrichment, mais on simule via fixture (cas générique
|
||||
# : action upstream qui pose ce champ).
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(sp, "_attach_expected_screenshots", lambda *a, **k: None)
|
||||
monkeypatch.setattr(sp, "_enrich_actions_with_intentions", lambda *a, **k: None)
|
||||
monkeypatch.setattr(sp, "_unload_gemma4", lambda *a, **k: None)
|
||||
|
||||
events = [
|
||||
{"event": {
|
||||
"type": "window_focus_change",
|
||||
"timestamp": 0.5,
|
||||
"to": {"title": "Fenetre A", "app_name": "test.exe"},
|
||||
}},
|
||||
{"event": {
|
||||
"type": "mouse_click",
|
||||
"timestamp": 1.0,
|
||||
"pos": [10, 20],
|
||||
"screenshot_id": "shot_001",
|
||||
"window": {"title": "Fenetre A", "app_name": "test.exe"},
|
||||
"expected_window_before": "Pre-existant",
|
||||
}},
|
||||
]
|
||||
|
||||
actions = sp.build_replay_from_raw_events(
|
||||
events, session_id="sess_x", session_dir=str(session_dir),
|
||||
)
|
||||
clicks = [a for a in actions if a.get("type") == "click"]
|
||||
assert clicks
|
||||
# Si déjà posé en upstream, on respecte la valeur en place.
|
||||
pre_existing = clicks[0].get("expected_window_before")
|
||||
assert pre_existing in (None, "", "Fenetre A"), (
|
||||
# Soit absent (build n'a pas propagé sur ce clic), soit Fenetre A
|
||||
# (le dernier focus_change). En tout cas, doit être cohérent.
|
||||
f"valeur inattendue: {pre_existing!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_memory_window_title_for_action_reads_top_level_and_target_spec(monkeypatch):
|
||||
"""Le lecteur mémoire doit voir les variantes top-level et target_spec."""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
|
||||
assert mod._memory_window_title_for_action(
|
||||
{
|
||||
"expected_window_before": "Fenêtre attendue",
|
||||
"target_spec": {"window_title": "Fenêtre cible"},
|
||||
"window_title": "Fenêtre action",
|
||||
}
|
||||
) == "Fenêtre attendue"
|
||||
|
||||
assert mod._memory_window_title_for_action(
|
||||
{
|
||||
"target_spec": {"context_hints": {"window_title": "Depuis context_hints"}},
|
||||
}
|
||||
) == "Depuis context_hints"
|
||||
|
||||
assert mod._memory_window_title_for_action(
|
||||
{
|
||||
"window_title": "Top-level uniquement",
|
||||
"target_spec": {},
|
||||
}
|
||||
) == "Top-level uniquement"
|
||||
Binary file not shown.
Reference in New Issue
Block a user