feat(vwb): execute wait for state

This commit is contained in:
Dom
2026-05-29 17:22:35 +02:00
parent 7b1f30af1a
commit e66bc6d452
10 changed files with 491 additions and 9 deletions

View File

@@ -0,0 +1,96 @@
from visual_workflow_builder.backend.actions.base_action import VWBActionStatus
from visual_workflow_builder.backend.actions.control.wait_for_state import (
VWBWaitForStateAction,
)
from visual_workflow_builder.backend.services.wait_for_state import (
match_expected_state,
wait_for_expected_state,
)
def test_match_expected_state_accepts_window_title_and_process_alias():
match = match_expected_state(
expected_state={
"window_title_contains": ["Executer"],
"process_active": "explorer.exe",
},
observed_state={
"window_title": "Executer",
"process_active": "explorer",
},
)
assert match.matched is True
assert match.matched_signals == ["window_title_contains", "process_active"]
assert match.failed_signals == []
def test_wait_for_expected_state_polls_until_match():
states = iter(
[
{"window_title": "Visual Workflow Builder", "process_active": "chrome"},
{"window_title": "Executer", "process_active": "explorer.exe"},
]
)
result = wait_for_expected_state(
expected_state={"window_title_in": ["Executer"]},
timeout_ms=500,
poll_interval_ms=50,
state_provider=lambda: next(states),
)
assert result.matched is True
assert result.timed_out is False
assert result.polls == 2
assert result.match["matched_signals"] == ["window_title_in"]
def test_wait_for_state_action_returns_success_evidence_contract():
action = VWBWaitForStateAction(
action_id="wait_state_001",
parameters={
"expected_state": {
"window_title_in": ["Executer"],
"process_active": "explorer.exe",
},
"timeout_ms": 500,
"poll_interval_ms": 50,
"_state_provider": lambda: {
"window_title": "Executer",
"process_active": "explorer",
},
},
)
result = action.execute("step_001")
assert result.status == VWBActionStatus.SUCCESS
assert result.output_data["matched"] is True
assert result.output_data["match"]["matched_signals"] == [
"window_title_in",
"process_active",
]
def test_wait_for_state_action_returns_timeout_with_observed_state():
action = VWBWaitForStateAction(
action_id="wait_state_002",
parameters={
"expected_state": {"window_title_contains": ["Enregistrer sous"]},
"timeout_ms": 100,
"poll_interval_ms": 50,
"_state_provider": lambda: {
"window_title": "Executer",
"process_active": "explorer.exe",
},
},
)
result = action.execute("step_002")
assert result.status == VWBActionStatus.TIMEOUT
assert result.error is not None
assert result.output_data["matched"] is False
assert result.output_data["match"]["failed_signals"] == ["window_title_contains"]
assert result.output_data["match"]["observed_state"]["window_title"] == "Executer"

View File

@@ -28,6 +28,7 @@ from .vision_ui.focus_anchor import VWBFocusAnchorAction
from .vision_ui.type_secret import VWBTypeSecretAction
from .vision_ui.scroll_to_anchor import VWBScrollToAnchorAction
from .vision_ui.extract_text import VWBExtractTextAction
from .control.wait_for_state import VWBWaitForStateAction
from .registry import (
VWBActionRegistry,
get_global_registry,
@@ -50,6 +51,7 @@ __all__ = [
'VWBTypeSecretAction',
'VWBScrollToAnchorAction',
'VWBExtractTextAction',
'VWBWaitForStateAction',
'VWBActionRegistry',
'get_global_registry',
'register_action',
@@ -62,4 +64,4 @@ __all__ = [
__version__ = '1.1.0'
__author__ = 'Dom, Alice, Kiro'
__date__ = '10 janvier 2026'
__date__ = '10 janvier 2026'

View File

@@ -4,10 +4,13 @@ Auteur : Dom, Claude - 14 janvier 2026
Ce module contient les actions de contrôle de flux :
- keyboard_shortcut : Raccourcis clavier
- wait_for_state : Attente d'etat semantique
"""
from .keyboard_shortcut import VWBKeyboardShortcutAction
from .wait_for_state import VWBWaitForStateAction
__all__ = [
'VWBKeyboardShortcutAction',
'VWBWaitForStateAction',
]

View File

@@ -0,0 +1,113 @@
"""VWB action that waits for a semantic screen state."""
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional
from ..base_action import BaseVWBAction, VWBActionResult, VWBActionStatus
from ...contracts.error import VWBErrorType, create_vwb_error
from ...services.wait_for_state import wait_for_expected_state, StateProvider
class VWBWaitForStateAction(BaseVWBAction):
"""Wait until the foreground window/process matches an expected state."""
SUPPORTED_EVIDENCE = {"window_or_process"}
def __init__(
self,
action_id: str,
parameters: Dict[str, Any],
screen_capturer=None,
state_provider: Optional[StateProvider] = None,
):
super().__init__(
action_id=action_id,
name="Attendre un etat",
description="Attend un etat semantique d'ecran",
parameters=parameters,
screen_capturer=screen_capturer,
)
self.expected_state = parameters.get("expected_state") or {}
self.timeout_ms = int(parameters.get("timeout_ms", 5000))
self.poll_interval_ms = int(parameters.get("poll_interval_ms", 250))
self.evidence_required = parameters.get(
"evidence_required", "window_or_process"
)
self.state_provider = state_provider or parameters.get("_state_provider")
def validate_parameters(self) -> List[str]:
errors: List[str] = []
if not isinstance(self.expected_state, dict) or not self.expected_state:
errors.append("expected_state requis")
criteria = (
"window_title_in",
"window_title_contains",
"process_active",
)
if isinstance(self.expected_state, dict) and not any(
self.expected_state.get(key) for key in criteria
):
errors.append(
"expected_state doit definir window_title_in, "
"window_title_contains ou process_active"
)
if self.timeout_ms < 100:
errors.append("timeout_ms doit etre >= 100")
if self.poll_interval_ms < 50:
errors.append("poll_interval_ms doit etre >= 50")
if self.evidence_required not in self.SUPPORTED_EVIDENCE:
errors.append(
f"evidence_required non supporte: {self.evidence_required}"
)
return errors
def execute_core(self, step_id: str) -> VWBActionResult:
start_time = datetime.now()
wait_result = wait_for_expected_state(
expected_state=self.expected_state,
timeout_ms=self.timeout_ms,
poll_interval_ms=self.poll_interval_ms,
evidence_required=self.evidence_required,
state_provider=self.state_provider,
)
end_time = datetime.now()
execution_time = (end_time - start_time).total_seconds() * 1000
output = wait_result.to_dict()
if wait_result.matched:
return VWBActionResult(
action_id=self.action_id,
step_id=step_id,
status=VWBActionStatus.SUCCESS,
start_time=start_time,
end_time=end_time,
execution_time_ms=execution_time,
output_data=output,
evidence_list=self.evidence_list.copy(),
)
error = create_vwb_error(
error_type=VWBErrorType.WAIT_TIMEOUT,
message="Etat attendu non observe avant timeout",
action_id=self.action_id,
step_id=step_id,
technical_details=output,
execution_time_ms=execution_time,
)
return VWBActionResult(
action_id=self.action_id,
step_id=step_id,
status=VWBActionStatus.TIMEOUT,
start_time=start_time,
end_time=end_time,
execution_time_ms=execution_time,
output_data=output,
evidence_list=self.evidence_list.copy(),
error=error,
)

View File

@@ -69,7 +69,7 @@ _LLM_ACTION_MAP = {
}
# Actions VWB de type attente
_WAIT_ACTION_TYPES = {"wait_for_anchor"}
_WAIT_ACTION_TYPES = {"wait_for_anchor", "wait_for_state"}
# Actions VWB de type condition
_CONDITION_ACTION_TYPES = {"visual_condition"}
@@ -1179,6 +1179,7 @@ def execute_windows():
'hotkey': 'key_combo',
'scroll_to_anchor': 'scroll',
'wait_for_anchor': 'wait',
'wait_for_state': 'wait_for_state',
'visual_condition': 'wait',
}
@@ -1245,6 +1246,10 @@ def execute_windows():
# ---------------------------------------------------------------
if vwb_type in ('wait', 'wait_for_anchor', 'visual_condition') and 'duration_ms' in params:
action['duration_ms'] = params['duration_ms']
if vwb_type == 'wait_for_state':
for key in ('expected_state', 'timeout_ms', 'poll_interval_ms', 'evidence_required'):
if key in params:
action[key] = params[key]
# ---------------------------------------------------------------
# Actions fichiers → proxy vers /file-action de l'agent (port 5006)

View File

@@ -80,6 +80,36 @@ _check_screen_for_patterns = _shared_check_patterns
_handle_detected_pattern = _shared_handle_pattern
def _execute_wait_for_state(params: Dict[str, Any]) -> dict:
from visual_workflow_builder.backend.services.wait_for_state import (
wait_for_expected_state,
)
expected_state = params.get("expected_state") or {}
timeout_ms = int(params.get("timeout_ms", 5000))
poll_interval_ms = int(params.get("poll_interval_ms", 250))
evidence_required = params.get("evidence_required", "window_or_process")
print(
"⏱️ [Action] Attente etat "
f"{expected_state} timeout={timeout_ms}ms"
)
result = wait_for_expected_state(
expected_state=expected_state,
timeout_ms=timeout_ms,
poll_interval_ms=poll_interval_ms,
evidence_required=evidence_required,
)
output = result.to_dict()
if result.matched:
return {"success": True, "output": output}
return {
"success": False,
"error": "Etat attendu non observe avant timeout",
"output": output,
}
def minimize_active_window():
"""Minimise le navigateur VWB et active la fenêtre suivante (VM, app cible)."""
try:
@@ -1165,6 +1195,9 @@ def execute_action(action_type: str, params: dict) -> dict:
}
}
elif action_type == 'wait_for_state':
return _execute_wait_for_state(params)
elif action_type == 'keyboard_shortcut':
keys = params.get('keys', [])
if not keys:

View File

@@ -28,7 +28,7 @@ try:
BaseVWBAction,
VWBClickAnchorAction, VWBTypeTextAction, VWBWaitForAnchorAction,
VWBFocusAnchorAction, VWBTypeSecretAction, VWBScrollToAnchorAction,
VWBExtractTextAction
VWBExtractTextAction, VWBWaitForStateAction
)
from visual_workflow_builder.backend.contracts.visual_anchor import VWBVisualAnchor
ACTIONS_AVAILABLE = True
@@ -41,7 +41,7 @@ except ImportError as e:
BaseVWBAction,
VWBClickAnchorAction, VWBTypeTextAction, VWBWaitForAnchorAction,
VWBFocusAnchorAction, VWBTypeSecretAction, VWBScrollToAnchorAction,
VWBExtractTextAction
VWBExtractTextAction, VWBWaitForStateAction
)
from .contracts.visual_anchor import VWBVisualAnchor
ACTIONS_AVAILABLE = True
@@ -1278,6 +1278,12 @@ def create_action_from_config(action_config: Dict[str, Any]) -> Optional[BaseVWB
parameters=parameters,
screen_capturer=screen_capturer
)
elif action_type == 'wait_for_state':
return VWBWaitForStateAction(
action_id=action_id,
parameters=parameters,
screen_capturer=screen_capturer
)
elif action_type == 'focus_anchor':
return VWBFocusAnchorAction(
action_id=action_id,
@@ -1556,7 +1562,7 @@ def list_actions():
"type": "string",
"required": False,
"default": "window_or_process",
"options": ["window_or_process", "uia", "ocr", "screenshot_diff"],
"options": ["window_or_process"],
"description": "Niveau de preuve requis"
}
},
@@ -2069,6 +2075,7 @@ def execute_action():
result_message = ''
execution_success = True
direct_output_data = None
start_time = datetime.now()
if action_type in ['click', 'click_anchor']:
@@ -2696,6 +2703,29 @@ def execute_action():
time.sleep(wait_time)
result_message = f'Attente de {wait_time}s terminée'
elif action_type == 'wait_for_state':
from visual_workflow_builder.backend.services.wait_for_state import (
wait_for_expected_state,
)
expected_state = parameters.get('expected_state') or {}
timeout_ms = int(parameters.get('timeout_ms', 5000))
poll_interval_ms = int(parameters.get('poll_interval_ms', 250))
evidence_required = parameters.get('evidence_required', 'window_or_process')
wait_result = wait_for_expected_state(
expected_state=expected_state,
timeout_ms=timeout_ms,
poll_interval_ms=poll_interval_ms,
evidence_required=evidence_required,
)
direct_output_data = wait_result.to_dict()
execution_success = wait_result.matched
result_message = (
'Etat attendu observe'
if wait_result.matched
else 'Etat attendu non observe avant timeout'
)
elif action_type in ['focus', 'focus_anchor', 'focaliser']:
# focus_anchor: chercher l'ancre et cliquer dessus pour donner le focus
visual_anchor = parameters.get('visual_anchor', {})
@@ -2803,7 +2833,7 @@ def execute_action():
"step_id": step_id,
"status": "success" if execution_success else "failed",
"execution_time_ms": execution_time_ms,
"output_data": {"message": result_message},
"output_data": direct_output_data or {"message": result_message},
"evidence_list": [],
"error": None if execution_success else {"message": result_message}
}

View File

@@ -198,6 +198,22 @@ VWB_ACTION_CONTRACTS: Dict[str, ActionContract] = {
optional_params=["wait_mode", "max_wait_time_ms", "check_interval_ms"],
param_validators={"visual_anchor": lambda p: has_visual_anchor({"visual_anchor": p})}
),
"wait_for_state": ActionContract(
action_type="wait_for_state",
description="Attendre qu'un etat semantique d'ecran soit observe",
required_params=["expected_state"],
optional_params=["timeout_ms", "poll_interval_ms", "evidence_required"],
param_validators={
"expected_state": lambda p: isinstance(p, dict) and any(
p.get(key)
for key in (
"window_title_in",
"window_title_contains",
"process_active",
)
)
}
),
# --- ACTIONS DE SCROLL ---
"scroll_to_anchor": ActionContract(

View File

@@ -0,0 +1,187 @@
"""Runtime helpers for VWB wait_for_state actions."""
from __future__ import annotations
from dataclasses import dataclass, asdict
from time import monotonic, sleep
from typing import Any, Callable, Dict, List, Mapping, Optional
StateProvider = Callable[[], Mapping[str, Any]]
@dataclass
class StateMatch:
matched: bool
matched_signals: List[str]
failed_signals: List[str]
observed_state: Dict[str, Any]
expected_state: Dict[str, Any]
evidence_required: str = "window_or_process"
unsupported_evidence: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class WaitForStateResult:
matched: bool
timed_out: bool
elapsed_ms: int
polls: int
match: Dict[str, Any]
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
def _as_string_list(value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, str):
return [value] if value.strip() else []
if isinstance(value, (list, tuple, set)):
return [str(item).strip() for item in value if str(item).strip()]
return [str(value).strip()] if str(value).strip() else []
def _normalize_text(value: Any) -> str:
return str(value or "").strip().casefold()
def _normalize_process(value: Any) -> str:
text = _normalize_text(value)
return text[:-4] if text.endswith(".exe") else text
def get_active_window_state() -> Dict[str, Any]:
"""Return the current foreground window title and process/app name."""
try:
from agent_v0.agent_v1.window_info_crossplatform import get_active_window_info
info = get_active_window_info() or {}
except Exception as exc:
return {
"window_title": "",
"process_active": "",
"error": str(exc),
}
title = info.get("title") or info.get("window_title") or ""
process = (
info.get("process_active")
or info.get("process_name")
or info.get("app_name")
or info.get("application")
or ""
)
return {
"window_title": title,
"process_active": process,
"raw": dict(info),
}
def match_expected_state(
expected_state: Mapping[str, Any],
observed_state: Mapping[str, Any],
evidence_required: str = "window_or_process",
) -> StateMatch:
expected = dict(expected_state or {})
observed = {
"window_title": observed_state.get("window_title")
or observed_state.get("title")
or "",
"process_active": observed_state.get("process_active")
or observed_state.get("process_name")
or observed_state.get("app_name")
or "",
}
if "raw" in observed_state:
observed["raw"] = observed_state["raw"]
if "error" in observed_state:
observed["error"] = observed_state["error"]
matched_signals: List[str] = []
failed_signals: List[str] = []
title = _normalize_text(observed["window_title"])
exact_titles = _as_string_list(expected.get("window_title_in"))
if exact_titles:
if any(title == _normalize_text(candidate) for candidate in exact_titles):
matched_signals.append("window_title_in")
else:
failed_signals.append("window_title_in")
contains_titles = _as_string_list(expected.get("window_title_contains"))
if contains_titles:
if any(_normalize_text(candidate) in title for candidate in contains_titles):
matched_signals.append("window_title_contains")
else:
failed_signals.append("window_title_contains")
process_expected = _as_string_list(expected.get("process_active"))
if process_expected:
process = _normalize_process(observed["process_active"])
if any(process == _normalize_process(candidate) for candidate in process_expected):
matched_signals.append("process_active")
else:
failed_signals.append("process_active")
unsupported = None
if evidence_required != "window_or_process":
unsupported = evidence_required
return StateMatch(
matched=bool(matched_signals) and not failed_signals and unsupported is None,
matched_signals=matched_signals,
failed_signals=failed_signals,
observed_state=observed,
expected_state=expected,
evidence_required=evidence_required,
unsupported_evidence=unsupported,
)
def wait_for_expected_state(
expected_state: Mapping[str, Any],
timeout_ms: int = 5000,
poll_interval_ms: int = 250,
evidence_required: str = "window_or_process",
state_provider: Optional[StateProvider] = None,
) -> WaitForStateResult:
provider = state_provider or get_active_window_state
timeout_s = max(0, int(timeout_ms)) / 1000.0
poll_s = max(0.05, int(poll_interval_ms) / 1000.0)
started = monotonic()
deadline = started + timeout_s
polls = 0
last_match = match_expected_state(expected_state, {}, evidence_required)
while True:
polls += 1
observed = provider() or {}
last_match = match_expected_state(expected_state, observed, evidence_required)
elapsed_ms = int((monotonic() - started) * 1000)
if last_match.matched:
return WaitForStateResult(
matched=True,
timed_out=False,
elapsed_ms=elapsed_ms,
polls=polls,
match=last_match.to_dict(),
)
remaining = deadline - monotonic()
if remaining <= 0:
return WaitForStateResult(
matched=False,
timed_out=True,
elapsed_ms=elapsed_ms,
polls=polls,
match=last_match.to_dict(),
)
sleep(min(poll_s, remaining))

View File

@@ -426,9 +426,6 @@ export default function PropertiesPanel({ step, onUpdateParams, onDelete }: Prop
onChange={(e) => updateParam('evidence_required', e.target.value)}
>
<option value="window_or_process">Fenêtre ou process</option>
<option value="uia">UIA</option>
<option value="ocr">OCR</option>
<option value="screenshot_diff">Différence écran</option>
</select>
</div>
</>