diff --git a/tests/unit/test_vwb_wait_for_state.py b/tests/unit/test_vwb_wait_for_state.py new file mode 100644 index 000000000..517343ae3 --- /dev/null +++ b/tests/unit/test_vwb_wait_for_state.py @@ -0,0 +1,96 @@ +from visual_workflow_builder.backend.actions.base_action import VWBActionStatus +from visual_workflow_builder.backend.actions.control.wait_for_state import ( + VWBWaitForStateAction, +) +from visual_workflow_builder.backend.services.wait_for_state import ( + match_expected_state, + wait_for_expected_state, +) + + +def test_match_expected_state_accepts_window_title_and_process_alias(): + match = match_expected_state( + expected_state={ + "window_title_contains": ["Executer"], + "process_active": "explorer.exe", + }, + observed_state={ + "window_title": "Executer", + "process_active": "explorer", + }, + ) + + assert match.matched is True + assert match.matched_signals == ["window_title_contains", "process_active"] + assert match.failed_signals == [] + + +def test_wait_for_expected_state_polls_until_match(): + states = iter( + [ + {"window_title": "Visual Workflow Builder", "process_active": "chrome"}, + {"window_title": "Executer", "process_active": "explorer.exe"}, + ] + ) + + result = wait_for_expected_state( + expected_state={"window_title_in": ["Executer"]}, + timeout_ms=500, + poll_interval_ms=50, + state_provider=lambda: next(states), + ) + + assert result.matched is True + assert result.timed_out is False + assert result.polls == 2 + assert result.match["matched_signals"] == ["window_title_in"] + + +def test_wait_for_state_action_returns_success_evidence_contract(): + action = VWBWaitForStateAction( + action_id="wait_state_001", + parameters={ + "expected_state": { + "window_title_in": ["Executer"], + "process_active": "explorer.exe", + }, + "timeout_ms": 500, + "poll_interval_ms": 50, + "_state_provider": lambda: { + "window_title": "Executer", + "process_active": "explorer", + }, + }, + ) + + result = action.execute("step_001") + + assert result.status == VWBActionStatus.SUCCESS + assert result.output_data["matched"] is True + assert result.output_data["match"]["matched_signals"] == [ + "window_title_in", + "process_active", + ] + + +def test_wait_for_state_action_returns_timeout_with_observed_state(): + action = VWBWaitForStateAction( + action_id="wait_state_002", + parameters={ + "expected_state": {"window_title_contains": ["Enregistrer sous"]}, + "timeout_ms": 100, + "poll_interval_ms": 50, + "_state_provider": lambda: { + "window_title": "Executer", + "process_active": "explorer.exe", + }, + }, + ) + + result = action.execute("step_002") + + assert result.status == VWBActionStatus.TIMEOUT + assert result.error is not None + assert result.output_data["matched"] is False + assert result.output_data["match"]["failed_signals"] == ["window_title_contains"] + assert result.output_data["match"]["observed_state"]["window_title"] == "Executer" diff --git a/visual_workflow_builder/backend/actions/__init__.py b/visual_workflow_builder/backend/actions/__init__.py index a828b32ed..7be90fa6d 100644 --- a/visual_workflow_builder/backend/actions/__init__.py +++ b/visual_workflow_builder/backend/actions/__init__.py @@ -28,6 +28,7 @@ from .vision_ui.focus_anchor import VWBFocusAnchorAction from .vision_ui.type_secret import VWBTypeSecretAction from .vision_ui.scroll_to_anchor import VWBScrollToAnchorAction from .vision_ui.extract_text import VWBExtractTextAction +from .control.wait_for_state import VWBWaitForStateAction from .registry import ( VWBActionRegistry, get_global_registry, @@ -50,6 +51,7 @@ __all__ = [ 'VWBTypeSecretAction', 'VWBScrollToAnchorAction', 'VWBExtractTextAction', + 'VWBWaitForStateAction', 'VWBActionRegistry', 'get_global_registry', 'register_action', @@ -62,4 +64,4 @@ __all__ = [ __version__ = '1.1.0' __author__ = 'Dom, Alice, Kiro' -__date__ = '10 janvier 2026' \ No newline at end of file +__date__ = '10 janvier 2026' diff --git a/visual_workflow_builder/backend/actions/control/__init__.py b/visual_workflow_builder/backend/actions/control/__init__.py index d903a265c..4035363d6 100644 --- a/visual_workflow_builder/backend/actions/control/__init__.py +++ b/visual_workflow_builder/backend/actions/control/__init__.py @@ -4,10 +4,13 @@ Auteur : Dom, Claude - 14 janvier 2026 Ce module contient les actions de contrôle de flux : - keyboard_shortcut : Raccourcis clavier +- wait_for_state : Attente d'etat semantique """ from .keyboard_shortcut import VWBKeyboardShortcutAction +from .wait_for_state import VWBWaitForStateAction __all__ = [ 'VWBKeyboardShortcutAction', + 'VWBWaitForStateAction', ] diff --git a/visual_workflow_builder/backend/actions/control/wait_for_state.py b/visual_workflow_builder/backend/actions/control/wait_for_state.py new file mode 100644 index 000000000..4f262c7cd --- /dev/null +++ b/visual_workflow_builder/backend/actions/control/wait_for_state.py @@ -0,0 +1,113 @@ +"""VWB action that waits for a semantic screen state.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Dict, List, Optional + +from ..base_action import BaseVWBAction, VWBActionResult, VWBActionStatus +from ...contracts.error import VWBErrorType, create_vwb_error +from ...services.wait_for_state import wait_for_expected_state, StateProvider + + +class VWBWaitForStateAction(BaseVWBAction): + """Wait until the foreground window/process matches an expected state.""" + + SUPPORTED_EVIDENCE = {"window_or_process"} + + def __init__( + self, + action_id: str, + parameters: Dict[str, Any], + screen_capturer=None, + state_provider: Optional[StateProvider] = None, + ): + super().__init__( + action_id=action_id, + name="Attendre un etat", + description="Attend un etat semantique d'ecran", + parameters=parameters, + screen_capturer=screen_capturer, + ) + self.expected_state = parameters.get("expected_state") or {} + self.timeout_ms = int(parameters.get("timeout_ms", 5000)) + self.poll_interval_ms = int(parameters.get("poll_interval_ms", 250)) + self.evidence_required = parameters.get( + "evidence_required", "window_or_process" + ) + self.state_provider = state_provider or parameters.get("_state_provider") + + def validate_parameters(self) -> List[str]: + errors: List[str] = [] + + if not isinstance(self.expected_state, dict) or not self.expected_state: + errors.append("expected_state requis") + + criteria = ( + "window_title_in", + "window_title_contains", + "process_active", + ) + if isinstance(self.expected_state, dict) and not any( + self.expected_state.get(key) for key in criteria + ): + errors.append( + "expected_state doit definir window_title_in, " + "window_title_contains ou process_active" + ) + + if self.timeout_ms < 100: + errors.append("timeout_ms doit etre >= 100") + if self.poll_interval_ms < 50: + errors.append("poll_interval_ms doit etre >= 50") + if self.evidence_required not in self.SUPPORTED_EVIDENCE: + errors.append( + f"evidence_required non supporte: {self.evidence_required}" + ) + + return errors + + def execute_core(self, step_id: str) -> VWBActionResult: + start_time = datetime.now() + wait_result = wait_for_expected_state( + expected_state=self.expected_state, + timeout_ms=self.timeout_ms, + poll_interval_ms=self.poll_interval_ms, + evidence_required=self.evidence_required, + state_provider=self.state_provider, + ) + end_time = datetime.now() + execution_time = (end_time - start_time).total_seconds() * 1000 + output = wait_result.to_dict() + + if wait_result.matched: + return VWBActionResult( + action_id=self.action_id, + step_id=step_id, + status=VWBActionStatus.SUCCESS, + start_time=start_time, + end_time=end_time, + execution_time_ms=execution_time, + output_data=output, + evidence_list=self.evidence_list.copy(), + ) + + error = create_vwb_error( + error_type=VWBErrorType.WAIT_TIMEOUT, + message="Etat attendu non observe avant timeout", + action_id=self.action_id, + step_id=step_id, + technical_details=output, + execution_time_ms=execution_time, + ) + return VWBActionResult( + action_id=self.action_id, + step_id=step_id, + status=VWBActionStatus.TIMEOUT, + start_time=start_time, + end_time=end_time, + execution_time_ms=execution_time, + output_data=output, + evidence_list=self.evidence_list.copy(), + error=error, + ) diff --git a/visual_workflow_builder/backend/api_v3/dag_execute.py b/visual_workflow_builder/backend/api_v3/dag_execute.py index 7960412d5..dfd1bca24 100644 --- a/visual_workflow_builder/backend/api_v3/dag_execute.py +++ b/visual_workflow_builder/backend/api_v3/dag_execute.py @@ -69,7 +69,7 @@ _LLM_ACTION_MAP = { } # Actions VWB de type attente -_WAIT_ACTION_TYPES = {"wait_for_anchor"} +_WAIT_ACTION_TYPES = {"wait_for_anchor", "wait_for_state"} # Actions VWB de type condition _CONDITION_ACTION_TYPES = {"visual_condition"} @@ -1179,6 +1179,7 @@ def execute_windows(): 'hotkey': 'key_combo', 'scroll_to_anchor': 'scroll', 'wait_for_anchor': 'wait', + 'wait_for_state': 'wait_for_state', 'visual_condition': 'wait', } @@ -1245,6 +1246,10 @@ def execute_windows(): # --------------------------------------------------------------- if vwb_type in ('wait', 'wait_for_anchor', 'visual_condition') and 'duration_ms' in params: action['duration_ms'] = params['duration_ms'] + if vwb_type == 'wait_for_state': + for key in ('expected_state', 'timeout_ms', 'poll_interval_ms', 'evidence_required'): + if key in params: + action[key] = params[key] # --------------------------------------------------------------- # Actions fichiers → proxy vers /file-action de l'agent (port 5006) diff --git a/visual_workflow_builder/backend/api_v3/execute.py b/visual_workflow_builder/backend/api_v3/execute.py index ac908fd30..af511f879 100644 --- a/visual_workflow_builder/backend/api_v3/execute.py +++ b/visual_workflow_builder/backend/api_v3/execute.py @@ -80,6 +80,36 @@ _check_screen_for_patterns = _shared_check_patterns _handle_detected_pattern = _shared_handle_pattern +def _execute_wait_for_state(params: Dict[str, Any]) -> dict: + from visual_workflow_builder.backend.services.wait_for_state import ( + wait_for_expected_state, + ) + + expected_state = params.get("expected_state") or {} + timeout_ms = int(params.get("timeout_ms", 5000)) + poll_interval_ms = int(params.get("poll_interval_ms", 250)) + evidence_required = params.get("evidence_required", "window_or_process") + + print( + "⏱️ [Action] Attente etat " + f"{expected_state} timeout={timeout_ms}ms" + ) + result = wait_for_expected_state( + expected_state=expected_state, + timeout_ms=timeout_ms, + poll_interval_ms=poll_interval_ms, + evidence_required=evidence_required, + ) + output = result.to_dict() + if result.matched: + return {"success": True, "output": output} + return { + "success": False, + "error": "Etat attendu non observe avant timeout", + "output": output, + } + + def minimize_active_window(): """Minimise le navigateur VWB et active la fenêtre suivante (VM, app cible).""" try: @@ -1165,6 +1195,9 @@ def execute_action(action_type: str, params: dict) -> dict: } } + elif action_type == 'wait_for_state': + return _execute_wait_for_state(params) + elif action_type == 'keyboard_shortcut': keys = params.get('keys', []) if not keys: diff --git a/visual_workflow_builder/backend/catalog_routes_v2_vlm.py b/visual_workflow_builder/backend/catalog_routes_v2_vlm.py index 5a1083883..1fe0d88f0 100644 --- a/visual_workflow_builder/backend/catalog_routes_v2_vlm.py +++ b/visual_workflow_builder/backend/catalog_routes_v2_vlm.py @@ -28,7 +28,7 @@ try: BaseVWBAction, VWBClickAnchorAction, VWBTypeTextAction, VWBWaitForAnchorAction, VWBFocusAnchorAction, VWBTypeSecretAction, VWBScrollToAnchorAction, - VWBExtractTextAction + VWBExtractTextAction, VWBWaitForStateAction ) from visual_workflow_builder.backend.contracts.visual_anchor import VWBVisualAnchor ACTIONS_AVAILABLE = True @@ -41,7 +41,7 @@ except ImportError as e: BaseVWBAction, VWBClickAnchorAction, VWBTypeTextAction, VWBWaitForAnchorAction, VWBFocusAnchorAction, VWBTypeSecretAction, VWBScrollToAnchorAction, - VWBExtractTextAction + VWBExtractTextAction, VWBWaitForStateAction ) from .contracts.visual_anchor import VWBVisualAnchor ACTIONS_AVAILABLE = True @@ -1278,6 +1278,12 @@ def create_action_from_config(action_config: Dict[str, Any]) -> Optional[BaseVWB parameters=parameters, screen_capturer=screen_capturer ) + elif action_type == 'wait_for_state': + return VWBWaitForStateAction( + action_id=action_id, + parameters=parameters, + screen_capturer=screen_capturer + ) elif action_type == 'focus_anchor': return VWBFocusAnchorAction( action_id=action_id, @@ -1556,7 +1562,7 @@ def list_actions(): "type": "string", "required": False, "default": "window_or_process", - "options": ["window_or_process", "uia", "ocr", "screenshot_diff"], + "options": ["window_or_process"], "description": "Niveau de preuve requis" } }, @@ -2069,6 +2075,7 @@ def execute_action(): result_message = '' execution_success = True + direct_output_data = None start_time = datetime.now() if action_type in ['click', 'click_anchor']: @@ -2696,6 +2703,29 @@ def execute_action(): time.sleep(wait_time) result_message = f'Attente de {wait_time}s terminée' + elif action_type == 'wait_for_state': + from visual_workflow_builder.backend.services.wait_for_state import ( + wait_for_expected_state, + ) + + expected_state = parameters.get('expected_state') or {} + timeout_ms = int(parameters.get('timeout_ms', 5000)) + poll_interval_ms = int(parameters.get('poll_interval_ms', 250)) + evidence_required = parameters.get('evidence_required', 'window_or_process') + wait_result = wait_for_expected_state( + expected_state=expected_state, + timeout_ms=timeout_ms, + poll_interval_ms=poll_interval_ms, + evidence_required=evidence_required, + ) + direct_output_data = wait_result.to_dict() + execution_success = wait_result.matched + result_message = ( + 'Etat attendu observe' + if wait_result.matched + else 'Etat attendu non observe avant timeout' + ) + elif action_type in ['focus', 'focus_anchor', 'focaliser']: # focus_anchor: chercher l'ancre et cliquer dessus pour donner le focus visual_anchor = parameters.get('visual_anchor', {}) @@ -2803,7 +2833,7 @@ def execute_action(): "step_id": step_id, "status": "success" if execution_success else "failed", "execution_time_ms": execution_time_ms, - "output_data": {"message": result_message}, + "output_data": direct_output_data or {"message": result_message}, "evidence_list": [], "error": None if execution_success else {"message": result_message} } diff --git a/visual_workflow_builder/backend/contracts/action_contracts.py b/visual_workflow_builder/backend/contracts/action_contracts.py index f20178997..591ac113c 100644 --- a/visual_workflow_builder/backend/contracts/action_contracts.py +++ b/visual_workflow_builder/backend/contracts/action_contracts.py @@ -198,6 +198,22 @@ VWB_ACTION_CONTRACTS: Dict[str, ActionContract] = { optional_params=["wait_mode", "max_wait_time_ms", "check_interval_ms"], param_validators={"visual_anchor": lambda p: has_visual_anchor({"visual_anchor": p})} ), + "wait_for_state": ActionContract( + action_type="wait_for_state", + description="Attendre qu'un etat semantique d'ecran soit observe", + required_params=["expected_state"], + optional_params=["timeout_ms", "poll_interval_ms", "evidence_required"], + param_validators={ + "expected_state": lambda p: isinstance(p, dict) and any( + p.get(key) + for key in ( + "window_title_in", + "window_title_contains", + "process_active", + ) + ) + } + ), # --- ACTIONS DE SCROLL --- "scroll_to_anchor": ActionContract( diff --git a/visual_workflow_builder/backend/services/wait_for_state.py b/visual_workflow_builder/backend/services/wait_for_state.py new file mode 100644 index 000000000..38b5e0213 --- /dev/null +++ b/visual_workflow_builder/backend/services/wait_for_state.py @@ -0,0 +1,187 @@ +"""Runtime helpers for VWB wait_for_state actions.""" + +from __future__ import annotations + +from dataclasses import dataclass, asdict +from time import monotonic, sleep +from typing import Any, Callable, Dict, List, Mapping, Optional + + +StateProvider = Callable[[], Mapping[str, Any]] + + +@dataclass +class StateMatch: + matched: bool + matched_signals: List[str] + failed_signals: List[str] + observed_state: Dict[str, Any] + expected_state: Dict[str, Any] + evidence_required: str = "window_or_process" + unsupported_evidence: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +@dataclass +class WaitForStateResult: + matched: bool + timed_out: bool + elapsed_ms: int + polls: int + match: Dict[str, Any] + + def to_dict(self) -> Dict[str, Any]: + return asdict(self) + + +def _as_string_list(value: Any) -> List[str]: + if value is None: + return [] + if isinstance(value, str): + return [value] if value.strip() else [] + if isinstance(value, (list, tuple, set)): + return [str(item).strip() for item in value if str(item).strip()] + return [str(value).strip()] if str(value).strip() else [] + + +def _normalize_text(value: Any) -> str: + return str(value or "").strip().casefold() + + +def _normalize_process(value: Any) -> str: + text = _normalize_text(value) + return text[:-4] if text.endswith(".exe") else text + + +def get_active_window_state() -> Dict[str, Any]: + """Return the current foreground window title and process/app name.""" + try: + from agent_v0.agent_v1.window_info_crossplatform import get_active_window_info + + info = get_active_window_info() or {} + except Exception as exc: + return { + "window_title": "", + "process_active": "", + "error": str(exc), + } + + title = info.get("title") or info.get("window_title") or "" + process = ( + info.get("process_active") + or info.get("process_name") + or info.get("app_name") + or info.get("application") + or "" + ) + return { + "window_title": title, + "process_active": process, + "raw": dict(info), + } + + +def match_expected_state( + expected_state: Mapping[str, Any], + observed_state: Mapping[str, Any], + evidence_required: str = "window_or_process", +) -> StateMatch: + expected = dict(expected_state or {}) + observed = { + "window_title": observed_state.get("window_title") + or observed_state.get("title") + or "", + "process_active": observed_state.get("process_active") + or observed_state.get("process_name") + or observed_state.get("app_name") + or "", + } + if "raw" in observed_state: + observed["raw"] = observed_state["raw"] + if "error" in observed_state: + observed["error"] = observed_state["error"] + + matched_signals: List[str] = [] + failed_signals: List[str] = [] + + title = _normalize_text(observed["window_title"]) + exact_titles = _as_string_list(expected.get("window_title_in")) + if exact_titles: + if any(title == _normalize_text(candidate) for candidate in exact_titles): + matched_signals.append("window_title_in") + else: + failed_signals.append("window_title_in") + + contains_titles = _as_string_list(expected.get("window_title_contains")) + if contains_titles: + if any(_normalize_text(candidate) in title for candidate in contains_titles): + matched_signals.append("window_title_contains") + else: + failed_signals.append("window_title_contains") + + process_expected = _as_string_list(expected.get("process_active")) + if process_expected: + process = _normalize_process(observed["process_active"]) + if any(process == _normalize_process(candidate) for candidate in process_expected): + matched_signals.append("process_active") + else: + failed_signals.append("process_active") + + unsupported = None + if evidence_required != "window_or_process": + unsupported = evidence_required + + return StateMatch( + matched=bool(matched_signals) and not failed_signals and unsupported is None, + matched_signals=matched_signals, + failed_signals=failed_signals, + observed_state=observed, + expected_state=expected, + evidence_required=evidence_required, + unsupported_evidence=unsupported, + ) + + +def wait_for_expected_state( + expected_state: Mapping[str, Any], + timeout_ms: int = 5000, + poll_interval_ms: int = 250, + evidence_required: str = "window_or_process", + state_provider: Optional[StateProvider] = None, +) -> WaitForStateResult: + provider = state_provider or get_active_window_state + timeout_s = max(0, int(timeout_ms)) / 1000.0 + poll_s = max(0.05, int(poll_interval_ms) / 1000.0) + started = monotonic() + deadline = started + timeout_s + polls = 0 + last_match = match_expected_state(expected_state, {}, evidence_required) + + while True: + polls += 1 + observed = provider() or {} + last_match = match_expected_state(expected_state, observed, evidence_required) + elapsed_ms = int((monotonic() - started) * 1000) + + if last_match.matched: + return WaitForStateResult( + matched=True, + timed_out=False, + elapsed_ms=elapsed_ms, + polls=polls, + match=last_match.to_dict(), + ) + + remaining = deadline - monotonic() + if remaining <= 0: + return WaitForStateResult( + matched=False, + timed_out=True, + elapsed_ms=elapsed_ms, + polls=polls, + match=last_match.to_dict(), + ) + + sleep(min(poll_s, remaining)) diff --git a/visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx b/visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx index 484da2b3a..9652d7cf9 100644 --- a/visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx +++ b/visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx @@ -426,9 +426,6 @@ export default function PropertiesPanel({ step, onUpdateParams, onDelete }: Prop onChange={(e) => updateParam('evidence_required', e.target.value)} > - - -