Files
rpa_vision_v3/tests/unit/test_visual_anchor_semantics.py

154 lines
5.3 KiB
Python

import os
from types import SimpleNamespace
from agent_v0.server_v1.replay_engine import (
_create_replay_state,
_edge_to_normalized_actions,
)
os.environ.setdefault("RPA_AUTH_DISABLED", "true")
from agent_v0.server_v1.api_stream import _normalize_action_target_semantics
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
_vwb_params_to_target_spec,
)
class _FakeAction:
def __init__(self, type_, target=None, parameters=None):
self.type = type_
self.target = target
self.parameters = parameters or {}
class _FakeEdge:
def __init__(self, action):
self.edge_id = "edge_anchor"
self.from_node = "node_src"
self.to_node = "node_dst"
self.action = action
def test_vwb_target_spec_preserves_visual_anchor_semantics():
target = _vwb_params_to_target_spec(
"double_click_anchor",
{
"visual_anchor": {
"anchor_id": "anchor_a518f6d5e727_1778849657",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
},
},
)
assert target["by_text"] == "- W - ICE rapport urgenc."
hints = target["context_hints"]
assert hints["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert hints["target_text"] == "- W - ICE rapport urgenc."
assert hints["description"] == "Word document icon with text."
assert hints["vlm_description"] == "Word document icon with text."
def test_replay_normalization_lifts_anchor_semantics_from_context_hints():
target = SimpleNamespace(
by_role="icon",
by_text=None,
by_position=(0.12, 0.18),
context_hints={
"anchor_id": "anchor_a518f6d5e727_1778849657",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
"anchor_image_base64": "abc123",
},
)
edge = _FakeEdge(
_FakeAction("mouse_click", target=target, parameters={"button": "double"})
)
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
action = actions[0]
target_spec = action["target_spec"]
assert action["visual_mode"] is True
assert action["target_description"] == "- W - ICE rapport urgenc."
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert target_spec["vlm_description"] == "Word document icon with text."
assert target_spec["anchor_image_base64"] == "abc123"
def test_replay_state_strips_anchor_image_but_keeps_semantic_label():
action = {
"action_id": "act_anchor",
"type": "click",
"target_spec": {
"anchor_id": "anchor_a518f6d5e727_1778849657",
"anchor_image_base64": "abc123",
"by_text": "- W - ICE rapport urgenc.",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
},
}
state = _create_replay_state("replay", "workflow", "session", 1, actions=[action])
target_spec = state["actions"][0]["target_spec"]
assert "anchor_image_base64" not in target_spec
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["description"] == "Word document icon with text."
def test_compound_click_step_keeps_visual_anchor_semantics():
edge = _FakeEdge(
_FakeAction(
"compound",
parameters={
"steps": [
{
"type": "mouse_click",
"x_pct": 0.12,
"y_pct": 0.18,
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"anchor_id": "anchor_a518f6d5e727_1778849657",
}
]
},
)
)
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
target_spec = actions[0]["target_spec"]
assert actions[0]["visual_mode"] is True
assert actions[0]["target_description"] == "- W - ICE rapport urgenc."
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
def test_serialized_action_semantics_are_promoted_before_enqueue():
action = {
"action_id": "step_from_tmp",
"type": "click",
"target_spec": {
"anchor_id": "anchor_tmp",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
"anchor_image_base64": "abc123",
},
}
_normalize_action_target_semantics(action)
target_spec = action["target_spec"]
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["by_text_source"] == "visual_anchor"
assert target_spec["vlm_description"] == "Word document icon with text."
assert action["target_description"] == "- W - ICE rapport urgenc."