diff --git a/computer_use_demo/gradio/agent/vlm_agent.py b/computer_use_demo/gradio/agent/vlm_agent.py
index 85177e9..173c664 100644
--- a/computer_use_demo/gradio/agent/vlm_agent.py
+++ b/computer_use_demo/gradio/agent/vlm_agent.py
@@ -159,7 +159,7 @@ class VLMAgent:
self.output_callback(f'
', sender="bot")
self.output_callback(
f''
- f' Screen Info for OmniParser Agent
'
+ f' Parsed Screen elemetns by OmniParser
'
f' {screen_info}'
f' ',
sender="bot"
diff --git a/computer_use_demo/gradio/app.py b/computer_use_demo/gradio/app.py
index 35847f3..0128471 100644
--- a/computer_use_demo/gradio/app.py
+++ b/computer_use_demo/gradio/app.py
@@ -28,9 +28,9 @@ API_KEY_FILE = CONFIG_DIR / "api_key"
INTRO_TEXT = '''
🚀🤖✨ It's Play Time!
-Welcome to the OmniParser+X Computer Use Demo! X = [GPT-4o, R1, Qwen2.5VL, Claude]. Let OmniParser turn your general purpose vision-langauge model to an AI agent.
+Welcome to the OmniParser+X Computer Use Demo! X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]. Let OmniParser turn your general purpose vision-langauge model to an AI agent.
-Type a message and press submit to start OmniParser+X. Press the trash icon in the chat to clear the message history.
+Type a message and press submit to start OmniParser+X. Press stop to pause, and press the trash icon in the chat to clear the message history.
'''
def parse_arguments():
@@ -71,6 +71,8 @@ def setup_state(state):
state["only_n_most_recent_images"] = 2
if 'chatbot_messages' not in state:
state['chatbot_messages'] = []
+ if 'stop' not in state:
+ state['stop'] = False
async def main(state):
"""Render loop for Gradio"""
@@ -207,6 +209,10 @@ def valid_params(user_input, state):
return errors
def process_input(user_input, state):
+ # Reset the stop flag
+ if state["stop"]:
+ state["stop"] = False
+
errors = valid_params(user_input, state)
if errors:
raise gr.Error("Validation errors: " + ", ".join(errors))
@@ -260,8 +266,8 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
}
""")
- state = gr.State({"stop": False})
-
+ state = gr.State({})
+
setup_state(state.value)
gr.Markdown("# OmniParser + ✖️ Demo")
diff --git a/computer_use_demo/readme.md b/computer_use_demo/readme.md
index 08f3e7a..b7bb4b8 100644
--- a/computer_use_demo/readme.md
+++ b/computer_use_demo/readme.md
@@ -1,6 +1,6 @@
# OmniParser+X Computer Use Demo
-Control a Windows 11 VM with OmniParser+X (X = [GPT-4o/4o-mini, Claude, ...]).
+Control a Windows 11 VM with OmniParser+X (X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]).