small fixes
This commit is contained in:
@@ -159,7 +159,7 @@ class VLMAgent:
|
|||||||
self.output_callback(f'<img src="data:image/png;base64,{img_to_show_base64}">', sender="bot")
|
self.output_callback(f'<img src="data:image/png;base64,{img_to_show_base64}">', sender="bot")
|
||||||
self.output_callback(
|
self.output_callback(
|
||||||
f'<details>'
|
f'<details>'
|
||||||
f' <summary>Screen Info for OmniParser Agent</summary>'
|
f' <summary>Parsed Screen elemetns by OmniParser</summary>'
|
||||||
f' <pre>{screen_info}</pre>'
|
f' <pre>{screen_info}</pre>'
|
||||||
f'</details>',
|
f'</details>',
|
||||||
sender="bot"
|
sender="bot"
|
||||||
|
|||||||
@@ -28,9 +28,9 @@ API_KEY_FILE = CONFIG_DIR / "api_key"
|
|||||||
INTRO_TEXT = '''
|
INTRO_TEXT = '''
|
||||||
🚀🤖✨ It's Play Time!
|
🚀🤖✨ It's Play Time!
|
||||||
|
|
||||||
Welcome to the OmniParser+X Computer Use Demo! X = [GPT-4o, R1, Qwen2.5VL, Claude]. Let OmniParser turn your general purpose vision-langauge model to an AI agent.
|
Welcome to the OmniParser+X Computer Use Demo! X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]. Let OmniParser turn your general purpose vision-langauge model to an AI agent.
|
||||||
|
|
||||||
Type a message and press submit to start OmniParser+X. Press the trash icon in the chat to clear the message history.
|
Type a message and press submit to start OmniParser+X. Press stop to pause, and press the trash icon in the chat to clear the message history.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
@@ -71,6 +71,8 @@ def setup_state(state):
|
|||||||
state["only_n_most_recent_images"] = 2
|
state["only_n_most_recent_images"] = 2
|
||||||
if 'chatbot_messages' not in state:
|
if 'chatbot_messages' not in state:
|
||||||
state['chatbot_messages'] = []
|
state['chatbot_messages'] = []
|
||||||
|
if 'stop' not in state:
|
||||||
|
state['stop'] = False
|
||||||
|
|
||||||
async def main(state):
|
async def main(state):
|
||||||
"""Render loop for Gradio"""
|
"""Render loop for Gradio"""
|
||||||
@@ -207,6 +209,10 @@ def valid_params(user_input, state):
|
|||||||
return errors
|
return errors
|
||||||
|
|
||||||
def process_input(user_input, state):
|
def process_input(user_input, state):
|
||||||
|
# Reset the stop flag
|
||||||
|
if state["stop"]:
|
||||||
|
state["stop"] = False
|
||||||
|
|
||||||
errors = valid_params(user_input, state)
|
errors = valid_params(user_input, state)
|
||||||
if errors:
|
if errors:
|
||||||
raise gr.Error("Validation errors: " + ", ".join(errors))
|
raise gr.Error("Validation errors: " + ", ".join(errors))
|
||||||
@@ -260,7 +266,7 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
""")
|
""")
|
||||||
state = gr.State({"stop": False})
|
state = gr.State({})
|
||||||
|
|
||||||
setup_state(state.value)
|
setup_state(state.value)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# OmniParser+X Computer Use Demo
|
# OmniParser+X Computer Use Demo
|
||||||
|
|
||||||
Control a Windows 11 VM with OmniParser+X (X = [GPT-4o/4o-mini, Claude, ...]).
|
Control a Windows 11 VM with OmniParser+X (X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]).
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="../imgs/som_overlaid_omni.png" alt="OmniParser+X Computer Use Demo screenshot">
|
<img src="../imgs/som_overlaid_omni.png" alt="OmniParser+X Computer Use Demo screenshot">
|
||||||
</p>
|
</p>
|
||||||
|
|||||||
Reference in New Issue
Block a user