add in omniparser_url box
This commit is contained in:
@@ -66,6 +66,8 @@ def setup_state(state):
|
|||||||
state["only_n_most_recent_images"] = 2
|
state["only_n_most_recent_images"] = 2
|
||||||
if 'chatbot_messages' not in state:
|
if 'chatbot_messages' not in state:
|
||||||
state['chatbot_messages'] = []
|
state['chatbot_messages'] = []
|
||||||
|
if "omniparser_url" not in state:
|
||||||
|
state["omniparser_url"] = "localhost:8000"
|
||||||
|
|
||||||
async def main(state):
|
async def main(state):
|
||||||
"""Render loop for Gradio"""
|
"""Render loop for Gradio"""
|
||||||
@@ -207,7 +209,8 @@ def process_input(user_input, state):
|
|||||||
api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
|
api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
|
||||||
api_key=state["api_key"],
|
api_key=state["api_key"],
|
||||||
only_n_most_recent_images=state["only_n_most_recent_images"],
|
only_n_most_recent_images=state["only_n_most_recent_images"],
|
||||||
selected_screen=0
|
selected_screen=0,
|
||||||
|
omniparser_url=state["omniparser_url"]
|
||||||
):
|
):
|
||||||
if loop_msg is None:
|
if loop_msg is None:
|
||||||
yield state['chatbot_messages']
|
yield state['chatbot_messages']
|
||||||
@@ -271,6 +274,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|||||||
placeholder="Paste your API key here",
|
placeholder="Paste your API key here",
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
|
with gr.Row():
|
||||||
|
omniparser_url = gr.Textbox(
|
||||||
|
label="OmniParser Base URL",
|
||||||
|
value="localhost:8000",
|
||||||
|
placeholder="Enter OmniParser base URL (e.g. localhost:8000)",
|
||||||
|
interactive=True
|
||||||
|
)
|
||||||
# hide_images = gr.Checkbox(label="Hide screenshots", value=False)
|
# hide_images = gr.Checkbox(label="Hide screenshots", value=False)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@@ -341,10 +351,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|||||||
state["api_key"] = api_key_value
|
state["api_key"] = api_key_value
|
||||||
state[f'{state["provider"]}_api_key'] = api_key_value
|
state[f'{state["provider"]}_api_key'] = api_key_value
|
||||||
|
|
||||||
|
def update_omniparser_url(url_value, state):
|
||||||
|
state["omniparser_url"] = url_value
|
||||||
|
|
||||||
model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key])
|
model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key])
|
||||||
only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
|
only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
|
||||||
provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
|
provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
|
||||||
api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
|
api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
|
||||||
|
omniparser_url.change(fn=update_omniparser_url, inputs=[omniparser_url, state], outputs=None)
|
||||||
|
|
||||||
submit_button.click(process_input, [chat_input, state], chatbot)
|
submit_button.click(process_input, [chat_input, state], chatbot)
|
||||||
|
|
||||||
|
|||||||
@@ -46,16 +46,16 @@ def sampling_loop_sync(
|
|||||||
api_key: str,
|
api_key: str,
|
||||||
only_n_most_recent_images: int | None = 2,
|
only_n_most_recent_images: int | None = 2,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
selected_screen: int = 0
|
selected_screen: int = 0,
|
||||||
|
omniparser_url: str
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Synchronous agentic sampling loop for the assistant/tool interaction of computer use.
|
Synchronous agentic sampling loop for the assistant/tool interaction of computer use.
|
||||||
"""
|
"""
|
||||||
print('in sampling_loop_sync, model:', model)
|
print('in sampling_loop_sync, model:', model)
|
||||||
|
omniparser = OmniParser(url=f"http://{omniparser_url}/send_text/" if omniparser_url else None,
|
||||||
|
selected_screen=selected_screen,)
|
||||||
if model == "claude-3-5-sonnet-20241022":
|
if model == "claude-3-5-sonnet-20241022":
|
||||||
omniparser = OmniParser(url="http://localhost:8000/send_text/",
|
|
||||||
selected_screen=selected_screen,)
|
|
||||||
|
|
||||||
# Register Actor and Executor
|
# Register Actor and Executor
|
||||||
actor = AnthropicActor(
|
actor = AnthropicActor(
|
||||||
model=model,
|
model=model,
|
||||||
@@ -75,11 +75,6 @@ def sampling_loop_sync(
|
|||||||
)
|
)
|
||||||
|
|
||||||
elif model == "omniparser + gpt-4o" or model == "omniparser + phi35v":
|
elif model == "omniparser + gpt-4o" or model == "omniparser + phi35v":
|
||||||
# omniparser = OmniParser(url="http://localhost:8000/send_text/",
|
|
||||||
# selected_screen=selected_screen,)
|
|
||||||
omniparser = OmniParser(url=None,
|
|
||||||
selected_screen=selected_screen,)
|
|
||||||
|
|
||||||
actor = VLMAgent(
|
actor = VLMAgent(
|
||||||
model=model,
|
model=model,
|
||||||
provider=provider,
|
provider=provider,
|
||||||
|
|||||||
Reference in New Issue
Block a user