add in omniparser_url box

This commit is contained in:
Thomas Dhome Casanova (from Dev Box)
2025-01-22 21:38:59 -08:00
parent bb018460d7
commit b1cd705f1b
2 changed files with 19 additions and 10 deletions

View File

@@ -66,6 +66,8 @@ def setup_state(state):
state["only_n_most_recent_images"] = 2
if 'chatbot_messages' not in state:
state['chatbot_messages'] = []
if "omniparser_url" not in state:
state["omniparser_url"] = "localhost:8000"
async def main(state):
"""Render loop for Gradio"""
@@ -207,7 +209,8 @@ def process_input(user_input, state):
api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
api_key=state["api_key"],
only_n_most_recent_images=state["only_n_most_recent_images"],
selected_screen=0
selected_screen=0,
omniparser_url=state["omniparser_url"]
):
if loop_msg is None:
yield state['chatbot_messages']
@@ -271,6 +274,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
placeholder="Paste your API key here",
interactive=True,
)
with gr.Row():
omniparser_url = gr.Textbox(
label="OmniParser Base URL",
value="localhost:8000",
placeholder="Enter OmniParser base URL (e.g. localhost:8000)",
interactive=True
)
# hide_images = gr.Checkbox(label="Hide screenshots", value=False)
with gr.Row():
@@ -341,10 +351,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
state["api_key"] = api_key_value
state[f'{state["provider"]}_api_key'] = api_key_value
def update_omniparser_url(url_value, state):
state["omniparser_url"] = url_value
model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key])
only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
omniparser_url.change(fn=update_omniparser_url, inputs=[omniparser_url, state], outputs=None)
submit_button.click(process_input, [chat_input, state], chatbot)

View File

@@ -46,16 +46,16 @@ def sampling_loop_sync(
api_key: str,
only_n_most_recent_images: int | None = 2,
max_tokens: int = 4096,
selected_screen: int = 0
selected_screen: int = 0,
omniparser_url: str
):
"""
Synchronous agentic sampling loop for the assistant/tool interaction of computer use.
"""
print('in sampling_loop_sync, model:', model)
omniparser = OmniParser(url=f"http://{omniparser_url}/send_text/" if omniparser_url else None,
selected_screen=selected_screen,)
if model == "claude-3-5-sonnet-20241022":
omniparser = OmniParser(url="http://localhost:8000/send_text/",
selected_screen=selected_screen,)
# Register Actor and Executor
actor = AnthropicActor(
model=model,
@@ -75,11 +75,6 @@ def sampling_loop_sync(
)
elif model == "omniparser + gpt-4o" or model == "omniparser + phi35v":
# omniparser = OmniParser(url="http://localhost:8000/send_text/",
# selected_screen=selected_screen,)
omniparser = OmniParser(url=None,
selected_screen=selected_screen,)
actor = VLMAgent(
model=model,
provider=provider,