From c29ac5064a41a315c255dc105dedb3e3ca39cf74 Mon Sep 17 00:00:00 2001 From: "Thomas Dhome Casanova (from Dev Box)" Date: Wed, 22 Jan 2025 20:59:44 -0800 Subject: [PATCH] remove custom prompt functionality --- demo/gradio/app.py | 13 ------------- .../computer_use_demo/gui_agent/anthropic_agent.py | 8 ++------ demo/gradio/computer_use_demo/loop.py | 5 +---- .../computer_use_demo/omniparser_agent/vlm_agent.py | 13 ++----------- 4 files changed, 5 insertions(+), 34 deletions(-) diff --git a/demo/gradio/app.py b/demo/gradio/app.py index b54a19a..72d6d66 100644 --- a/demo/gradio/app.py +++ b/demo/gradio/app.py @@ -67,8 +67,6 @@ def setup_state(state): state["tools"] = {} if "only_n_most_recent_images" not in state: state["only_n_most_recent_images"] = 2 - if "custom_system_prompt" not in state: - state["custom_system_prompt"] = "" if 'chatbot_messages' not in state: state['chatbot_messages'] = [] @@ -204,7 +202,6 @@ def process_input(user_input, state): # Run sampling_loop_sync with the chatbot_output_callback for loop_msg in sampling_loop_sync( - system_prompt_suffix=state["custom_system_prompt"], model=state["model"], provider=state["provider"], messages=state["messages"], @@ -252,12 +249,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo: value="omniparser + gpt-4o", # Set to one of the choices interactive=True, ) - with gr.Column(): - custom_prompt = gr.Textbox( - label="System Prompt Suffix", - value="", - interactive=True, - ) with gr.Column(): only_n_images = gr.Slider( label="N most recent screenshots", @@ -333,9 +324,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo: ) return provider_update, api_key_update - - def update_system_prompt_suffix(system_prompt_suffix, state): - state["custom_system_prompt"] = system_prompt_suffix def update_only_n_images(only_n_images_value, state): state["only_n_most_recent_images"] = only_n_images_value @@ -357,7 +345,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo: state[f'{state["provider"]}_api_key'] = api_key_value model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key]) - custom_prompt.change(fn=update_system_prompt_suffix, inputs=[custom_prompt, state], outputs=None) only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None) provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key) api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None) diff --git a/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py b/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py index fbd66a1..e7b2071 100644 --- a/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py +++ b/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py @@ -61,8 +61,7 @@ class AnthropicActor: def __init__( self, model: str, - provider: APIProvider, - system_prompt_suffix: str, + provider: APIProvider, api_key: str, api_response_callback: Callable[[APIResponse[BetaMessage]], None], max_tokens: int = 4096, @@ -72,7 +71,6 @@ class AnthropicActor: ): self.model = model self.provider = provider - self.system_prompt_suffix = system_prompt_suffix self.api_key = api_key self.api_response_callback = api_response_callback self.max_tokens = max_tokens @@ -83,9 +81,7 @@ class AnthropicActor: ComputerTool(selected_screen=selected_screen), ) - self.system = ( - f"{SYSTEM_PROMPT}{' ' + system_prompt_suffix if system_prompt_suffix else ''}" - ) + self.system = SYSTEM_PROMPT self.total_token_usage = 0 self.total_cost = 0 diff --git a/demo/gradio/computer_use_demo/loop.py b/demo/gradio/computer_use_demo/loop.py index 22754f8..2a7c076 100644 --- a/demo/gradio/computer_use_demo/loop.py +++ b/demo/gradio/computer_use_demo/loop.py @@ -75,7 +75,6 @@ def sampling_loop_sync( *, model: str, provider: APIProvider | None, - system_prompt_suffix: str, messages: list[BetaMessageParam], output_callback: Callable[[BetaContentBlock], None], tool_output_callback: Callable[[ToolResult, str], None], @@ -96,8 +95,7 @@ def sampling_loop_sync( # Register Actor and Executor actor = AnthropicActor( model=model, - provider=provider, - system_prompt_suffix=system_prompt_suffix, + provider=provider, api_key=api_key, api_response_callback=api_response_callback, max_tokens=max_tokens, @@ -121,7 +119,6 @@ def sampling_loop_sync( actor = VLMAgent( model=model, provider=provider, - system_prompt_suffix=system_prompt_suffix, api_key=api_key, api_response_callback=api_response_callback, selected_screen=selected_screen, diff --git a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py index 66c740a..26d5190 100644 --- a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py +++ b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py @@ -22,13 +22,7 @@ from computer_use_demo.gui_agent.llm_utils.qwen import run_qwen from computer_use_demo.gui_agent.llm_utils.llm_utils import extract_data from computer_use_demo.colorful_text import colorful_text_vlm import time -# start = time.time() -SYSTEM_PROMPT = f""" -* You are utilizing a Windows system with internet access. -* The current date is {datetime.today().strftime('%A, %B %d, %Y')}. - -""" OUTPUT_DIR = "./tmp/outputs" class OmniParser: @@ -94,13 +88,11 @@ class OmniParser: return response_json - class VLMAgent: def __init__( self, model: str, provider: str, - system_prompt_suffix: str, api_key: str, output_callback: Callable, api_response_callback: Callable, @@ -115,7 +107,6 @@ class VLMAgent: raise ValueError(f"Model {model} not supported") self.provider = provider - self.system_prompt_suffix = system_prompt_suffix self.api_key = api_key self.api_response_callback = api_response_callback self.max_tokens = max_tokens @@ -127,7 +118,7 @@ class VLMAgent: self.total_token_usage = 0 self.total_cost = 0 - self.system = system_prompt_suffix + self.system = '' def __call__(self, messages: list, parsed_screen: list[str, list, dict]): # Show results of Omniparser @@ -144,7 +135,7 @@ class VLMAgent: # example parsed_screen: {"som_image_base64": dino_labled_img, "parsed_content_list": parsed_content_list, "screen_info"} boxids_and_labels = parsed_screen["screen_info"] - system = self._get_system_prompt(boxids_and_labels) + self.system_prompt_suffix + system = self._get_system_prompt(boxids_and_labels) # drop looping actions msg, byte image etc planner_messages = messages