remove custom prompt functionality

2025-01-22 20:59:44 -08:00
parent 6e389fe851
commit c29ac5064a
4 changed files with 5 additions and 34 deletions
--- a/demo/gradio/app.py
+++ b/demo/gradio/app.py
@@ -67,8 +67,6 @@ def setup_state(state):
        state["tools"] = {}
    if "only_n_most_recent_images" not in state:
        state["only_n_most_recent_images"] = 2
    if "custom_system_prompt" not in state:
        state["custom_system_prompt"] = ""
    if 'chatbot_messages' not in state:
        state['chatbot_messages'] = []
@@ -204,7 +202,6 @@ def process_input(user_input, state):
    # Run sampling_loop_sync with the chatbot_output_callback
    for loop_msg in sampling_loop_sync(
        system_prompt_suffix=state["custom_system_prompt"],
        model=state["model"],
        provider=state["provider"],
        messages=state["messages"],
@@ -252,12 +249,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                    value="omniparser + gpt-4o",  # Set to one of the choices
                    interactive=True,
                )
            with gr.Column():
                custom_prompt = gr.Textbox(
                    label="System Prompt Suffix",
                    value="",
                    interactive=True,
                )
            with gr.Column():
                only_n_images = gr.Slider(
                    label="N most recent screenshots",
@@ -333,9 +324,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
        )
        return provider_update, api_key_update
    def update_system_prompt_suffix(system_prompt_suffix, state):
        state["custom_system_prompt"] = system_prompt_suffix
    def update_only_n_images(only_n_images_value, state):
        state["only_n_most_recent_images"] = only_n_images_value
@@ -357,7 +345,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
        state[f'{state["provider"]}_api_key'] = api_key_value
    model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key])
    custom_prompt.change(fn=update_system_prompt_suffix, inputs=[custom_prompt, state], outputs=None)
    only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
    provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
    api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
--- a/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py
+++ b/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py
@@ -61,8 +61,7 @@ class AnthropicActor:
    def __init__(
        self, 
        model: str, 
-        provider: APIProvider, 
+        provider: APIProvider,
        system_prompt_suffix: str, 
        api_key: str,
        api_response_callback: Callable[[APIResponse[BetaMessage]], None],
        max_tokens: int = 4096,
@@ -72,7 +71,6 @@ class AnthropicActor:
    ):
        self.model = model
        self.provider = provider
        self.system_prompt_suffix = system_prompt_suffix
        self.api_key = api_key
        self.api_response_callback = api_response_callback
        self.max_tokens = max_tokens
@@ -83,9 +81,7 @@ class AnthropicActor:
            ComputerTool(selected_screen=selected_screen),
        )
-        self.system = (
+        self.system = SYSTEM_PROMPT
            f"{SYSTEM_PROMPT}{' ' + system_prompt_suffix if system_prompt_suffix else ''}"
        )
        self.total_token_usage = 0
        self.total_cost = 0
--- a/demo/gradio/computer_use_demo/loop.py
+++ b/demo/gradio/computer_use_demo/loop.py
@@ -75,7 +75,6 @@ def sampling_loop_sync(
    *,
    model: str,
    provider: APIProvider | None,
    system_prompt_suffix: str,
    messages: list[BetaMessageParam],
    output_callback: Callable[[BetaContentBlock], None],
    tool_output_callback: Callable[[ToolResult, str], None],
@@ -96,8 +95,7 @@ def sampling_loop_sync(
        # Register Actor and Executor
        actor = AnthropicActor(
            model=model, 
-            provider=provider, 
+            provider=provider,
            system_prompt_suffix=system_prompt_suffix, 
            api_key=api_key, 
            api_response_callback=api_response_callback,
            max_tokens=max_tokens,
@@ -121,7 +119,6 @@ def sampling_loop_sync(
        actor = VLMAgent(
            model=model,
            provider=provider,
            system_prompt_suffix=system_prompt_suffix,
            api_key=api_key,
            api_response_callback=api_response_callback,
            selected_screen=selected_screen,
--- a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
+++ b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
@@ -22,13 +22,7 @@ from computer_use_demo.gui_agent.llm_utils.qwen import run_qwen
 from computer_use_demo.gui_agent.llm_utils.llm_utils import extract_data
 from computer_use_demo.colorful_text import colorful_text_vlm
 import time
 #     start = time.time()
 SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
 * You are utilizing a Windows system with internet access.
 * The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
 </SYSTEM_CAPABILITY>
 """
 OUTPUT_DIR = "./tmp/outputs"
 class OmniParser:
@@ -94,13 +88,11 @@ class OmniParser:
        return response_json
 class VLMAgent:
    def __init__(
        self,
        model: str, 
        provider: str, 
        system_prompt_suffix: str, 
        api_key: str,
        output_callback: Callable, 
        api_response_callback: Callable,
@@ -115,7 +107,6 @@ class VLMAgent:
            raise ValueError(f"Model {model} not supported")
        self.provider = provider
        self.system_prompt_suffix = system_prompt_suffix
        self.api_key = api_key
        self.api_response_callback = api_response_callback
        self.max_tokens = max_tokens
@@ -127,7 +118,7 @@ class VLMAgent:
        self.total_token_usage = 0
        self.total_cost = 0
-        self.system = system_prompt_suffix
+        self.system = ''
    def __call__(self, messages: list, parsed_screen: list[str, list, dict]):
        # Show results of Omniparser
@@ -144,7 +135,7 @@ class VLMAgent:
        # example parsed_screen: {"som_image_base64": dino_labled_img, "parsed_content_list": parsed_content_list, "screen_info"}
        boxids_and_labels = parsed_screen["screen_info"]
-        system = self._get_system_prompt(boxids_and_labels) + self.system_prompt_suffix
+        system = self._get_system_prompt(boxids_and_labels)
        # drop looping actions msg, byte image etc
        planner_messages = messages