From c29ac5064a41a315c255dc105dedb3e3ca39cf74 Mon Sep 17 00:00:00 2001
From: "Thomas Dhome Casanova (from Dev Box)" <thomasdh@microsoft.com>
Date: Wed, 22 Jan 2025 20:59:44 -0800
Subject: [PATCH] remove custom prompt functionality

---
 demo/gradio/app.py                                  | 13 -------------
 .../computer_use_demo/gui_agent/anthropic_agent.py  |  8 ++------
 demo/gradio/computer_use_demo/loop.py               |  5 +----
 .../computer_use_demo/omniparser_agent/vlm_agent.py | 13 ++-----------
 4 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/demo/gradio/app.py b/demo/gradio/app.py
index b54a19a..72d6d66 100644
--- a/demo/gradio/app.py
+++ b/demo/gradio/app.py
@@ -67,8 +67,6 @@ def setup_state(state):
         state["tools"] = {}
     if "only_n_most_recent_images" not in state:
         state["only_n_most_recent_images"] = 2
-    if "custom_system_prompt" not in state:
-        state["custom_system_prompt"] = ""
     if 'chatbot_messages' not in state:
         state['chatbot_messages'] = []
 
@@ -204,7 +202,6 @@ def process_input(user_input, state):
 
     # Run sampling_loop_sync with the chatbot_output_callback
     for loop_msg in sampling_loop_sync(
-        system_prompt_suffix=state["custom_system_prompt"],
         model=state["model"],
         provider=state["provider"],
         messages=state["messages"],
@@ -252,12 +249,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                     value="omniparser + gpt-4o",  # Set to one of the choices
                     interactive=True,
                 )
-            with gr.Column():
-                custom_prompt = gr.Textbox(
-                    label="System Prompt Suffix",
-                    value="",
-                    interactive=True,
-                )
             with gr.Column():
                 only_n_images = gr.Slider(
                     label="N most recent screenshots",
@@ -333,9 +324,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         )
 
         return provider_update, api_key_update
- 
-    def update_system_prompt_suffix(system_prompt_suffix, state):
-        state["custom_system_prompt"] = system_prompt_suffix
 
     def update_only_n_images(only_n_images_value, state):
         state["only_n_most_recent_images"] = only_n_images_value
@@ -357,7 +345,6 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         state[f'{state["provider"]}_api_key'] = api_key_value
 
     model.change(fn=update_model, inputs=[model, state], outputs=[provider, api_key])
-    custom_prompt.change(fn=update_system_prompt_suffix, inputs=[custom_prompt, state], outputs=None)
     only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
     provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
     api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
diff --git a/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py b/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py
index fbd66a1..e7b2071 100644
--- a/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py
+++ b/demo/gradio/computer_use_demo/gui_agent/anthropic_agent.py
@@ -61,8 +61,7 @@ class AnthropicActor:
     def __init__(
         self, 
         model: str, 
-        provider: APIProvider, 
-        system_prompt_suffix: str, 
+        provider: APIProvider,
         api_key: str,
         api_response_callback: Callable[[APIResponse[BetaMessage]], None],
         max_tokens: int = 4096,
@@ -72,7 +71,6 @@ class AnthropicActor:
     ):
         self.model = model
         self.provider = provider
-        self.system_prompt_suffix = system_prompt_suffix
         self.api_key = api_key
         self.api_response_callback = api_response_callback
         self.max_tokens = max_tokens
@@ -83,9 +81,7 @@ class AnthropicActor:
             ComputerTool(selected_screen=selected_screen),
         )
 
-        self.system = (
-            f"{SYSTEM_PROMPT}{' ' + system_prompt_suffix if system_prompt_suffix else ''}"
-        )
+        self.system = SYSTEM_PROMPT
         
         self.total_token_usage = 0
         self.total_cost = 0
diff --git a/demo/gradio/computer_use_demo/loop.py b/demo/gradio/computer_use_demo/loop.py
index 22754f8..2a7c076 100644
--- a/demo/gradio/computer_use_demo/loop.py
+++ b/demo/gradio/computer_use_demo/loop.py
@@ -75,7 +75,6 @@ def sampling_loop_sync(
     *,
     model: str,
     provider: APIProvider | None,
-    system_prompt_suffix: str,
     messages: list[BetaMessageParam],
     output_callback: Callable[[BetaContentBlock], None],
     tool_output_callback: Callable[[ToolResult, str], None],
@@ -96,8 +95,7 @@ def sampling_loop_sync(
         # Register Actor and Executor
         actor = AnthropicActor(
             model=model, 
-            provider=provider, 
-            system_prompt_suffix=system_prompt_suffix, 
+            provider=provider,
             api_key=api_key, 
             api_response_callback=api_response_callback,
             max_tokens=max_tokens,
@@ -121,7 +119,6 @@ def sampling_loop_sync(
         actor = VLMAgent(
             model=model,
             provider=provider,
-            system_prompt_suffix=system_prompt_suffix,
             api_key=api_key,
             api_response_callback=api_response_callback,
             selected_screen=selected_screen,
diff --git a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
index 66c740a..26d5190 100644
--- a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
+++ b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
@@ -22,13 +22,7 @@ from computer_use_demo.gui_agent.llm_utils.qwen import run_qwen
 from computer_use_demo.gui_agent.llm_utils.llm_utils import extract_data
 from computer_use_demo.colorful_text import colorful_text_vlm
 import time
-#     start = time.time()
 
-SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
-* You are utilizing a Windows system with internet access.
-* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
-</SYSTEM_CAPABILITY>
-"""
 OUTPUT_DIR = "./tmp/outputs"
 
 class OmniParser:
@@ -94,13 +88,11 @@ class OmniParser:
         return response_json
 
 
-
 class VLMAgent:
     def __init__(
         self,
         model: str, 
         provider: str, 
-        system_prompt_suffix: str, 
         api_key: str,
         output_callback: Callable, 
         api_response_callback: Callable,
@@ -115,7 +107,6 @@ class VLMAgent:
             raise ValueError(f"Model {model} not supported")
         
         self.provider = provider
-        self.system_prompt_suffix = system_prompt_suffix
         self.api_key = api_key
         self.api_response_callback = api_response_callback
         self.max_tokens = max_tokens
@@ -127,7 +118,7 @@ class VLMAgent:
         self.total_token_usage = 0
         self.total_cost = 0
 
-        self.system = system_prompt_suffix
+        self.system = ''
            
     def __call__(self, messages: list, parsed_screen: list[str, list, dict]):
         # Show results of Omniparser
@@ -144,7 +135,7 @@ class VLMAgent:
 
         # example parsed_screen: {"som_image_base64": dino_labled_img, "parsed_content_list": parsed_content_list, "screen_info"}
         boxids_and_labels = parsed_screen["screen_info"]
-        system = self._get_system_prompt(boxids_and_labels) + self.system_prompt_suffix
+        system = self._get_system_prompt(boxids_and_labels)
 
         # drop looping actions msg, byte image etc
         planner_messages = messages