diff --git a/.gitignore b/.gitignore
index 2ad14a7..40c4d8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ weights/icon_detect_v1_5_2/
 .gradio
 __pycache__/
 debug.ipynb
+util/__pycache__/
diff --git a/demo/gradio/app.py b/demo/gradio/app.py
index 842738f..d005c49 100644
--- a/demo/gradio/app.py
+++ b/demo/gradio/app.py
@@ -1,5 +1,6 @@
 """
 Entrypoint for Gradio, see https://gradio.app/
+python app.py --windows_host_url xxxx:8006/ --omniparser_host_url localhost:8000
 """
 
 import platform
@@ -15,6 +16,7 @@ from pathlib import Path
 from typing import cast, Dict
 from PIL import Image
 import socket
+import argparse
 
 import gradio as gr
 from anthropic import APIResponse
@@ -39,6 +41,18 @@ Welcome to the OmniParser+X Demo! X = [GPT-4o/4o-mini, Claude, Phi, Llama]. Let
 Type a message and press submit to start OmniParser+X. Press the trash icon in the chat to clear the message history.
 '''
 
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Gradio App")
+    parser.add_argument("--windows_host_url", type=str, default='GCRSANDBOX336.redmond.corp.microsoft.com:8006/') # http://gcrsandbox336.redmond.corp.microsoft.com/
+    parser.add_argument("--omniparser_host_url", type=str, default="localhost:8000")
+    return parser.parse_args()
+args = parse_arguments()
+windows_host_url = args.windows_host_url
+omniparser_host_url = args.omniparser_host_url
+print(f"Windows host URL: {windows_host_url}")
+print(f"OmniParser host URL: {omniparser_host_url}")
+
+
 class Sender(StrEnum):
     USER = "user"
     BOT = "assistant"
@@ -68,8 +82,8 @@ def setup_state(state):
         state["only_n_most_recent_images"] = 2
     if 'chatbot_messages' not in state:
         state['chatbot_messages'] = []
-    if "omniparser_url" not in state:
-        state["omniparser_url"] = "localhost:8000"
+    # if "omniparser_url" not in state:
+    #     state["omniparser_url"] = "localhost:8000"
 
 async def main(state):
     """Render loop for Gradio"""
@@ -211,7 +225,7 @@ def process_input(user_input, state):
         api_response_callback=partial(_api_response_callback, response_state=state["responses"]),
         api_key=state["api_key"],
         only_n_most_recent_images=state["only_n_most_recent_images"],
-        omniparser_url=state["omniparser_url"]
+        omniparser_url=omniparser_host_url #state["omniparser_url"]
     ):  
         if loop_msg is None:
             yield state['chatbot_messages']
@@ -275,13 +289,13 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
                     placeholder="Paste your API key here",
                     interactive=True,
                 )
-        with gr.Row():
-            omniparser_url = gr.Textbox(
-                label="OmniParser Base URL",
-                value="localhost:8000",
-                placeholder="Enter OmniParser base URL (e.g. localhost:8000)",
-                interactive=True
-            )
+        # with gr.Row():
+        #     omniparser_url = gr.Textbox(
+        #         label="OmniParser Base URL",
+        #         value="localhost:8000",
+        #         placeholder="Enter OmniParser base URL (e.g. localhost:8000)",
+        #         interactive=True
+        #     )
         # hide_images = gr.Checkbox(label="Hide screenshots", value=False)
 
     with gr.Row():
@@ -294,11 +308,20 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         with gr.Column(scale=1):
             chatbot = gr.Chatbot(label="Chatbot History", autoscroll=True, height=580)
         with gr.Column(scale=3):
-            iframe = gr.HTML(
-                f'<iframe src="http://localhost:8006/vnc.html?view_only=1&autoconnect=1&resize=scale" width="100%" height="580" allow="fullscreen"></iframe>',
-                container=False,
-                elem_classes="no-padding"
-            )
+            if not windows_host_url:
+                iframe = gr.HTML(
+                    f'<iframe src="http://localhost:8006/vnc.html?view_only=1&autoconnect=1&resize=scale" width="100%" height="580" allow="fullscreen"></iframe>',
+                    container=False,
+                    elem_classes="no-padding"
+                )
+            else:
+                # machine_fqdn = socket.getfqdn()
+                # print('machine_fqdn:', machine_fqdn)
+                iframe = gr.HTML(
+                    f'<iframe src="http://{windows_host_url}/vnc.html?view_only=1&autoconnect=1&resize=scale" width="100%" height="580" allow="fullscreen"></iframe>',
+                    container=False,
+                    elem_classes="no-padding"
+                )
 
     def update_model(model_selection, state):
         state["model"] = model_selection
@@ -350,8 +373,8 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
         state["api_key"] = api_key_value
         state[f'{state["provider"]}_api_key'] = api_key_value
 
-    def update_omniparser_url(url_value, state):
-        state["omniparser_url"] = url_value
+    # def update_omniparser_url(url_value, state):
+    #     state["omniparser_url"] = url_value
 
     def clear_chat(state):
         # Reset message-related state
@@ -365,7 +388,7 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
     only_n_images.change(fn=update_only_n_images, inputs=[only_n_images, state], outputs=None)
     provider.change(fn=update_provider, inputs=[provider, state], outputs=api_key)
     api_key.change(fn=update_api_key, inputs=[api_key, state], outputs=None)
-    omniparser_url.change(fn=update_omniparser_url, inputs=[omniparser_url, state], outputs=None)
+    # omniparser_url.change(fn=update_omniparser_url, inputs=[omniparser_url, state], outputs=None)
     chatbot.clear(fn=clear_chat, inputs=[state], outputs=[chatbot])
 
     submit_button.click(process_input, [chat_input, state], chatbot)
diff --git a/demo/gradio/computer_use_demo/loop.py b/demo/gradio/computer_use_demo/loop.py
index c4129e5..c400ec4 100644
--- a/demo/gradio/computer_use_demo/loop.py
+++ b/demo/gradio/computer_use_demo/loop.py
@@ -52,7 +52,7 @@ def sampling_loop_sync(
     Synchronous agentic sampling loop for the assistant/tool interaction of computer use.
     """
     print('in sampling_loop_sync, model:', model)
-    omniparser = OmniParser(url=f"http://{omniparser_url}/send_text/" if omniparser_url and omniparser_url != "localhost:8000" else None)
+    omniparser = OmniParser(url=f"http://{omniparser_url}/send_text/" if omniparser_url else None)
     if model == "claude-3-5-sonnet-20241022":
         # Register Actor and Executor
         actor = AnthropicActor(
diff --git a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
index 92a7901..35f57c9 100644
--- a/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
+++ b/demo/gradio/computer_use_demo/omniparser_agent/vlm_agent.py
@@ -132,7 +132,15 @@ class VLMAgent:
                              sender="bot")
         self.output_callback(f'Set of Marks Screenshot for {colorful_text_vlm}:\n<img src="data:image/png;base64,{parsed_screen["som_image_base64"]}">', sender="bot")
         screen_info = str(parsed_screen['screen_info'])
-        self.output_callback(f'Screen Info for {colorful_text_vlm}:\n{screen_info}', sender="bot")
+        # self.output_callback(f'Screen Info for {colorful_text_vlm}:\n{screen_info}', sender="bot")
+        self.output_callback(
+                    f'<details>'
+                    f'  <summary>Screen Info for {colorful_text_vlm}</summary>'
+                    f'  <pre>{screen_info}</pre>'
+                    f'</details>',
+                    sender="bot"
+                )
+
 
         screenshot_uuid = parsed_screen['screenshot_uuid']
         screen_width, screen_height = parsed_screen['width'], parsed_screen['height']
@@ -155,7 +163,7 @@ class VLMAgent:
             planner_messages[-1]["content"].append(f"{OUTPUT_DIR}/screenshot_{screenshot_uuid}.png")
             planner_messages[-1]["content"].append(f"{OUTPUT_DIR}/screenshot_som_{screenshot_uuid}.png")
 
-        print(f"Sending messages to VLMPlanner : {planner_messages}")
+        # print(f"Sending messages to VLMPlanner : {planner_messages}")
         start = time.time()
         if "gpt" in self.model:
             vlm_response, token_usage = run_oai_interleaved(
diff --git a/demo/remote_request.py b/demo/remote_request.py
index 84dd3a9..7efafc3 100644
--- a/demo/remote_request.py
+++ b/demo/remote_request.py
@@ -1,4 +1,7 @@
 # uvicorn remote_request:app --host 0.0.0.0 --port 8000 --reload
+'''
+python -m remote_request --som_model_path ../weights/icon_detect_v1_5/model_v1_5.pt --caption_model_name florence2 --caption_model_path ../weights/icon_detect_v1_5/model_v1_5.pt --device cuda --BOX_TRESHOLD 0.05
+'''
 
 import sys
 import os
@@ -12,14 +15,31 @@ import base64
 import io
 from fastapi import FastAPI
 from pydantic import BaseModel
+import argparse
 
-config = {
-    'som_model_path': '../weights/icon_detect_v1_5/model_v1_5.pt',
-    'device': 'cpu',
-    'caption_model_name': 'florence2',
-    'caption_model_path': '../weights/icon_caption_florence',
-    'BOX_TRESHOLD': 0.05
-}
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Omniparser API')
+    parser.add_argument('--som_model_path', type=str, default='../weights/icon_detect_v1_5/model_v1_5.pt', help='Path to the som model')
+    parser.add_argument('--caption_model_name', type=str, default='florence2', help='Name of the caption model')
+    parser.add_argument('--caption_model_path', type=str, default='../weights/icon_caption_florence', help='Path to the caption model')
+    parser.add_argument('--device', type=str, default='cpu', help='Device to run the model')
+    parser.add_argument('--BOX_TRESHOLD', type=float, default=0.05, help='Threshold for box detection')
+    parser.add_argument('--host', type=str, default='0.0.0.0', help='Host for the API')
+    parser.add_argument('--port', type=int, default=8000, help='Port for the API')
+    args = parser.parse_args()
+    return args
+
+args = parse_arguments()
+config = vars(args)
+
+
+# config = {
+#     'som_model_path': '../weights/icon_detect_v1_5/model_v1_5.pt',
+#     'device': 'cpu',
+#     'caption_model_name': 'florence2',
+#     'caption_model_path': '../weights/icon_caption_florence',
+#     'BOX_TRESHOLD': 0.05
+# }
 
 
 class Omniparser(object):
@@ -74,4 +94,9 @@ async def send_text(item: Item):
 
 @app.get("/")
 async def root():
-    return {"message": "Omniparser API ready"}
\ No newline at end of file
+    return {"message": "Omniparser API ready"}
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("remote_request:app", host=args.host, port=args.port, reload=True)
\ No newline at end of file
diff --git a/util/__pycache__/__init__.cpython-312.pyc b/util/__pycache__/__init__.cpython-312.pyc
deleted file mode 100644
index 9b6f52f..0000000
Binary files a/util/__pycache__/__init__.cpython-312.pyc and /dev/null differ
diff --git a/util/__pycache__/__init__.cpython-39.pyc b/util/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index 9cb17e3..0000000
Binary files a/util/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/util/__pycache__/action_matching.cpython-39.pyc b/util/__pycache__/action_matching.cpython-39.pyc
deleted file mode 100644
index aac3447..0000000
Binary files a/util/__pycache__/action_matching.cpython-39.pyc and /dev/null differ
diff --git a/util/__pycache__/box_annotator.cpython-312.pyc b/util/__pycache__/box_annotator.cpython-312.pyc
deleted file mode 100644
index 0b3df34..0000000
Binary files a/util/__pycache__/box_annotator.cpython-312.pyc and /dev/null differ
diff --git a/util/__pycache__/box_annotator.cpython-39.pyc b/util/__pycache__/box_annotator.cpython-39.pyc
deleted file mode 100644
index c091943..0000000
Binary files a/util/__pycache__/box_annotator.cpython-39.pyc and /dev/null differ