diff --git a/imgs/header_bar.png b/imgs/header_bar.png new file mode 100644 index 0000000..148fbee Binary files /dev/null and b/imgs/header_bar.png differ diff --git a/computer_use_demo/gradio/.gitignore b/omnitool/gradio/.gitignore similarity index 100% rename from computer_use_demo/gradio/.gitignore rename to omnitool/gradio/.gitignore diff --git a/computer_use_demo/gradio/__init__.py b/omnitool/gradio/__init__.py similarity index 100% rename from computer_use_demo/gradio/__init__.py rename to omnitool/gradio/__init__.py diff --git a/computer_use_demo/gradio/agent/anthropic_agent.py b/omnitool/gradio/agent/anthropic_agent.py similarity index 100% rename from computer_use_demo/gradio/agent/anthropic_agent.py rename to omnitool/gradio/agent/anthropic_agent.py diff --git a/computer_use_demo/gradio/agent/llm_utils/groqclient.py b/omnitool/gradio/agent/llm_utils/groqclient.py similarity index 100% rename from computer_use_demo/gradio/agent/llm_utils/groqclient.py rename to omnitool/gradio/agent/llm_utils/groqclient.py diff --git a/computer_use_demo/gradio/agent/llm_utils/oaiclient.py b/omnitool/gradio/agent/llm_utils/oaiclient.py similarity index 100% rename from computer_use_demo/gradio/agent/llm_utils/oaiclient.py rename to omnitool/gradio/agent/llm_utils/oaiclient.py diff --git a/computer_use_demo/gradio/agent/llm_utils/omniparserclient.py b/omnitool/gradio/agent/llm_utils/omniparserclient.py similarity index 100% rename from computer_use_demo/gradio/agent/llm_utils/omniparserclient.py rename to omnitool/gradio/agent/llm_utils/omniparserclient.py diff --git a/computer_use_demo/gradio/agent/llm_utils/utils.py b/omnitool/gradio/agent/llm_utils/utils.py similarity index 100% rename from computer_use_demo/gradio/agent/llm_utils/utils.py rename to omnitool/gradio/agent/llm_utils/utils.py diff --git a/computer_use_demo/gradio/agent/vlm_agent.py b/omnitool/gradio/agent/vlm_agent.py similarity index 100% rename from computer_use_demo/gradio/agent/vlm_agent.py rename to omnitool/gradio/agent/vlm_agent.py diff --git a/computer_use_demo/gradio/app.py b/omnitool/gradio/app.py similarity index 97% rename from computer_use_demo/gradio/app.py rename to omnitool/gradio/app.py index 92ea689..1b2c459 100644 --- a/computer_use_demo/gradio/app.py +++ b/omnitool/gradio/app.py @@ -26,11 +26,11 @@ CONFIG_DIR = Path("~/.anthropic").expanduser() API_KEY_FILE = CONFIG_DIR / "api_key" INTRO_TEXT = ''' -🚀🤖✨ It's Play Time! +Welcome to OmniTool - the OmniParser+X Computer Use Demo! X = [OpenAI (4o/o1/o3-mini), DeepSeek (R1), Qwen (2.5VL) or Anthropic Computer Use (Sonnet)]. -Welcome to the OmniParser+X Computer Use Demo! X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]. Let OmniParser turn your general purpose vision-langauge model to an AI agent. +OmniParser lets you turn any vision-langauge model into an AI agent. -Type a message and press submit to start OmniParser+X. Press stop to pause, and press the trash icon in the chat to clear the message history. +Type a message and press submit to start OmniTool. Press stop to pause, and press the trash icon in the chat to clear the message history. ''' def parse_arguments(): @@ -271,7 +271,7 @@ with gr.Blocks(theme=gr.themes.Default()) as demo: setup_state(state.value) - gr.Markdown("# OmniParser + ✖️ Demo") + gr.Markdown("# OmniTool") if not os.getenv("HIDE_WARNING", False): gr.Markdown(INTRO_TEXT) diff --git a/computer_use_demo/gradio/executor/anthropic_executor.py b/omnitool/gradio/executor/anthropic_executor.py similarity index 100% rename from computer_use_demo/gradio/executor/anthropic_executor.py rename to omnitool/gradio/executor/anthropic_executor.py diff --git a/computer_use_demo/gradio/loop.py b/omnitool/gradio/loop.py similarity index 100% rename from computer_use_demo/gradio/loop.py rename to omnitool/gradio/loop.py diff --git a/computer_use_demo/gradio/tools/__init__.py b/omnitool/gradio/tools/__init__.py similarity index 100% rename from computer_use_demo/gradio/tools/__init__.py rename to omnitool/gradio/tools/__init__.py diff --git a/computer_use_demo/gradio/tools/base.py b/omnitool/gradio/tools/base.py similarity index 100% rename from computer_use_demo/gradio/tools/base.py rename to omnitool/gradio/tools/base.py diff --git a/computer_use_demo/gradio/tools/collection.py b/omnitool/gradio/tools/collection.py similarity index 100% rename from computer_use_demo/gradio/tools/collection.py rename to omnitool/gradio/tools/collection.py diff --git a/computer_use_demo/gradio/tools/computer.py b/omnitool/gradio/tools/computer.py similarity index 100% rename from computer_use_demo/gradio/tools/computer.py rename to omnitool/gradio/tools/computer.py diff --git a/computer_use_demo/gradio/tools/screen_capture.py b/omnitool/gradio/tools/screen_capture.py similarity index 100% rename from computer_use_demo/gradio/tools/screen_capture.py rename to omnitool/gradio/tools/screen_capture.py diff --git a/computer_use_demo/windowshost/.gitignore b/omnitool/omnibox/.gitignore similarity index 100% rename from computer_use_demo/windowshost/.gitignore rename to omnitool/omnibox/.gitignore diff --git a/computer_use_demo/windowshost/Dockerfile b/omnitool/omnibox/Dockerfile similarity index 100% rename from computer_use_demo/windowshost/Dockerfile rename to omnitool/omnibox/Dockerfile diff --git a/computer_use_demo/windowshost/compose.yml b/omnitool/omnibox/compose.yml similarity index 100% rename from computer_use_demo/windowshost/compose.yml rename to omnitool/omnibox/compose.yml diff --git a/computer_use_demo/windowshost/scripts/manage_vm.ps1 b/omnitool/omnibox/scripts/manage_vm.ps1 similarity index 100% rename from computer_use_demo/windowshost/scripts/manage_vm.ps1 rename to omnitool/omnibox/scripts/manage_vm.ps1 diff --git a/computer_use_demo/windowshost/scripts/manage_vm.sh b/omnitool/omnibox/scripts/manage_vm.sh similarity index 100% rename from computer_use_demo/windowshost/scripts/manage_vm.sh rename to omnitool/omnibox/scripts/manage_vm.sh diff --git a/computer_use_demo/windowshost/vm/buildcontainer/define.sh b/omnitool/omnibox/vm/buildcontainer/define.sh similarity index 100% rename from computer_use_demo/windowshost/vm/buildcontainer/define.sh rename to omnitool/omnibox/vm/buildcontainer/define.sh diff --git a/computer_use_demo/windowshost/vm/buildcontainer/entry.sh b/omnitool/omnibox/vm/buildcontainer/entry.sh similarity index 100% rename from computer_use_demo/windowshost/vm/buildcontainer/entry.sh rename to omnitool/omnibox/vm/buildcontainer/entry.sh diff --git a/computer_use_demo/windowshost/vm/buildcontainer/install.sh b/omnitool/omnibox/vm/buildcontainer/install.sh similarity index 100% rename from computer_use_demo/windowshost/vm/buildcontainer/install.sh rename to omnitool/omnibox/vm/buildcontainer/install.sh diff --git a/computer_use_demo/windowshost/vm/buildcontainer/power.sh b/omnitool/omnibox/vm/buildcontainer/power.sh similarity index 100% rename from computer_use_demo/windowshost/vm/buildcontainer/power.sh rename to omnitool/omnibox/vm/buildcontainer/power.sh diff --git a/computer_use_demo/windowshost/vm/buildcontainer/samba.sh b/omnitool/omnibox/vm/buildcontainer/samba.sh similarity index 100% rename from computer_use_demo/windowshost/vm/buildcontainer/samba.sh rename to omnitool/omnibox/vm/buildcontainer/samba.sh diff --git a/computer_use_demo/windowshost/vm/win11def/win11x64-enterprise-eval.xml b/omnitool/omnibox/vm/win11def/win11x64-enterprise-eval.xml similarity index 100% rename from computer_use_demo/windowshost/vm/win11def/win11x64-enterprise-eval.xml rename to omnitool/omnibox/vm/win11def/win11x64-enterprise-eval.xml diff --git a/computer_use_demo/windowshost/vm/win11iso/README.md b/omnitool/omnibox/vm/win11iso/README.md similarity index 100% rename from computer_use_demo/windowshost/vm/win11iso/README.md rename to omnitool/omnibox/vm/win11iso/README.md diff --git a/computer_use_demo/windowshost/vm/win11setup/firstboot/install.bat b/omnitool/omnibox/vm/win11setup/firstboot/install.bat similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/firstboot/install.bat rename to omnitool/omnibox/vm/win11setup/firstboot/install.bat diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/on-logon.ps1 b/omnitool/omnibox/vm/win11setup/setupscripts/on-logon.ps1 similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/on-logon.ps1 rename to omnitool/omnibox/vm/win11setup/setupscripts/on-logon.ps1 diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/server/cursor.png b/omnitool/omnibox/vm/win11setup/setupscripts/server/cursor.png similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/server/cursor.png rename to omnitool/omnibox/vm/win11setup/setupscripts/server/cursor.png diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/server/main.py b/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/server/main.py rename to omnitool/omnibox/vm/win11setup/setupscripts/server/main.py diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/server/requirements.txt b/omnitool/omnibox/vm/win11setup/setupscripts/server/requirements.txt similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/server/requirements.txt rename to omnitool/omnibox/vm/win11setup/setupscripts/server/requirements.txt diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/setup-tools.psm1 b/omnitool/omnibox/vm/win11setup/setupscripts/setup-tools.psm1 similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/setup-tools.psm1 rename to omnitool/omnibox/vm/win11setup/setupscripts/setup-tools.psm1 diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/setup.ps1 b/omnitool/omnibox/vm/win11setup/setupscripts/setup.ps1 similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/setup.ps1 rename to omnitool/omnibox/vm/win11setup/setupscripts/setup.ps1 diff --git a/computer_use_demo/windowshost/vm/win11setup/setupscripts/tools_config.json b/omnitool/omnibox/vm/win11setup/setupscripts/tools_config.json similarity index 100% rename from computer_use_demo/windowshost/vm/win11setup/setupscripts/tools_config.json rename to omnitool/omnibox/vm/win11setup/setupscripts/tools_config.json diff --git a/computer_use_demo/omniparserserver/omniparserserver.py b/omnitool/omniparserserver/omniparserserver.py similarity index 100% rename from computer_use_demo/omniparserserver/omniparserserver.py rename to omnitool/omniparserserver/omniparserserver.py diff --git a/computer_use_demo/readme.md b/omnitool/readme.md similarity index 62% rename from computer_use_demo/readme.md rename to omnitool/readme.md index b7bb4b8..c572fc0 100644 --- a/computer_use_demo/readme.md +++ b/omnitool/readme.md @@ -1,66 +1,68 @@ -# OmniParser+X Computer Use Demo - -Control a Windows 11 VM with OmniParser+X (X = [GPT family (4o/o1/o3-mini), Claude, deepseek R1/V3, Qwen-2.5VL]).

- OmniParser+X Computer Use Demo screenshot + OmniParser+X Computer Use Demo screenshot

+# OmniTool + +Control a Windows 11 VM with OmniParser+X (OpenAI (4o/o1/o3-mini), DeepSeek (R1), Qwen (2.5VL)) or Anthropic Computer Use. + ## Overview There are three components: -1. **windowshost**: A Windows 11 VM running in a Docker container -2. **omniparserserver**: FastAPI server running OmniParser +1. **omnibox**: A Windows 11 VM running in a Docker container +2. **omniparserserver**: FastAPI server running OmniParser V2 3. **gradio**: UI where you can provide commands and watch OmniParser+X reasoning and executing on the Windows 11 VM Notes: + 1. The Windows 11 VM docker is dependent on KVM so can only run quickly on Windows and Linux. This can run on a CPU machine (doesn't need GPU). 2. Though OmniParser can run on a CPU, we have separated this out if you want to run it fast on a GPU machine -3. The Gradio UI can also run on a CPU machine. +3. The Gradio UI can also run on a CPU machine. We suggest running **omnibox** and **gradio** on the same CPU machine and **omniparserserver** on a GPU server. ## Setup -1. **windowshost**: +1. **omnibox**: a. Install Docker Desktop - - b. Visit [Microsoft Evaluation Center](https://info.microsoft.com/ww-landing-windows-11-enterprise.html), accept the Terms of Service, and download a **Windows 11 Enterprise Evaluation (90-day trial, English, United States)** ISO file [~6GB]. Rename the file to `custom.iso` and copy it to the directory `OmniParser/computer_use_demo/windowshost/vm/win11iso` - - c. Navigate to vm management script directory with`cd OmniParser/computer_use_demo/windowshost/scripts` - + + b. Visit [Microsoft Evaluation Center](https://info.microsoft.com/ww-landing-windows-11-enterprise.html), accept the Terms of Service, and download a **Windows 11 Enterprise Evaluation (90-day trial, English, United States)** ISO file [~6GB]. Rename the file to `custom.iso` and copy it to the directory `OmniParser/omnitool/omnibox/vm/win11iso` + + c. Navigate to vm management script directory with`cd OmniParser/omnitool/omnibox/scripts` + d. Build the docker container [400MB] and install the ISO to a storage folder [20GB] with `./manage_vm.sh create` - - e. After creating the first time it will store a save of the VM state in `vm/win11storage`. You can then manage the VM with `./manage_vm.sh start` and `./manage_vm.sh stop`. To delete the VM, use `./manage_vm.sh delete` and delete the `OmniParser/computer_use_demo/windowshost/vm/win11storage` directory. + + e. After creating the first time it will store a save of the VM state in `vm/win11storage`. You can then manage the VM with `./manage_vm.sh start` and `./manage_vm.sh stop`. To delete the VM, use `./manage_vm.sh delete` and delete the `OmniParser/omnitool/omnibox/vm/win11storage` directory. 2. **omniparserserver**: a. If you already have a conda environment for OmniParser, you can use that. Else follow the following steps to create one - + b. Ensure conda is installed with `conda --version` or install from the [Anaconda website](https://www.anaconda.com/download/success) - + c. Navigate to the root of the repo with `cd OmniParser` - + d. Create a conda python environment with `conda create -n "omni" python==3.12` - + e. Set the python environment to be used with `conda activate omni` - + f. Install the dependencies with `pip install -r requirements.txt` - + g. Continue from here if you already had the conda environment. - + h. Ensure you have the weights downloaded in weights folder. If not download them with: `for folder in icon_caption_florence icon_detect icon_detect_v1_5; do huggingface-cli download microsoft/OmniParser --local-dir weights/ --repo-type model --include "$folder/*"; done` - - h. Navigate to the server directory with `cd OmniParser/computer_use_demo/omniparserserver` - + + h. Navigate to the server directory with `cd OmniParser/omnitool/omniparserserver` + i. Start the server with `python -m omniparserserver` 3. **gradio**: - a. Navigate to the gradio directory with `cd OmniParser/computer_use_demo/gradio` + a. Navigate to the gradio directory with `cd OmniParser/omnitool/gradio` - b. Ensure you have activated the conda python environment with `conda activate omni` + b. Ensure you have activated the conda python environment with `conda activate omni` - c. Start the server with `python app.py --windows_host_url localhost:8006 --omniparser_server_url localhost:8000` + c. Start the server with `python app.py --windows_host_url localhost:8006 --omniparser_server_url localhost:8000` - d. Open the URL in the terminal output, set your API Key from OpenAI and start playing with the AI agent! + d. Open the URL in the terminal output, set your API Key from OpenAI and start playing with the AI agent!