diff --git a/.gitignore b/.gitignore index 1cbd9d3..02eae0d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ __pycache__/ debug.ipynb util/__pycache__/ index.html?linkid=2289031 -wget-log \ No newline at end of file +wget-log +weights/omniv2/ \ No newline at end of file diff --git a/omnitool/gradio/app.py b/omnitool/gradio/app.py index 1ad28d2..44a1580 100644 --- a/omnitool/gradio/app.py +++ b/omnitool/gradio/app.py @@ -259,7 +259,7 @@ def get_header_image_base64(): try: # Get the absolute path to the image relative to this script script_dir = Path(__file__).parent - image_path = script_dir.parent.parent / "imgs" / "header_bar_small.png" + image_path = script_dir.parent.parent / "imgs" / "header_bar_thin.png" with open(image_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode() diff --git a/omnitool/readme.md b/omnitool/readme.md index 2a8b529..cc101d7 100644 --- a/omnitool/readme.md +++ b/omnitool/readme.md @@ -84,3 +84,8 @@ There are three components: c. Start the server with `python app.py --windows_host_url localhost:8006 --omniparser_server_url localhost:8000` d. Open the URL in the terminal output, set your API Key and start playing with the AI agent! + + +## Acknowledgment +Kudos to the amazing resources that are indispensable in the development of our code: [Claude Computer Use](https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/README.md), [OS World](https://github.com/xlang-ai/OSWorld), [Windows Agent Arena](https://github.com/microsoft/WindowsAgentArena), and [computer_use_ootb](https://github.com/showlab/computer_use_ootb). +We are grateful for helpful suggestions and feedbacks provided by Francesco Bonacci, Jianwei Yang, Dillon DuPont, Yue Wu, Anh Nguyen.