From 2f13aebc6eed190d1681377d583b32eb7bbacec9 Mon Sep 17 00:00:00 2001 From: yadonglu Date: Tue, 26 Nov 2024 13:17:15 -0800 Subject: [PATCH] update readme --- .gitignore | 1 + README.md | 2 +- demo.ipynb | 16 ++++++++-------- weights/convert_safetensor_to_pt.py | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 672c6ae..e455714 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ weights/icon_detect/ weights/icon_detect_v1_5/ .gradio __pycache__/ +debug.ipynb diff --git a/README.md b/README.md index 7d813ff..9e4ef3c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ **OmniParser** is a comprehensive method for parsing user interface screenshots into structured and easy-to-understand elements, which significantly enhances the ability of GPT-4V to generate actions that can be accurately grounded in the corresponding regions of the interface. ## News -- [2024/11/26] We release an updated version, OmniParser V1.5 which features more fine grained/small icon detection. Examples in the demo.ipynb. +- [2024/11/26] We release an updated version, OmniParser V1.5 which features 1) more fine grained/small icon detection, 2) prediction of whether each screen element is interactable or not. Examples in the demo.ipynb. - [2024/10] OmniParser was the #1 trending model on huggingface model hub (starting 10/29/2024). - [2024/10] Feel free to checkout our demo on [huggingface space](https://huggingface.co/spaces/microsoft/OmniParser)! (stay tuned for OmniParser + Claude Computer Use) - [2024/10] Both Interactive Region Detection Model and Icon functional description model are released! [Hugginface models](https://huggingface.co/microsoft/OmniParser) diff --git a/demo.ipynb b/demo.ipynb index 6f34af0..bd280c8 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -28,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -84,8 +84,8 @@ "text": [ "image size: (1919, 1079)\n", "\n", - "image 1/1 /home/yadonglu/OmniParser/imgs/word.png: 736x1280 138 0s, 7.5ms\n", - "Speed: 5.4ms preprocess, 7.5ms inference, 5.1ms postprocess per image at shape (1, 3, 736, 1280)\n" + "image 1/1 /home/yadonglu/OmniParser/imgs/word.png: 736x1280 138 0s, 61.1ms\n", + "Speed: 9.0ms preprocess, 61.1ms inference, 432.6ms postprocess per image at shape (1, 3, 736, 1280)\n" ] } ], @@ -129,16 +129,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 16, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, diff --git a/weights/convert_safetensor_to_pt.py b/weights/convert_safetensor_to_pt.py index 901aa09..d45869f 100644 --- a/weights/convert_safetensor_to_pt.py +++ b/weights/convert_safetensor_to_pt.py @@ -16,7 +16,7 @@ if args.version == 'v1': torch.save({'model':model}, 'weights/icon_detect/best.pt') elif args.version == 'v1_5': print("Converting v1_5") - tensor_dict = torch.load("weights/icon_detect_v1_5/model.safetensors") + tensor_dict = load_file("weights/icon_detect_v1_5/model.safetensors") model = DetectionModel('weights/icon_detect_v1_5/model.yaml') model.load_state_dict(tensor_dict) save_dict = {'model':model}