diff --git a/README.md b/README.md index 9e4ef3c..4c4c7be 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,13 @@ pip install -r requirements.txt Then download the model ckpts files in: https://huggingface.co/microsoft/OmniParser, and put them under weights/, default folder structure is: weights/icon_detect, weights/icon_caption_florence, weights/icon_caption_blip2. -Finally, convert the safetensor to .pt file. +For v1: +convert the safetensor to .pt file. ```python python weights/convert_safetensor_to_pt.py + +For v1.5: +download 'model_v1_5.pt' from https://huggingface.co/microsoft/OmniParser/tree/main/icon_detect_v1_5, make a new dir: weights/icon_detect_v1_5, and put it inside the folder. No weight conversion is needed. ``` ## Examples: @@ -39,7 +43,10 @@ We put together a few simple examples in the demo.ipynb. ## Gradio Demo To run gradio demo, simply run: ```python -python gradio_demo.py +# For v1 +python gradio_demo.py --icon_detect_model weights/icon_detect/best.pt --icon_caption_model florence2 +# For v1.5 +python gradio_demo.py --icon_detect_model weights/icon_detect_v1_5/model_v1_5.pt --icon_caption_model florence2 ``` ## Model Weights License diff --git a/__pycache__/utils.cpython-312.pyc b/__pycache__/utils.cpython-312.pyc deleted file mode 100644 index 5d73f5d..0000000 Binary files a/__pycache__/utils.cpython-312.pyc and /dev/null differ diff --git a/demo.ipynb b/demo.ipynb index 9506e86..81c3285 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -65,7 +65,7 @@ "(device(type='cuda', index=0), ultralytics.models.yolo.model.YOLO)" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -85,8 +85,8 @@ "text": [ "image size: (1919, 1079)\n", "\n", - "image 1/1 /home/yadonglu/OmniParser/imgs/word.png: 736x1280 115 icons, 66.6ms\n", - "Speed: 7.2ms preprocess, 66.6ms inference, 459.4ms postprocess per image at shape (1, 3, 736, 1280)\n" + "image 1/1 /home/yadonglu/OmniParser/imgs/word.png: 736x1280 115 icons, 67.0ms\n", + "Speed: 7.0ms preprocess, 67.0ms inference, 423.3ms postprocess per image at shape (1, 3, 736, 1280)\n" ] } ], @@ -130,16 +130,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, @@ -338,6 +338,1103 @@ "df" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'type': 'text',\n", + " 'bbox': [0.023970818147063255,\n", + " 0.012974976561963558,\n", + " 0.05315268412232399,\n", + " 0.033364225178956985],\n", + " 'interactivity': False,\n", + " 'content': 'AutoSave'},\n", + " {'type': 'text',\n", + " 'bbox': [0.1500781625509262,\n", + " 0.011121409013867378,\n", + " 0.3272537887096405,\n", + " 0.03521779552102089],\n", + " 'interactivity': False,\n", + " 'content': 'Document 10.docx General* Last Modified: Just now '},\n", + " {'type': 'text',\n", + " 'bbox': [0.4095883369445801,\n", + " 0.011121409013867378,\n", + " 0.4544033408164978,\n", + " 0.037071362137794495],\n", + " 'interactivity': False,\n", + " 'content': 'O Search'},\n", + " {'type': 'text',\n", + " 'bbox': [0.010943199507892132,\n", + " 0.048192769289016724,\n", + " 0.021365294232964516,\n", + " 0.07043559104204178],\n", + " 'interactivity': False,\n", + " 'content': 'File'},\n", + " {'type': 'text',\n", + " 'bbox': [0.03335070237517357,\n", + " 0.045412417501211166,\n", + " 0.05523710325360298,\n", + " 0.07228915393352509],\n", + " 'interactivity': False,\n", + " 'content': 'Home'},\n", + " {'type': 'text',\n", + " 'bbox': [0.22094841301441193,\n", + " 0.048192769289016724,\n", + " 0.2542991042137146,\n", + " 0.07321593910455704],\n", + " 'interactivity': False,\n", + " 'content': 'Mailings'},\n", + " {'type': 'text',\n", + " 'bbox': [0.2595101594924927,\n", + " 0.05004633963108063,\n", + " 0.2845231890678406,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Review'},\n", + " {'type': 'text',\n", + " 'bbox': [0.31474727392196655,\n", + " 0.05004633963108063,\n", + " 0.3335070312023163,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Help'},\n", + " {'type': 'text',\n", + " 'bbox': [0.06357477605342865,\n", + " 0.05189990624785423,\n", + " 0.08546117693185806,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Insert'},\n", + " {'type': 'text',\n", + " 'bbox': [0.08963001519441605,\n", + " 0.05189990624785423,\n", + " 0.11151641607284546,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Draw'},\n", + " {'type': 'text',\n", + " 'bbox': [0.11672746390104294,\n", + " 0.05189990624785423,\n", + " 0.14069828391075134,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Design'},\n", + " {'type': 'text',\n", + " 'bbox': [0.14799374341964722,\n", + " 0.05189990624785423,\n", + " 0.17300677299499512,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'Layout'},\n", + " {'type': 'text',\n", + " 'bbox': [0.1792600303888321,\n", + " 0.05189990624785423,\n", + " 0.21677957475185394,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'References'},\n", + " {'type': 'text',\n", + " 'bbox': [0.29077643156051636,\n", + " 0.05189990624785423,\n", + " 0.31057843565940857,\n", + " 0.07136237621307373],\n", + " 'interactivity': False,\n", + " 'content': 'View'},\n", + " {'type': 'text',\n", + " 'bbox': [0.21261073648929596,\n", + " 0.08248378336429596,\n", + " 0.2542991042137146,\n", + " 0.10843373835086823],\n", + " 'interactivity': False,\n", + " 'content': '|Aa~|Ao'},\n", + " {'type': 'text',\n", + " 'bbox': [0.2053152620792389,\n", + " 0.08804448693990707,\n", + " 0.21573735773563385,\n", + " 0.10472659766674042],\n", + " 'interactivity': False,\n", + " 'content': 'A'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7425742745399475,\n", + " 0.1010194644331932,\n", + " 0.7821782231330872,\n", + " 0.12696941196918488],\n", + " 'interactivity': False,\n", + " 'content': '9c Replace'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8936946392059326,\n", + " 0.11028730124235153,\n", + " 0.9405940771102905,\n", + " 0.13067655265331268],\n", + " 'interactivity': False,\n", + " 'content': 'Editor Copilot'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7425742745399475,\n", + " 0.1204819306731224,\n", + " 0.7795726656913757,\n", + " 0.15106579661369324],\n", + " 'interactivity': False,\n", + " 'content': ' Select v'},\n", + " {'type': 'text',\n", + " 'bbox': [0.03230849280953407,\n", + " 0.14735867083072662,\n", + " 0.06253256648778915,\n", + " 0.16682113707065582],\n", + " 'interactivity': False,\n", + " 'content': 'Clipboard'},\n", + " {'type': 'text',\n", + " 'bbox': [0.1771756112575531,\n", + " 0.14550510048866272,\n", + " 0.19593538343906403,\n", + " 0.16682113707065582],\n", + " 'interactivity': False,\n", + " 'content': 'Font'},\n", + " {'type': 'text',\n", + " 'bbox': [0.34184470772743225,\n", + " 0.14365153014659882,\n", + " 0.3751954138278961,\n", + " 0.16867469251155853],\n", + " 'interactivity': False,\n", + " 'content': ' Paragraph'},\n", + " {'type': 'text',\n", + " 'bbox': [0.5747785568237305,\n", + " 0.14550510048866272,\n", + " 0.595622718334198,\n", + " 0.16682113707065582],\n", + " 'interactivity': False,\n", + " 'content': 'Styles'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7509119510650635,\n", + " 0.14272475242614746,\n", + " 0.7754038572311401,\n", + " 0.16774791479110718],\n", + " 'interactivity': False,\n", + " 'content': 'Editing'},\n", + " {'type': 'text',\n", + " 'bbox': [0.3303804099559784,\n", + " 0.3271547853946686,\n", + " 0.5528921484947205,\n", + " 0.35125115513801575],\n", + " 'interactivity': False,\n", + " 'content': 'Select the icon or press Alt + i to draft with Copilot'},\n", + " {'type': 'text',\n", + " 'bbox': [0.001042209449224174,\n", + " 0.9360519051551819,\n", + " 0.20427305996418,\n", + " 0.9592214822769165],\n", + " 'interactivity': False,\n", + " 'content': 'Page 1 of1 Owords English (United States) Text Predictions: On'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7509119510650635,\n", + " 0.9341983199119568,\n", + " 0.9051589369773865,\n", + " 0.9573679566383362],\n", + " 'interactivity': False,\n", + " 'content': 'Display Settings Focus '},\n", + " {'type': 'text',\n", + " 'bbox': [0.9676914811134338,\n", + " 0.937905490398407,\n", + " 0.9989578127861023,\n", + " 0.9573679566383362],\n", + " 'interactivity': False,\n", + " 'content': '+100%'},\n", + " {'type': 'text',\n", + " 'bbox': [0.4421736001968384,\n", + " 0.08184756338596344,\n", + " 0.5035458207130432,\n", + " 0.14312665164470673],\n", + " 'interactivity': True,\n", + " 'content': 'Normal'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8202124834060669,\n", + " 0.07994024455547333,\n", + " 0.8537722826004028,\n", + " 0.1654169261455536],\n", + " 'interactivity': True,\n", + " 'content': 'Sensitivity'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8202124834060669,\n", + " 0.07994024455547333,\n", + " 0.8537722826004028,\n", + " 0.1654169261455536],\n", + " 'interactivity': True,\n", + " 'content': 'Sensitivity'},\n", + " {'type': 'text',\n", + " 'bbox': [0.2829875349998474,\n", + " 0.962411642074585,\n", + " 0.3987993597984314,\n", + " 0.9934952259063721],\n", + " 'interactivity': True,\n", + " 'content': 'Q Search'},\n", + " {'type': 'text',\n", + " 'bbox': [0.09709451347589493,\n", + " 0.07982245087623596,\n", + " 0.16560909152030945,\n", + " 0.10813643783330917],\n", + " 'interactivity': True,\n", + " 'content': 'Aptos (Body)'},\n", + " {'type': 'text',\n", + " 'bbox': [0.03258265182375908,\n", + " 0.12557221949100494,\n", + " 0.09002341330051422,\n", + " 0.14639848470687866],\n", + " 'interactivity': True,\n", + " 'content': ' Format Painter'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7444383502006531,\n", + " 0.07407139241695404,\n", + " 0.7835553288459778,\n", + " 0.09996039420366287],\n", + " 'interactivity': True,\n", + " 'content': 'O Find'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8902580738067627,\n", + " 0.044810064136981964,\n", + " 0.9379465579986572,\n", + " 0.07267601788043976],\n", + " 'interactivity': True,\n", + " 'content': ' Editing '},\n", + " {'type': 'text',\n", + " 'bbox': [0.8594686985015869,\n", + " 0.07990065962076187,\n", + " 0.8862254023551941,\n", + " 0.16565318405628204],\n", + " 'interactivity': True,\n", + " 'content': 'Add-ins'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8594686985015869,\n", + " 0.07990065962076187,\n", + " 0.8862254023551941,\n", + " 0.16565318405628204],\n", + " 'interactivity': True,\n", + " 'content': 'Add-ins'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8398236036300659,\n", + " 0.045797333121299744,\n", + " 0.8904637098312378,\n", + " 0.07201862335205078],\n", + " 'interactivity': True,\n", + " 'content': ' Comments'},\n", + " {'type': 'text',\n", + " 'bbox': [0.6806007623672485,\n", + " 0.09105142951011658,\n", + " 0.7179933786392212,\n", + " 0.133430615067482],\n", + " 'interactivity': True,\n", + " 'content': 'Title'},\n", + " {'type': 'text',\n", + " 'bbox': [0.23385684192180634,\n", + " 0.11361894011497498,\n", + " 0.2530038356781006,\n", + " 0.13530029356479645],\n", + " 'interactivity': True,\n", + " 'content': 'A'},\n", + " {'type': 'text',\n", + " 'bbox': [0.052603382617235184,\n", + " 0.009093760512769222,\n", + " 0.07912862300872803,\n", + " 0.035185642540454865],\n", + " 'interactivity': True,\n", + " 'content': 'On O'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7914328575134277,\n", + " 0.08069829642772675,\n", + " 0.8148500323295593,\n", + " 0.16481055319309235],\n", + " 'interactivity': True,\n", + " 'content': '0'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7914328575134277,\n", + " 0.08069829642772675,\n", + " 0.8148500323295593,\n", + " 0.16481055319309235],\n", + " 'interactivity': True,\n", + " 'content': 'Dictate'},\n", + " {'type': 'text',\n", + " 'bbox': [0.7914328575134277,\n", + " 0.08069829642772675,\n", + " 0.8148500323295593,\n", + " 0.16481055319309235],\n", + " 'interactivity': True,\n", + " 'content': 'Voice'},\n", + " {'type': 'text',\n", + " 'bbox': [0.6167113780975342,\n", + " 0.09614628553390503,\n", + " 0.6672280430793762,\n", + " 0.1265750229358673],\n", + " 'interactivity': True,\n", + " 'content': 'Heading 2'},\n", + " {'type': 'text',\n", + " 'bbox': [0.009729904122650623,\n", + " 0.07482191920280457,\n", + " 0.03324205428361893,\n", + " 0.1444273591041565],\n", + " 'interactivity': True,\n", + " 'content': 'Paste'},\n", + " {'type': 'text',\n", + " 'bbox': [0.5577436685562134,\n", + " 0.09436895698308945,\n", + " 0.6113256216049194,\n", + " 0.13046985864639282],\n", + " 'interactivity': True,\n", + " 'content': 'Heading'},\n", + " {'type': 'text',\n", + " 'bbox': [0.5062990784645081,\n", + " 0.094179168343544,\n", + " 0.5525638461112976,\n", + " 0.12651978433132172],\n", + " 'interactivity': True,\n", + " 'content': 'No Spacing'},\n", + " {'type': 'text',\n", + " 'bbox': [0.8793159127235413,\n", + " 0.9656907916069031,\n", + " 0.8976714015007019,\n", + " 0.9917687773704529],\n", + " 'interactivity': True,\n", + " 'content': 'ENG'},\n", + " {'type': 'text',\n", + " 'bbox': [0.9438582062721252,\n", + " 0.9580937027931213,\n", + " 0.9950194358825684,\n", + " 0.9967001676559448],\n", + " 'interactivity': True,\n", + " 'content': '3:31 PM'},\n", + " {'type': 'text',\n", + " 'bbox': [0.9438582062721252,\n", + " 0.9580937027931213,\n", + " 0.9950194358825684,\n", + " 0.9967001676559448],\n", + " 'interactivity': True,\n", + " 'content': '11/11/2024'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.4421736001968384,\n", + " 0.08184756338596344,\n", + " 0.5035458207130432,\n", + " 0.14312665164470673],\n", + " 'interactivity': True,\n", + " 'content': '\"Normal\"'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8202124834060669,\n", + " 0.07994024455547333,\n", + " 0.8537722826004028,\n", + " 0.1654169261455536],\n", + " 'interactivity': True,\n", + " 'content': 'Sensibility'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2829875349998474,\n", + " 0.962411642074585,\n", + " 0.3987993597984314,\n", + " 0.9934952259063721],\n", + " 'interactivity': True,\n", + " 'content': 'a search function.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.09709451347589493,\n", + " 0.07982245087623596,\n", + " 0.16560909152030945,\n", + " 0.10813643783330917],\n", + " 'interactivity': True,\n", + " 'content': 'A dropdown menu to select the Aptos (BODY).'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.03258265182375908,\n", + " 0.12557221949100494,\n", + " 0.09002341330051422,\n", + " 0.14639848470687866],\n", + " 'interactivity': True,\n", + " 'content': 'Format Painter.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7444383502006531,\n", + " 0.07407139241695404,\n", + " 0.7835553288459778,\n", + " 0.09996039420366287],\n", + " 'interactivity': True,\n", + " 'content': 'Find.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8902580738067627,\n", + " 0.044810064136981964,\n", + " 0.9379465579986572,\n", + " 0.07267601788043976],\n", + " 'interactivity': True,\n", + " 'content': 'editing options.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8594686985015869,\n", + " 0.07990065962076187,\n", + " 0.8862254023551941,\n", + " 0.16565318405628204],\n", + " 'interactivity': True,\n", + " 'content': 'Add-ins'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.1647460013628006,\n", + " 0.08069700747728348,\n", + " 0.1894654780626297,\n", + " 0.10782838612794876],\n", + " 'interactivity': True,\n", + " 'content': 'A dropdown menu displaying the number 12.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8911053538322449,\n", + " 0.07846804708242416,\n", + " 0.9160677194595337,\n", + " 0.1305442601442337],\n", + " 'interactivity': True,\n", + " 'content': 'Editor'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.032847966998815536,\n", + " 0.1001187413930893,\n", + " 0.09088721126317978,\n", + " 0.12493240088224411],\n", + " 'interactivity': True,\n", + " 'content': 'Copy function.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9372036457061768,\n", + " 0.04397198185324669,\n", + " 0.978323757648468,\n", + " 0.07226070016622543],\n", + " 'interactivity': True,\n", + " 'content': 'sharing content.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8398236036300659,\n", + " 0.045797333121299744,\n", + " 0.8904637098312378,\n", + " 0.07201862335205078],\n", + " 'interactivity': True,\n", + " 'content': 'Comments section.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6806007623672485,\n", + " 0.09105142951011658,\n", + " 0.7179933786392212,\n", + " 0.133430615067482],\n", + " 'interactivity': True,\n", + " 'content': 'a text input field for a title.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2890045642852783,\n", + " 0.08490100502967834,\n", + " 0.30828937888145447,\n", + " 0.10592091828584671],\n", + " 'interactivity': True,\n", + " 'content': 'Bullets'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.03349688649177551,\n", + " 0.07261990755796432,\n", + " 0.09314201027154922,\n", + " 0.10159766674041748],\n", + " 'interactivity': True,\n", + " 'content': 'Cut function.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2877277433872223,\n", + " 0.11145589500665665,\n", + " 0.303026407957077,\n", + " 0.13835953176021576],\n", + " 'interactivity': True,\n", + " 'content': 'Left'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2691686451435089,\n", + " 0.11370231956243515,\n", + " 0.2802695035934448,\n", + " 0.13622881472110748],\n", + " 'interactivity': True,\n", + " 'content': 'Properties'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2549927830696106,\n", + " 0.11380065977573395,\n", + " 0.2666113078594208,\n", + " 0.1355700045824051],\n", + " 'interactivity': True,\n", + " 'content': 'Text Box'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.3584928810596466,\n", + " 0.11388528347015381,\n", + " 0.37715959548950195,\n", + " 0.1368168294429779],\n", + " 'interactivity': True,\n", + " 'content': 'Line Spacing'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.31671008467674255,\n", + " 0.11221727728843689,\n", + " 0.3286404311656952,\n", + " 0.13693131506443024],\n", + " 'interactivity': True,\n", + " 'content': 'Justified'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5834745764732361,\n", + " 0.010299306362867355,\n", + " 0.595229983329773,\n", + " 0.03519251197576523],\n", + " 'interactivity': True,\n", + " 'content': 'Dictate'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9155870079994202,\n", + " 0.0774989053606987,\n", + " 0.9414986968040466,\n", + " 0.1304350197315216],\n", + " 'interactivity': True,\n", + " 'content': 'Copilot'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.23385684192180634,\n", + " 0.11361894011497498,\n", + " 0.2530038356781006,\n", + " 0.13530029356479645],\n", + " 'interactivity': True,\n", + " 'content': 'Font Color'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.052603382617235184,\n", + " 0.009093760512769222,\n", + " 0.07912862300872803,\n", + " 0.035185642540454865],\n", + " 'interactivity': True,\n", + " 'content': 'a toggle switch in the \"on\" position.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7914328575134277,\n", + " 0.08069829642772675,\n", + " 0.8148500323295593,\n", + " 0.16481055319309235],\n", + " 'interactivity': True,\n", + " 'content': 'Dictate voice input.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.31092125177383423,\n", + " 0.08435532450675964,\n", + " 0.329327255487442,\n", + " 0.10585256665945053],\n", + " 'interactivity': True,\n", + " 'content': 'Ordered List'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.19138740003108978,\n", + " 0.1136791929602623,\n", + " 0.2088518887758255,\n", + " 0.13493801653385162],\n", + " 'interactivity': True,\n", + " 'content': 'Superscript'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.38494837284088135,\n", + " 0.08577587455511093,\n", + " 0.40266716480255127,\n", + " 0.10767263174057007],\n", + " 'interactivity': True,\n", + " 'content': 'Text formatting options'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.3328312039375305,\n", + " 0.08451363444328308,\n", + " 0.3506781756877899,\n", + " 0.10656751692295074],\n", + " 'interactivity': True,\n", + " 'content': 'Ordered List'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.34318241477012634,\n", + " 0.1132659986615181,\n", + " 0.35551774501800537,\n", + " 0.1364356428384781],\n", + " 'interactivity': True,\n", + " 'content': 'Decrease'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.42345917224884033,\n", + " 0.08657703548669815,\n", + " 0.43425723910331726,\n", + " 0.1062837764620781],\n", + " 'interactivity': True,\n", + " 'content': 'Formatting Marks'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.4080311954021454,\n", + " 0.08575674891471863,\n", + " 0.41973719000816345,\n", + " 0.10661903768777847],\n", + " 'interactivity': True,\n", + " 'content': 'Alphabetical'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.3302677273750305,\n", + " 0.11280456185340881,\n", + " 0.3417406976222992,\n", + " 0.1366947740316391],\n", + " 'interactivity': True,\n", + " 'content': 'Justified'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6167113780975342,\n", + " 0.09614628553390503,\n", + " 0.6672280430793762,\n", + " 0.1265750229358673],\n", + " 'interactivity': True,\n", + " 'content': 'the heading 2 feature.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.11592193692922592,\n", + " 0.005373150110244751,\n", + " 0.14385110139846802,\n", + " 0.03672346472740173],\n", + " 'interactivity': True,\n", + " 'content': 'Refresh or reload.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.14717423915863037,\n", + " 0.11490623652935028,\n", + " 0.16088661551475525,\n", + " 0.1356186866760254],\n", + " 'interactivity': True,\n", + " 'content': 'Strikethrough'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.3821171224117279,\n", + " 0.11373978108167648,\n", + " 0.4016689956188202,\n", + " 0.1374775469303131],\n", + " 'interactivity': True,\n", + " 'content': 'Character Highlighting Color'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.26916417479515076,\n", + " 0.08428845554590225,\n", + " 0.28033241629600525,\n", + " 0.10548985004425049],\n", + " 'interactivity': True,\n", + " 'content': 'Text Box'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.20385362207889557,\n", + " 0.0841338112950325,\n", + " 0.2137889862060547,\n", + " 0.10445716977119446],\n", + " 'interactivity': True,\n", + " 'content': 'Superscript'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.16311821341514587,\n", + " 0.11534248292446136,\n", + " 0.17403928935527802,\n", + " 0.13565881550312042],\n", + " 'interactivity': True,\n", + " 'content': 'Subscript'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9052765965461731,\n", + " 0.00814617145806551,\n", + " 0.9237172603607178,\n", + " 0.038098178803920746],\n", + " 'interactivity': True,\n", + " 'content': 'Color picker'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.009729904122650623,\n", + " 0.07482191920280457,\n", + " 0.03324205428361893,\n", + " 0.1444273591041565],\n", + " 'interactivity': True,\n", + " 'content': 'Paste'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.4029557406902313,\n", + " 0.114181287586689,\n", + " 0.4229840338230133,\n", + " 0.13744214177131653],\n", + " 'interactivity': True,\n", + " 'content': 'Borders'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.304019957780838,\n", + " 0.11177197843790054,\n", + " 0.31476888060569763,\n", + " 0.13749711215496063],\n", + " 'interactivity': True,\n", + " 'content': 'Justified'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7243528366088867,\n", + " 0.11843044310808182,\n", + " 0.7368360757827759,\n", + " 0.14324188232421875],\n", + " 'interactivity': True,\n", + " 'content': 'Align Left'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.10052549093961716,\n", + " 0.11491432040929794,\n", + " 0.11139754951000214,\n", + " 0.1337520182132721],\n", + " 'interactivity': True,\n", + " 'content': 'Bold'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.72037672996521,\n", + " 0.9603810906410217,\n", + " 0.7408535480499268,\n", + " 0.9975049495697021],\n", + " 'interactivity': True,\n", + " 'content': 'Word'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.17647607624530792,\n", + " 0.11430654674768448,\n", + " 0.18667954206466675,\n", + " 0.1349179446697235],\n", + " 'interactivity': True,\n", + " 'content': 'Superscript'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8552797436714172,\n", + " 0.008872815407812595,\n", + " 0.8746973276138306,\n", + " 0.03798949345946312],\n", + " 'interactivity': True,\n", + " 'content': 'User profile'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2123347371816635,\n", + " 0.11371146142482758,\n", + " 0.2324133962392807,\n", + " 0.13519784808158875],\n", + " 'interactivity': True,\n", + " 'content': 'Highlight Color Yellow'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5577436685562134,\n", + " 0.09436895698308945,\n", + " 0.6113256216049194,\n", + " 0.13046985864639282],\n", + " 'interactivity': True,\n", + " 'content': 'The heading section or feature in a user interface.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.4012863337993622,\n", + " 0.9643725752830505,\n", + " 0.4189930260181427,\n", + " 0.9973337054252625],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft Edge browser.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.09716729074716568,\n", + " 0.006186232436448336,\n", + " 0.11493486911058426,\n", + " 0.036148034036159515],\n", + " 'interactivity': True,\n", + " 'content': 'Undo'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5062990784645081,\n", + " 0.094179168343544,\n", + " 0.5525638461112976,\n", + " 0.12651978433132172],\n", + " 'interactivity': True,\n", + " 'content': 'No Spacing.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2558182179927826,\n", + " 0.08482034504413605,\n", + " 0.26677465438842773,\n", + " 0.10527177155017853],\n", + " 'interactivity': True,\n", + " 'content': 'Spelling'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.26080361008644104,\n", + " 0.9643218517303467,\n", + " 0.2798599302768707,\n", + " 0.9920910000801086],\n", + " 'interactivity': True,\n", + " 'content': 'Windows Start menu'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9542831778526306,\n", + " 0.007899148389697075,\n", + " 0.9719265699386597,\n", + " 0.03660045564174652],\n", + " 'interactivity': True,\n", + " 'content': 'Rename'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.929588794708252,\n", + " 0.008353784680366516,\n", + " 0.9459244608879089,\n", + " 0.03581856191158295],\n", + " 'interactivity': True,\n", + " 'content': 'Minimize'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7249853014945984,\n", + " 0.0994315892457962,\n", + " 0.7370050549507141,\n", + " 0.12186422944068909],\n", + " 'interactivity': True,\n", + " 'content': 'a dropdown menu.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6986724138259888,\n", + " 0.9619226455688477,\n", + " 0.7189748883247375,\n", + " 0.996666431427002],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft Outlook.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.468858003616333,\n", + " 0.9603798389434814,\n", + " 0.48965680599212646,\n", + " 0.9965083599090576],\n", + " 'interactivity': True,\n", + " 'content': 'Outlook'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6761321425437927,\n", + " 0.9637576937675476,\n", + " 0.6941074728965759,\n", + " 0.9978290796279907],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft Edge browser.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.49360886216163635,\n", + " 0.9623897671699524,\n", + " 0.5103277564048767,\n", + " 0.9960827827453613],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft Edge browser.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9793746471405029,\n", + " 0.008349756710231304,\n", + " 0.9978024959564209,\n", + " 0.03722222149372101],\n", + " 'interactivity': True,\n", + " 'content': 'Close'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6313145160675049,\n", + " 0.9642522931098938,\n", + " 0.6476803421974182,\n", + " 0.9950305223464966],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft 365.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8821634650230408,\n", + " 0.007914461195468903,\n", + " 0.8968126773834229,\n", + " 0.03773742914199829],\n", + " 'interactivity': True,\n", + " 'content': 'Ideas'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.2984054386615753,\n", + " 0.31830549240112305,\n", + " 0.3177582323551178,\n", + " 0.3532152771949768],\n", + " 'interactivity': True,\n", + " 'content': 'Paste'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.35590431094169617,\n", + " 0.08475812524557114,\n", + " 0.3688337504863739,\n", + " 0.10659123957157135],\n", + " 'interactivity': True,\n", + " 'content': 'Increase'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6540439128875732,\n", + " 0.9651411771774292,\n", + " 0.6705880761146545,\n", + " 0.9966898560523987],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft PowerPoint'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.11363166570663452,\n", + " 0.11444719135761261,\n", + " 0.1247863918542862,\n", + " 0.13346022367477417],\n", + " 'interactivity': True,\n", + " 'content': 'Italic'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.6090287566184998,\n", + " 0.9621816277503967,\n", + " 0.6264833807945251,\n", + " 0.996106743812561],\n", + " 'interactivity': True,\n", + " 'content': 'System'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5378493666648865,\n", + " 0.9638866186141968,\n", + " 0.5561378002166748,\n", + " 0.9978582859039307],\n", + " 'interactivity': True,\n", + " 'content': 'Mail'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.44901859760284424,\n", + " 0.9625388979911804,\n", + " 0.46310722827911377,\n", + " 0.9957110285758972],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft OneNote.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.003765839384868741,\n", + " 0.005633189808577299,\n", + " 0.02113271877169609,\n", + " 0.038551587611436844],\n", + " 'interactivity': True,\n", + " 'content': 'Word'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9811697602272034,\n", + " 0.04323381558060646,\n", + " 0.996175229549408,\n", + " 0.07214201241731644],\n", + " 'interactivity': True,\n", + " 'content': 'Feedback'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5169132351875305,\n", + " 0.964064359664917,\n", + " 0.5334573984146118,\n", + " 0.9958033561706543],\n", + " 'interactivity': True,\n", + " 'content': 'Microsoft Office.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.42432236671447754,\n", + " 0.9645394086837769,\n", + " 0.441427618265152,\n", + " 0.9964744448661804],\n", + " 'interactivity': True,\n", + " 'content': 'folder'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.19066540896892548,\n", + " 0.08328725397586823,\n", + " 0.20065611600875854,\n", + " 0.10488075762987137],\n", + " 'interactivity': True,\n", + " 'content': 'Grow Font'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.1277783066034317,\n", + " 0.11477591842412949,\n", + " 0.14602738618850708,\n", + " 0.13506455719470978],\n", + " 'interactivity': True,\n", + " 'content': 'Underline'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.08056716620922089,\n", + " 0.00711445976048708,\n", + " 0.09538117051124573,\n", + " 0.035812146961688995],\n", + " 'interactivity': True,\n", + " 'content': 'Save'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8606503009796143,\n", + " 0.9657216668128967,\n", + " 0.8758888244628906,\n", + " 0.9911571741104126],\n", + " 'interactivity': True,\n", + " 'content': 'Cloud storage'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5619325041770935,\n", + " 0.9625910520553589,\n", + " 0.5788794755935669,\n", + " 0.9987230896949768],\n", + " 'interactivity': True,\n", + " 'content': 'Forward'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.5840384364128113,\n", + " 0.9613261222839355,\n", + " 0.6028916239738464,\n", + " 0.9975537061691284],\n", + " 'interactivity': True,\n", + " 'content': 'Color picker'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8284427523612976,\n", + " 0.9678279757499695,\n", + " 0.8419187068939209,\n", + " 0.9910277724266052],\n", + " 'interactivity': True,\n", + " 'content': 'Move Up'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9818350672721863,\n", + " 0.14312511682510376,\n", + " 0.9959391355514526,\n", + " 0.16661280393600464],\n", + " 'interactivity': True,\n", + " 'content': 'Expand to show more options'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7249933481216431,\n", + " 0.08324253559112549,\n", + " 0.736741840839386,\n", + " 0.10332420468330383],\n", + " 'interactivity': True,\n", + " 'content': 'Scroll up.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7324294447898865,\n", + " 0.14760799705982208,\n", + " 0.7400776743888855,\n", + " 0.16557711362838745],\n", + " 'interactivity': True,\n", + " 'content': 'Redo'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.844497561454773,\n", + " 0.9674849510192871,\n", + " 0.8574301600456238,\n", + " 0.9902239441871643],\n", + " 'interactivity': True,\n", + " 'content': 'Increase'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.3699079155921936,\n", + " 0.08577551692724228,\n", + " 0.3810403347015381,\n", + " 0.1063292920589447],\n", + " 'interactivity': True,\n", + " 'content': 'Increase'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9397410750389099,\n", + " 0.07160084694623947,\n", + " 0.9963639974594116,\n", + " 0.13945062458515167],\n", + " 'interactivity': True,\n", + " 'content': 'minimizing a window.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.8793159127235413,\n", + " 0.9656907916069031,\n", + " 0.8976714015007019,\n", + " 0.9917687773704529],\n", + " 'interactivity': True,\n", + " 'content': 'English'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9074605107307434,\n", + " 0.9333967566490173,\n", + " 0.917551577091217,\n", + " 0.955557107925415],\n", + " 'interactivity': True,\n", + " 'content': 'Minimize'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9032755494117737,\n", + " 0.9673230648040771,\n", + " 0.9354223608970642,\n", + " 0.990323543548584],\n", + " 'interactivity': True,\n", + " 'content': 'Wireless connectivity.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.43108317255973816,\n", + " 0.14830924570560455,\n", + " 0.43957316875457764,\n", + " 0.16404235363006592],\n", + " 'interactivity': True,\n", + " 'content': 'Redo'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9390127658843994,\n", + " 0.9358015060424805,\n", + " 0.9462282657623291,\n", + " 0.9563449621200562],\n", + " 'interactivity': True,\n", + " 'content': 'Hanging Indent'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7451589703559875,\n", + " 0.14763562381267548,\n", + " 0.7853077054023743,\n", + " 0.1652708351612091],\n", + " 'interactivity': True,\n", + " 'content': 'Editing.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.27768608927726746,\n", + " 0.1485075205564499,\n", + " 0.2840471863746643,\n", + " 0.16409289836883545],\n", + " 'interactivity': True,\n", + " 'content': 'Redo'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.9438582062721252,\n", + " 0.9580937027931213,\n", + " 0.9950194358825684,\n", + " 0.9967001676559448],\n", + " 'interactivity': True,\n", + " 'content': 'Notifications.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.31950756907463074,\n", + " 0.3229200839996338,\n", + " 0.3328617513179779,\n", + " 0.3530133366584778],\n", + " 'interactivity': True,\n", + " 'content': 'minimizing a window.'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.08737719058990479,\n", + " 0.148496612906456,\n", + " 0.09548091888427734,\n", + " 0.16415521502494812],\n", + " 'interactivity': True,\n", + " 'content': 'Redo'},\n", + " {'type': 'icon',\n", + " 'bbox': [0.7414734959602356,\n", + " 0.000822930654976517,\n", + " 0.7834907174110413,\n", + " 0.05006092041730881],\n", + " 'interactivity': True,\n", + " 'content': 'M0,0L9,0 4.5,5z'}]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parsed_content_list" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/gradio_demo.py b/gradio_demo.py index 14dd6d0..0557680 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -11,10 +11,7 @@ import base64, os from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img import torch from PIL import Image - -yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt') -caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence") -# caption_model_processor = get_caption_model_processor(model_name="blip2", model_name_or_path="weights/icon_caption_blip2") +import argparse @@ -39,9 +36,9 @@ def process( box_threshold, iou_threshold, use_paddleocr, - imgsz + imgsz, + icon_process_batch_size, ) -> Optional[Image.Image]: - image_save_path = 'imgs/saved_image_demo.png' image_input.save(image_save_path) image = Image.open(image_save_path) @@ -57,13 +54,26 @@ def process( ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr) text, ocr_bbox = ocr_bbox_rslt # print('prompt:', prompt) - dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold, imgsz=imgsz) + dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold, imgsz=imgsz, batch_size=icon_process_batch_size) image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img))) print('finish processing') - parsed_content_list = '\n'.join(parsed_content_list) + # parsed_content_list = '\n'.join(parsed_content_list) + parsed_content_list = '\n'.join([f'type: {x['type']}, content: {x["content"]}, interactivity: {x["interactivity"]}' for x in parsed_content_list]) return image, str(parsed_content_list) +parser = argparse.ArgumentParser(description='Process model paths and names.') +parser.add_argument('--icon_detect_model', type=str, required=True, default='weights/icon_detect/best.pt', help='Path to the YOLO model weights') +parser.add_argument('--icon_caption_model', type=str, required=True, default='florence2', help='Name of the caption model') + +args = parser.parse_args() +icon_detect_model, icon_caption_model = args.icon_detect_model, args.icon_caption_model + +yolo_model = get_yolo_model(model_path=icon_detect_model) +if icon_caption_model == 'florence2': + caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence") +elif icon_caption_model == 'blip2': + caption_model_processor = get_caption_model_processor(model_name="blip2", model_name_or_path="weights/icon_caption_blip2") with gr.Blocks() as demo: gr.Markdown(MARKDOWN) @@ -78,9 +88,11 @@ with gr.Blocks() as demo: iou_threshold_component = gr.Slider( label='IOU Threshold', minimum=0.01, maximum=1.0, step=0.01, value=0.1) use_paddleocr_component = gr.Checkbox( - label='Use PaddleOCR', value=True) + label='Use PaddleOCR', value=False) imgsz_component = gr.Slider( - label='Icon Detect Image Size', minimum=640, maximum=1920, step=32, value=640) + label='Icon Detect Image Size', minimum=640, maximum=3200, step=32, value=1920) + icon_process_batch_size_component = gr.Slider( + label='Icon Process Batch Size', minimum=1, maximum=256, step=1, value=64) submit_button_component = gr.Button( value='Submit', variant='primary') with gr.Column(): @@ -94,10 +106,16 @@ with gr.Blocks() as demo: box_threshold_component, iou_threshold_component, use_paddleocr_component, - imgsz_component + imgsz_component, + icon_process_batch_size_component ], outputs=[image_output_component, text_output_component] ) # demo.launch(debug=False, show_error=True, share=True) -demo.launch(share=True, server_port=7861, server_name='0.0.0.0') \ No newline at end of file +demo.launch(share=True, server_port=7861, server_name='0.0.0.0') + + + +# python gradio_demo.py --icon_detect_model weights/icon_detect/best.pt --icon_caption_model florence2 +# python gradio_demo.py --icon_detect_model weights/icon_detect_v1_5/model_v1_5.pt --icon_caption_model florence2 \ No newline at end of file diff --git a/imgs/saved_image_demo.png b/imgs/saved_image_demo.png index 1719c03..8042584 100644 Binary files a/imgs/saved_image_demo.png and b/imgs/saved_image_demo.png differ diff --git a/weights/convert_safetensor_to_pt.py b/weights/convert_safetensor_to_pt.py index 9ed0a7f..69023cd 100644 --- a/weights/convert_safetensor_to_pt.py +++ b/weights/convert_safetensor_to_pt.py @@ -5,16 +5,13 @@ import argparse import yaml import os -# accept args to specify v1 or v1_5 -parser = argparse.ArgumentParser(description='Specify version v1 or v1_5') -parser.add_argument('--weights_dir', type=str, required=True, help='Specify the path to the safetensor file', default='weights/icon_detect_v1_5') +# accept args to specify v1 +parser = argparse.ArgumentParser(description='add weight directory') +parser.add_argument('--weights_dir', type=str, required=True, help='Specify the path to the safetensor file', default='weights/icon_detect') args = parser.parse_args() tensor_dict = load_file(os.path.join(args.weights_dir, "model.safetensors")) model = DetectionModel(os.path.join(args.weights_dir, "model.yaml")) -# from ultralytics import YOLO -# som_model = YOLO("yolo11m.pt") -# model = som_model.model model.load_state_dict(tensor_dict) save_dict = {'model':model}