add florence model; update demo.ipynb

2024-10-25 16:09:24 -07:00
parent b510e61ff3
commit cafc5ef109
15 changed files with 186 additions and 242 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-weights/
+weights/icon_caption_blip2
+weights/icon_caption_florence
--- a/pycache/utils.cpython-312.pyc
+++ b/pycache/utils.cpython-312.pyc
--- a/demo.ipynb
+++ b/demo.ipynb
--- a/imgs/google_page.png
+++ b/imgs/google_page.png
--- a/imgs/mobile_4.png
+++ b/imgs/mobile_4.png
--- a/imgs/pc_1.png
+++ b/imgs/pc_1.png
--- a/imgs/saved_image_demo.png
+++ b/imgs/saved_image_demo.png
--- a/imgs/settings.png
+++ b/imgs/settings.png
--- a/imgs/windows_home.png
+++ b/imgs/windows_home.png
--- a/imgs/windows_multitab.png
+++ b/imgs/windows_multitab.png
--- a/requirement.txt
+++ b/requirement.txt
@@ -12,4 +12,4 @@ opencv-python-headless
 gradio
 dill
 accelerate
-
+timm
--- a/util/pycache/init.cpython-312.pyc
+++ b/util/pycache/init.cpython-312.pyc
--- a/util/pycache/box_annotator.cpython-312.pyc
+++ b/util/pycache/box_annotator.cpython-312.pyc
--- a/utils.py
+++ b/utils.py
@@ -33,9 +33,10 @@ import supervision as sv
 import torchvision.transforms as T


-def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
+def get_caption_model_processor(model_name, model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
    if not device:
        device = "cuda" if torch.cuda.is_available() else "cpu"
+    if model_name == "blip2":
        from transformers import Blip2Processor, Blip2ForConditionalGeneration
        processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
        if device == 'cpu':
@@ -45,7 +46,14 @@ def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b",
        else:
            model = Blip2ForConditionalGeneration.from_pretrained(
            model_name_or_path, device_map=None, torch_dtype=torch.float16
-    )
+        ).to(device)
+    elif model_name == "florence2":
+        from transformers import AutoProcessor, AutoModelForCausalLM 
+        processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
+        if device == 'cpu':
+            model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float32, trust_remote_code=True)
+        else:
+            model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True).to(device)
    return {'model': model.to(device), 'processor': processor}


--- a/weights/icon_detect/best.pt
+++ b/weights/icon_detect/best.pt