add florence model; update demo.ipynb

This commit is contained in:
yadonglu
2024-10-25 16:09:24 -07:00
parent b510e61ff3
commit cafc5ef109
15 changed files with 186 additions and 242 deletions

3
.gitignore vendored
View File

@@ -1 +1,2 @@
weights/ weights/icon_caption_blip2
weights/icon_caption_florence

Binary file not shown.

File diff suppressed because one or more lines are too long

BIN
imgs/google_page.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 324 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 197 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 805 B

BIN
imgs/windows_home.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 MiB

BIN
imgs/windows_multitab.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 459 KiB

View File

@@ -12,4 +12,4 @@ opencv-python-headless
gradio gradio
dill dill
accelerate accelerate
timm

View File

@@ -33,9 +33,10 @@ import supervision as sv
import torchvision.transforms as T import torchvision.transforms as T
def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b", device=None): def get_caption_model_processor(model_name, model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
if not device: if not device:
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
if model_name == "blip2":
from transformers import Blip2Processor, Blip2ForConditionalGeneration from transformers import Blip2Processor, Blip2ForConditionalGeneration
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
if device == 'cpu': if device == 'cpu':
@@ -45,7 +46,14 @@ def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b",
else: else:
model = Blip2ForConditionalGeneration.from_pretrained( model = Blip2ForConditionalGeneration.from_pretrained(
model_name_or_path, device_map=None, torch_dtype=torch.float16 model_name_or_path, device_map=None, torch_dtype=torch.float16
) ).to(device)
elif model_name == "florence2":
from transformers import AutoProcessor, AutoModelForCausalLM
processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
if device == 'cpu':
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float32, trust_remote_code=True)
else:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True).to(device)
return {'model': model.to(device), 'processor': processor} return {'model': model.to(device), 'processor': processor}

BIN
weights/icon_detect/best.pt Normal file

Binary file not shown.