add florence model; update demo.ipynb
3
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
weights/
|
weights/icon_caption_blip2
|
||||||
|
weights/icon_caption_florence
|
||||||
385
demo.ipynb
BIN
imgs/google_page.png
Normal file
|
After Width: | Height: | Size: 324 KiB |
|
Before Width: | Height: | Size: 1.8 MiB |
BIN
imgs/pc_1.png
|
Before Width: | Height: | Size: 197 KiB |
|
Before Width: | Height: | Size: 185 KiB |
|
Before Width: | Height: | Size: 805 B |
BIN
imgs/windows_home.png
Normal file
|
After Width: | Height: | Size: 5.8 MiB |
BIN
imgs/windows_multitab.png
Normal file
|
After Width: | Height: | Size: 459 KiB |
@@ -12,4 +12,4 @@ opencv-python-headless
|
|||||||
gradio
|
gradio
|
||||||
dill
|
dill
|
||||||
accelerate
|
accelerate
|
||||||
|
timm
|
||||||
|
|||||||
12
utils.py
@@ -33,9 +33,10 @@ import supervision as sv
|
|||||||
import torchvision.transforms as T
|
import torchvision.transforms as T
|
||||||
|
|
||||||
|
|
||||||
def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
|
def get_caption_model_processor(model_name, model_name_or_path="Salesforce/blip2-opt-2.7b", device=None):
|
||||||
if not device:
|
if not device:
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
if model_name == "blip2":
|
||||||
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
||||||
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
||||||
if device == 'cpu':
|
if device == 'cpu':
|
||||||
@@ -45,7 +46,14 @@ def get_caption_model_processor(model_name_or_path="Salesforce/blip2-opt-2.7b",
|
|||||||
else:
|
else:
|
||||||
model = Blip2ForConditionalGeneration.from_pretrained(
|
model = Blip2ForConditionalGeneration.from_pretrained(
|
||||||
model_name_or_path, device_map=None, torch_dtype=torch.float16
|
model_name_or_path, device_map=None, torch_dtype=torch.float16
|
||||||
)
|
).to(device)
|
||||||
|
elif model_name == "florence2":
|
||||||
|
from transformers import AutoProcessor, AutoModelForCausalLM
|
||||||
|
processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
|
||||||
|
if device == 'cpu':
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float32, trust_remote_code=True)
|
||||||
|
else:
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, torch_dtype=torch.float16, trust_remote_code=True).to(device)
|
||||||
return {'model': model.to(device), 'processor': processor}
|
return {'model': model.to(device), 'processor': processor}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||