This commit is contained in:
yadonglu
2024-12-05 11:28:18 -08:00
parent 2f13aebc6e
commit 075f349ea1
5 changed files with 100 additions and 161 deletions

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@ weights/icon_caption_blip2
weights/icon_caption_florence
weights/icon_detect/
weights/icon_detect_v1_5/
weights/icon_detect_v1_5_2/
.gradio
__pycache__/
debug.ipynb

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@@ -255,7 +255,7 @@ def remove_overlap_new(boxes, iou_threshold, ocr_bbox=None):
# return box1[0] >= box2[0] and box1[1] >= box2[1] and box1[2] <= box2[2] and box1[3] <= box2[3]
intersection = intersection_area(box1, box2)
ratio1 = intersection / box_area(box1)
return ratio1 > 0.95
return ratio1 > 0.80
# boxes = boxes.tolist()
filtered_boxes = []
@@ -274,27 +274,28 @@ def remove_overlap_new(boxes, iou_threshold, ocr_bbox=None):
if is_valid_box:
# add the following 2 lines to include ocr bbox
if ocr_bbox:
# only add the box if it does not overlap with any ocr bbox
# keep yolo boxes + prioritize ocr label
box_added = False
for box3_elem in ocr_bbox:
if not box_added:
box3 = box3_elem['bbox']
if is_inside(box3, box1): # ocr inside icon
box_added = True
# box_added = True
# delete the box3_elem from ocr_bbox
try:
filtered_boxes.append({'type': 'text', 'bbox': box1_elem['bbox'], 'interactivity': True, 'content': box3_elem['content']})
filtered_boxes.remove(box3_elem)
# print('remove ocr bbox:', box3_elem)
except:
continue
break
# break
elif is_inside(box1, box3): # icon inside ocr
box_added = True
try:
filtered_boxes.append({'type': 'icon', 'bbox': box1_elem['bbox'], 'interactivity': True, 'content': None})
filtered_boxes.remove(box3_elem)
except:
continue
# try:
# filtered_boxes.append({'type': 'icon', 'bbox': box1_elem['bbox'], 'interactivity': True, 'content': None})
# filtered_boxes.remove(box3_elem)
# except:
# continue
break
else:
continue

View File

@@ -3,25 +3,24 @@ from ultralytics.nn.tasks import DetectionModel
from safetensors.torch import load_file
import argparse
import yaml
import os
# accept args to specify v1 or v1_5
parser = argparse.ArgumentParser(description='Specify version v1 or v1_5')
parser.add_argument('--version', choices=['v1', 'v1_5'], required=True, help='Specify the version: v1 or v1_5')
parser.add_argument('--weights_dir', type=str, required=True, help='Specify the path to the safetensor file', default='weights/icon_detect_v1_5')
args = parser.parse_args()
if args.version == 'v1':
tensor_dict = load_file("weights/icon_detect/model.safetensors")
model = DetectionModel('weights/icon_detect/model.yaml')
model.load_state_dict(tensor_dict)
torch.save({'model':model}, 'weights/icon_detect/best.pt')
elif args.version == 'v1_5':
print("Converting v1_5")
tensor_dict = load_file("weights/icon_detect_v1_5/model.safetensors")
model = DetectionModel('weights/icon_detect_v1_5/model.yaml')
model.load_state_dict(tensor_dict)
save_dict = {'model':model}
tensor_dict = load_file(os.path.join(args.weights_dir, "model.safetensors"))
model = DetectionModel(os.path.join(args.weights_dir, "model.yaml"))
# from ultralytics import YOLO
# som_model = YOLO("yolo11m.pt")
# model = som_model.model
model.load_state_dict(tensor_dict)
save_dict = {'model':model}
with open(os.path.join(args.weights_dir, "train_args.yaml"), 'r') as file:
train_args = yaml.safe_load(file)
save_dict.update(train_args)
torch.save(save_dict, os.path.join(args.weights_dir, "best.pt"))
with open("weights/icon_detect_v1_5/train_args.yaml", 'r') as file:
train_args = yaml.safe_load(file)
save_dict.update(train_args)
torch.save(save_dict, 'weights/icon_detect_v1_5/best.pt')