qwen2.5vl

This commit is contained in:
Thomas Dhome-Casanova
2025-02-01 17:29:34 -08:00
parent 3d981833e6
commit ba7ed0ac06
5 changed files with 31 additions and 16 deletions

View File

@@ -2,7 +2,7 @@ from groq import Groq
import os
from .utils import is_image_path
def run_groq_interleaved(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0.6):
def run_groq_interleaved(messages: list, system: str, model_name: str, api_key: str, max_tokens=256, temperature=0.6):
"""
Run a chat completion through Groq's API, ignoring any images in the messages.
"""

View File

@@ -4,11 +4,7 @@ import base64
import requests
from .utils import is_image_path, encode_image
def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
api_key = api_key or os.environ.get("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY is not set")
def run_oai_interleaved(messages: list, system: str, model_name: str, api_key: str, max_tokens=256, temperature=0, provider_base_url: str = "https://api.openai.com/v1"):
headers = {"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"}
@@ -43,20 +39,21 @@ def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max
final_messages = [{"role": "user", "content": messages}]
payload = {
"model": llm,
"model": model_name,
"messages": final_messages,
"max_tokens": max_tokens,
"temperature": temperature
}
response = requests.post(
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload
f"{provider_base_url}/chat/completions", headers=headers, json=payload
)
try:
text = response.json()['choices'][0]['message']['content']
token_usage = int(response.json()['usage']['total_tokens'])
return text, token_usage
except Exception as e:
print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
print(f"Error in interleaved openAI: {e}. This may due to your invalid API key. Please check the response: {response.json()} ")
return response.json()

View File

@@ -42,6 +42,8 @@ class VLMAgent:
self.model = "gpt-4o-2024-11-20"
elif model == "omniparser + R1":
self.model = "deepseek-r1-distill-llama-70b"
elif model == "omniparser + qwen2.5vl":
self.model = "qwen2.5-vl-72b-instruct"
else:
raise ValueError(f"Model {model} not supported")
@@ -93,9 +95,10 @@ class VLMAgent:
vlm_response, token_usage = run_oai_interleaved(
messages=planner_messages,
system=system,
llm=self.model,
model_name=self.model,
api_key=self.api_key,
max_tokens=self.max_tokens,
provider_base_url="https://api.openai.com/v1",
temperature=0,
)
print(f"oai token usage: {token_usage}")
@@ -106,13 +109,26 @@ class VLMAgent:
vlm_response, token_usage = run_groq_interleaved(
messages=planner_messages,
system=system,
llm=self.model,
model_name=self.model,
api_key=self.api_key,
max_tokens=self.max_tokens,
)
print(f"groq token usage: {token_usage}")
self.total_token_usage += token_usage
self.total_cost += (token_usage * 0.99 / 1000000)
elif "qwen" in self.model:
vlm_response, token_usage = run_oai_interleaved(
messages=planner_messages,
system=system,
model_name=self.model,
api_key=self.api_key,
max_tokens=min(2048, self.max_tokens),
provider_base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
temperature=0,
)
print(f"qwen token usage: {token_usage}")
self.total_token_usage += token_usage
self.total_cost += (token_usage * 2.2 / 1000000) # https://help.aliyun.com/zh/model-studio/getting-started/models?spm=a2c4g.11186623.0.0.74b04823CGnPv7#fe96cfb1a422a
else:
raise ValueError(f"Model {self.model} not supported")
latency_vlm = time.time() - start