docker demo, migration, speedup inference using cv2
This commit is contained in:
107
demo/gui_agent/llm_utils/qwen.py
Normal file
107
demo/gui_agent/llm_utils/qwen.py
Normal file
@@ -0,0 +1,107 @@
|
||||
|
||||
import os
|
||||
import logging
|
||||
import base64
|
||||
import requests
|
||||
|
||||
import dashscope
|
||||
# from computer_use_demo.gui_agent.llm_utils import is_image_path, encode_image
|
||||
|
||||
def is_image_path(text):
|
||||
return False
|
||||
|
||||
def encode_image(image_path):
|
||||
return ""
|
||||
|
||||
|
||||
def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
|
||||
|
||||
api_key = api_key or os.environ.get("QWEN_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("QWEN_API_KEY is not set")
|
||||
|
||||
dashscope.api_key = api_key
|
||||
|
||||
# from IPython.core.debugger import Pdb; Pdb().set_trace()
|
||||
|
||||
final_messages = [{"role": "system", "content": [{"text": system}]}]
|
||||
# image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
if type(messages) == list:
|
||||
for item in messages:
|
||||
contents = []
|
||||
if isinstance(item, dict):
|
||||
for cnt in item["content"]:
|
||||
if isinstance(cnt, str):
|
||||
if is_image_path(cnt):
|
||||
# base64_image = encode_image(cnt)
|
||||
content = [{"image": cnt}]
|
||||
# content = {"type": "image_url", "image_url": {"url": image_url}}
|
||||
else:
|
||||
content = {"text": cnt}
|
||||
contents.append(content)
|
||||
|
||||
message = {"role": item["role"], "content": contents}
|
||||
else: # str
|
||||
contents.append({"text": item})
|
||||
message = {"role": "user", "content": contents}
|
||||
|
||||
final_messages.append(message)
|
||||
|
||||
print("[qwen-vl] sending messages:", final_messages)
|
||||
|
||||
response = dashscope.MultiModalConversation.call(
|
||||
model='qwen-vl-max-0809',
|
||||
messages=final_messages
|
||||
)
|
||||
|
||||
# from IPython.core.debugger import Pdb; Pdb().set_trace()
|
||||
|
||||
try:
|
||||
text = response.output.choices[0].message.content[0]['text']
|
||||
usage = response.usage
|
||||
|
||||
if "total_tokens" not in usage:
|
||||
token_usage = int(usage["input_tokens"] + usage["output_tokens"])
|
||||
else:
|
||||
token_usage = int(usage["total_tokens"])
|
||||
|
||||
return text, token_usage
|
||||
# return response.json()['choices'][0]['message']['content']
|
||||
# return error message if the response is not successful
|
||||
except Exception as e:
|
||||
print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
|
||||
return response.json()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
api_key = os.environ.get("QWEN_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("QWEN_API_KEY is not set")
|
||||
|
||||
dashscope.api_key = api_key
|
||||
|
||||
final_messages = [{"role": "user",
|
||||
"content": [
|
||||
{"text": "What is in the screenshot?"},
|
||||
{"image": "./tmp/outputs/screenshot_0b04acbb783d4706bc93873d17ba8c05.png"}
|
||||
]
|
||||
}
|
||||
]
|
||||
response = dashscope.MultiModalConversation.call(model='qwen-vl-max-0809', messages=final_messages)
|
||||
|
||||
print(response)
|
||||
|
||||
text = response.output.choices[0].message.content[0]['text']
|
||||
usage = response.usage
|
||||
|
||||
if "total_tokens" not in usage:
|
||||
if "image_tokens" in usage:
|
||||
token_usage = usage["input_tokens"] + usage["output_tokens"] + usage["image_tokens"]
|
||||
else:
|
||||
token_usage = usage["input_tokens"] + usage["output_tokens"]
|
||||
else:
|
||||
token_usage = usage["total_tokens"]
|
||||
|
||||
print(text, token_usage)
|
||||
# The screenshot is from a video game... 1387
|
||||
Reference in New Issue
Block a user