init gradio demo

This commit is contained in:
Thomas Dhome Casanova (from Dev Box)
2025-01-20 15:07:05 -08:00
parent 9b2c7dae24
commit 85f5fc0385
17 changed files with 2083 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
import os
import re
import ast
import base64
def is_image_path(text):
# Checking if the input text ends with typical image file extensions
image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif")
if text.endswith(image_extensions):
return True
else:
return False
def encode_image(image_path):
"""Encode image file to base64."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def is_url_or_filepath(input_string):
# Check if input_string is a URL
url_pattern = re.compile(
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
)
if url_pattern.match(input_string):
return "URL"
# Check if input_string is a file path
file_path = os.path.abspath(input_string)
if os.path.exists(file_path):
return "File path"
return "Invalid"
def extract_data(input_string, data_type):
# Regular expression to extract content starting from '```python' until the end if there are no closing backticks
pattern = f"```{data_type}" + r"(.*?)(```|$)"
# Extract content
# re.DOTALL allows '.' to match newlines as well
matches = re.findall(pattern, input_string, re.DOTALL)
# Return the first match if exists, trimming whitespace and ignoring potential closing backticks
return matches[0][0].strip() if matches else input_string
def parse_input(code):
"""Use AST to parse the input string and extract the function name, arguments, and keyword arguments."""
def get_target_names(target):
"""Recursively get all variable names from the assignment target."""
if isinstance(target, ast.Name):
return [target.id]
elif isinstance(target, ast.Tuple):
names = []
for elt in target.elts:
names.extend(get_target_names(elt))
return names
return []
def extract_value(node):
"""提取 AST 节点的实际值"""
if isinstance(node, ast.Constant):
return node.value
elif isinstance(node, ast.Name):
# TODO: a better way to handle variables
raise ValueError(
f"Arguments should be a Constant, got a variable {node.id} instead."
)
# 添加其他需要处理的 AST 节点类型
return None
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
targets = []
for t in node.targets:
targets.extend(get_target_names(t))
if isinstance(node.value, ast.Call):
func_name = node.value.func.id
args = [ast.dump(arg) for arg in node.value.args]
kwargs = {
kw.arg: extract_value(kw.value) for kw in node.value.keywords
}
print(f"Input: {code.strip()}")
print(f"Output Variables: {targets}")
print(f"Function Name: {func_name}")
print(f"Arguments: {args}")
print(f"Keyword Arguments: {kwargs}")
elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
targets = []
func_name = extract_value(node.value.func)
args = [extract_value(arg) for arg in node.value.args]
kwargs = {kw.arg: extract_value(kw.value) for kw in node.value.keywords}
except SyntaxError:
print(f"Input: {code.strip()}")
print("No match found")
return targets, func_name, args, kwargs
if __name__ == "__main__":
import json
s='{"Reasoning": "The Docker icon has been successfully clicked, and the Docker application should now be opening. No further actions are required.", "Next Action": None}'
json_str = json.loads(s)
print(json_str)

View File

@@ -0,0 +1,117 @@
import os
import logging
import base64
import requests
# from computer_use_demo.gui_agent.llm_utils import is_image_path, encode_image
def is_image_path(text):
# Checking if the input text ends with typical image file extensions
image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".tif")
if text.endswith(image_extensions):
return True
else:
return False
def encode_image(image_path):
"""Encode image file to base64."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# from openai import OpenAI
# client = OpenAI(
# api_key=os.environ.get("OPENAI_API_KEY")
# )
def run_oai_interleaved(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
api_key = api_key or os.environ.get("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY is not set")
headers = {"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"}
final_messages = [{"role": "system", "content": system}]
# image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
if type(messages) == list:
for item in messages:
contents = []
if isinstance(item, dict):
for cnt in item["content"]:
if isinstance(cnt, str):
if is_image_path(cnt):
base64_image = encode_image(cnt)
content = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
else:
content = {"type": "text", "text": cnt}
else:
# in this case it is a text block from anthropic
content = {"type": "text", "text": str(cnt)}
contents.append(content)
message = {"role": 'user', "content": contents}
else: # str
contents.append({"type": "text", "text": item})
message = {"role": "user", "content": contents}
final_messages.append(message)
elif isinstance(messages, str):
final_messages = [{"role": "user", "content": messages}]
# import pdb; pdb.set_trace()
print("[oai] sending messages:", {"role": "user", "content": messages})
payload = {
"model": llm,
"messages": final_messages,
"max_tokens": max_tokens,
"temperature": temperature,
# "stop": stop,
}
# from IPython.core.debugger import Pdb; Pdb().set_trace()
response = requests.post(
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload
)
try:
text = response.json()['choices'][0]['message']['content']
token_usage = int(response.json()['usage']['total_tokens'])
return text, token_usage
# return error message if the response is not successful
except Exception as e:
print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
return response.json()
if __name__ == "__main__":
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY is not set")
text, token_usage = run_oai_interleaved(
messages= [{"content": [
"What is in the screenshot?",
"./tmp/outputs/screenshot_0b04acbb783d4706bc93873d17ba8c05.png"],
"role": "user"
}],
llm="gpt-4o-mini",
system="You are a helpful assistant",
api_key=api_key,
max_tokens=256,
temperature=0)
print(text, token_usage)
# There is an introduction describing the Calyx... 36986

View File

@@ -0,0 +1,107 @@
import os
import logging
import base64
import requests
import dashscope
# from computer_use_demo.gui_agent.llm_utils import is_image_path, encode_image
def is_image_path(text):
return False
def encode_image(image_path):
return ""
def run_qwen(messages: list, system: str, llm: str, api_key: str, max_tokens=256, temperature=0):
api_key = api_key or os.environ.get("QWEN_API_KEY")
if not api_key:
raise ValueError("QWEN_API_KEY is not set")
dashscope.api_key = api_key
# from IPython.core.debugger import Pdb; Pdb().set_trace()
final_messages = [{"role": "system", "content": [{"text": system}]}]
# image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
if type(messages) == list:
for item in messages:
contents = []
if isinstance(item, dict):
for cnt in item["content"]:
if isinstance(cnt, str):
if is_image_path(cnt):
# base64_image = encode_image(cnt)
content = [{"image": cnt}]
# content = {"type": "image_url", "image_url": {"url": image_url}}
else:
content = {"text": cnt}
contents.append(content)
message = {"role": item["role"], "content": contents}
else: # str
contents.append({"text": item})
message = {"role": "user", "content": contents}
final_messages.append(message)
print("[qwen-vl] sending messages:", final_messages)
response = dashscope.MultiModalConversation.call(
model='qwen-vl-max-0809',
messages=final_messages
)
# from IPython.core.debugger import Pdb; Pdb().set_trace()
try:
text = response.output.choices[0].message.content[0]['text']
usage = response.usage
if "total_tokens" not in usage:
token_usage = int(usage["input_tokens"] + usage["output_tokens"])
else:
token_usage = int(usage["total_tokens"])
return text, token_usage
# return response.json()['choices'][0]['message']['content']
# return error message if the response is not successful
except Exception as e:
print(f"Error in interleaved openAI: {e}. This may due to your invalid OPENAI_API_KEY. Please check the response: {response.json()} ")
return response.json()
if __name__ == "__main__":
api_key = os.environ.get("QWEN_API_KEY")
if not api_key:
raise ValueError("QWEN_API_KEY is not set")
dashscope.api_key = api_key
final_messages = [{"role": "user",
"content": [
{"text": "What is in the screenshot?"},
{"image": "./tmp/outputs/screenshot_0b04acbb783d4706bc93873d17ba8c05.png"}
]
}
]
response = dashscope.MultiModalConversation.call(model='qwen-vl-max-0809', messages=final_messages)
print(response)
text = response.output.choices[0].message.content[0]['text']
usage = response.usage
if "total_tokens" not in usage:
if "image_tokens" in usage:
token_usage = usage["input_tokens"] + usage["output_tokens"] + usage["image_tokens"]
else:
token_usage = usage["input_tokens"] + usage["output_tokens"]
else:
token_usage = usage["total_tokens"]
print(text, token_usage)
# The screenshot is from a video game... 1387

View File

@@ -0,0 +1,44 @@
import base64
import logging
from .oai import run_oai_interleaved
from .gemini import run_gemini_interleaved
def run_llm(prompt, llm="gpt-4o-mini", max_tokens=256, temperature=0, stop=None):
log_prompt(prompt)
# turn string prompt into list
if isinstance(prompt, str):
prompt = [prompt]
elif isinstance(prompt, list):
pass
else:
raise ValueError(f"Invalid prompt type: {type(prompt)}")
if llm.startswith("gpt"): # gpt series
out = run_oai_interleaved(
prompt,
llm,
max_tokens,
temperature,
stop
)
elif llm.startswith("gemini"): # gemini series
out = run_gemini_interleaved(
prompt,
llm,
max_tokens,
temperature,
stop
)
else:
raise ValueError(f"Invalid llm: {llm}")
logging.info(
f"========Output for {llm}=======\n{out}\n============================")
return out
def log_prompt(prompt):
prompt_display = [prompt] if isinstance(prompt, str) else prompt
prompt_display = "\n\n".join(prompt_display)
logging.info(
f"========Prompt=======\n{prompt_display}\n============================")