From f6029344c53d742d2632cbd08e37a8113fda6e03 Mon Sep 17 00:00:00 2001 From: Thomas Dhome-Casanova Date: Wed, 29 Jan 2025 22:45:26 -0800 Subject: [PATCH] further clean colored text --- computer_use_demo/gradio/agent/vlm_agent.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/computer_use_demo/gradio/agent/vlm_agent.py b/computer_use_demo/gradio/agent/vlm_agent.py index 6703ee7..76815a6 100644 --- a/computer_use_demo/gradio/agent/vlm_agent.py +++ b/computer_use_demo/gradio/agent/vlm_agent.py @@ -11,7 +11,6 @@ from anthropic.types import ToolResultBlockParam from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaMessageParam, BetaUsage from agent.llm_utils.oai import run_oai_interleaved -from colorful_text import colorful_text_vlm import time import re @@ -60,14 +59,14 @@ class VLMAgent: # Show results of Omniparser image_base64 = parsed_screen['original_screenshot_base64'] latency_omniparser = parsed_screen['latency'] - self.output_callback(f'Screenshot for {colorful_text_vlm}:\n', + self.output_callback(f'Screenshot for OmniParser Agent:\n', sender="bot") - self.output_callback(f'Set of Marks Screenshot for {colorful_text_vlm}:\n', sender="bot") + self.output_callback(f'Set of Marks Screenshot for OmniParser Agent:\n', sender="bot") screen_info = str(parsed_screen['screen_info']) - # self.output_callback(f'Screen Info for {colorful_text_vlm}:\n{screen_info}', sender="bot") + # self.output_callback(f'Screen Info for OmniParser Agent:\n{screen_info}', sender="bot") self.output_callback( f'
' - f' Screen Info for {colorful_text_vlm}' + f' Screen Info for OmniParser Agent' f'
{screen_info}
' f'
', sender="bot" @@ -140,7 +139,7 @@ class VLMAgent: vlm_plan_str += f'{value}' else: vlm_plan_str += f'\n{key}: {value}' - # self.output_callback(f"{colorful_text_vlm}:\n{vlm_plan_str}", sender="bot") + # self.output_callback(f"OmniParser Agent:\n{vlm_plan_str}", sender="bot") # construct the response so that anthropicExcutor can execute the tool response_content = [BetaTextBlock(text=vlm_plan_str, type='text')] @@ -240,7 +239,7 @@ IMPORTANT NOTES: buffered = BytesIO() image.save(buffered, format="PNG") image_with_circle_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") - self.output_callback(f'Action performed on the red circle with centroid ({x}, {y}), for {colorful_text_vlm}:\n', sender="bot") + self.output_callback(f'Action performed on the red circle with centroid ({x}, {y}), for OmniParser Agent:\n', sender="bot") def _keep_latest_images(messages):