增加任务条

This commit is contained in:
yuruo 2025-03-15 15:44:46 +08:00
parent 8433098673
commit 2752ee22ac
3 changed files with 101 additions and 61 deletions

View File

@ -90,74 +90,87 @@ def save_to_storage(filename: str, data: str) -> None:
except Exception as e:
print(f"Debug: Error saving {filename}: {e}")
def format_json_content(json_content):
"""Format JSON content with reasoning and details"""
content_json = json.loads(json_content)
reasoning = f'<h3>{content_json["reasoning"]}</h3>'
details = f'<br/> <details> <summary>Detail</summary> <pre>{json.dumps(content_json, indent=4, ensure_ascii=False)}</pre> </details>'
return reasoning, details
def format_message_content(content):
"""Format message content for gradio chatbox display"""
# Handle list-type content (multimodal)
if isinstance(content, list):
formatted_content = ""
json_reasoning = None
for item in content:
if item["type"] == "image_url":
formatted_content += f'<br/><img style="width: 100%;" src="{item["image_url"]["url"]}">'
elif item["type"] == "text":
if is_json_format(item["text"]):
reasoning, details = format_json_content(item["text"])
json_reasoning = reasoning
formatted_content += details
else:
formatted_content += item["text"]
return formatted_content, json_reasoning
# Handle string content
if is_json_format(content):
reasoning, _ = format_json_content(content)
formatted_content = json.dumps(json.loads(content), indent=4, ensure_ascii=False)
return formatted_content, reasoning
return content, None
def process_input(user_input, state, vision_agent_state):
# Reset the stop flag
if state["stop"]:
state["stop"] = False
# Configure API
config = Config()
config.set_openai_config(base_url=state["base_url"], api_key=state["api_key"], model=state["model"])
state["messages"].append(
{
"role": "user",
"content": user_input
}
)
state["chatbox_messages"].append(
{
"role": "user",
"content": user_input
}
)
yield state['chatbox_messages']
# Add user message
state["messages"].append({"role": "user", "content": user_input})
state["chatbox_messages"].append({"role": "user", "content": user_input})
yield state["chatbox_messages"]
# Process with agent
agent = vision_agent_state["agent"]
for _ in sampling_loop_sync(
model=state["model"],
messages=state["messages"],
vision_agent = agent,
vision_agent=agent,
screen_region=state.get("screen_region", None)
):
if state["stop"]:
return
state['chatbox_messages'] = []
for message in state['messages']:
# convert message["content"] to gradio chatbox format
if type(message["content"]) is list:
gradio_chatbox_content = ""
for content in message["content"]:
# convert image_url to gradio image format
if content["type"] == "image_url":
gradio_chatbox_content += f'<br/><img style="width: 100%;" src="{content["image_url"]["url"]}">'
# convert text to gradio text format
elif content["type"] == "text":
# agent response is json format and must contains reasoning
if is_json_format(content["text"]):
content_json = json.loads(content["text"])
state['chatbox_messages'].append({
"role": message["role"],
"content": f'<h3>{content_json["reasoning"]}</h3>'
})
gradio_chatbox_content += f'<br/> <details> <summary>Detail</summary> <pre>{json.dumps(content_json, indent=4, ensure_ascii=False)}</pre> </details>'
else:
gradio_chatbox_content += content["text"]
state['chatbox_messages'].append({
# Rebuild chatbox messages from the original messages
state["chatbox_messages"] = []
for message in state["messages"]:
formatted_content, json_reasoning = format_message_content(message["content"])
# Add json reasoning as a separate message if exists
if json_reasoning:
state["chatbox_messages"].append({
"role": message["role"],
"content": gradio_chatbox_content
"content": json_reasoning
})
else:
if is_json_format(message["content"]):
content_json = json.loads(message["content"])
state['chatbox_messages'].append({
"role": message["role"],
"content": f'<h3>{content_json["reasoning"]}</h3>'
})
# Add the formatted content
state["chatbox_messages"].append({
"role": message["role"],
"content": formatted_content
})
yield state["chatbox_messages"]
state['chatbox_messages'].append({
"role": message["role"],
"content": message["content"] if not is_json_format(message["content"]) else json.dumps(json.loads(message["content"]), indent=4, ensure_ascii=False)
})
yield state['chatbox_messages']
def is_json_format(text):
try:
json.loads(text)
@ -182,7 +195,14 @@ def get_header_image_base64():
print(f"Failed to load header image: {e}")
return None
def update_task_list(state):
"""Update task list with completed tasks marked"""
tasks = state.get("tasks", [])
task_status = []
for task in tasks:
status = "" if task.get("completed", False) else ""
task_status.append([task.get("description", ""), status])
return task_status
def run():
with gr.Blocks(theme=gr.themes.Default()) as demo:
gr.HTML("""
@ -259,7 +279,15 @@ def run():
stop_button = gr.Button(value="Stop", variant="secondary")
with gr.Row():
with gr.Column(scale=1):
with gr.Column(scale=2):
task_list = gr.Dataframe(
headers=["Status", "Task"],
datatype=["str", "str"],
value=[],
label="Task List",
interactive=False )
with gr.Column(scale=8):
chatbot = gr.Chatbot(
label="Chatbot History",
autoscroll=True,
@ -281,6 +309,7 @@ def run():
state["chatbox_messages"] = []
state["responses"] = {}
state["tools"] = {}
state["tasks"] = []
return state["chatbox_messages"]
model.change(fn=update_model, inputs=[model, state], outputs=None)
@ -292,4 +321,6 @@ def run():
submit_button.click(process_input, [chat_input, state, vision_agent_state], chatbot)
stop_button.click(stop_app, [state], None)
base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None)
demo.launch(server_name="0.0.0.0", server_port=7888)

View File

@ -3,9 +3,7 @@ Agentic sampling loop that calls the Anthropic API and local implenmentation of
"""
import base64
from io import BytesIO
from time import sleep
import cv2
from gradio_ui.agent.verification_agent import VerificationAgent
from gradio_ui.agent.vision_agent import VisionAgent
from gradio_ui.tools.screen_capture import get_screenshot
from anthropic.types.beta import (BetaMessageParam)
@ -31,16 +29,16 @@ def sampling_loop_sync(
task_plan_agent = TaskPlanAgent()
executor = AnthropicExecutor()
task_run_agent = TaskRunAgent()
parsed_screen_result = parsed_screen(vision_agent)
parsed_screen_result = parsed_screen(vision_agent, screen_region)
plan_list = task_plan_agent(messages=messages, parsed_screen_result=parsed_screen_result)
yield
for plan in plan_list:
execute_task_plan(plan, vision_agent, task_run_agent, executor, messages)
execute_task_plan(plan, vision_agent, task_run_agent, executor, messages, screen_region)
yield
def execute_task_plan(plan, vision_agent, task_run_agent, executor, messages):
parsed_screen_result = parsed_screen(vision_agent)
def execute_task_plan(plan, vision_agent, task_run_agent, executor, messages, screen_region):
parsed_screen_result = parsed_screen(vision_agent, screen_region)
tools_use_needed, __ = task_run_agent(task_plan=plan, parsed_screen_result=parsed_screen_result, messages=messages)
executor(tools_use_needed, messages)

View File

@ -6,13 +6,24 @@ from util import tool
OUTPUT_DIR = "./tmp/outputs"
def get_screenshot():
def get_screenshot(screen_region):
output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(parents=True, exist_ok=True)
path = output_dir / f"screenshot_{uuid4().hex}.png"
try:
img_io = tool.capture_screen_with_cursor()
screenshot = Image.open(img_io)
screenshot = Image.open(img_io)
# Create a black mask of the same size
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
# If screen_region is provided and valid, copy only that region
if screen_region and len(screen_region) == 4:
x1, y1, x2, y2 = screen_region
region = screenshot.crop((x1, y1, x2, y2))
# Paste the region onto the black mask
black_mask.paste(region, (x1, y1, x2, y2))
# Use the modified image as screenshot
screenshot = black_mask
screenshot.save(path)
return screenshot, path
except Exception as e: