From 55e2876f8114588a86ee5ef21d43faa01926fdf1 Mon Sep 17 00:00:00 2001 From: yuruo Date: Sat, 15 Mar 2025 11:16:39 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8C=BA=E5=9F=9F=E9=80=89?= =?UTF-8?q?=E6=8B=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gradio_ui/agent/task_plan_agent.py | 1 + gradio_ui/app.py | 64 +++++++------ main.py | 10 ++ util/screen_selector.py | 149 +++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 29 deletions(-) create mode 100644 util/screen_selector.py diff --git a/gradio_ui/agent/task_plan_agent.py b/gradio_ui/agent/task_plan_agent.py index 24a7392..80e1f0d 100644 --- a/gradio_ui/agent/task_plan_agent.py +++ b/gradio_ui/agent/task_plan_agent.py @@ -16,6 +16,7 @@ class TaskPlanAgent(BaseAgent): ] } response = run(messages, user_prompt=system_prompt.format(screen_info=screen_info), response_format=TaskPlanResponse) + print("task_plan_agent response: ", response) return json.loads(response) class Plan(BaseModel): diff --git a/gradio_ui/app.py b/gradio_ui/app.py index e824982..8b48510 100644 --- a/gradio_ui/app.py +++ b/gradio_ui/app.py @@ -214,36 +214,42 @@ def run(): with gr.Accordion("Settings", open=True): with gr.Row(): with gr.Column(): - model = gr.Textbox( - label="Model", - value=state.value["model"], - placeholder="输入模型名称", - interactive=True, - ) + with gr.Row(): + with gr.Column(): + model = gr.Textbox( + label="Model", + value=state.value["model"], + placeholder="输入模型名称", + interactive=True, + ) + with gr.Column(): + base_url = gr.Textbox( + label="Base URL", + value=state.value["base_url"], + placeholder="输入基础 URL", + interactive=True + ) + with gr.Row(): + api_key = gr.Textbox( + label="API Key", + type="password", + value=state.value["api_key"], + placeholder="Paste your API key here", + interactive=True, + ) + with gr.Column(): - base_url = gr.Textbox( - label="Base URL", - value=state.value["base_url"], - placeholder="输入基础 URL", - interactive=True - ) - with gr.Column(): - gr.Slider( - label="N most recent screenshots", - minimum=0, - maximum=10, - step=1, - value=2, - interactive=True - ) - with gr.Row(): - api_key = gr.Textbox( - label="API Key", - type="password", - value=state.value["api_key"], - placeholder="Paste your API key here", - interactive=True, - ) + select_region_btn = gr.Button(value="Select Region", variant="primary") + + def select_screen_region(state): + from util.screen_selector import ScreenSelector + region = ScreenSelector().get_selection() + if region: + state["screen_region"] = region + return f"Selected region: {region}" + return "Selection cancelled" + + select_region_btn.click(fn=select_screen_region, inputs=[state], outputs=[gr.Textbox(label="Region Info")]) with gr.Row(): with gr.Column(scale=8): chat_input = gr.Textbox(show_label=False, placeholder="Type a message to send to Omniparser + X ...", container=False) diff --git a/main.py b/main.py index d550c2c..5bab297 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,12 @@ from gradio_ui import app from util import download_weights import torch +import signal +import sys + +def signal_handler(*args): + print("Ctrl+C detected, exiting gracefully...") + sys.exit(0) def run(): try: @@ -13,6 +19,10 @@ def run(): # download the weight files download_weights.download() + + # Register signal handler for graceful shutdown + signal.signal(signal.SIGINT, signal_handler) + app.run() diff --git a/util/screen_selector.py b/util/screen_selector.py new file mode 100644 index 0000000..2042d3e --- /dev/null +++ b/util/screen_selector.py @@ -0,0 +1,149 @@ +import tkinter as tk +from tkinter import Button +import sys + +class ScreenSelector: + def __init__(self): + self.root = tk.Tk() + self.root.withdraw() + + # 创建全屏窗口 + self.window = tk.Toplevel(self.root) + self.window.attributes("-fullscreen", True) + self.window.attributes("-alpha", 0.6) + self.window.attributes("-topmost", True) + + # 初始化变量 + self.start_x = self.start_y = self.current_x = self.current_y = None + self.selection_rect = self.confirm_button = None + self.result = None + + # 创建画布 + self.canvas = tk.Canvas(self.window, bg="gray20", highlightthickness=0) + self.canvas.pack(fill=tk.BOTH, expand=True) + + # 绑定事件 + self.canvas.bind("", self.on_press) + self.canvas.bind("", self.on_drag) + self.canvas.bind("", self.on_release) + self.window.bind("", self.cancel) + + def on_press(self, event): + # 清除已有选择 + if self.selection_rect: + self.canvas.delete(self.selection_rect) + if self.confirm_button: + self.confirm_button.destroy() + self.confirm_button = None + + self.start_x = self.canvas.canvasx(event.x) + self.start_y = self.canvas.canvasy(event.y) + self.selection_rect = self.canvas.create_rectangle( + self.start_x, self.start_y, self.start_x, self.start_y, + outline="red", width=5 + ) + + def on_drag(self, event): + self.current_x = self.canvas.canvasx(event.x) + self.current_y = self.canvas.canvasy(event.y) + + # 更新选择框 + self.canvas.coords(self.selection_rect, + self.start_x, self.start_y, + self.current_x, self.current_y) + + # 更新透明区域 + self.update_region() + + def update_region(self): + self.canvas.delete("transparent_region") + + # 计算坐标 + x1 = min(self.start_x, self.current_x) + y1 = min(self.start_y, self.current_y) + x2 = max(self.start_x, self.current_x) + y2 = max(self.start_y, self.current_y) + + # 绘制背景和透明区域 + self.canvas.create_rectangle( + 0, 0, self.window.winfo_width(), self.window.winfo_height(), + fill="gray20", stipple="gray50", tags="transparent_region" + ) + self.canvas.create_rectangle( + x1, y1, x2, y2, fill="", outline="", tags="transparent_region" + ) + + # 确保选择框在最上层 + self.canvas.tag_raise(self.selection_rect) + + def on_release(self, event): + self.current_x = self.canvas.canvasx(event.x) + self.current_y = self.canvas.canvasy(event.y) + + # 有效选择判断 + if abs(self.current_x - self.start_x) > 5 and abs(self.current_y - self.start_y) > 5: + self.show_button() + + def show_button(self): + if self.confirm_button: + self.confirm_button.destroy() + + # 计算坐标 + x1 = min(self.start_x, self.current_x) + y1 = min(self.start_y, self.current_y) + x2 = max(self.start_x, self.current_x) + y2 = max(self.start_y, self.current_y) + + # 计算距离四个角的距离 + distances = [ + ((self.current_x - x1)**2 + (self.current_y - y1)**2, (x1 - 90, y1 - 40)), # 左上 + ((self.current_x - x2)**2 + (self.current_y - y1)**2, (x2 + 10, y1 - 40)), # 右上 + ((self.current_x - x1)**2 + (self.current_y - y2)**2, (x1 - 90, y2 + 10)), # 左下 + ((self.current_x - x2)**2 + (self.current_y - y2)**2, (x2 + 10, y2 + 10)) # 右下 + ] + + # 选择最近的角 + btn_x, btn_y = min(distances, key=lambda d: d[0])[1] + + # 边界检查 + width, height = self.window.winfo_width(), self.window.winfo_height() + if btn_x + 80 > width: btn_x = x1 - 90 + if btn_x < 0: btn_x = x2 + 10 + if btn_y < 0: btn_y = y2 + 10 + if btn_y + 30 > height: btn_y = y1 - 40 + + # 创建按钮 + self.confirm_button = Button( + self.window, text="Confirm", command=self.confirm, + bg="white", fg="black", font=("Arial", 12, "bold"), + padx=10, pady=5 + ) + self.confirm_button.place(x=btn_x, y=btn_y) + + def confirm(self): + # 获取选择区域坐标 + x1 = min(self.start_x, self.current_x) + y1 = min(self.start_y, self.current_y) + x2 = max(self.start_x, self.current_x) + y2 = max(self.start_y, self.current_y) + + self.result = (int(x1), int(y1), int(x2), int(y2)) + self.root.quit() + self.window.destroy() + + def cancel(self, event=None): + self.result = None + self.root.quit() + self.window.destroy() + + def get_selection(self): + self.root.mainloop() + if hasattr(self, 'root') and self.root: + self.root.destroy() + return self.result + + +if __name__ == "__main__": + region = ScreenSelector().get_selection() + print(f"Selected region: {region}") + sys.exit(0)