use pyqt replace gradio

This commit is contained in:
yuruo 2025-03-24 17:31:53 +08:00
parent 1733facad8
commit 6cc993e537
34 changed files with 1053 additions and 74 deletions

View File

@ -1,9 +1,9 @@
import json
from pydantic import BaseModel, Field
from gradio_ui.agent.base_agent import BaseAgent
from auto_control.agent.base_agent import BaseAgent
from xbrain.core.chat import run
from gradio_ui.tools.computer import Action
from auto_control.tools.computer import Action
class TaskPlanAgent(BaseAgent):
def __call__(self, messages, parsed_screen_result):

View File

@ -2,10 +2,10 @@ import json
import uuid
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaMessageParam, BetaUsage
from pydantic import Field, create_model
from gradio_ui.agent.base_agent import BaseAgent
from auto_control.agent.base_agent import BaseAgent
from xbrain.core.chat import run
from gradio_ui.tools.computer import Action
from auto_control.tools.computer import Action
class TaskRunAgent(BaseAgent):
def __init__(self):
self.OUTPUT_DIR = "./tmp/outputs"

View File

@ -7,8 +7,8 @@ import os
from pathlib import Path
import argparse
import gradio as gr
from gradio_ui.agent.vision_agent import VisionAgent
from gradio_ui.loop import (
from auto_control.agent.vision_agent import VisionAgent
from auto_control.loop import (
sampling_loop_sync,
)
import base64
@ -349,4 +349,4 @@ def run():
while True:
time.sleep(1)
except KeyboardInterrupt:
print("\n💤 closing server")
print("\n<EFBFBD><EFBFBD> closing server")

View File

@ -3,7 +3,7 @@ from typing import Any, cast
from anthropic.types.beta import (
BetaContentBlock
)
from gradio_ui.tools import ComputerTool, ToolCollection
from auto_control.tools import ComputerTool, ToolCollection
class AnthropicExecutor:

View File

@ -4,12 +4,12 @@ Agentic sampling loop that calls the Anthropic API and local implenmentation of
import base64
from io import BytesIO
import cv2
from gradio_ui.agent.vision_agent import VisionAgent
from gradio_ui.tools.screen_capture import get_screenshot
from auto_control.agent.vision_agent import VisionAgent
from auto_control.tools.screen_capture import get_screenshot
from anthropic.types.beta import (BetaMessageParam)
from gradio_ui.agent.task_plan_agent import TaskPlanAgent
from gradio_ui.agent.task_run_agent import TaskRunAgent
from gradio_ui.executor.anthropic_executor import AnthropicExecutor
from auto_control.agent.task_plan_agent import TaskPlanAgent
from auto_control.agent.task_run_agent import TaskRunAgent
from auto_control.executor.anthropic_executor import AnthropicExecutor
import numpy as np
from PIL import Image

View File

@ -1 +0,0 @@
tmp/

BIN
imgs/wechat/chat_select.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 770 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1010 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1017 B

BIN
imgs/wechat/search.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 590 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

12
main.py
View File

@ -1,11 +1,9 @@
from gradio_ui import app
import os
from ui.main import main
from util import download_weights
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
def run():
download_weights.download()
app.run()
main()
if __name__ == "__main__":
run()
if __name__ == '__main__':
run()

View File

@ -13,4 +13,6 @@ timm
einops==0.8.0
modelscope
pynput
lap
lap
pyqt6==6.8.1
keyboard==0.13.5

3
ui/__init__.py Normal file
View File

@ -0,0 +1,3 @@
"""
autoMate UI package
"""

174
ui/agent_worker.py Normal file
View File

@ -0,0 +1,174 @@
"""
Worker thread for handling agent operations
"""
import json
from PyQt6.QtCore import QThread, pyqtSignal
from auto_control.loop import sampling_loop_sync
from xbrain.utils.config import Config
class AgentWorker(QThread):
"""Worker thread for running agent operations asynchronously"""
update_signal = pyqtSignal(list, list)
status_signal = pyqtSignal(str) # Signal for status updates
task_signal = pyqtSignal(str) # Signal for current task
error_signal = pyqtSignal(str) # Error signal
def __init__(self, user_input, state, vision_agent):
super().__init__()
self.user_input = user_input
self.state = state
self.vision_agent = vision_agent
def run(self):
# Reset stop flag
if self.state["stop"]:
self.state["stop"] = False
# Configure API
config = Config()
config.set_openai_config(
base_url=self.state["base_url"],
api_key=self.state["api_key"],
model=self.state["model"]
)
# Add user message
self.state["messages"].append({"role": "user", "content": self.user_input})
self.state["chatbox_messages"].append({"role": "user", "content": self.user_input})
# Send initial update
self.update_signal.emit(self.state["chatbox_messages"], [])
self.status_signal.emit("Starting analysis...")
try:
# Process with agent
for _ in sampling_loop_sync(
model=self.state["model"],
messages=self.state["messages"],
vision_agent=self.vision_agent,
screen_region=self.state.get("screen_region", None)
):
if self.state["stop"]:
self.state["chatbox_messages"].append({"role": "user", "content": "Stop!"})
self.status_signal.emit("Operation stopped by user")
return
# task_plan_agent first response
if len(self.state["messages"]) == 2:
task_list = json.loads(self.state["messages"][-1]["content"])["task_list"]
for task in task_list:
self.state["tasks"].append({
"status": "",
"task": task
})
else:
# Reset all task statuses
for i in range(len(self.state["tasks"])):
self.state["tasks"][i]["status"] = ""
# Update task progress
content_json = json.loads(self.state["messages"][-1]["content"])
task_completed_number = content_json["current_task_id"]
# Update status with reasoning
if "reasoning" in content_json:
self.status_signal.emit(content_json["reasoning"])
# Update current task
if task_completed_number < len(self.state["tasks"]):
current_task = self.state["tasks"][task_completed_number]["task"]
self.task_signal.emit(current_task)
if task_completed_number > len(self.state["tasks"]) + 1:
for i in range(len(self.state["tasks"])):
self.state["tasks"][i]["status"] = ""
else:
for i in range(task_completed_number + 1):
self.state["tasks"][i]["status"] = ""
# Reconstruct chat messages from original messages
self.state["chatbox_messages"] = []
for message in self.state["messages"]:
formatted_content, json_reasoning = self.format_message_content(message["content"])
# Add json reasoning as a separate message if exists
if json_reasoning:
self.state["chatbox_messages"].append({
"role": message["role"],
"content": json_reasoning
})
# Add formatted content
self.state["chatbox_messages"].append({
"role": message["role"],
"content": formatted_content
})
# Convert data format before returning results
tasks_2d = [[task["status"], task["task"]] for task in self.state["tasks"]]
self.update_signal.emit(self.state["chatbox_messages"], tasks_2d)
# All done
self.status_signal.emit("Task completed")
except Exception as e:
# Send error signal
import traceback
error_message = f"Error occurred: {str(e)}\n{traceback.format_exc()}"
print(error_message)
# Add error message to chat
self.state["chatbox_messages"].append({
"role": "assistant",
"content": f"<span style='color:red'>⚠️ Network connection error: {str(e)}</span><br>Please check your network connection and API settings, or try again later."
})
self.update_signal.emit(self.state["chatbox_messages"],
[[task["status"], task["task"]] for task in self.state["tasks"]])
self.error_signal.emit(str(e))
self.status_signal.emit(f"Error: {str(e)}")
def format_message_content(self, content):
"""Format message content for display"""
# Handle list-type content (multimodal)
if isinstance(content, list):
formatted_content = ""
json_reasoning = None
for item in content:
if item["type"] == "image_url":
# Changed image style to be smaller
formatted_content += f'<br/><img style="width: 50%; max-width: 400px;" src="{item["image_url"]["url"]}">'
elif item["type"] == "text":
if self.is_json_format(item["text"]):
reasoning, details = self.format_json_content(item["text"])
json_reasoning = reasoning
formatted_content += details
else:
formatted_content += item["text"]
return formatted_content, json_reasoning
# Handle string content
if self.is_json_format(content):
reasoning, _ = self.format_json_content(content)
formatted_content = json.dumps(json.loads(content), indent=4, ensure_ascii=False)
return formatted_content, reasoning
return content, None
def format_json_content(self, json_content):
"""Format JSON content with reasoning and details"""
content_json = json.loads(json_content)
reasoning = f'<h3>{content_json["reasoning"]}</h3>'
details = f'<br/> <details> <summary>Detail</summary> <pre>{json.dumps(content_json, indent=4, ensure_ascii=False)}</pre> </details>'
return reasoning, details
def is_json_format(self, text):
try:
json.loads(text)
return True
except:
return False

90
ui/hotkey_edit.py Normal file
View File

@ -0,0 +1,90 @@
"""
Hotkey editing widget
"""
import keyboard
from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QPushButton
# Default stop hotkey
DEFAULT_STOP_HOTKEY = "ctrl+k"
class HotkeyEdit(QWidget):
"""Widget for recording hotkey combinations"""
def __init__(self, hotkey="", parent=None):
super().__init__(parent)
layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
self.hotkey_input = QLineEdit(hotkey)
self.hotkey_input.setReadOnly(True)
self.hotkey_input.setPlaceholderText("Click to record hotkey")
self.record_btn = QPushButton("Record")
self.record_btn.clicked.connect(self.start_recording)
layout.addWidget(self.hotkey_input, 1)
layout.addWidget(self.record_btn)
self.recording = False
self.keys_pressed = set()
def start_recording(self):
"""Start recording a new hotkey"""
if self.recording:
self.stop_recording()
return
self.hotkey_input.setText("Press keys...")
self.record_btn.setText("Stop")
self.recording = True
self.keys_pressed = set()
# Hook global events
keyboard.hook(self.on_key_event)
def stop_recording(self):
"""Stop recording and set the hotkey"""
keyboard.unhook(self.on_key_event)
self.recording = False
self.record_btn.setText("Record")
# Convert keys to hotkey string
if self.keys_pressed:
hotkey = '+'.join(sorted(self.keys_pressed))
self.hotkey_input.setText(hotkey)
else:
self.hotkey_input.setText("")
def on_key_event(self, event):
"""Handle key events during recording"""
if not self.recording:
return
# Skip key up events
if not event.event_type == keyboard.KEY_DOWN:
return
# Get key name
key_name = event.name.lower()
# Special handling for modifier keys
if key_name in ['ctrl', 'alt', 'shift', 'windows']:
self.keys_pressed.add(key_name)
else:
self.keys_pressed.add(key_name)
# Show current keys
self.hotkey_input.setText('+'.join(sorted(self.keys_pressed)))
# Stop recording if user presses Escape alone
if len(self.keys_pressed) == 1 and 'esc' in self.keys_pressed:
self.keys_pressed.clear()
self.stop_recording()
def get_hotkey(self):
"""Get the current hotkey string"""
return self.hotkey_input.text()
def set_hotkey(self, hotkey):
"""Set the hotkey string"""
self.hotkey_input.setText(hotkey)

25
ui/main.py Normal file
View File

@ -0,0 +1,25 @@
"""
Main entry point for autoMate application
"""
import sys
import argparse
from PyQt6.QtWidgets import QApplication
from ui.main_window import MainWindow
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description="PyQt6 App")
parser.add_argument("--windows_host_url", type=str, default='localhost:8006')
parser.add_argument("--omniparser_server_url", type=str, default="localhost:8000")
return parser.parse_args()
def main():
"""Main application entry point"""
args = parse_arguments()
app = QApplication(sys.argv)
window = MainWindow(args)
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
main()

388
ui/main_window.py Normal file
View File

@ -0,0 +1,388 @@
"""
Main application window
"""
import os
import keyboard
from pathlib import Path
from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QLabel, QLineEdit, QPushButton, QTableWidget, QTableWidgetItem,
QTextEdit, QSplitter, QMessageBox, QHeaderView, QDialog, QSystemTrayIcon)
from PyQt6.QtCore import Qt, pyqtSlot, QSize
from PyQt6.QtGui import QPixmap, QIcon, QTextCursor, QTextCharFormat, QColor
from xbrain.utils.config import Config
from auto_control.agent.vision_agent import VisionAgent
from util.download_weights import OMNI_PARSER_DIR
from ui.theme import apply_theme
from ui.settings_dialog import SettingsDialog
from ui.agent_worker import AgentWorker
from ui.tray_icon import StatusTrayIcon
from ui.hotkey_edit import DEFAULT_STOP_HOTKEY
# Intro text for application
INTRO_TEXT = '''
Based on Omniparser to control desktop!
'''
class MainWindow(QMainWindow):
"""Main application window"""
def __init__(self, args):
super().__init__()
self.args = args
# Initialize state
self.state = self.setup_initial_state()
# Initialize Agent
self.vision_agent = VisionAgent(
yolo_model_path=os.path.join(OMNI_PARSER_DIR, "icon_detect", "model.pt")
)
# Create tray icon
self.setup_tray_icon()
self.setWindowTitle("autoMate")
self.setMinimumSize(1200, 800)
self.init_ui()
self.apply_theme()
# Register hotkey handler
self.hotkey_handler = None
self.register_stop_hotkey()
# Print startup information
print(f"\n\n🚀 PyQt6 application launched")
def setup_tray_icon(self):
"""Setup system tray icon"""
# Create or load icon
try:
script_dir = Path(__file__).parent
# Use logo.png as icon
image_path = script_dir.parent / "imgs" / "logo.png"
# Load image and create suitable icon size
pixmap = QPixmap(str(image_path))
# Resize to suitable icon size
icon_pixmap = pixmap.scaled(32, 32, Qt.AspectRatioMode.KeepAspectRatio, Qt.TransformationMode.SmoothTransformation)
app_icon = QIcon(icon_pixmap)
# Set application icon
self.setWindowIcon(app_icon)
# Create system tray icon
self.tray_icon = StatusTrayIcon(app_icon, self)
self.tray_icon.show()
except Exception as e:
print(f"Error setting up tray icon: {e}")
self.tray_icon = None
def setup_initial_state(self):
"""Set up initial state"""
state = {}
# Load data from config
config = Config()
if config.OPENAI_API_KEY:
state["api_key"] = config.OPENAI_API_KEY
else:
state["api_key"] = ""
if config.OPENAI_BASE_URL:
state["base_url"] = config.OPENAI_BASE_URL
else:
state["base_url"] = "https://api.openai.com/v1"
if config.OPENAI_MODEL:
state["model"] = config.OPENAI_MODEL
else:
state["model"] = "gpt-4o"
# Default to light theme
state["theme"] = "Light"
# Default stop hotkey
state["stop_hotkey"] = DEFAULT_STOP_HOTKEY
state["messages"] = []
state["chatbox_messages"] = []
state["auth_validated"] = False
state["responses"] = {}
state["tools"] = {}
state["tasks"] = []
state["only_n_most_recent_images"] = 2
state["stop"] = False
return state
def register_stop_hotkey(self):
"""Register the global stop hotkey"""
# First unregister any existing hotkey
if self.hotkey_handler:
try:
keyboard.unhook_all()
self.hotkey_handler = None
except:
pass
# Get the current hotkey from state
hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY)
# Check if hotkey is valid
if not hotkey:
return
try:
# Register new hotkey
self.hotkey_handler = keyboard.add_hotkey(hotkey, self.handle_stop_hotkey)
print(f"Registered stop hotkey: {hotkey}")
except Exception as e:
print(f"Error registering hotkey '{hotkey}': {e}")
def handle_stop_hotkey(self):
"""Handle stop hotkey press"""
print("Stop hotkey pressed!")
self.state["stop"] = True
# Show brief notification
if hasattr(self, 'tray_icon') and self.tray_icon is not None:
self.tray_icon.showMessage("autoMate", "Stopping automation...", QSystemTrayIcon.MessageIcon.Information, 1000)
def apply_theme(self):
"""Apply the current theme to the application"""
theme_name = self.state.get("theme", "Light")
apply_theme(self, theme_name)
def init_ui(self):
"""Initialize UI components"""
central_widget = QWidget()
main_layout = QVBoxLayout(central_widget)
# Load top image
header_layout = QVBoxLayout()
try:
script_dir = Path(__file__).parent
image_path = script_dir.parent.parent / "imgs" / "header_bar_thin.png"
if image_path.exists():
pixmap = QPixmap(str(image_path))
header_label = QLabel()
header_label.setPixmap(pixmap.scaledToWidth(self.width()))
header_layout.addWidget(header_label)
except Exception as e:
print(f"Failed to load header image: {e}")
title_label = QLabel("autoMate")
title_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
font = title_label.font()
font.setPointSize(20)
title_label.setFont(font)
header_layout.addWidget(title_label)
# Introduction text
intro_label = QLabel(INTRO_TEXT)
intro_label.setWordWrap(True)
font = intro_label.font()
font.setPointSize(12)
intro_label.setFont(font)
# Settings button and clear chat button (at top)
top_buttons_layout = QHBoxLayout()
self.settings_button = QPushButton("Settings")
self.settings_button.clicked.connect(self.open_settings_dialog)
self.clear_button = QPushButton("Clear Chat")
self.clear_button.clicked.connect(self.clear_chat)
top_buttons_layout.addWidget(self.settings_button)
top_buttons_layout.addWidget(self.clear_button)
top_buttons_layout.addStretch() # Add elastic space to left-align buttons
# Input area
input_layout = QHBoxLayout()
self.chat_input = QLineEdit()
self.chat_input.setPlaceholderText("Type a message to send to Omniparser + X ...")
# Send message on Enter key
self.chat_input.returnPressed.connect(self.process_input)
self.submit_button = QPushButton("Send")
self.submit_button.clicked.connect(self.process_input)
self.stop_button = QPushButton("Stop")
self.stop_button.clicked.connect(self.stop_process)
input_layout.addWidget(self.chat_input, 8)
input_layout.addWidget(self.submit_button, 1)
input_layout.addWidget(self.stop_button, 1)
# Main content area
content_splitter = QSplitter(Qt.Orientation.Horizontal)
# Task list
task_widget = QWidget()
task_layout = QVBoxLayout(task_widget)
task_label = QLabel("Task List")
self.task_table = QTableWidget(0, 2)
self.task_table.setHorizontalHeaderLabels(["Status", "Task"])
self.task_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
task_layout.addWidget(task_label)
task_layout.addWidget(self.task_table)
# Chat area
chat_widget = QWidget()
chat_layout = QVBoxLayout(chat_widget)
chat_label = QLabel("Chat History")
self.chat_display = QTextEdit()
self.chat_display.setReadOnly(True)
chat_layout.addWidget(chat_label)
chat_layout.addWidget(self.chat_display)
# Add to splitter
content_splitter.addWidget(task_widget)
content_splitter.addWidget(chat_widget)
content_splitter.setSizes([int(self.width() * 0.2), int(self.width() * 0.8)])
# Add all components to main layout
main_layout.addLayout(header_layout)
main_layout.addWidget(intro_label)
main_layout.addLayout(top_buttons_layout) # Add top button area
main_layout.addLayout(input_layout)
main_layout.addWidget(content_splitter, 1) # 1 is the stretch factor
self.setCentralWidget(central_widget)
def open_settings_dialog(self):
"""Open settings dialog"""
dialog = SettingsDialog(self, self.state)
result = dialog.exec()
if result == QDialog.DialogCode.Accepted:
# Get and apply new settings
settings = dialog.get_settings()
# Check if stop hotkey changed
old_hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY)
new_hotkey = settings["stop_hotkey"]
self.state["model"] = settings["model"]
self.state["base_url"] = settings["base_url"]
self.state["api_key"] = settings["api_key"]
self.state["stop_hotkey"] = new_hotkey
# Update theme if changed
if settings["theme"] != self.state.get("theme", "Light"):
self.state["theme"] = settings["theme"]
self.apply_theme()
if settings["screen_region"]:
self.state["screen_region"] = settings["screen_region"]
# Update hotkey if changed
if old_hotkey != new_hotkey:
self.register_stop_hotkey()
def process_input(self):
"""Process user input"""
user_input = self.chat_input.text()
if not user_input.strip():
return
# Clear input box
self.chat_input.clear()
# Show hotkey reminder
hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY)
QMessageBox.information(self, "Automation Starting",
f"Automation will start now. You can press {hotkey} to stop at any time.")
# Minimize main window
self.showMinimized()
# Create and start worker thread
self.worker = AgentWorker(user_input, self.state, self.vision_agent)
self.worker.update_signal.connect(self.update_ui)
self.worker.error_signal.connect(self.handle_error)
# Connect signals to tray icon if available
if hasattr(self, 'tray_icon') and self.tray_icon is not None:
self.worker.status_signal.connect(self.tray_icon.update_status)
self.worker.task_signal.connect(self.tray_icon.update_task)
self.worker.start()
def handle_error(self, error_message):
"""Handle error messages"""
# Restore main window to show the error
self.showNormal()
self.activateWindow()
# Show error message
QMessageBox.warning(self, "Connection Error",
f"Error connecting to AI service:\n{error_message}\n\nPlease check your network connection and API settings.")
@pyqtSlot(list, list)
def update_ui(self, chatbox_messages, tasks):
"""Update UI display"""
# Update chat display
self.chat_display.clear()
for msg in chatbox_messages:
role = msg["role"]
content = msg["content"]
# Set different formats based on role
format = QTextCharFormat()
if role == "user":
format.setForeground(QColor(0, 0, 255)) # Blue for user
self.chat_display.append("You:")
else:
format.setForeground(QColor(0, 128, 0)) # Green for AI
self.chat_display.append("AI:")
# Add content
cursor = self.chat_display.textCursor()
cursor.movePosition(QTextCursor.MoveOperation.End)
# Special handling for HTML content
if "<" in content and ">" in content:
self.chat_display.insertHtml(content)
self.chat_display.append("") # Add empty line
else:
self.chat_display.append(content)
self.chat_display.append("") # Add empty line
# Scroll to bottom
self.chat_display.verticalScrollBar().setValue(
self.chat_display.verticalScrollBar().maximum()
)
# Update task table
self.task_table.setRowCount(len(tasks))
for i, (status, task) in enumerate(tasks):
self.task_table.setItem(i, 0, QTableWidgetItem(status))
self.task_table.setItem(i, 1, QTableWidgetItem(task))
def stop_process(self):
"""Stop processing"""
self.state["stop"] = True
def clear_chat(self):
"""Clear chat history"""
self.state["messages"] = []
self.state["chatbox_messages"] = []
self.state["responses"] = {}
self.state["tools"] = {}
self.state["tasks"] = []
self.chat_display.clear()
self.task_table.setRowCount(0)
def closeEvent(self, event):
"""Handle window close event"""
# This allows the app to continue running in the system tray
# when the main window is closed
if hasattr(self, 'tray_icon') and self.tray_icon is not None and self.tray_icon.isVisible():
self.hide()
event.ignore()
else:
# Clean up on exit
keyboard.unhook_all()
event.accept()

125
ui/settings_dialog.py Normal file
View File

@ -0,0 +1,125 @@
"""
Settings dialog for application configuration
"""
from PyQt6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout,
QLabel, QLineEdit, QPushButton, QComboBox)
from PyQt6.QtCore import QTimer
from ui.hotkey_edit import HotkeyEdit, DEFAULT_STOP_HOTKEY
from ui.theme import THEMES
class SettingsDialog(QDialog):
"""Dialog for application settings"""
def __init__(self, parent=None, state=None):
super().__init__(parent)
self.state = state
self.parent_window = parent
self.setWindowTitle("Settings")
self.setMinimumWidth(500)
self.init_ui()
def init_ui(self):
layout = QVBoxLayout(self)
# Model settings
model_layout = QHBoxLayout()
model_label = QLabel("Model:")
self.model_input = QLineEdit(self.state["model"])
model_layout.addWidget(model_label)
model_layout.addWidget(self.model_input)
# Base URL settings
url_layout = QHBoxLayout()
url_label = QLabel("Base URL:")
self.base_url_input = QLineEdit(self.state["base_url"])
url_layout.addWidget(url_label)
url_layout.addWidget(self.base_url_input)
# API key settings
api_layout = QHBoxLayout()
api_label = QLabel("API Key:")
self.api_key_input = QLineEdit(self.state["api_key"])
self.api_key_input.setEchoMode(QLineEdit.EchoMode.Password)
api_layout.addWidget(api_label)
api_layout.addWidget(self.api_key_input)
# Theme selection
theme_layout = QHBoxLayout()
theme_label = QLabel("Theme:")
self.theme_combo = QComboBox()
self.theme_combo.addItems(list(THEMES.keys()))
current_theme = self.state.get("theme", "Light")
self.theme_combo.setCurrentText(current_theme)
theme_layout.addWidget(theme_label)
theme_layout.addWidget(self.theme_combo)
# Stop hotkey setting
hotkey_layout = QHBoxLayout()
hotkey_label = QLabel("Stop Hotkey:")
self.hotkey_edit = HotkeyEdit(self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY))
hotkey_layout.addWidget(hotkey_label)
hotkey_layout.addWidget(self.hotkey_edit)
# Screen region selection
region_layout = QHBoxLayout()
self.select_region_btn = QPushButton("Select Screen Region")
self.region_info = QLabel("No region selected" if "screen_region" not in self.state else f"Selected region: {self.state['screen_region']}")
self.select_region_btn.clicked.connect(self.select_screen_region)
region_layout.addWidget(self.select_region_btn)
region_layout.addWidget(self.region_info)
# OK and Cancel buttons
button_layout = QHBoxLayout()
self.ok_button = QPushButton("OK")
self.cancel_button = QPushButton("Cancel")
self.ok_button.clicked.connect(self.accept)
self.cancel_button.clicked.connect(self.reject)
button_layout.addWidget(self.ok_button)
button_layout.addWidget(self.cancel_button)
# Add all elements to main layout
layout.addLayout(model_layout)
layout.addLayout(url_layout)
layout.addLayout(api_layout)
layout.addLayout(theme_layout)
layout.addLayout(hotkey_layout)
layout.addLayout(region_layout)
layout.addLayout(button_layout)
def select_screen_region(self):
"""Select screen region"""
# Minimize the parent window before selecting region
if self.parent_window:
self.parent_window.showMinimized()
# Wait a moment for the window to minimize
QTimer.singleShot(500, self._do_select_region)
else:
self._do_select_region()
def _do_select_region(self):
"""Actual region selection after minimizing"""
from util.screen_selector import ScreenSelector
region = ScreenSelector().get_selection()
# Restore the dialog and parent window
self.activateWindow()
if self.parent_window:
self.parent_window.showNormal()
self.parent_window.activateWindow()
if region:
self.state["screen_region"] = region
self.region_info.setText(f"Selected region: {region}")
else:
self.region_info.setText("Selection cancelled")
def get_settings(self):
"""Get settings content"""
return {
"model": self.model_input.text(),
"base_url": self.base_url_input.text(),
"api_key": self.api_key_input.text(),
"screen_region": self.state.get("screen_region", None),
"theme": self.theme_combo.currentText(),
"stop_hotkey": self.hotkey_edit.get_hotkey()
}

99
ui/theme.py Normal file
View File

@ -0,0 +1,99 @@
"""
Theme definitions and theme handling functionality
"""
# Theme definitions
THEMES = {
"Light": {
"main_bg": "#F5F5F5",
"widget_bg": "#FFFFFF",
"text": "#333333",
"accent": "#4A86E8",
"button_bg": "#E3E3E3",
"button_text": "#333333",
"border": "#CCCCCC",
"selection_bg": "#D0E2F4"
},
"Dark": {
"main_bg": "#2D2D2D",
"widget_bg": "#3D3D3D",
"text": "#FFFFFF",
"accent": "#4A86E8",
"button_bg": "#555555",
"button_text": "#FFFFFF",
"border": "#555555",
"selection_bg": "#3A5F8A"
}
}
def apply_theme(widget, theme_name="Light"):
"""Apply the specified theme to the widget"""
theme = THEMES[theme_name]
# Create stylesheet for the application
stylesheet = f"""
QMainWindow, QDialog {{
background-color: {theme['main_bg']};
color: {theme['text']};
}}
QWidget {{
background-color: {theme['main_bg']};
color: {theme['text']};
}}
QLabel {{
color: {theme['text']};
}}
QPushButton {{
background-color: {theme['button_bg']};
color: {theme['button_text']};
border: 1px solid {theme['border']};
border-radius: 4px;
padding: 5px 10px;
}}
QPushButton:hover {{
background-color: {theme['accent']};
color: white;
}}
QLineEdit, QTextEdit, QTableWidget, QComboBox {{
background-color: {theme['widget_bg']};
color: {theme['text']};
border: 1px solid {theme['border']};
border-radius: 4px;
padding: 4px;
}}
QTextEdit {{
background-color: {theme['widget_bg']};
}}
QTableWidget::item:selected {{
background-color: {theme['selection_bg']};
}}
QHeaderView::section {{
background-color: {theme['button_bg']};
color: {theme['button_text']};
padding: 4px;
border: 1px solid {theme['border']};
}}
QSplitter::handle {{
background-color: {theme['border']};
}}
QScrollBar {{
background-color: {theme['widget_bg']};
}}
QScrollBar::handle {{
background-color: {theme['button_bg']};
border-radius: 4px;
}}
"""
widget.setStyleSheet(stylesheet)

60
ui/tray_icon.py Normal file
View File

@ -0,0 +1,60 @@
"""
System tray icon implementation
"""
from PyQt6.QtWidgets import QSystemTrayIcon, QMenu, QApplication
from PyQt6.QtGui import QAction
class StatusTrayIcon(QSystemTrayIcon):
"""System tray icon that displays application status"""
def __init__(self, icon, parent=None):
super().__init__(icon, parent)
self.parent = parent
self.setToolTip("autoMate")
# Create context menu
self.menu = QMenu()
self.show_action = QAction("Show Main Window")
self.show_action.triggered.connect(self.show_main_window)
self.menu_status = QAction("Status: Idle")
self.menu_status.setEnabled(False)
self.menu_task = QAction("Task: None")
self.menu_task.setEnabled(False)
self.exit_action = QAction("Exit")
self.exit_action.triggered.connect(QApplication.quit)
self.menu.addAction(self.show_action)
self.menu.addSeparator()
self.menu.addAction(self.menu_status)
self.menu.addAction(self.menu_task)
self.menu.addSeparator()
self.menu.addAction(self.exit_action)
self.setContextMenu(self.menu)
# Connect signals
self.activated.connect(self.icon_activated)
def show_main_window(self):
if self.parent:
self.parent.showNormal()
self.parent.activateWindow()
def icon_activated(self, reason):
if reason == QSystemTrayIcon.ActivationReason.DoubleClick:
self.show_main_window()
def update_status(self, status_text):
"""Update status text in tray tooltip and menu"""
# Truncate if too long for menu
short_status = status_text[:50] + "..." if len(status_text) > 50 else status_text
self.menu_status.setText(f"Status: {short_status}")
# Show brief notification but don't disrupt automation
# Only show notification for 500ms (very brief) to not interfere with visual automation
self.showMessage("autoMate Status", status_text, QSystemTrayIcon.MessageIcon.Information, 500)
def update_task(self, task_text):
"""Update task text in tray menu"""
short_task = task_text[:50] + "..." if len(task_text) > 50 else task_text
self.menu_task.setText(f"Task: {short_task}")

View File

@ -4,12 +4,12 @@ import time
# Add the project root directory to Python path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gradio_ui.agent.vision_agent import VisionAgent
from auto_control.agent.vision_agent import VisionAgent
from util.download_weights import MODEL_DIR
from pynput import mouse, keyboard
# Now you can import from gradio_ui
from gradio_ui.tools.screen_capture import get_screenshot
# Now you can import from auto_control
from auto_control.tools.screen_capture import get_screenshot
class AutoControl:
def __init__(self):
@ -81,8 +81,7 @@ class AutoControl:
if key == keyboard.Key.esc:
print("self.auto_list", self.auto_list)
vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"),
caption_model_path=os.path.join(MODEL_DIR, "icon_caption"))
vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"))
for item in self.auto_list:
element_list =vision_agent(str(item["path"]))

34
util/auto_util.py Normal file
View File

@ -0,0 +1,34 @@
import os
import platform
import pyautogui
from enum import Enum
import pyperclip
class AppName(Enum):
WECHAT = "wechat"
class AutoUtil:
def __init__(self, app_name: AppName):
self.img_dir = os.path.join(os.path.dirname(__file__),"..", "imgs", app_name.value)
def click_multi_img(self, img_names, offset_x=0, offset_y=0, minSearchTime=0):
for img_name in img_names:
self.find_click_img(img_name, offset_x, offset_y, minSearchTime)
def find_click_img(self, img_name, offset_x=0, offset_y=0, minSearchTime=0):
img_path = os.path.join(self.img_dir, img_name + ".png")
img = pyautogui.locateOnScreen(img_path, minSearchTime=minSearchTime)
x,y = pyautogui.center(img)
# Add offset to click position
pyautogui.click(x + offset_x, y + offset_y)
def send_text(self, text):
clipboard_data = pyperclip.paste()
pyperclip.copy(text)
if platform.system() == 'Darwin':
pyautogui.hotkey('command', 'v', interval=0.1)
else:
pyautogui.hotkey('ctrl', 'v')
# Copy old data back to clipboard
pyperclip.copy(clipboard_data)

View File

@ -1,47 +0,0 @@
import cv2
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gradio_ui.tools.screen_capture import get_screenshot
def detect_and_draw_edges():
# Read the image
screenshot, path = get_screenshot(is_cursor=False)
img = cv2.imread(path)
if img is None:
print("Error: Could not read the image.")
return
# Create a copy for drawing contours later
original = img.copy()
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply Gaussian blur to reduce noise
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Detect edges using Canny algorithm
edges = cv2.Canny(blurred, 50, 150)
# Find contours from the edges
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw all detected contours
cv2.drawContours(original, contours, -1, (0, 255, 0), 2)
print(f"Found {len(contours)} contours in the image")
# Display results
# cv2.imshow("Original Image", img)
cv2.imshow("Edges", edges)
# cv2.imshow("Contours", original)
cv2.waitKey(0)
cv2.destroyAllWindows()
return original, contours
# Example usage
if __name__ == "__main__":
result_image, detected_contours = detect_and_draw_edges()

30
util/wechat_auto.py Normal file
View File

@ -0,0 +1,30 @@
import os
import sys
import time
import pyautogui
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from util.auto_util import AppName, AutoUtil
class WechatAuto:
def __init__(self):
self.auto_util = AutoUtil(AppName.WECHAT)
def go_to_chat(self):
self.auto_util.find_click_img("chat_unselect.png")
def search_friend(self, friend_name):
try:
self.auto_util.find_click_img("chat_unselect")
except pyautogui.ImageNotFoundException:
self.auto_util.find_click_img("chat_select")
self.auto_util.find_click_img("search", offset_x=100)
self.auto_util.send_text(friend_name)
self.auto_util.find_click_img("contact_person",offset_x=100,offset_y=100,minSearchTime=10)
self.auto_util.find_click_img("search",offset_x=-100,offset_y=-100,minSearchTime=10)
if __name__ == "__main__":
time.sleep(3)
wechat_auto = WechatAuto()
wechat_auto.search_friend("李杨林")