From 6cc993e537e68cb216bfdd2b4df8c225ac098548 Mon Sep 17 00:00:00 2001 From: yuruo Date: Mon, 24 Mar 2025 17:31:53 +0800 Subject: [PATCH] use pyqt replace gradio --- {gradio_ui => auto_control}/.DS_Store | Bin {gradio_ui => auto_control}/__init__.py | 0 .../agent/base_agent.py | 0 .../agent/task_plan_agent.py | 4 +- .../agent/task_run_agent.py | 4 +- .../agent/vision_agent.py | 0 {gradio_ui => auto_control}/app.py | 6 +- .../executor/anthropic_executor.py | 2 +- {gradio_ui => auto_control}/loop.py | 10 +- {gradio_ui => auto_control}/tools/__init__.py | 0 {gradio_ui => auto_control}/tools/base.py | 0 .../tools/collection.py | 0 {gradio_ui => auto_control}/tools/computer.py | 0 .../tools/screen_capture.py | 0 gradio_ui/.gitignore | 1 - imgs/wechat/chat_select.png | Bin 0 -> 770 bytes imgs/wechat/chat_unselect.png | Bin 0 -> 1010 bytes imgs/wechat/contact_person.png | Bin 0 -> 1017 bytes imgs/wechat/search.png | Bin 0 -> 590 bytes imgs/wechat/send_message.png | Bin 0 -> 1487 bytes main.py | 12 +- requirements.txt | 4 +- ui/__init__.py | 3 + ui/agent_worker.py | 174 ++++++++ ui/hotkey_edit.py | 90 ++++ ui/main.py | 25 ++ ui/main_window.py | 388 ++++++++++++++++++ ui/settings_dialog.py | 125 ++++++ ui/theme.py | 99 +++++ ui/tray_icon.py | 60 +++ util/auto_control.py | 9 +- util/auto_util.py | 34 ++ util/opencv._detect.py | 47 --- util/wechat_auto.py | 30 ++ 34 files changed, 1053 insertions(+), 74 deletions(-) rename {gradio_ui => auto_control}/.DS_Store (100%) rename {gradio_ui => auto_control}/__init__.py (100%) rename {gradio_ui => auto_control}/agent/base_agent.py (100%) rename {gradio_ui => auto_control}/agent/task_plan_agent.py (95%) rename {gradio_ui => auto_control}/agent/task_run_agent.py (98%) rename {gradio_ui => auto_control}/agent/vision_agent.py (100%) rename {gradio_ui => auto_control}/app.py (98%) rename {gradio_ui => auto_control}/executor/anthropic_executor.py (93%) rename {gradio_ui => auto_control}/loop.py (94%) rename {gradio_ui => auto_control}/tools/__init__.py (100%) rename {gradio_ui => auto_control}/tools/base.py (100%) rename {gradio_ui => auto_control}/tools/collection.py (100%) rename {gradio_ui => auto_control}/tools/computer.py (100%) rename {gradio_ui => auto_control}/tools/screen_capture.py (100%) delete mode 100644 gradio_ui/.gitignore create mode 100644 imgs/wechat/chat_select.png create mode 100644 imgs/wechat/chat_unselect.png create mode 100644 imgs/wechat/contact_person.png create mode 100644 imgs/wechat/search.png create mode 100644 imgs/wechat/send_message.png create mode 100644 ui/__init__.py create mode 100644 ui/agent_worker.py create mode 100644 ui/hotkey_edit.py create mode 100644 ui/main.py create mode 100644 ui/main_window.py create mode 100644 ui/settings_dialog.py create mode 100644 ui/theme.py create mode 100644 ui/tray_icon.py create mode 100644 util/auto_util.py delete mode 100644 util/opencv._detect.py create mode 100644 util/wechat_auto.py diff --git a/gradio_ui/.DS_Store b/auto_control/.DS_Store similarity index 100% rename from gradio_ui/.DS_Store rename to auto_control/.DS_Store diff --git a/gradio_ui/__init__.py b/auto_control/__init__.py similarity index 100% rename from gradio_ui/__init__.py rename to auto_control/__init__.py diff --git a/gradio_ui/agent/base_agent.py b/auto_control/agent/base_agent.py similarity index 100% rename from gradio_ui/agent/base_agent.py rename to auto_control/agent/base_agent.py diff --git a/gradio_ui/agent/task_plan_agent.py b/auto_control/agent/task_plan_agent.py similarity index 95% rename from gradio_ui/agent/task_plan_agent.py rename to auto_control/agent/task_plan_agent.py index 4a082f9..7470c1e 100644 --- a/gradio_ui/agent/task_plan_agent.py +++ b/auto_control/agent/task_plan_agent.py @@ -1,9 +1,9 @@ import json from pydantic import BaseModel, Field -from gradio_ui.agent.base_agent import BaseAgent +from auto_control.agent.base_agent import BaseAgent from xbrain.core.chat import run -from gradio_ui.tools.computer import Action +from auto_control.tools.computer import Action class TaskPlanAgent(BaseAgent): def __call__(self, messages, parsed_screen_result): diff --git a/gradio_ui/agent/task_run_agent.py b/auto_control/agent/task_run_agent.py similarity index 98% rename from gradio_ui/agent/task_run_agent.py rename to auto_control/agent/task_run_agent.py index d83387e..9b3c72f 100644 --- a/gradio_ui/agent/task_run_agent.py +++ b/auto_control/agent/task_run_agent.py @@ -2,10 +2,10 @@ import json import uuid from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaMessageParam, BetaUsage from pydantic import Field, create_model -from gradio_ui.agent.base_agent import BaseAgent +from auto_control.agent.base_agent import BaseAgent from xbrain.core.chat import run -from gradio_ui.tools.computer import Action +from auto_control.tools.computer import Action class TaskRunAgent(BaseAgent): def __init__(self): self.OUTPUT_DIR = "./tmp/outputs" diff --git a/gradio_ui/agent/vision_agent.py b/auto_control/agent/vision_agent.py similarity index 100% rename from gradio_ui/agent/vision_agent.py rename to auto_control/agent/vision_agent.py diff --git a/gradio_ui/app.py b/auto_control/app.py similarity index 98% rename from gradio_ui/app.py rename to auto_control/app.py index 94df407..6fd7171 100644 --- a/gradio_ui/app.py +++ b/auto_control/app.py @@ -7,8 +7,8 @@ import os from pathlib import Path import argparse import gradio as gr -from gradio_ui.agent.vision_agent import VisionAgent -from gradio_ui.loop import ( +from auto_control.agent.vision_agent import VisionAgent +from auto_control.loop import ( sampling_loop_sync, ) import base64 @@ -349,4 +349,4 @@ def run(): while True: time.sleep(1) except KeyboardInterrupt: - print("\n💤 closing server") + print("\n�� closing server") diff --git a/gradio_ui/executor/anthropic_executor.py b/auto_control/executor/anthropic_executor.py similarity index 93% rename from gradio_ui/executor/anthropic_executor.py rename to auto_control/executor/anthropic_executor.py index 020bb65..8e99163 100644 --- a/gradio_ui/executor/anthropic_executor.py +++ b/auto_control/executor/anthropic_executor.py @@ -3,7 +3,7 @@ from typing import Any, cast from anthropic.types.beta import ( BetaContentBlock ) -from gradio_ui.tools import ComputerTool, ToolCollection +from auto_control.tools import ComputerTool, ToolCollection class AnthropicExecutor: diff --git a/gradio_ui/loop.py b/auto_control/loop.py similarity index 94% rename from gradio_ui/loop.py rename to auto_control/loop.py index 4ba08c2..5bd84ef 100644 --- a/gradio_ui/loop.py +++ b/auto_control/loop.py @@ -4,12 +4,12 @@ Agentic sampling loop that calls the Anthropic API and local implenmentation of import base64 from io import BytesIO import cv2 -from gradio_ui.agent.vision_agent import VisionAgent -from gradio_ui.tools.screen_capture import get_screenshot +from auto_control.agent.vision_agent import VisionAgent +from auto_control.tools.screen_capture import get_screenshot from anthropic.types.beta import (BetaMessageParam) -from gradio_ui.agent.task_plan_agent import TaskPlanAgent -from gradio_ui.agent.task_run_agent import TaskRunAgent -from gradio_ui.executor.anthropic_executor import AnthropicExecutor +from auto_control.agent.task_plan_agent import TaskPlanAgent +from auto_control.agent.task_run_agent import TaskRunAgent +from auto_control.executor.anthropic_executor import AnthropicExecutor import numpy as np from PIL import Image diff --git a/gradio_ui/tools/__init__.py b/auto_control/tools/__init__.py similarity index 100% rename from gradio_ui/tools/__init__.py rename to auto_control/tools/__init__.py diff --git a/gradio_ui/tools/base.py b/auto_control/tools/base.py similarity index 100% rename from gradio_ui/tools/base.py rename to auto_control/tools/base.py diff --git a/gradio_ui/tools/collection.py b/auto_control/tools/collection.py similarity index 100% rename from gradio_ui/tools/collection.py rename to auto_control/tools/collection.py diff --git a/gradio_ui/tools/computer.py b/auto_control/tools/computer.py similarity index 100% rename from gradio_ui/tools/computer.py rename to auto_control/tools/computer.py diff --git a/gradio_ui/tools/screen_capture.py b/auto_control/tools/screen_capture.py similarity index 100% rename from gradio_ui/tools/screen_capture.py rename to auto_control/tools/screen_capture.py diff --git a/gradio_ui/.gitignore b/gradio_ui/.gitignore deleted file mode 100644 index c036379..0000000 --- a/gradio_ui/.gitignore +++ /dev/null @@ -1 +0,0 @@ -tmp/ \ No newline at end of file diff --git a/imgs/wechat/chat_select.png b/imgs/wechat/chat_select.png new file mode 100644 index 0000000000000000000000000000000000000000..2477a86a15d711101a2b44bab9f0420f0a9f1469 GIT binary patch literal 770 zcmV+d1O5DoP)eh4bY)!^&fNe202g#cSael$WpW@x zZ*6U5Zgc=sV{&C>ZgXgFboX@)M*si(;DG(>9Hh*c@8zqjX!Vb_bqQ zY)3!6e&6S{P0Hmmu155VxL1p2=Pd}EMYBe*lnrjAMbVU5CUr_9Mg2%W2(3!n`tI|3 z$d3d88A>4k<~nqA?}5{~ofUJYGPHA}5{XgkrZ#;0qigJkm!n&Ea<$dPXhtV=cq{`_ zQgJUTu2r*Ny@4gIR4Vnp5=(15o~^Ae$ed2dNL&Df5NT=tvJC}&H=wZ^L?=3_Kb=2Z znTLLU1}+ez!BSRQnzhwBruQAvzH57rieRG#J4BQ;A3FZM{K*6Wk!J_b7t|zN0D#h{ z`|P?X@Dp@nWiBr06LNcs^W1$h-$#oXetH*^%fDX;9UTDe-pt4g%sajmBf+f<02(^F z;?rz*MM+pR08nLV(>+KCfyyud6p2#m(*MVh0uTcL)tWI$rplIzc>(}6_qsHaQzVl_ znZE-7)ZFf{hL|Fnd$IYe1%PTnQg~OJB6{ZS9t$LT-DrP0OnvhpC(gK1)@C!m?)F9$ z00D7c?eP6BCi4n+h4Xsnc-XhJS?$cNRf(9Oo{22ItXh@!@kIZ;CWLTTq&AwZ<-9V- z>wA?*<4v&Y&eHGQ$CoPHa`$xi1aohT&pL6pbX!6xX1X){OOf*Q|D|N{3!po3m(*u%j@w z>E}K-jB1Beh4bY)!^&fNe202g#cSael$WpW@x zZ*6U5Zgc=sV{&C>ZgXgFboX@)M*si=WJyFpR7i>CmQ7FUOc;jyMQH^Fr~%H!kX3IIuxxm>PTEES2s_S~WTvk^wX?OePZqL9p3&p-}Mq{q=fX*Y#hxdooS4 zR;&4ZzLS#^Z{-~j3Wd_?w3l$dcK0L*!f-g`c^&{T2P6^+pU)?XqG_6I0xZiCMG?cW z?d|P3004mFc)#CY)7{fxFu*V@5{b+KBuQczwoddv3POmYC`D0ofS0xev7$BoP&kew z2;w--8~`D79LM`C>njurxvpy%#vJha`fA%Y$8moIz_P64IK$y^4v-{iJRZklv1l~9 zraJ&29*-M_;ib$wU^1E1>-A75#Pj@`fNVA!jYey=n%DB?mE3N(pPrub`Ftvs`n7v9 znLIi=>h*ffX46~w)a+zg<~S~s$>_Q+%kuwq=Xw78{M_NcRxNpK07-jN%H#oy4UN?);|Fd1kp525QI!76OBe2jRt}sD2gue-PqV* zS+-Ov<#M^vXmoXTC5qz50}Cq4%P6m7$K&zc-CZ2Vhr^+wC|BYYRzU-)3h&ci%T~c3{EDK%gf7DDz*Fu5W?Hr+iJD?-RxTcz;T?SD30SS%j$N! zolZy7w5_eJNF?I-`+Yv2>$(s^MNuS4>i7H0$LeChcsv#aq0{L|lH?7hFOmJZzWp!y gQ^Hms`PTve0ks6Kr%Ab#A^-pY07*qoM6N<$f~FYKYXATM literal 0 HcmV?d00001 diff --git a/imgs/wechat/contact_person.png b/imgs/wechat/contact_person.png new file mode 100644 index 0000000000000000000000000000000000000000..779bf024289482ed382b75cd6da749b02448212a GIT binary patch literal 1017 zcmVeh4bY)!^&fNe202g#cSael$WpW@x zZ*6U5Zgc=sV{&C>ZgXgFboX@)M*si=Ye_^wR9J=WSTSqsFc4PXdr$&S2X9V?5Oo^DXzbu>YoXDgY(|T5Au*J0Z#bSYcI-Gw zA@I^~O3u>xKHYaG$w!}`pXea*Yu^Fv4L{5e4?oNg4~NVA&n}atl+)>SJRYYOO6g*; z7!HSP=f-h7nM|@4+qN;rW#cKOVHjrLwIgaVr<4jIiuS=^&}=pdArMd;$3lo~^qh0X zSee_=Xr$|U%|I86Ma_{laa8jp!x)>*X239)SkLo1oesv>^E`x5b;1cDNs?^DIep7o z=Nn-swF4j;&bd;`ah$SYAaBdE`u%>^&N&Z)V6BXmQkG@y9&L*4nHLKcA>_I){Kzc< z$UbLGJy@0%$8m)U;y6a%E%*(XCrOeLGYmu5^^^%?EDXakDT2aLW?izVkV%U9e2x%O zO4W~cEjD1TX$E`>H9uH2xDy9b{!{S7(xJ2O4Bs!$6SGLsPk&%3Yam* zIX4Ug1mbz#&CShPGGQ1(k-xpY?VovZtILXcWj;O6%XPt%lM~tJ1OmHy zac_f!r*1LEK@d<%%LGy%Ti#fdQm*TA&LKF^XjB~BQG);`L@B47P*&4qe9v;>XqLjYAzG95;?(VwXZWKjdUtgD( zmk6QBWP&k1KR-tZwOXwxie6q`;1NXs`1sgvx9{)o(|mq>e7wKEUtL}4y549sy4~(< zHk(eT$H&L5Rx6#z7%S`f?RGngqNk^)PN&moG`3@2_r{W22)MDhu3J2Z6GEEJ=5RQK ztvjt}_zPoJp57T_&=Fw2Tzk%`IjjuZGhZ&3r>CbYI}IVE>`@)(LWr7|IasgaI@ZB9 n%>NG@?$@Xv=7)zL=7)#BJa-WWF)is}00000NkvXXu0mjfgaXx^ literal 0 HcmV?d00001 diff --git a/imgs/wechat/search.png b/imgs/wechat/search.png new file mode 100644 index 0000000000000000000000000000000000000000..7523d59cc74a7f827604214e76241065a0839e02 GIT binary patch literal 590 zcmV-U0P000{Z0ssI221Bzy00009a7bBm000&x z000&x0ZCFM@Bjb+5p+dZbW?9;ba!ELWdKudX>eh4bY)!^&fNe202g#cSael$WpW@x zZ*6U5Zgc=sV{&C>ZgXgFboX@)M*si;z)3_wR7i>K)G^DdP#6dB^PW;FT^iyHA`*v# zOAv%e(9{rwgGVrS>9|mqA0$!%J;<>qm-JaIhjlZ zL4XiK2n9hHkH_ovno{a{UfsFMmg6{t5LH#Htr%lMi0isdtuPD`Ld9sM@H~$YilV5g zmFIa~YmqF=np(0f$8nq_NwxLyc*Jr1!*?uOx~>=3Uv2e$pE0IsTAfwiRYHiOD3)cl z+ii?7&-2BJeBa;gcKv>TI2_hlZ|v$|Ft}c?qtR%&Ty{F0Miv0zcDs3==elkbMOl_f zlB8)G#`ttP*|x2!>T0#Ba<8qrq-na}?;XdPPN$2-qQt%ax9ieswG6`$Me%q%rfFIl z_B{n>sX3yu-w(a}<{(aa#dA~T0o6qOP_MFe>-}U$I>wB^+^E?k@%yHb`TGjlk c@BJsOUwC)cMcJ;God5s;07*qoM6N<$f)f`L2LJ#7 literal 0 HcmV?d00001 diff --git a/imgs/wechat/send_message.png b/imgs/wechat/send_message.png new file mode 100644 index 0000000000000000000000000000000000000000..c045c96b68cacf9e9a4935db5915cb23702c1a18 GIT binary patch literal 1487 zcmV;=1u*)FP)eh4bY)!^&fNe202g#cSael$WpW@x zZ*6U5Zgc=sV{&C>ZgXgFboX@)M*si?K}keGRCt{2nNMgMc@)QA*KDUP#x-PURk{WV z1=nR)RtmE1W$~hiwTJW;cOl!Wh2|8=A(Y}F=&1p7TbjM-ZF8u5(95uw281PG7bXJ| zq#~N6Q)(td+S!e;hev)4KmTMj$xMfx4<0fvzxn-s{=W0xy!X7azrPO_pFE!#y#WhN zSUQ;lODA(+>0}Npoy>uylR2<-G6$AU=D^a)99TM;14}1!@JZ-Yl&W4KwY;}UETLL@ zp}Em_aHy8%l{RIJ)z^wrRpVeYVvJ6)`kGfrk;|>)T>!XuV-_!YrTy&I15OsaLW)wW zC{@L-{8QsswUJlaywXax3e+kK%?+_DXWy3k?jcl5dxex*#hl1_g_Qa;WU$T{pi@z* z@=7b=3=>QHu;I@vu~AMIaw11YqFwCDvHBV@1{inAw+H}gV@~8a zSx7j;zuo?&cMmAxx@SJXxY6PjQdzM~4I@%*-&tX8cIx@i69aU5y8n{3*;ie2#Pa$> zleO8&kr5uVk%m9>n2k{Obc>6yO6FRRKVBFMk;Hj;9=E>8o1#;-r zY zcICbqqne+mz6XH6KEAok4faQGf1<$rEn@d;9IuXYXNIZ1;3-EYzfv0$&Tzg{Nwx~V zOWS?(4kW<1S(_anES)h}r}cM#0syAqgT@3HPs#TL|3+;vRF1de-oFx@`Ve8&HP=2l zsMer#(GZN(2`kX#^bcFdyD#3q>ecDR`&R%k zN z@;$-Xs7|iya|fKZmk{jMQ_hE*1b~N*klH?|#dX=}?N2$8%XcaXXSg?Oz*t8vwdcMh zO{b(?b3WLW;Oi!=DK-o>=|4y8k_BLa&g*W?f(Zi#viP-=ex{ixuu4M}Mow;EL3}4S$A8 z_Ap7gdu>MJ_q{;$?!!`$0*KT%YA`=m*SP>tl&Yxc*vU85ht&@5XS_bhIWeq$BEf3N zd7*L(%?&I+OTIuylR2<-G6$AU p=D^a)99TM;14}1!VCiHI{sUR%y)FK2J4XNj002ovPDHLkV1l#D&a?mk literal 0 HcmV?d00001 diff --git a/main.py b/main.py index af3827d..8c019dc 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,9 @@ -from gradio_ui import app -import os +from ui.main import main from util import download_weights -os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" def run(): download_weights.download() - app.run() + main() + +if __name__ == "__main__": + run() - -if __name__ == '__main__': - run() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 43c1040..491dee3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,6 @@ timm einops==0.8.0 modelscope pynput -lap \ No newline at end of file +lap +pyqt6==6.8.1 +keyboard==0.13.5 \ No newline at end of file diff --git a/ui/__init__.py b/ui/__init__.py new file mode 100644 index 0000000..6396c95 --- /dev/null +++ b/ui/__init__.py @@ -0,0 +1,3 @@ +""" +autoMate UI package +""" \ No newline at end of file diff --git a/ui/agent_worker.py b/ui/agent_worker.py new file mode 100644 index 0000000..e2da3b6 --- /dev/null +++ b/ui/agent_worker.py @@ -0,0 +1,174 @@ +""" +Worker thread for handling agent operations +""" +import json +from PyQt6.QtCore import QThread, pyqtSignal + +from auto_control.loop import sampling_loop_sync +from xbrain.utils.config import Config + +class AgentWorker(QThread): + """Worker thread for running agent operations asynchronously""" + + update_signal = pyqtSignal(list, list) + status_signal = pyqtSignal(str) # Signal for status updates + task_signal = pyqtSignal(str) # Signal for current task + error_signal = pyqtSignal(str) # Error signal + + def __init__(self, user_input, state, vision_agent): + super().__init__() + self.user_input = user_input + self.state = state + self.vision_agent = vision_agent + + def run(self): + # Reset stop flag + if self.state["stop"]: + self.state["stop"] = False + + # Configure API + config = Config() + config.set_openai_config( + base_url=self.state["base_url"], + api_key=self.state["api_key"], + model=self.state["model"] + ) + + # Add user message + self.state["messages"].append({"role": "user", "content": self.user_input}) + self.state["chatbox_messages"].append({"role": "user", "content": self.user_input}) + + # Send initial update + self.update_signal.emit(self.state["chatbox_messages"], []) + self.status_signal.emit("Starting analysis...") + + try: + # Process with agent + for _ in sampling_loop_sync( + model=self.state["model"], + messages=self.state["messages"], + vision_agent=self.vision_agent, + screen_region=self.state.get("screen_region", None) + ): + if self.state["stop"]: + self.state["chatbox_messages"].append({"role": "user", "content": "Stop!"}) + self.status_signal.emit("Operation stopped by user") + return + + # task_plan_agent first response + if len(self.state["messages"]) == 2: + task_list = json.loads(self.state["messages"][-1]["content"])["task_list"] + for task in task_list: + self.state["tasks"].append({ + "status": "⬜", + "task": task + }) + else: + # Reset all task statuses + for i in range(len(self.state["tasks"])): + self.state["tasks"][i]["status"] = "⬜" + + # Update task progress + content_json = json.loads(self.state["messages"][-1]["content"]) + task_completed_number = content_json["current_task_id"] + + # Update status with reasoning + if "reasoning" in content_json: + self.status_signal.emit(content_json["reasoning"]) + + # Update current task + if task_completed_number < len(self.state["tasks"]): + current_task = self.state["tasks"][task_completed_number]["task"] + self.task_signal.emit(current_task) + + if task_completed_number > len(self.state["tasks"]) + 1: + for i in range(len(self.state["tasks"])): + self.state["tasks"][i]["status"] = "✅" + else: + for i in range(task_completed_number + 1): + self.state["tasks"][i]["status"] = "✅" + + # Reconstruct chat messages from original messages + self.state["chatbox_messages"] = [] + + for message in self.state["messages"]: + formatted_content, json_reasoning = self.format_message_content(message["content"]) + + # Add json reasoning as a separate message if exists + if json_reasoning: + self.state["chatbox_messages"].append({ + "role": message["role"], + "content": json_reasoning + }) + + # Add formatted content + self.state["chatbox_messages"].append({ + "role": message["role"], + "content": formatted_content + }) + + # Convert data format before returning results + tasks_2d = [[task["status"], task["task"]] for task in self.state["tasks"]] + self.update_signal.emit(self.state["chatbox_messages"], tasks_2d) + + # All done + self.status_signal.emit("Task completed") + + except Exception as e: + # Send error signal + import traceback + error_message = f"Error occurred: {str(e)}\n{traceback.format_exc()}" + print(error_message) + + # Add error message to chat + self.state["chatbox_messages"].append({ + "role": "assistant", + "content": f"⚠️ Network connection error: {str(e)}
Please check your network connection and API settings, or try again later." + }) + self.update_signal.emit(self.state["chatbox_messages"], + [[task["status"], task["task"]] for task in self.state["tasks"]]) + self.error_signal.emit(str(e)) + self.status_signal.emit(f"Error: {str(e)}") + + def format_message_content(self, content): + """Format message content for display""" + # Handle list-type content (multimodal) + if isinstance(content, list): + formatted_content = "" + json_reasoning = None + + for item in content: + if item["type"] == "image_url": + # Changed image style to be smaller + formatted_content += f'
' + elif item["type"] == "text": + if self.is_json_format(item["text"]): + reasoning, details = self.format_json_content(item["text"]) + json_reasoning = reasoning + formatted_content += details + else: + formatted_content += item["text"] + + return formatted_content, json_reasoning + + # Handle string content + if self.is_json_format(content): + reasoning, _ = self.format_json_content(content) + formatted_content = json.dumps(json.loads(content), indent=4, ensure_ascii=False) + return formatted_content, reasoning + + return content, None + + def format_json_content(self, json_content): + """Format JSON content with reasoning and details""" + content_json = json.loads(json_content) + reasoning = f'

{content_json["reasoning"]}

' + details = f'
Detail
{json.dumps(content_json, indent=4, ensure_ascii=False)}
' + return reasoning, details + + def is_json_format(self, text): + try: + json.loads(text) + return True + except: + return False \ No newline at end of file diff --git a/ui/hotkey_edit.py b/ui/hotkey_edit.py new file mode 100644 index 0000000..4591a3c --- /dev/null +++ b/ui/hotkey_edit.py @@ -0,0 +1,90 @@ +""" +Hotkey editing widget +""" +import keyboard +from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLineEdit, QPushButton + +# Default stop hotkey +DEFAULT_STOP_HOTKEY = "ctrl+k" + +class HotkeyEdit(QWidget): + """Widget for recording hotkey combinations""" + + def __init__(self, hotkey="", parent=None): + super().__init__(parent) + layout = QHBoxLayout(self) + layout.setContentsMargins(0, 0, 0, 0) + + self.hotkey_input = QLineEdit(hotkey) + self.hotkey_input.setReadOnly(True) + self.hotkey_input.setPlaceholderText("Click to record hotkey") + + self.record_btn = QPushButton("Record") + self.record_btn.clicked.connect(self.start_recording) + + layout.addWidget(self.hotkey_input, 1) + layout.addWidget(self.record_btn) + + self.recording = False + self.keys_pressed = set() + + def start_recording(self): + """Start recording a new hotkey""" + if self.recording: + self.stop_recording() + return + + self.hotkey_input.setText("Press keys...") + self.record_btn.setText("Stop") + self.recording = True + self.keys_pressed = set() + + # Hook global events + keyboard.hook(self.on_key_event) + + def stop_recording(self): + """Stop recording and set the hotkey""" + keyboard.unhook(self.on_key_event) + self.recording = False + self.record_btn.setText("Record") + + # Convert keys to hotkey string + if self.keys_pressed: + hotkey = '+'.join(sorted(self.keys_pressed)) + self.hotkey_input.setText(hotkey) + else: + self.hotkey_input.setText("") + + def on_key_event(self, event): + """Handle key events during recording""" + if not self.recording: + return + + # Skip key up events + if not event.event_type == keyboard.KEY_DOWN: + return + + # Get key name + key_name = event.name.lower() + + # Special handling for modifier keys + if key_name in ['ctrl', 'alt', 'shift', 'windows']: + self.keys_pressed.add(key_name) + else: + self.keys_pressed.add(key_name) + + # Show current keys + self.hotkey_input.setText('+'.join(sorted(self.keys_pressed))) + + # Stop recording if user presses Escape alone + if len(self.keys_pressed) == 1 and 'esc' in self.keys_pressed: + self.keys_pressed.clear() + self.stop_recording() + + def get_hotkey(self): + """Get the current hotkey string""" + return self.hotkey_input.text() + + def set_hotkey(self, hotkey): + """Set the hotkey string""" + self.hotkey_input.setText(hotkey) \ No newline at end of file diff --git a/ui/main.py b/ui/main.py new file mode 100644 index 0000000..5c7379c --- /dev/null +++ b/ui/main.py @@ -0,0 +1,25 @@ +""" +Main entry point for autoMate application +""" +import sys +import argparse +from PyQt6.QtWidgets import QApplication +from ui.main_window import MainWindow + +def parse_arguments(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description="PyQt6 App") + parser.add_argument("--windows_host_url", type=str, default='localhost:8006') + parser.add_argument("--omniparser_server_url", type=str, default="localhost:8000") + return parser.parse_args() + +def main(): + """Main application entry point""" + args = parse_arguments() + app = QApplication(sys.argv) + window = MainWindow(args) + window.show() + sys.exit(app.exec()) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ui/main_window.py b/ui/main_window.py new file mode 100644 index 0000000..020d81e --- /dev/null +++ b/ui/main_window.py @@ -0,0 +1,388 @@ +""" +Main application window +""" +import os +import keyboard +from pathlib import Path +from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QLabel, QLineEdit, QPushButton, QTableWidget, QTableWidgetItem, + QTextEdit, QSplitter, QMessageBox, QHeaderView, QDialog, QSystemTrayIcon) +from PyQt6.QtCore import Qt, pyqtSlot, QSize +from PyQt6.QtGui import QPixmap, QIcon, QTextCursor, QTextCharFormat, QColor + +from xbrain.utils.config import Config +from auto_control.agent.vision_agent import VisionAgent +from util.download_weights import OMNI_PARSER_DIR + +from ui.theme import apply_theme +from ui.settings_dialog import SettingsDialog +from ui.agent_worker import AgentWorker +from ui.tray_icon import StatusTrayIcon +from ui.hotkey_edit import DEFAULT_STOP_HOTKEY + +# Intro text for application +INTRO_TEXT = ''' +Based on Omniparser to control desktop! +''' + +class MainWindow(QMainWindow): + """Main application window""" + + def __init__(self, args): + super().__init__() + self.args = args + + # Initialize state + self.state = self.setup_initial_state() + + # Initialize Agent + self.vision_agent = VisionAgent( + yolo_model_path=os.path.join(OMNI_PARSER_DIR, "icon_detect", "model.pt") + ) + + # Create tray icon + self.setup_tray_icon() + + self.setWindowTitle("autoMate") + self.setMinimumSize(1200, 800) + + self.init_ui() + self.apply_theme() + + # Register hotkey handler + self.hotkey_handler = None + self.register_stop_hotkey() + + # Print startup information + print(f"\n\n🚀 PyQt6 application launched") + + def setup_tray_icon(self): + """Setup system tray icon""" + # Create or load icon + try: + script_dir = Path(__file__).parent + + # Use logo.png as icon + image_path = script_dir.parent / "imgs" / "logo.png" + # Load image and create suitable icon size + pixmap = QPixmap(str(image_path)) + # Resize to suitable icon size + icon_pixmap = pixmap.scaled(32, 32, Qt.AspectRatioMode.KeepAspectRatio, Qt.TransformationMode.SmoothTransformation) + app_icon = QIcon(icon_pixmap) + # Set application icon + self.setWindowIcon(app_icon) + + # Create system tray icon + self.tray_icon = StatusTrayIcon(app_icon, self) + self.tray_icon.show() + + except Exception as e: + print(f"Error setting up tray icon: {e}") + self.tray_icon = None + + def setup_initial_state(self): + """Set up initial state""" + state = {} + + # Load data from config + config = Config() + if config.OPENAI_API_KEY: + state["api_key"] = config.OPENAI_API_KEY + else: + state["api_key"] = "" + + if config.OPENAI_BASE_URL: + state["base_url"] = config.OPENAI_BASE_URL + else: + state["base_url"] = "https://api.openai.com/v1" + + if config.OPENAI_MODEL: + state["model"] = config.OPENAI_MODEL + else: + state["model"] = "gpt-4o" + + # Default to light theme + state["theme"] = "Light" + + # Default stop hotkey + state["stop_hotkey"] = DEFAULT_STOP_HOTKEY + + state["messages"] = [] + state["chatbox_messages"] = [] + state["auth_validated"] = False + state["responses"] = {} + state["tools"] = {} + state["tasks"] = [] + state["only_n_most_recent_images"] = 2 + state["stop"] = False + + return state + + def register_stop_hotkey(self): + """Register the global stop hotkey""" + # First unregister any existing hotkey + if self.hotkey_handler: + try: + keyboard.unhook_all() + self.hotkey_handler = None + except: + pass + + # Get the current hotkey from state + hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY) + + # Check if hotkey is valid + if not hotkey: + return + + try: + # Register new hotkey + self.hotkey_handler = keyboard.add_hotkey(hotkey, self.handle_stop_hotkey) + print(f"Registered stop hotkey: {hotkey}") + except Exception as e: + print(f"Error registering hotkey '{hotkey}': {e}") + + def handle_stop_hotkey(self): + """Handle stop hotkey press""" + print("Stop hotkey pressed!") + self.state["stop"] = True + + # Show brief notification + if hasattr(self, 'tray_icon') and self.tray_icon is not None: + self.tray_icon.showMessage("autoMate", "Stopping automation...", QSystemTrayIcon.MessageIcon.Information, 1000) + + def apply_theme(self): + """Apply the current theme to the application""" + theme_name = self.state.get("theme", "Light") + apply_theme(self, theme_name) + + def init_ui(self): + """Initialize UI components""" + central_widget = QWidget() + main_layout = QVBoxLayout(central_widget) + + # Load top image + header_layout = QVBoxLayout() + try: + script_dir = Path(__file__).parent + image_path = script_dir.parent.parent / "imgs" / "header_bar_thin.png" + if image_path.exists(): + pixmap = QPixmap(str(image_path)) + header_label = QLabel() + header_label.setPixmap(pixmap.scaledToWidth(self.width())) + header_layout.addWidget(header_label) + except Exception as e: + print(f"Failed to load header image: {e}") + + title_label = QLabel("autoMate") + title_label.setAlignment(Qt.AlignmentFlag.AlignCenter) + font = title_label.font() + font.setPointSize(20) + title_label.setFont(font) + header_layout.addWidget(title_label) + + # Introduction text + intro_label = QLabel(INTRO_TEXT) + intro_label.setWordWrap(True) + font = intro_label.font() + font.setPointSize(12) + intro_label.setFont(font) + + # Settings button and clear chat button (at top) + top_buttons_layout = QHBoxLayout() + self.settings_button = QPushButton("Settings") + self.settings_button.clicked.connect(self.open_settings_dialog) + self.clear_button = QPushButton("Clear Chat") + self.clear_button.clicked.connect(self.clear_chat) + top_buttons_layout.addWidget(self.settings_button) + top_buttons_layout.addWidget(self.clear_button) + top_buttons_layout.addStretch() # Add elastic space to left-align buttons + + # Input area + input_layout = QHBoxLayout() + self.chat_input = QLineEdit() + self.chat_input.setPlaceholderText("Type a message to send to Omniparser + X ...") + # Send message on Enter key + self.chat_input.returnPressed.connect(self.process_input) + self.submit_button = QPushButton("Send") + self.submit_button.clicked.connect(self.process_input) + self.stop_button = QPushButton("Stop") + self.stop_button.clicked.connect(self.stop_process) + + input_layout.addWidget(self.chat_input, 8) + input_layout.addWidget(self.submit_button, 1) + input_layout.addWidget(self.stop_button, 1) + + # Main content area + content_splitter = QSplitter(Qt.Orientation.Horizontal) + + # Task list + task_widget = QWidget() + task_layout = QVBoxLayout(task_widget) + task_label = QLabel("Task List") + self.task_table = QTableWidget(0, 2) + self.task_table.setHorizontalHeaderLabels(["Status", "Task"]) + self.task_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch) + task_layout.addWidget(task_label) + task_layout.addWidget(self.task_table) + + # Chat area + chat_widget = QWidget() + chat_layout = QVBoxLayout(chat_widget) + chat_label = QLabel("Chat History") + self.chat_display = QTextEdit() + self.chat_display.setReadOnly(True) + chat_layout.addWidget(chat_label) + chat_layout.addWidget(self.chat_display) + + # Add to splitter + content_splitter.addWidget(task_widget) + content_splitter.addWidget(chat_widget) + content_splitter.setSizes([int(self.width() * 0.2), int(self.width() * 0.8)]) + + # Add all components to main layout + main_layout.addLayout(header_layout) + main_layout.addWidget(intro_label) + main_layout.addLayout(top_buttons_layout) # Add top button area + main_layout.addLayout(input_layout) + main_layout.addWidget(content_splitter, 1) # 1 is the stretch factor + + self.setCentralWidget(central_widget) + + def open_settings_dialog(self): + """Open settings dialog""" + dialog = SettingsDialog(self, self.state) + result = dialog.exec() + + if result == QDialog.DialogCode.Accepted: + # Get and apply new settings + settings = dialog.get_settings() + + # Check if stop hotkey changed + old_hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY) + new_hotkey = settings["stop_hotkey"] + + self.state["model"] = settings["model"] + self.state["base_url"] = settings["base_url"] + self.state["api_key"] = settings["api_key"] + self.state["stop_hotkey"] = new_hotkey + + # Update theme if changed + if settings["theme"] != self.state.get("theme", "Light"): + self.state["theme"] = settings["theme"] + self.apply_theme() + + if settings["screen_region"]: + self.state["screen_region"] = settings["screen_region"] + + # Update hotkey if changed + if old_hotkey != new_hotkey: + self.register_stop_hotkey() + + def process_input(self): + """Process user input""" + user_input = self.chat_input.text() + if not user_input.strip(): + return + + # Clear input box + self.chat_input.clear() + + # Show hotkey reminder + hotkey = self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY) + QMessageBox.information(self, "Automation Starting", + f"Automation will start now. You can press {hotkey} to stop at any time.") + + # Minimize main window + self.showMinimized() + + # Create and start worker thread + self.worker = AgentWorker(user_input, self.state, self.vision_agent) + self.worker.update_signal.connect(self.update_ui) + self.worker.error_signal.connect(self.handle_error) + + # Connect signals to tray icon if available + if hasattr(self, 'tray_icon') and self.tray_icon is not None: + self.worker.status_signal.connect(self.tray_icon.update_status) + self.worker.task_signal.connect(self.tray_icon.update_task) + + self.worker.start() + + def handle_error(self, error_message): + """Handle error messages""" + # Restore main window to show the error + self.showNormal() + self.activateWindow() + + # Show error message + QMessageBox.warning(self, "Connection Error", + f"Error connecting to AI service:\n{error_message}\n\nPlease check your network connection and API settings.") + + @pyqtSlot(list, list) + def update_ui(self, chatbox_messages, tasks): + """Update UI display""" + # Update chat display + self.chat_display.clear() + + for msg in chatbox_messages: + role = msg["role"] + content = msg["content"] + + # Set different formats based on role + format = QTextCharFormat() + if role == "user": + format.setForeground(QColor(0, 0, 255)) # Blue for user + self.chat_display.append("You:") + else: + format.setForeground(QColor(0, 128, 0)) # Green for AI + self.chat_display.append("AI:") + + # Add content + cursor = self.chat_display.textCursor() + cursor.movePosition(QTextCursor.MoveOperation.End) + + # Special handling for HTML content + if "<" in content and ">" in content: + self.chat_display.insertHtml(content) + self.chat_display.append("") # Add empty line + else: + self.chat_display.append(content) + self.chat_display.append("") # Add empty line + + # Scroll to bottom + self.chat_display.verticalScrollBar().setValue( + self.chat_display.verticalScrollBar().maximum() + ) + + # Update task table + self.task_table.setRowCount(len(tasks)) + for i, (status, task) in enumerate(tasks): + self.task_table.setItem(i, 0, QTableWidgetItem(status)) + self.task_table.setItem(i, 1, QTableWidgetItem(task)) + + def stop_process(self): + """Stop processing""" + self.state["stop"] = True + + def clear_chat(self): + """Clear chat history""" + self.state["messages"] = [] + self.state["chatbox_messages"] = [] + self.state["responses"] = {} + self.state["tools"] = {} + self.state["tasks"] = [] + + self.chat_display.clear() + self.task_table.setRowCount(0) + + def closeEvent(self, event): + """Handle window close event""" + # This allows the app to continue running in the system tray + # when the main window is closed + if hasattr(self, 'tray_icon') and self.tray_icon is not None and self.tray_icon.isVisible(): + self.hide() + event.ignore() + else: + # Clean up on exit + keyboard.unhook_all() + event.accept() \ No newline at end of file diff --git a/ui/settings_dialog.py b/ui/settings_dialog.py new file mode 100644 index 0000000..d572644 --- /dev/null +++ b/ui/settings_dialog.py @@ -0,0 +1,125 @@ +""" +Settings dialog for application configuration +""" +from PyQt6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, + QLabel, QLineEdit, QPushButton, QComboBox) +from PyQt6.QtCore import QTimer +from ui.hotkey_edit import HotkeyEdit, DEFAULT_STOP_HOTKEY +from ui.theme import THEMES + +class SettingsDialog(QDialog): + """Dialog for application settings""" + + def __init__(self, parent=None, state=None): + super().__init__(parent) + self.state = state + self.parent_window = parent + self.setWindowTitle("Settings") + self.setMinimumWidth(500) + self.init_ui() + + def init_ui(self): + layout = QVBoxLayout(self) + + # Model settings + model_layout = QHBoxLayout() + model_label = QLabel("Model:") + self.model_input = QLineEdit(self.state["model"]) + model_layout.addWidget(model_label) + model_layout.addWidget(self.model_input) + + # Base URL settings + url_layout = QHBoxLayout() + url_label = QLabel("Base URL:") + self.base_url_input = QLineEdit(self.state["base_url"]) + url_layout.addWidget(url_label) + url_layout.addWidget(self.base_url_input) + + # API key settings + api_layout = QHBoxLayout() + api_label = QLabel("API Key:") + self.api_key_input = QLineEdit(self.state["api_key"]) + self.api_key_input.setEchoMode(QLineEdit.EchoMode.Password) + api_layout.addWidget(api_label) + api_layout.addWidget(self.api_key_input) + + # Theme selection + theme_layout = QHBoxLayout() + theme_label = QLabel("Theme:") + self.theme_combo = QComboBox() + self.theme_combo.addItems(list(THEMES.keys())) + current_theme = self.state.get("theme", "Light") + self.theme_combo.setCurrentText(current_theme) + theme_layout.addWidget(theme_label) + theme_layout.addWidget(self.theme_combo) + + # Stop hotkey setting + hotkey_layout = QHBoxLayout() + hotkey_label = QLabel("Stop Hotkey:") + self.hotkey_edit = HotkeyEdit(self.state.get("stop_hotkey", DEFAULT_STOP_HOTKEY)) + hotkey_layout.addWidget(hotkey_label) + hotkey_layout.addWidget(self.hotkey_edit) + + # Screen region selection + region_layout = QHBoxLayout() + self.select_region_btn = QPushButton("Select Screen Region") + self.region_info = QLabel("No region selected" if "screen_region" not in self.state else f"Selected region: {self.state['screen_region']}") + self.select_region_btn.clicked.connect(self.select_screen_region) + region_layout.addWidget(self.select_region_btn) + region_layout.addWidget(self.region_info) + + # OK and Cancel buttons + button_layout = QHBoxLayout() + self.ok_button = QPushButton("OK") + self.cancel_button = QPushButton("Cancel") + self.ok_button.clicked.connect(self.accept) + self.cancel_button.clicked.connect(self.reject) + button_layout.addWidget(self.ok_button) + button_layout.addWidget(self.cancel_button) + + # Add all elements to main layout + layout.addLayout(model_layout) + layout.addLayout(url_layout) + layout.addLayout(api_layout) + layout.addLayout(theme_layout) + layout.addLayout(hotkey_layout) + layout.addLayout(region_layout) + layout.addLayout(button_layout) + + def select_screen_region(self): + """Select screen region""" + # Minimize the parent window before selecting region + if self.parent_window: + self.parent_window.showMinimized() + # Wait a moment for the window to minimize + QTimer.singleShot(500, self._do_select_region) + else: + self._do_select_region() + + def _do_select_region(self): + """Actual region selection after minimizing""" + from util.screen_selector import ScreenSelector + region = ScreenSelector().get_selection() + + # Restore the dialog and parent window + self.activateWindow() + if self.parent_window: + self.parent_window.showNormal() + self.parent_window.activateWindow() + + if region: + self.state["screen_region"] = region + self.region_info.setText(f"Selected region: {region}") + else: + self.region_info.setText("Selection cancelled") + + def get_settings(self): + """Get settings content""" + return { + "model": self.model_input.text(), + "base_url": self.base_url_input.text(), + "api_key": self.api_key_input.text(), + "screen_region": self.state.get("screen_region", None), + "theme": self.theme_combo.currentText(), + "stop_hotkey": self.hotkey_edit.get_hotkey() + } \ No newline at end of file diff --git a/ui/theme.py b/ui/theme.py new file mode 100644 index 0000000..fed7c91 --- /dev/null +++ b/ui/theme.py @@ -0,0 +1,99 @@ +""" +Theme definitions and theme handling functionality +""" + +# Theme definitions +THEMES = { + "Light": { + "main_bg": "#F5F5F5", + "widget_bg": "#FFFFFF", + "text": "#333333", + "accent": "#4A86E8", + "button_bg": "#E3E3E3", + "button_text": "#333333", + "border": "#CCCCCC", + "selection_bg": "#D0E2F4" + }, + "Dark": { + "main_bg": "#2D2D2D", + "widget_bg": "#3D3D3D", + "text": "#FFFFFF", + "accent": "#4A86E8", + "button_bg": "#555555", + "button_text": "#FFFFFF", + "border": "#555555", + "selection_bg": "#3A5F8A" + } +} + +def apply_theme(widget, theme_name="Light"): + """Apply the specified theme to the widget""" + theme = THEMES[theme_name] + + # Create stylesheet for the application + stylesheet = f""" + QMainWindow, QDialog {{ + background-color: {theme['main_bg']}; + color: {theme['text']}; + }} + + QWidget {{ + background-color: {theme['main_bg']}; + color: {theme['text']}; + }} + + QLabel {{ + color: {theme['text']}; + }} + + QPushButton {{ + background-color: {theme['button_bg']}; + color: {theme['button_text']}; + border: 1px solid {theme['border']}; + border-radius: 4px; + padding: 5px 10px; + }} + + QPushButton:hover {{ + background-color: {theme['accent']}; + color: white; + }} + + QLineEdit, QTextEdit, QTableWidget, QComboBox {{ + background-color: {theme['widget_bg']}; + color: {theme['text']}; + border: 1px solid {theme['border']}; + border-radius: 4px; + padding: 4px; + }} + + QTextEdit {{ + background-color: {theme['widget_bg']}; + }} + + QTableWidget::item:selected {{ + background-color: {theme['selection_bg']}; + }} + + QHeaderView::section {{ + background-color: {theme['button_bg']}; + color: {theme['button_text']}; + padding: 4px; + border: 1px solid {theme['border']}; + }} + + QSplitter::handle {{ + background-color: {theme['border']}; + }} + + QScrollBar {{ + background-color: {theme['widget_bg']}; + }} + + QScrollBar::handle {{ + background-color: {theme['button_bg']}; + border-radius: 4px; + }} + """ + + widget.setStyleSheet(stylesheet) \ No newline at end of file diff --git a/ui/tray_icon.py b/ui/tray_icon.py new file mode 100644 index 0000000..dc63ff9 --- /dev/null +++ b/ui/tray_icon.py @@ -0,0 +1,60 @@ +""" +System tray icon implementation +""" +from PyQt6.QtWidgets import QSystemTrayIcon, QMenu, QApplication +from PyQt6.QtGui import QAction + +class StatusTrayIcon(QSystemTrayIcon): + """System tray icon that displays application status""" + + def __init__(self, icon, parent=None): + super().__init__(icon, parent) + self.parent = parent + self.setToolTip("autoMate") + + # Create context menu + self.menu = QMenu() + self.show_action = QAction("Show Main Window") + self.show_action.triggered.connect(self.show_main_window) + self.menu_status = QAction("Status: Idle") + self.menu_status.setEnabled(False) + self.menu_task = QAction("Task: None") + self.menu_task.setEnabled(False) + self.exit_action = QAction("Exit") + self.exit_action.triggered.connect(QApplication.quit) + + self.menu.addAction(self.show_action) + self.menu.addSeparator() + self.menu.addAction(self.menu_status) + self.menu.addAction(self.menu_task) + self.menu.addSeparator() + self.menu.addAction(self.exit_action) + + self.setContextMenu(self.menu) + + # Connect signals + self.activated.connect(self.icon_activated) + + def show_main_window(self): + if self.parent: + self.parent.showNormal() + self.parent.activateWindow() + + def icon_activated(self, reason): + if reason == QSystemTrayIcon.ActivationReason.DoubleClick: + self.show_main_window() + + def update_status(self, status_text): + """Update status text in tray tooltip and menu""" + # Truncate if too long for menu + short_status = status_text[:50] + "..." if len(status_text) > 50 else status_text + self.menu_status.setText(f"Status: {short_status}") + + # Show brief notification but don't disrupt automation + # Only show notification for 500ms (very brief) to not interfere with visual automation + self.showMessage("autoMate Status", status_text, QSystemTrayIcon.MessageIcon.Information, 500) + + def update_task(self, task_text): + """Update task text in tray menu""" + short_task = task_text[:50] + "..." if len(task_text) > 50 else task_text + self.menu_task.setText(f"Task: {short_task}") \ No newline at end of file diff --git a/util/auto_control.py b/util/auto_control.py index 8913f66..b2852d9 100644 --- a/util/auto_control.py +++ b/util/auto_control.py @@ -4,12 +4,12 @@ import time # Add the project root directory to Python path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from gradio_ui.agent.vision_agent import VisionAgent +from auto_control.agent.vision_agent import VisionAgent from util.download_weights import MODEL_DIR from pynput import mouse, keyboard -# Now you can import from gradio_ui -from gradio_ui.tools.screen_capture import get_screenshot +# Now you can import from auto_control +from auto_control.tools.screen_capture import get_screenshot class AutoControl: def __init__(self): @@ -81,8 +81,7 @@ class AutoControl: if key == keyboard.Key.esc: print("self.auto_list", self.auto_list) - vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"), - caption_model_path=os.path.join(MODEL_DIR, "icon_caption")) + vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt")) for item in self.auto_list: element_list =vision_agent(str(item["path"])) diff --git a/util/auto_util.py b/util/auto_util.py new file mode 100644 index 0000000..ba98092 --- /dev/null +++ b/util/auto_util.py @@ -0,0 +1,34 @@ +import os +import platform +import pyautogui +from enum import Enum + +import pyperclip +class AppName(Enum): + WECHAT = "wechat" + + +class AutoUtil: + def __init__(self, app_name: AppName): + self.img_dir = os.path.join(os.path.dirname(__file__),"..", "imgs", app_name.value) + + def click_multi_img(self, img_names, offset_x=0, offset_y=0, minSearchTime=0): + for img_name in img_names: + self.find_click_img(img_name, offset_x, offset_y, minSearchTime) + + def find_click_img(self, img_name, offset_x=0, offset_y=0, minSearchTime=0): + img_path = os.path.join(self.img_dir, img_name + ".png") + img = pyautogui.locateOnScreen(img_path, minSearchTime=minSearchTime) + x,y = pyautogui.center(img) + # Add offset to click position + pyautogui.click(x + offset_x, y + offset_y) + + def send_text(self, text): + clipboard_data = pyperclip.paste() + pyperclip.copy(text) + if platform.system() == 'Darwin': + pyautogui.hotkey('command', 'v', interval=0.1) + else: + pyautogui.hotkey('ctrl', 'v') + # Copy old data back to clipboard + pyperclip.copy(clipboard_data) diff --git a/util/opencv._detect.py b/util/opencv._detect.py deleted file mode 100644 index d637388..0000000 --- a/util/opencv._detect.py +++ /dev/null @@ -1,47 +0,0 @@ -import cv2 -import sys -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from gradio_ui.tools.screen_capture import get_screenshot - -def detect_and_draw_edges(): - # Read the image - screenshot, path = get_screenshot(is_cursor=False) - img = cv2.imread(path) - if img is None: - print("Error: Could not read the image.") - return - - # Create a copy for drawing contours later - original = img.copy() - - # Convert to grayscale - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - - # Apply Gaussian blur to reduce noise - blurred = cv2.GaussianBlur(gray, (5, 5), 0) - - # Detect edges using Canny algorithm - edges = cv2.Canny(blurred, 50, 150) - - # Find contours from the edges - contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - - # Draw all detected contours - cv2.drawContours(original, contours, -1, (0, 255, 0), 2) - - print(f"Found {len(contours)} contours in the image") - - # Display results - # cv2.imshow("Original Image", img) - cv2.imshow("Edges", edges) - # cv2.imshow("Contours", original) - cv2.waitKey(0) - cv2.destroyAllWindows() - - return original, contours - -# Example usage -if __name__ == "__main__": - result_image, detected_contours = detect_and_draw_edges() diff --git a/util/wechat_auto.py b/util/wechat_auto.py new file mode 100644 index 0000000..4463773 --- /dev/null +++ b/util/wechat_auto.py @@ -0,0 +1,30 @@ +import os +import sys +import time + +import pyautogui +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from util.auto_util import AppName, AutoUtil +class WechatAuto: + def __init__(self): + self.auto_util = AutoUtil(AppName.WECHAT) + + def go_to_chat(self): + self.auto_util.find_click_img("chat_unselect.png") + + def search_friend(self, friend_name): + try: + self.auto_util.find_click_img("chat_unselect") + except pyautogui.ImageNotFoundException: + self.auto_util.find_click_img("chat_select") + self.auto_util.find_click_img("search", offset_x=100) + self.auto_util.send_text(friend_name) + self.auto_util.find_click_img("contact_person",offset_x=100,offset_y=100,minSearchTime=10) + self.auto_util.find_click_img("search",offset_x=-100,offset_y=-100,minSearchTime=10) + +if __name__ == "__main__": + time.sleep(3) + wechat_auto = WechatAuto() + wechat_auto.search_friend("李杨林") +