mirror of
https://github.com/yuruotong1/autoMate.git
synced 2026-03-22 13:07:17 +08:00
当刷新界面时,更新输入内容为上一次保存的数据。增加录制功能
This commit is contained in:
@@ -63,11 +63,14 @@ def setup_state(state):
|
|||||||
state["only_n_most_recent_images"] = 2
|
state["only_n_most_recent_images"] = 2
|
||||||
if 'stop' not in state:
|
if 'stop' not in state:
|
||||||
state['stop'] = False
|
state['stop'] = False
|
||||||
|
# update state
|
||||||
async def main(state):
|
return (
|
||||||
"""Render loop for Gradio"""
|
state["model"], # model textbox
|
||||||
setup_state(state)
|
state["base_url"], # base_url textbox
|
||||||
return "Setup completed"
|
state["api_key"], # api_key textbox
|
||||||
|
state["chatbox_messages"], # chatbot
|
||||||
|
[[task["status"], task["task"]] for task in state["tasks"]] # task_list
|
||||||
|
)
|
||||||
|
|
||||||
def load_from_storage(filename: str) -> str | None:
|
def load_from_storage(filename: str) -> str | None:
|
||||||
"""Load data from a file in the storage directory."""
|
"""Load data from a file in the storage directory."""
|
||||||
@@ -324,5 +327,9 @@ def run():
|
|||||||
stop_button.click(stop_app, [state], None)
|
stop_button.click(stop_app, [state], None)
|
||||||
base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None)
|
base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None)
|
||||||
|
|
||||||
|
demo.load(
|
||||||
demo.launch(server_name="0.0.0.0", server_port=7888)
|
setup_state,
|
||||||
|
inputs=[state],
|
||||||
|
outputs=[model, base_url, api_key, chatbot, task_list]
|
||||||
|
)
|
||||||
|
demo.launch(server_name="0.0.0.0", server_port=7888)
|
||||||
|
|||||||
@@ -1,29 +1,36 @@
|
|||||||
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
import pyautogui
|
||||||
from .base import ToolError
|
from .base import ToolError
|
||||||
from util import tool
|
from util import tool
|
||||||
|
|
||||||
OUTPUT_DIR = "./tmp/outputs"
|
OUTPUT_DIR = "./tmp/outputs"
|
||||||
|
|
||||||
def get_screenshot(screen_region):
|
def get_screenshot(screen_region=None, is_cursor=True):
|
||||||
output_dir = Path(OUTPUT_DIR)
|
output_dir = Path(OUTPUT_DIR)
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
path = output_dir / f"screenshot_{uuid4().hex}.png"
|
path = output_dir / f"screenshot_{uuid4().hex}.png"
|
||||||
try:
|
try:
|
||||||
img_io = tool.capture_screen_with_cursor()
|
if is_cursor:
|
||||||
|
img_io = tool.capture_screen_with_cursor()
|
||||||
|
else:
|
||||||
|
pyautogui_screenshot = pyautogui.screenshot()
|
||||||
|
img_io = BytesIO()
|
||||||
|
pyautogui_screenshot.save(img_io, 'PNG')
|
||||||
screenshot = Image.open(img_io)
|
screenshot = Image.open(img_io)
|
||||||
|
|
||||||
# Create a black mask of the same size
|
# Create a black mask of the same size
|
||||||
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
|
|
||||||
# If screen_region is provided and valid, copy only that region
|
# If screen_region is provided and valid, copy only that region
|
||||||
if screen_region and len(screen_region) == 4:
|
if screen_region and len(screen_region) == 4:
|
||||||
|
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
|
||||||
x1, y1, x2, y2 = screen_region
|
x1, y1, x2, y2 = screen_region
|
||||||
region = screenshot.crop((x1, y1, x2, y2))
|
region = screenshot.crop((x1, y1, x2, y2))
|
||||||
# Paste the region onto the black mask
|
# Paste the region onto the black mask
|
||||||
black_mask.paste(region, (x1, y1, x2, y2))
|
black_mask.paste(region, (x1, y1, x2, y2))
|
||||||
# Use the modified image as screenshot
|
# Use the modified image as screenshot
|
||||||
screenshot = black_mask
|
screenshot = black_mask
|
||||||
|
|
||||||
screenshot.save(path)
|
screenshot.save(path)
|
||||||
return screenshot, path
|
return screenshot, path
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -11,4 +11,5 @@ anthropic[bedrock,vertex]>=0.37.1
|
|||||||
pyxbrain==1.1.31
|
pyxbrain==1.1.31
|
||||||
timm
|
timm
|
||||||
einops==0.8.0
|
einops==0.8.0
|
||||||
modelscope
|
modelscope
|
||||||
|
pynput
|
||||||
126
util/auto_control.py
Normal file
126
util/auto_control.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Add the project root directory to Python path
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from gradio_ui.agent.vision_agent import VisionAgent
|
||||||
|
from util.download_weights import MODEL_DIR
|
||||||
|
from pynput import mouse, keyboard
|
||||||
|
|
||||||
|
# Now you can import from gradio_ui
|
||||||
|
from gradio_ui.tools.screen_capture import get_screenshot
|
||||||
|
|
||||||
|
class AutoControl:
|
||||||
|
def __init__(self):
|
||||||
|
self.auto_list = []
|
||||||
|
|
||||||
|
def start_listen(self):
|
||||||
|
# Create both mouse and keyboard listeners
|
||||||
|
mouse_listener = mouse.Listener(
|
||||||
|
on_move=self.on_move,
|
||||||
|
on_click=self.on_click,
|
||||||
|
on_scroll=self.on_scroll)
|
||||||
|
|
||||||
|
keyboard_listener = keyboard.Listener(
|
||||||
|
on_press=self.on_press,
|
||||||
|
on_release=self.on_release)
|
||||||
|
|
||||||
|
# Start both listeners
|
||||||
|
mouse_listener.start()
|
||||||
|
keyboard_listener.start()
|
||||||
|
|
||||||
|
# Keep the program running until keyboard listener stops
|
||||||
|
keyboard_listener.join()
|
||||||
|
|
||||||
|
# After keyboard stops (ESC pressed), stop mouse listener too
|
||||||
|
mouse_listener.stop()
|
||||||
|
|
||||||
|
def on_move(self, x, y, injected):
|
||||||
|
print('Pointer moved to {}; it was {}'.format(
|
||||||
|
(x, y), 'faked' if injected else 'not faked'))
|
||||||
|
|
||||||
|
def on_click(self, x, y, button, pressed, injected):
|
||||||
|
print('Mouse {} {} at {}; it was {}'.format(
|
||||||
|
button,
|
||||||
|
'Pressed' if pressed else 'Released',
|
||||||
|
(x, y),
|
||||||
|
'faked' if injected else 'not faked'))
|
||||||
|
if not pressed:
|
||||||
|
# wait right click window
|
||||||
|
if button == mouse.Button.right:
|
||||||
|
time.sleep(1)
|
||||||
|
screenshot, path = get_screenshot(is_cursor=False)
|
||||||
|
self.auto_list.append(
|
||||||
|
{"button": button,
|
||||||
|
"pressed": pressed,
|
||||||
|
"position": (x, y),
|
||||||
|
"path": path,
|
||||||
|
"image": screenshot
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def on_scroll(self, x, y, dx, dy, injected):
|
||||||
|
print('Scrolled {} at {}; it was {}'.format(
|
||||||
|
'down' if dy < 0 else 'up',
|
||||||
|
(x, y), 'faked' if injected else 'not faked'))
|
||||||
|
|
||||||
|
def on_press(self, key, injected):
|
||||||
|
try:
|
||||||
|
print('alphanumeric key {} pressed; it was {}'.format(
|
||||||
|
key.char, 'faked' if injected else 'not faked'))
|
||||||
|
except AttributeError:
|
||||||
|
print('special key {} pressed'.format(
|
||||||
|
key))
|
||||||
|
|
||||||
|
def on_release(self, key, injected):
|
||||||
|
print('{} released; it was {}'.format(
|
||||||
|
key, 'faked' if injected else 'not faked'))
|
||||||
|
|
||||||
|
if key == keyboard.Key.esc:
|
||||||
|
|
||||||
|
print("self.auto_list", self.auto_list)
|
||||||
|
vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"),
|
||||||
|
caption_model_path=os.path.join(MODEL_DIR, "icon_caption"))
|
||||||
|
|
||||||
|
for item in self.auto_list:
|
||||||
|
element_list =vision_agent(str(item["path"]))
|
||||||
|
for element in element_list:
|
||||||
|
if self.crop_image_if_position_in_coordinates(item["image"], item["path"], item["position"], element.coordinates):
|
||||||
|
break
|
||||||
|
# Stop listener
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def crop_image_if_position_in_coordinates(self, image, image_path, position, coordinates):
|
||||||
|
"""
|
||||||
|
Check if position is within coordinates and crop image if true
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image: PIL Image object
|
||||||
|
position: tuple of (x, y) - current position
|
||||||
|
coordinates: tuple of (x1, y1, x2, y2) - target area
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if position is in coordinates
|
||||||
|
"""
|
||||||
|
x, y = position
|
||||||
|
x1, y1, x2, y2 = coordinates
|
||||||
|
|
||||||
|
# Check if position is within coordinates
|
||||||
|
if (x1 <= x <= x2) and (y1 <= y <= y2):
|
||||||
|
# Crop the image to the coordinates
|
||||||
|
cropped_image = image.crop(coordinates)
|
||||||
|
# Save the cropped image with proper path and format
|
||||||
|
save_path = str(image_path).replace('.png', '_cropped.png')
|
||||||
|
cropped_image.save(save_path, 'PNG')
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
auto_control = AutoControl()
|
||||||
|
auto_control.start_listen()
|
||||||
|
|
||||||
Reference in New Issue
Block a user