mirror of
https://github.com/yuruotong1/autoMate.git
synced 2025-12-26 05:16:21 +08:00
当刷新界面时,更新输入内容为上一次保存的数据。增加录制功能
This commit is contained in:
parent
d9044e0a87
commit
b778dcc87d
@ -63,11 +63,14 @@ def setup_state(state):
|
||||
state["only_n_most_recent_images"] = 2
|
||||
if 'stop' not in state:
|
||||
state['stop'] = False
|
||||
|
||||
async def main(state):
|
||||
"""Render loop for Gradio"""
|
||||
setup_state(state)
|
||||
return "Setup completed"
|
||||
# update state
|
||||
return (
|
||||
state["model"], # model textbox
|
||||
state["base_url"], # base_url textbox
|
||||
state["api_key"], # api_key textbox
|
||||
state["chatbox_messages"], # chatbot
|
||||
[[task["status"], task["task"]] for task in state["tasks"]] # task_list
|
||||
)
|
||||
|
||||
def load_from_storage(filename: str) -> str | None:
|
||||
"""Load data from a file in the storage directory."""
|
||||
@ -324,5 +327,9 @@ def run():
|
||||
stop_button.click(stop_app, [state], None)
|
||||
base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None)
|
||||
|
||||
|
||||
demo.launch(server_name="0.0.0.0", server_port=7888)
|
||||
demo.load(
|
||||
setup_state,
|
||||
inputs=[state],
|
||||
outputs=[model, base_url, api_key, chatbot, task_list]
|
||||
)
|
||||
demo.launch(server_name="0.0.0.0", server_port=7888)
|
||||
|
||||
@ -1,29 +1,36 @@
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from PIL import Image
|
||||
import pyautogui
|
||||
from .base import ToolError
|
||||
from util import tool
|
||||
|
||||
OUTPUT_DIR = "./tmp/outputs"
|
||||
|
||||
def get_screenshot(screen_region):
|
||||
def get_screenshot(screen_region=None, is_cursor=True):
|
||||
output_dir = Path(OUTPUT_DIR)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = output_dir / f"screenshot_{uuid4().hex}.png"
|
||||
try:
|
||||
img_io = tool.capture_screen_with_cursor()
|
||||
if is_cursor:
|
||||
img_io = tool.capture_screen_with_cursor()
|
||||
else:
|
||||
pyautogui_screenshot = pyautogui.screenshot()
|
||||
img_io = BytesIO()
|
||||
pyautogui_screenshot.save(img_io, 'PNG')
|
||||
screenshot = Image.open(img_io)
|
||||
|
||||
# Create a black mask of the same size
|
||||
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
|
||||
# If screen_region is provided and valid, copy only that region
|
||||
if screen_region and len(screen_region) == 4:
|
||||
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
|
||||
x1, y1, x2, y2 = screen_region
|
||||
region = screenshot.crop((x1, y1, x2, y2))
|
||||
# Paste the region onto the black mask
|
||||
black_mask.paste(region, (x1, y1, x2, y2))
|
||||
# Use the modified image as screenshot
|
||||
screenshot = black_mask
|
||||
|
||||
screenshot.save(path)
|
||||
return screenshot, path
|
||||
except Exception as e:
|
||||
|
||||
@ -11,4 +11,5 @@ anthropic[bedrock,vertex]>=0.37.1
|
||||
pyxbrain==1.1.31
|
||||
timm
|
||||
einops==0.8.0
|
||||
modelscope
|
||||
modelscope
|
||||
pynput
|
||||
126
util/auto_control.py
Normal file
126
util/auto_control.py
Normal file
@ -0,0 +1,126 @@
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
# Add the project root directory to Python path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from gradio_ui.agent.vision_agent import VisionAgent
|
||||
from util.download_weights import MODEL_DIR
|
||||
from pynput import mouse, keyboard
|
||||
|
||||
# Now you can import from gradio_ui
|
||||
from gradio_ui.tools.screen_capture import get_screenshot
|
||||
|
||||
class AutoControl:
|
||||
def __init__(self):
|
||||
self.auto_list = []
|
||||
|
||||
def start_listen(self):
|
||||
# Create both mouse and keyboard listeners
|
||||
mouse_listener = mouse.Listener(
|
||||
on_move=self.on_move,
|
||||
on_click=self.on_click,
|
||||
on_scroll=self.on_scroll)
|
||||
|
||||
keyboard_listener = keyboard.Listener(
|
||||
on_press=self.on_press,
|
||||
on_release=self.on_release)
|
||||
|
||||
# Start both listeners
|
||||
mouse_listener.start()
|
||||
keyboard_listener.start()
|
||||
|
||||
# Keep the program running until keyboard listener stops
|
||||
keyboard_listener.join()
|
||||
|
||||
# After keyboard stops (ESC pressed), stop mouse listener too
|
||||
mouse_listener.stop()
|
||||
|
||||
def on_move(self, x, y, injected):
|
||||
print('Pointer moved to {}; it was {}'.format(
|
||||
(x, y), 'faked' if injected else 'not faked'))
|
||||
|
||||
def on_click(self, x, y, button, pressed, injected):
|
||||
print('Mouse {} {} at {}; it was {}'.format(
|
||||
button,
|
||||
'Pressed' if pressed else 'Released',
|
||||
(x, y),
|
||||
'faked' if injected else 'not faked'))
|
||||
if not pressed:
|
||||
# wait right click window
|
||||
if button == mouse.Button.right:
|
||||
time.sleep(1)
|
||||
screenshot, path = get_screenshot(is_cursor=False)
|
||||
self.auto_list.append(
|
||||
{"button": button,
|
||||
"pressed": pressed,
|
||||
"position": (x, y),
|
||||
"path": path,
|
||||
"image": screenshot
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def on_scroll(self, x, y, dx, dy, injected):
|
||||
print('Scrolled {} at {}; it was {}'.format(
|
||||
'down' if dy < 0 else 'up',
|
||||
(x, y), 'faked' if injected else 'not faked'))
|
||||
|
||||
def on_press(self, key, injected):
|
||||
try:
|
||||
print('alphanumeric key {} pressed; it was {}'.format(
|
||||
key.char, 'faked' if injected else 'not faked'))
|
||||
except AttributeError:
|
||||
print('special key {} pressed'.format(
|
||||
key))
|
||||
|
||||
def on_release(self, key, injected):
|
||||
print('{} released; it was {}'.format(
|
||||
key, 'faked' if injected else 'not faked'))
|
||||
|
||||
if key == keyboard.Key.esc:
|
||||
|
||||
print("self.auto_list", self.auto_list)
|
||||
vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"),
|
||||
caption_model_path=os.path.join(MODEL_DIR, "icon_caption"))
|
||||
|
||||
for item in self.auto_list:
|
||||
element_list =vision_agent(str(item["path"]))
|
||||
for element in element_list:
|
||||
if self.crop_image_if_position_in_coordinates(item["image"], item["path"], item["position"], element.coordinates):
|
||||
break
|
||||
# Stop listener
|
||||
return False
|
||||
|
||||
|
||||
|
||||
def crop_image_if_position_in_coordinates(self, image, image_path, position, coordinates):
|
||||
"""
|
||||
Check if position is within coordinates and crop image if true
|
||||
|
||||
Args:
|
||||
image: PIL Image object
|
||||
position: tuple of (x, y) - current position
|
||||
coordinates: tuple of (x1, y1, x2, y2) - target area
|
||||
|
||||
Returns:
|
||||
bool: True if position is in coordinates
|
||||
"""
|
||||
x, y = position
|
||||
x1, y1, x2, y2 = coordinates
|
||||
|
||||
# Check if position is within coordinates
|
||||
if (x1 <= x <= x2) and (y1 <= y <= y2):
|
||||
# Crop the image to the coordinates
|
||||
cropped_image = image.crop(coordinates)
|
||||
# Save the cropped image with proper path and format
|
||||
save_path = str(image_path).replace('.png', '_cropped.png')
|
||||
cropped_image.save(save_path, 'PNG')
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
auto_control = AutoControl()
|
||||
auto_control.start_listen()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user