当刷新界面时,更新输入内容为上一次保存的数据。增加录制功能

This commit is contained in:
yuruo
2025-03-16 17:31:36 +08:00
parent d9044e0a87
commit b778dcc87d
4 changed files with 153 additions and 12 deletions

View File

@@ -63,11 +63,14 @@ def setup_state(state):
state["only_n_most_recent_images"] = 2 state["only_n_most_recent_images"] = 2
if 'stop' not in state: if 'stop' not in state:
state['stop'] = False state['stop'] = False
# update state
async def main(state): return (
"""Render loop for Gradio""" state["model"], # model textbox
setup_state(state) state["base_url"], # base_url textbox
return "Setup completed" state["api_key"], # api_key textbox
state["chatbox_messages"], # chatbot
[[task["status"], task["task"]] for task in state["tasks"]] # task_list
)
def load_from_storage(filename: str) -> str | None: def load_from_storage(filename: str) -> str | None:
"""Load data from a file in the storage directory.""" """Load data from a file in the storage directory."""
@@ -324,5 +327,9 @@ def run():
stop_button.click(stop_app, [state], None) stop_button.click(stop_app, [state], None)
base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None) base_url.change(fn=update_base_url, inputs=[base_url, state], outputs=None)
demo.load(
demo.launch(server_name="0.0.0.0", server_port=7888) setup_state,
inputs=[state],
outputs=[model, base_url, api_key, chatbot, task_list]
)
demo.launch(server_name="0.0.0.0", server_port=7888)

View File

@@ -1,29 +1,36 @@
from io import BytesIO
from pathlib import Path from pathlib import Path
from uuid import uuid4 from uuid import uuid4
from PIL import Image from PIL import Image
import pyautogui
from .base import ToolError from .base import ToolError
from util import tool from util import tool
OUTPUT_DIR = "./tmp/outputs" OUTPUT_DIR = "./tmp/outputs"
def get_screenshot(screen_region): def get_screenshot(screen_region=None, is_cursor=True):
output_dir = Path(OUTPUT_DIR) output_dir = Path(OUTPUT_DIR)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
path = output_dir / f"screenshot_{uuid4().hex}.png" path = output_dir / f"screenshot_{uuid4().hex}.png"
try: try:
img_io = tool.capture_screen_with_cursor() if is_cursor:
img_io = tool.capture_screen_with_cursor()
else:
pyautogui_screenshot = pyautogui.screenshot()
img_io = BytesIO()
pyautogui_screenshot.save(img_io, 'PNG')
screenshot = Image.open(img_io) screenshot = Image.open(img_io)
# Create a black mask of the same size # Create a black mask of the same size
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
# If screen_region is provided and valid, copy only that region # If screen_region is provided and valid, copy only that region
if screen_region and len(screen_region) == 4: if screen_region and len(screen_region) == 4:
black_mask = Image.new("RGBA", screenshot.size, (0, 0, 0, 255))
x1, y1, x2, y2 = screen_region x1, y1, x2, y2 = screen_region
region = screenshot.crop((x1, y1, x2, y2)) region = screenshot.crop((x1, y1, x2, y2))
# Paste the region onto the black mask # Paste the region onto the black mask
black_mask.paste(region, (x1, y1, x2, y2)) black_mask.paste(region, (x1, y1, x2, y2))
# Use the modified image as screenshot # Use the modified image as screenshot
screenshot = black_mask screenshot = black_mask
screenshot.save(path) screenshot.save(path)
return screenshot, path return screenshot, path
except Exception as e: except Exception as e:

View File

@@ -11,4 +11,5 @@ anthropic[bedrock,vertex]>=0.37.1
pyxbrain==1.1.31 pyxbrain==1.1.31
timm timm
einops==0.8.0 einops==0.8.0
modelscope modelscope
pynput

126
util/auto_control.py Normal file
View File

@@ -0,0 +1,126 @@
import sys
import os
import time
# Add the project root directory to Python path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gradio_ui.agent.vision_agent import VisionAgent
from util.download_weights import MODEL_DIR
from pynput import mouse, keyboard
# Now you can import from gradio_ui
from gradio_ui.tools.screen_capture import get_screenshot
class AutoControl:
def __init__(self):
self.auto_list = []
def start_listen(self):
# Create both mouse and keyboard listeners
mouse_listener = mouse.Listener(
on_move=self.on_move,
on_click=self.on_click,
on_scroll=self.on_scroll)
keyboard_listener = keyboard.Listener(
on_press=self.on_press,
on_release=self.on_release)
# Start both listeners
mouse_listener.start()
keyboard_listener.start()
# Keep the program running until keyboard listener stops
keyboard_listener.join()
# After keyboard stops (ESC pressed), stop mouse listener too
mouse_listener.stop()
def on_move(self, x, y, injected):
print('Pointer moved to {}; it was {}'.format(
(x, y), 'faked' if injected else 'not faked'))
def on_click(self, x, y, button, pressed, injected):
print('Mouse {} {} at {}; it was {}'.format(
button,
'Pressed' if pressed else 'Released',
(x, y),
'faked' if injected else 'not faked'))
if not pressed:
# wait right click window
if button == mouse.Button.right:
time.sleep(1)
screenshot, path = get_screenshot(is_cursor=False)
self.auto_list.append(
{"button": button,
"pressed": pressed,
"position": (x, y),
"path": path,
"image": screenshot
}
)
def on_scroll(self, x, y, dx, dy, injected):
print('Scrolled {} at {}; it was {}'.format(
'down' if dy < 0 else 'up',
(x, y), 'faked' if injected else 'not faked'))
def on_press(self, key, injected):
try:
print('alphanumeric key {} pressed; it was {}'.format(
key.char, 'faked' if injected else 'not faked'))
except AttributeError:
print('special key {} pressed'.format(
key))
def on_release(self, key, injected):
print('{} released; it was {}'.format(
key, 'faked' if injected else 'not faked'))
if key == keyboard.Key.esc:
print("self.auto_list", self.auto_list)
vision_agent = VisionAgent(yolo_model_path=os.path.join(MODEL_DIR, "icon_detect", "model.pt"),
caption_model_path=os.path.join(MODEL_DIR, "icon_caption"))
for item in self.auto_list:
element_list =vision_agent(str(item["path"]))
for element in element_list:
if self.crop_image_if_position_in_coordinates(item["image"], item["path"], item["position"], element.coordinates):
break
# Stop listener
return False
def crop_image_if_position_in_coordinates(self, image, image_path, position, coordinates):
"""
Check if position is within coordinates and crop image if true
Args:
image: PIL Image object
position: tuple of (x, y) - current position
coordinates: tuple of (x1, y1, x2, y2) - target area
Returns:
bool: True if position is in coordinates
"""
x, y = position
x1, y1, x2, y2 = coordinates
# Check if position is within coordinates
if (x1 <= x <= x2) and (y1 <= y <= y2):
# Crop the image to the coordinates
cropped_image = image.crop(coordinates)
# Save the cropped image with proper path and format
save_path = str(image_path).replace('.png', '_cropped.png')
cropped_image.save(save_path, 'PNG')
return True
return False
if __name__ == "__main__":
auto_control = AutoControl()
auto_control.start_listen()