mirror of
https://github.com/yuruotong1/autoMate.git
synced 2026-03-22 13:07:17 +08:00
更新agent和支持的模型
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
| Vendor-en | Vendor-ch | Model | base-url |
|
||||
| --- | --- | --- | --- |
|
||||
| openainext | openainext | gpt-4o-2024-11-20 | https://api.openai-next.com/v1 |
|
||||
| openainext | openainext | gpt-4.5-preview-2025-02-27 | https://api.openai-next.com/v1 |
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaMessageParam, BetaUsage
|
||||
@@ -6,6 +7,8 @@ from gradio_ui.agent.base_agent import BaseAgent
|
||||
from xbrain.core.chat import run
|
||||
import platform
|
||||
import re
|
||||
|
||||
from gradio_ui.tools.computer import Action
|
||||
class TaskRunAgent(BaseAgent):
|
||||
def __init__(self):
|
||||
self.OUTPUT_DIR = "./tmp/outputs"
|
||||
@@ -90,8 +93,7 @@ class TaskRunAgentResponse(BaseModel):
|
||||
next_action: str = Field(
|
||||
description="选择一个操作类型,如果找不到合适的操作,请选择None",
|
||||
json_schema_extra={
|
||||
"enum": ["type", "left_click", "right_click", "double_click",
|
||||
"hover", "scroll_up", "scroll_down", "wait", "None"]
|
||||
"enum": Action
|
||||
}
|
||||
)
|
||||
box_id: int = Field(description="要操作的框ID,当next_action为left_click、right_click、double_click、hover时提供,否则为None", default=None)
|
||||
@@ -133,16 +135,6 @@ system_prompt = """
|
||||
}}
|
||||
```
|
||||
|
||||
【next_action】仅包括下面之一:
|
||||
- type:输入一串文本。
|
||||
- left_click:将鼠标移动到框ID并左键单击。
|
||||
- right_click:将鼠标移动到框ID并右键单击。
|
||||
- double_click:将鼠标移动到框ID并双击。
|
||||
- hover:将鼠标移动到框ID。
|
||||
- scroll_up:向上滚动屏幕以查看之前的内容。
|
||||
- scroll_down:当所需按钮不可见或您需要查看更多内容时,向下滚动屏幕。
|
||||
- wait:等待1秒钟让设备加载或响应。
|
||||
|
||||
##########
|
||||
### 案例 ###
|
||||
一个例子:
|
||||
|
||||
@@ -3,6 +3,8 @@ from pydantic import Field,BaseModel
|
||||
from gradio_ui.agent.base_agent import BaseAgent
|
||||
from xbrain.core.chat import run
|
||||
|
||||
from gradio_ui.tools.computer import Action
|
||||
|
||||
class VerificationAgent(BaseAgent):
|
||||
def __call__(self, messages, parsed_screen_result):
|
||||
messages.append(
|
||||
@@ -17,7 +19,7 @@ class VerificationAgent(BaseAgent):
|
||||
})
|
||||
response = run(
|
||||
messages,
|
||||
user_prompt=prompt.format(screen_info=str(parsed_screen_result['parsed_content_list'])),
|
||||
user_prompt=prompt.format(screen_info=str(parsed_screen_result['parsed_content_list'], action_list=str(Action))),
|
||||
response_format=VerificationResponse
|
||||
)
|
||||
return json.loads(response)
|
||||
@@ -74,6 +76,12 @@ prompt = """
|
||||
- **模糊匹配**:允许近似匹配而非精确匹配
|
||||
- **超时设置**:指定验证的最长等待时间
|
||||
|
||||
### 补救措施 ###
|
||||
补救措施建议如下:
|
||||
- 【推荐】可以再等待一段时间看看效果,因为上一个操作还没执行完成就开始了验证
|
||||
- 再一次操作
|
||||
- 检查是否存在其他验证方法,但是仅限于以下几个动作:
|
||||
{action_list}
|
||||
### 例子 ###
|
||||
操作:点击"登录"按钮
|
||||
预期结果:登录成功并显示首页
|
||||
|
||||
@@ -37,7 +37,7 @@ def sampling_loop_sync(
|
||||
for plan in plan_list:
|
||||
execute_task_plan(plan, vision_agent, task_run_agent, executor, messages)
|
||||
yield
|
||||
sleep(2)
|
||||
sleep(5)
|
||||
yield from verification_loop(vision_agent, verification_agent, executor, task_run_agent, messages)
|
||||
|
||||
|
||||
|
||||
@@ -28,7 +28,8 @@ Action = Literal[
|
||||
"hover",
|
||||
"wait",
|
||||
"scroll_up",
|
||||
"scroll_down"
|
||||
"scroll_down",
|
||||
"None"
|
||||
]
|
||||
|
||||
class Resolution(TypedDict):
|
||||
|
||||
Reference in New Issue
Block a user