更新agent和支持的模型

This commit is contained in:
yuruo
2025-03-14 16:35:48 +08:00
parent 800e2f33cb
commit 95b8ad3407
5 changed files with 17 additions and 15 deletions

View File

@@ -1,3 +1,4 @@
| Vendor-en | Vendor-ch | Model | base-url |
| --- | --- | --- | --- |
| openainext | openainext | gpt-4o-2024-11-20 | https://api.openai-next.com/v1 |
| openainext | openainext | gpt-4.5-preview-2025-02-27 | https://api.openai-next.com/v1 |

View File

@@ -1,3 +1,4 @@
import json
import uuid
from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock, BetaMessageParam, BetaUsage
@@ -6,6 +7,8 @@ from gradio_ui.agent.base_agent import BaseAgent
from xbrain.core.chat import run
import platform
import re
from gradio_ui.tools.computer import Action
class TaskRunAgent(BaseAgent):
def __init__(self):
self.OUTPUT_DIR = "./tmp/outputs"
@@ -90,8 +93,7 @@ class TaskRunAgentResponse(BaseModel):
next_action: str = Field(
description="选择一个操作类型如果找不到合适的操作请选择None",
json_schema_extra={
"enum": ["type", "left_click", "right_click", "double_click",
"hover", "scroll_up", "scroll_down", "wait", "None"]
"enum": Action
}
)
box_id: int = Field(description="要操作的框ID当next_action为left_click、right_click、double_click、hover时提供否则为None", default=None)
@@ -133,16 +135,6 @@ system_prompt = """
}}
```
【next_action】仅包括下面之一
- type输入一串文本。
- left_click将鼠标移动到框ID并左键单击。
- right_click将鼠标移动到框ID并右键单击。
- double_click将鼠标移动到框ID并双击。
- hover将鼠标移动到框ID。
- scroll_up向上滚动屏幕以查看之前的内容。
- scroll_down当所需按钮不可见或您需要查看更多内容时向下滚动屏幕。
- wait等待1秒钟让设备加载或响应。
##########
### 案例 ###
一个例子:

View File

@@ -3,6 +3,8 @@ from pydantic import Field,BaseModel
from gradio_ui.agent.base_agent import BaseAgent
from xbrain.core.chat import run
from gradio_ui.tools.computer import Action
class VerificationAgent(BaseAgent):
def __call__(self, messages, parsed_screen_result):
messages.append(
@@ -17,7 +19,7 @@ class VerificationAgent(BaseAgent):
})
response = run(
messages,
user_prompt=prompt.format(screen_info=str(parsed_screen_result['parsed_content_list'])),
user_prompt=prompt.format(screen_info=str(parsed_screen_result['parsed_content_list'], action_list=str(Action))),
response_format=VerificationResponse
)
return json.loads(response)
@@ -74,6 +76,12 @@ prompt = """
- **模糊匹配**:允许近似匹配而非精确匹配
- **超时设置**:指定验证的最长等待时间
### 补救措施 ###
补救措施建议如下:
- 【推荐】可以再等待一段时间看看效果,因为上一个操作还没执行完成就开始了验证
- 再一次操作
- 检查是否存在其他验证方法,但是仅限于以下几个动作:
{action_list}
### 例子 ###
操作:点击"登录"按钮
预期结果:登录成功并显示首页

View File

@@ -37,7 +37,7 @@ def sampling_loop_sync(
for plan in plan_list:
execute_task_plan(plan, vision_agent, task_run_agent, executor, messages)
yield
sleep(2)
sleep(5)
yield from verification_loop(vision_agent, verification_agent, executor, task_run_agent, messages)

View File

@@ -28,7 +28,8 @@ Action = Literal[
"hover",
"wait",
"scroll_up",
"scroll_down"
"scroll_down",
"None"
]
class Resolution(TypedDict):