diff --git a/gradio_ui/agent/task_run_agent.py b/gradio_ui/agent/task_run_agent.py index 7264761..2422d27 100644 --- a/gradio_ui/agent/task_run_agent.py +++ b/gradio_ui/agent/task_run_agent.py @@ -33,9 +33,11 @@ class TaskRunAgent(BaseAgent): ) vlm_response_json = json.loads(vlm_response) response_content = [BetaTextBlock(text=vlm_response_json["reasoning"], type='text')] - if 'box_centroid_coordinate' in vlm_response_json: + if "box_id" in vlm_response_json: + bbox = parsed_screen_result["parsed_content_list"][int(vlm_response_json["box_id"])].coordinates + box_centroid_coordinate = [int((bbox[0] + bbox[2]) / 2 ), int((bbox[1] + bbox[3]) / 2 )] move_cursor_block = BetaToolUseBlock(id=f'toolu_{uuid.uuid4()}', - input={'action': 'mouse_move', 'coordinate': vlm_response_json["box_centroid_coordinate"]}, + input={'action': 'mouse_move', 'coordinate': box_centroid_coordinate}, name='computer', type='tool_use') response_content.append(move_cursor_block) diff --git a/gradio_ui/agent/verification_agent.py b/gradio_ui/agent/verification_agent.py index 26cf91a..8efa21e 100644 --- a/gradio_ui/agent/verification_agent.py +++ b/gradio_ui/agent/verification_agent.py @@ -1,6 +1,5 @@ import json -from anthropic import BaseModel -from pydantic import Field +from pydantic import Field,BaseModel from gradio_ui.agent.base_agent import BaseAgent from xbrain.core.chat import run @@ -43,7 +42,7 @@ prompt = """ ### 输出格式 ### 验证结果应采用以下JSON格式: -{ +{{ "验证状态": "成功/失败", "验证方法": "使用的验证方法", "证据": "支持验证结果的具体证据", @@ -51,7 +50,7 @@ prompt = """ "补救措施": [ "再执行一次操作" ], -} +}} ### 验证方法 ### 1. **视觉验证**:识别特定UI元素是否出现或消失 @@ -79,11 +78,11 @@ prompt = """ 操作:点击"登录"按钮 预期结果:登录成功并显示首页 验证输出: -{ +{{ "verification_status": "success", "verification_method": "视觉验证+内容验证", "reasoning": "1. 检测到欢迎消息'你好,用户名' 2. 导航栏显示用户头像 3. URL已变更为首页地址", "failure_reason": "无", "remedy_measures": [], -} +}} """ diff --git a/gradio_ui/loop.py b/gradio_ui/loop.py index 2691564..a3e5200 100644 --- a/gradio_ui/loop.py +++ b/gradio_ui/loop.py @@ -38,8 +38,8 @@ def sampling_loop_sync( execute_task_plan(plan, vision_agent, task_run_agent, executor, messages) yield sleep(2) - verification_loop(vision_agent, verification_agent, executor, task_run_agent, messages) - yield + yield from verification_loop(vision_agent, verification_agent, executor, task_run_agent, messages) + def verification_loop(vision_agent, verification_agent, executor, task_run_agent, messages): """verification agent will be called in the loop"""