From 260be08f5d91e88a6a2e93efa06d838aa800b3c5 Mon Sep 17 00:00:00 2001 From: warmshao Date: Fri, 3 Jan 2025 08:33:35 +0800 Subject: [PATCH] add max run steps --- src/agent/custom_agent.py | 41 ++++++------------------------------- src/agent/custom_prompts.py | 2 +- src/agent/custom_views.py | 2 +- webui.py | 18 ++++++++-------- 4 files changed, 18 insertions(+), 45 deletions(-) diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index 58d00ff..5b075dd 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -126,8 +126,8 @@ class CustomAgent(Agent): emoji = '🤷' logger.info(f'{emoji} Eval: {response.current_state.prev_action_evaluation}') - logger.info(f'🧠 Memory: {response.current_state.import_contents}') - logger.info(f'⏳ Task Progress: {response.current_state.completed_contents}') + logger.info(f'🧠 New Memory: {response.current_state.important_contents}') + logger.info(f'⏳ Task Progress: {response.current_state.completed_contents}') logger.info(f'🤔 Thought: {response.current_state.thought}') logger.info(f'🎯 Summary: {response.current_state.summary}') for i, action in enumerate(response.action): @@ -143,15 +143,14 @@ class CustomAgent(Agent): return step_info.step_number += 1 - import_contents = model_output.current_state.import_contents - if import_contents and 'None' not in import_contents and import_contents not in step_info.memory: - step_info.memory += import_contents + '\n' + important_contents = model_output.current_state.important_contents + if important_contents and 'None' not in important_contents and important_contents not in step_info.memory: + step_info.memory += important_contents + '\n' completed_contents = model_output.current_state.completed_contents if completed_contents and 'None' not in completed_contents: step_info.task_progress = completed_contents - @time_execution_async('--step') async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: """Execute one step of the task""" @@ -166,6 +165,7 @@ class CustomAgent(Agent): input_messages = self.message_manager.get_messages() model_output = await self.get_next_action(input_messages) self.update_step_info(model_output, step_info) + logger.info(f'🧠 All Memory: {step_info.memory}') self._save_conversation(input_messages, model_output) self.message_manager._remove_last_state_message() # we dont want the whole state in the chat history self.message_manager.add_model_output(model_output) @@ -198,35 +198,6 @@ class CustomAgent(Agent): if state: self._make_history_item(model_output, state, result) - def _make_history_item( - self, - model_output: CustomAgentOutput | None, - state: BrowserState, - result: list[ActionResult], - ) -> None: - """Create and store history item""" - interacted_element = None - len_result = len(result) - - if model_output: - interacted_elements = AgentHistory.get_interacted_element( - model_output, state.selector_map - ) - else: - interacted_elements = [None] - - state_history = BrowserStateHistory( - url=state.url, - title=state.title, - tabs=state.tabs, - interacted_element=interacted_elements, - screenshot=state.screenshot, - ) - - history_item = AgentHistory(model_output=model_output, result=result, state=state_history) - - self.history.history.append(history_item) - async def run(self, max_steps: int = 100) -> AgentHistoryList: """Execute the task with maximum number of steps""" try: diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index a315072..bd8e612 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -26,7 +26,7 @@ class CustomSystemPrompt(SystemPrompt): { "current_state": { "prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.", - "import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".", + "important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output \"None\".", "completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button", "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.", "summary": "Please generate a brief natural language description for the operation in next actions based on your Thought." diff --git a/src/agent/custom_views.py b/src/agent/custom_views.py index d1319af..d3e1647 100644 --- a/src/agent/custom_views.py +++ b/src/agent/custom_views.py @@ -25,7 +25,7 @@ class CustomAgentBrain(BaseModel): """Current state of the agent""" prev_action_evaluation: str - import_contents: str + important_contents: str completed_contents: str thought: str summary: str diff --git a/webui.py b/webui.py index 5ab933c..a991c3e 100644 --- a/webui.py +++ b/webui.py @@ -52,7 +52,7 @@ async def run_browser_agent( save_recording_path, task, add_infos, - progress=gr.Progress() + max_steps ): """ Runs the browser agent based on user configurations. @@ -74,7 +74,7 @@ async def run_browser_agent( window_h=window_h, save_recording_path=save_recording_path, task=task, - progress=progress, + max_steps=max_steps, ) elif agent_type == "custom": return await run_custom_agent( @@ -87,7 +87,7 @@ async def run_browser_agent( save_recording_path=save_recording_path, task=task, add_infos=add_infos, - progress=progress, + max_steps=max_steps, ) else: raise ValueError(f"Invalid agent type: {agent_type}") @@ -101,7 +101,7 @@ async def run_org_agent( window_h, save_recording_path, task, - progress + max_steps ): browser = Browser( config=BrowserConfig( @@ -123,7 +123,7 @@ async def run_org_agent( llm=llm, browser_context=browser_context, ) - history = await agent.run(max_steps=10) + history = await agent.run(max_steps=max_steps) final_result = history.final_result() errors = history.errors() @@ -143,7 +143,7 @@ async def run_custom_agent( save_recording_path, task, add_infos, - progress + max_steps ): controller = CustomController() playwright = None @@ -195,7 +195,7 @@ async def run_custom_agent( controller=controller, system_prompt_class=CustomSystemPrompt ) - history = await agent.run(max_steps=10) + history = await agent.run(max_steps=max_steps) final_result = history.final_result() errors = history.errors() @@ -244,6 +244,7 @@ def main(): gr.Markdown("

Browser Use WebUI

") with gr.Row(): agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom") + max_steps = gr.Number(label="max run steps", value=100) with gr.Row(): llm_provider = gr.Dropdown( ["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini" @@ -266,7 +267,7 @@ def main(): with gr.Accordion("Task Settings", open=True): task = gr.Textbox(label="Task", lines=10, value="go to google.com and type 'OpenAI' click search and give me the first url") - add_infos = gr.Textbox(label="Additional Infos", lines=10) + add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5) run_button = gr.Button("Run Agent", variant="primary") with gr.Column(): @@ -292,6 +293,7 @@ def main(): save_recording_path, task, add_infos, + max_steps ], outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output], )