add max run steps

2026-03-22 11:17:17 +08:00 · 2025-01-03 08:33:35 +08:00
parent 0b822fa1c2
commit 260be08f5d
4 changed files with 18 additions and 45 deletions
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -126,8 +126,8 @@ class CustomAgent(Agent):
            emoji = '🤷'

        logger.info(f'{emoji} Eval: {response.current_state.prev_action_evaluation}')
-        logger.info(f'🧠 Memory: {response.current_state.import_contents}')
-        logger.info(f'⏳  Task Progress: {response.current_state.completed_contents}')
+        logger.info(f'🧠 New Memory: {response.current_state.important_contents}')
+        logger.info(f'⏳ Task Progress: {response.current_state.completed_contents}')
        logger.info(f'🤔 Thought: {response.current_state.thought}')
        logger.info(f'🎯 Summary: {response.current_state.summary}')
        for i, action in enumerate(response.action):
@@ -143,15 +143,14 @@ class CustomAgent(Agent):
            return

        step_info.step_number += 1
-        import_contents = model_output.current_state.import_contents
-        if import_contents and 'None' not in import_contents and import_contents not in step_info.memory:
-            step_info.memory += import_contents + '\n'
+        important_contents = model_output.current_state.important_contents
+        if important_contents and 'None' not in important_contents and important_contents not in step_info.memory:
+            step_info.memory += important_contents + '\n'

        completed_contents = model_output.current_state.completed_contents
        if completed_contents and 'None' not in completed_contents:
            step_info.task_progress = completed_contents

-
    @time_execution_async('--step')
    async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
        """Execute one step of the task"""
@@ -166,6 +165,7 @@ class CustomAgent(Agent):
            input_messages = self.message_manager.get_messages()
            model_output = await self.get_next_action(input_messages)
            self.update_step_info(model_output, step_info)
+            logger.info(f'🧠 All Memory: {step_info.memory}')
            self._save_conversation(input_messages, model_output)
            self.message_manager._remove_last_state_message()  # we dont want the whole state in the chat history
            self.message_manager.add_model_output(model_output)
@@ -198,35 +198,6 @@ class CustomAgent(Agent):
            if state:
                self._make_history_item(model_output, state, result)

-    def _make_history_item(
-            self,
-            model_output: CustomAgentOutput | None,
-            state: BrowserState,
-            result: list[ActionResult],
-    ) -> None:
-        """Create and store history item"""
-        interacted_element = None
-        len_result = len(result)
-
-        if model_output:
-            interacted_elements = AgentHistory.get_interacted_element(
-                model_output, state.selector_map
-            )
-        else:
-            interacted_elements = [None]
-
-        state_history = BrowserStateHistory(
-            url=state.url,
-            title=state.title,
-            tabs=state.tabs,
-            interacted_element=interacted_elements,
-            screenshot=state.screenshot,
-        )
-
-        history_item = AgentHistory(model_output=model_output, result=result, state=state_history)
-
-        self.history.history.append(history_item)
-
    async def run(self, max_steps: int = 100) -> AgentHistoryList:
        """Execute the task with maximum number of steps"""
        try:
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -26,7 +26,7 @@ class CustomSystemPrompt(SystemPrompt):
       {
         "current_state": {
           "prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
-           "import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".",
+           "important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output \"None\".",
           "completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
           "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
           "summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."
--- a/src/agent/custom_views.py
+++ b/src/agent/custom_views.py
@@ -25,7 +25,7 @@ class CustomAgentBrain(BaseModel):
    """Current state of the agent"""

    prev_action_evaluation: str
-    import_contents: str
+    important_contents: str
    completed_contents: str
    thought: str
    summary: str
--- a/webui.py
+++ b/webui.py
@@ -52,7 +52,7 @@ async def run_browser_agent(
        save_recording_path,
        task,
        add_infos,
-        progress=gr.Progress()
+        max_steps
 ):
    """
    Runs the browser agent based on user configurations.
@@ -74,7 +74,7 @@ async def run_browser_agent(
            window_h=window_h,
            save_recording_path=save_recording_path,
            task=task,
-            progress=progress,
+            max_steps=max_steps,
        )
    elif agent_type == "custom":
        return await run_custom_agent(
@@ -87,7 +87,7 @@ async def run_browser_agent(
            save_recording_path=save_recording_path,
            task=task,
            add_infos=add_infos,
-            progress=progress,
+            max_steps=max_steps,
        )
    else:
        raise ValueError(f"Invalid agent type: {agent_type}")
@@ -101,7 +101,7 @@ async def run_org_agent(
        window_h,
        save_recording_path,
        task,
-        progress
+        max_steps
 ):
    browser = Browser(
        config=BrowserConfig(
@@ -123,7 +123,7 @@ async def run_org_agent(
            llm=llm,
            browser_context=browser_context,
        )
-        history = await agent.run(max_steps=10)
+        history = await agent.run(max_steps=max_steps)

        final_result = history.final_result()
        errors = history.errors()
@@ -143,7 +143,7 @@ async def run_custom_agent(
        save_recording_path,
        task,
        add_infos,
-        progress
+        max_steps
 ):
    controller = CustomController()
    playwright = None
@@ -195,7 +195,7 @@ async def run_custom_agent(
                controller=controller,
                system_prompt_class=CustomSystemPrompt
            )
-            history = await agent.run(max_steps=10)
+            history = await agent.run(max_steps=max_steps)

            final_result = history.final_result()
            errors = history.errors()
@@ -244,6 +244,7 @@ def main():
        gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
        with gr.Row():
            agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
+            max_steps = gr.Number(label="max run steps", value=100)
        with gr.Row():
            llm_provider = gr.Dropdown(
                ["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini"
@@ -266,7 +267,7 @@ def main():
        with gr.Accordion("Task Settings", open=True):
            task = gr.Textbox(label="Task", lines=10,
                              value="go to google.com and type 'OpenAI' click search and give me the first url")
-            add_infos = gr.Textbox(label="Additional Infos", lines=10)
+            add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)

        run_button = gr.Button("Run Agent", variant="primary")
        with gr.Column():
@@ -292,6 +293,7 @@ def main():
                save_recording_path,
                task,
                add_infos,
+                max_steps
            ],
            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
        )