add max run steps

This commit is contained in:
warmshao
2025-01-03 08:33:35 +08:00
parent 0b822fa1c2
commit 260be08f5d
4 changed files with 18 additions and 45 deletions

View File

@@ -126,8 +126,8 @@ class CustomAgent(Agent):
emoji = '🤷'
logger.info(f'{emoji} Eval: {response.current_state.prev_action_evaluation}')
logger.info(f'🧠 Memory: {response.current_state.import_contents}')
logger.info(f' Task Progress: {response.current_state.completed_contents}')
logger.info(f'🧠 New Memory: {response.current_state.important_contents}')
logger.info(f'⏳ Task Progress: {response.current_state.completed_contents}')
logger.info(f'🤔 Thought: {response.current_state.thought}')
logger.info(f'🎯 Summary: {response.current_state.summary}')
for i, action in enumerate(response.action):
@@ -143,15 +143,14 @@ class CustomAgent(Agent):
return
step_info.step_number += 1
import_contents = model_output.current_state.import_contents
if import_contents and 'None' not in import_contents and import_contents not in step_info.memory:
step_info.memory += import_contents + '\n'
important_contents = model_output.current_state.important_contents
if important_contents and 'None' not in important_contents and important_contents not in step_info.memory:
step_info.memory += important_contents + '\n'
completed_contents = model_output.current_state.completed_contents
if completed_contents and 'None' not in completed_contents:
step_info.task_progress = completed_contents
@time_execution_async('--step')
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
"""Execute one step of the task"""
@@ -166,6 +165,7 @@ class CustomAgent(Agent):
input_messages = self.message_manager.get_messages()
model_output = await self.get_next_action(input_messages)
self.update_step_info(model_output, step_info)
logger.info(f'🧠 All Memory: {step_info.memory}')
self._save_conversation(input_messages, model_output)
self.message_manager._remove_last_state_message() # we dont want the whole state in the chat history
self.message_manager.add_model_output(model_output)
@@ -198,35 +198,6 @@ class CustomAgent(Agent):
if state:
self._make_history_item(model_output, state, result)
def _make_history_item(
self,
model_output: CustomAgentOutput | None,
state: BrowserState,
result: list[ActionResult],
) -> None:
"""Create and store history item"""
interacted_element = None
len_result = len(result)
if model_output:
interacted_elements = AgentHistory.get_interacted_element(
model_output, state.selector_map
)
else:
interacted_elements = [None]
state_history = BrowserStateHistory(
url=state.url,
title=state.title,
tabs=state.tabs,
interacted_element=interacted_elements,
screenshot=state.screenshot,
)
history_item = AgentHistory(model_output=model_output, result=result, state=state_history)
self.history.history.append(history_item)
async def run(self, max_steps: int = 100) -> AgentHistoryList:
"""Execute the task with maximum number of steps"""
try:

View File

@@ -26,7 +26,7 @@ class CustomSystemPrompt(SystemPrompt):
{
"current_state": {
"prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
"import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".",
"important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output \"None\".",
"completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
"summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."

View File

@@ -25,7 +25,7 @@ class CustomAgentBrain(BaseModel):
"""Current state of the agent"""
prev_action_evaluation: str
import_contents: str
important_contents: str
completed_contents: str
thought: str
summary: str

View File

@@ -52,7 +52,7 @@ async def run_browser_agent(
save_recording_path,
task,
add_infos,
progress=gr.Progress()
max_steps
):
"""
Runs the browser agent based on user configurations.
@@ -74,7 +74,7 @@ async def run_browser_agent(
window_h=window_h,
save_recording_path=save_recording_path,
task=task,
progress=progress,
max_steps=max_steps,
)
elif agent_type == "custom":
return await run_custom_agent(
@@ -87,7 +87,7 @@ async def run_browser_agent(
save_recording_path=save_recording_path,
task=task,
add_infos=add_infos,
progress=progress,
max_steps=max_steps,
)
else:
raise ValueError(f"Invalid agent type: {agent_type}")
@@ -101,7 +101,7 @@ async def run_org_agent(
window_h,
save_recording_path,
task,
progress
max_steps
):
browser = Browser(
config=BrowserConfig(
@@ -123,7 +123,7 @@ async def run_org_agent(
llm=llm,
browser_context=browser_context,
)
history = await agent.run(max_steps=10)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
@@ -143,7 +143,7 @@ async def run_custom_agent(
save_recording_path,
task,
add_infos,
progress
max_steps
):
controller = CustomController()
playwright = None
@@ -195,7 +195,7 @@ async def run_custom_agent(
controller=controller,
system_prompt_class=CustomSystemPrompt
)
history = await agent.run(max_steps=10)
history = await agent.run(max_steps=max_steps)
final_result = history.final_result()
errors = history.errors()
@@ -244,6 +244,7 @@ def main():
gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
with gr.Row():
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
max_steps = gr.Number(label="max run steps", value=100)
with gr.Row():
llm_provider = gr.Dropdown(
["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini"
@@ -266,7 +267,7 @@ def main():
with gr.Accordion("Task Settings", open=True):
task = gr.Textbox(label="Task", lines=10,
value="go to google.com and type 'OpenAI' click search and give me the first url")
add_infos = gr.Textbox(label="Additional Infos", lines=10)
add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)
run_button = gr.Button("Run Agent", variant="primary")
with gr.Column():
@@ -292,6 +293,7 @@ def main():
save_recording_path,
task,
add_infos,
max_steps
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
)