fix multiple tab

This commit is contained in:
alex
2025-04-12 21:05:02 +08:00
parent 2df50b308c
commit d70db733a4
5 changed files with 45 additions and 25 deletions

View File

@@ -74,7 +74,8 @@ class CustomMessageManager(MessageManager):
min_message_len = 2 if self.context_content is not None else 1
while diff > 0 and len(self.state.history.messages) > min_message_len:
self.state.history.remove_message(min_message_len) # always remove the oldest message
msg = self.state.history.messages.pop(min_message_len)
self.state.history.current_tokens -= msg.metadata.tokens
diff = self.state.history.current_tokens - self.settings.max_input_tokens
def add_state_message(
@@ -104,6 +105,7 @@ class CustomMessageManager(MessageManager):
if isinstance(self.state.history.messages[i].message, HumanMessage):
remove_cnt += 1
if remove_cnt == abs(remove_ind):
self.state.history.messages.pop(i)
msg = self.state.history.messages.pop(i)
self.state.history.current_tokens -= msg.metadata.tokens
break
i -= 1

View File

@@ -21,6 +21,18 @@ class CustomSystemPrompt(SystemPrompt):
except Exception as e:
raise RuntimeError(f'Failed to load system prompt template: {e}')
def get_system_message(self) -> SystemMessage:
"""
Get the system prompt for the agent.
Returns:
SystemMessage: Formatted system prompt
"""
prompt = self.prompt_template.format(max_actions=self.max_actions_per_step,
available_actions=self.default_action_description)
return SystemMessage(content=prompt)
class CustomAgentMessagePrompt(AgentMessagePrompt):
def __init__(

View File

@@ -30,7 +30,7 @@ Example:
]
}}
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {max_actions} actions per sequence.
Common action sequences:
- Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
- Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
@@ -39,6 +39,7 @@ Common action sequences:
- Only provide the action sequence until an action which changes the page state significantly.
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
- only use multiple actions if it makes sense.
- Only chose from below available actions.
3. ELEMENT INTERACTION:
- Only use indexes of the interactive elements
@@ -73,4 +74,7 @@ Common action sequences:
9. Extraction:
- If your task is to find information - call extract_content on the specific pages to get and store the information.
Your responses must be always JSON with the specified format.
Your responses must be always JSON with the specified format.
Available Actions:
{available_actions}

View File

@@ -118,26 +118,26 @@ async def test_browser_use_custom():
# api_key=os.getenv("OPENAI_API_KEY", ""),
# )
llm = utils.get_llm_model(
provider="azure_openai",
model_name="gpt-4o",
temperature=0.5,
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
)
# llm = utils.get_llm_model(
# provider="azure_openai",
# model_name="gpt-4o",
# provider="google",
# model_name="gemini-2.0-flash",
# temperature=0.6,
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
# api_key=os.getenv("GOOGLE_API_KEY", "")
# )
llm = utils.get_llm_model(
provider="google",
model_name="gemini-2.0-flash",
temperature=0.6,
api_key=os.getenv("GOOGLE_API_KEY", "")
)
llm = utils.get_llm_model(
provider="deepseek",
model_name="deepseek-reasoner",
temperature=0.8
)
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
# llm = utils.get_llm_model(
# provider="deepseek",
@@ -156,9 +156,9 @@ async def test_browser_use_custom():
controller = CustomController()
use_own_browser = True
disable_security = True
use_vision = False # Set to False when using DeepSeek
use_vision = True # Set to False when using DeepSeek
max_actions_per_step = 1
max_actions_per_step = 10
playwright = None
browser = None
browser_context = None
@@ -193,7 +193,7 @@ async def test_browser_use_custom():
)
)
agent = CustomAgent(
task="Give me stock price of Nvidia",
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
add_infos="", # some hints for llm to complete the task
llm=llm,
browser=browser,

View File

@@ -332,7 +332,7 @@ async def run_org_agent(
try:
global _global_browser, _global_browser_context, _global_agent
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
cdp_url = chrome_cdp
if use_own_browser:
@@ -362,6 +362,7 @@ async def run_org_agent(
config=BrowserContextConfig(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
save_downloads_path="./tmp/downloads",
no_viewport=False,
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
@@ -435,7 +436,7 @@ async def run_custom_agent(
try:
global _global_browser, _global_browser_context, _global_agent
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
cdp_url = chrome_cdp
if use_own_browser:
cdp_url = os.getenv("CHROME_CDP", chrome_cdp)
@@ -470,6 +471,7 @@ async def run_custom_agent(
trace_path=save_trace_path if save_trace_path else None,
save_recording_path=save_recording_path if save_recording_path else None,
no_viewport=False,
save_downloads_path="./tmp/downloads",
browser_window_size=BrowserContextWindowSize(
width=window_w, height=window_h
),