mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
Merge pull request #526 from vvincent1234/fix/multi_tab
fix multiple tab
This commit is contained in:
@@ -74,7 +74,8 @@ class CustomMessageManager(MessageManager):
|
||||
min_message_len = 2 if self.context_content is not None else 1
|
||||
|
||||
while diff > 0 and len(self.state.history.messages) > min_message_len:
|
||||
self.state.history.remove_message(min_message_len) # always remove the oldest message
|
||||
msg = self.state.history.messages.pop(min_message_len)
|
||||
self.state.history.current_tokens -= msg.metadata.tokens
|
||||
diff = self.state.history.current_tokens - self.settings.max_input_tokens
|
||||
|
||||
def add_state_message(
|
||||
@@ -104,6 +105,7 @@ class CustomMessageManager(MessageManager):
|
||||
if isinstance(self.state.history.messages[i].message, HumanMessage):
|
||||
remove_cnt += 1
|
||||
if remove_cnt == abs(remove_ind):
|
||||
self.state.history.messages.pop(i)
|
||||
msg = self.state.history.messages.pop(i)
|
||||
self.state.history.current_tokens -= msg.metadata.tokens
|
||||
break
|
||||
i -= 1
|
||||
|
||||
@@ -21,6 +21,18 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
except Exception as e:
|
||||
raise RuntimeError(f'Failed to load system prompt template: {e}')
|
||||
|
||||
def get_system_message(self) -> SystemMessage:
|
||||
"""
|
||||
Get the system prompt for the agent.
|
||||
|
||||
Returns:
|
||||
SystemMessage: Formatted system prompt
|
||||
"""
|
||||
prompt = self.prompt_template.format(max_actions=self.max_actions_per_step,
|
||||
available_actions=self.default_action_description)
|
||||
|
||||
return SystemMessage(content=prompt)
|
||||
|
||||
|
||||
class CustomAgentMessagePrompt(AgentMessagePrompt):
|
||||
def __init__(
|
||||
|
||||
@@ -30,7 +30,7 @@ Example:
|
||||
]
|
||||
}}
|
||||
|
||||
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
|
||||
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {max_actions} actions per sequence.
|
||||
Common action sequences:
|
||||
- Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
|
||||
- Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
|
||||
@@ -39,6 +39,7 @@ Common action sequences:
|
||||
- Only provide the action sequence until an action which changes the page state significantly.
|
||||
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
|
||||
- only use multiple actions if it makes sense.
|
||||
- Only chose from below available actions.
|
||||
|
||||
3. ELEMENT INTERACTION:
|
||||
- Only use indexes of the interactive elements
|
||||
@@ -73,4 +74,7 @@ Common action sequences:
|
||||
|
||||
9. Extraction:
|
||||
- If your task is to find information - call extract_content on the specific pages to get and store the information.
|
||||
Your responses must be always JSON with the specified format.
|
||||
Your responses must be always JSON with the specified format.
|
||||
|
||||
Available Actions:
|
||||
{available_actions}
|
||||
@@ -118,26 +118,26 @@ async def test_browser_use_custom():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
# temperature=0.6,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="google",
|
||||
model_name="gemini-2.0-flash",
|
||||
temperature=0.6,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
)
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="deepseek",
|
||||
model_name="deepseek-reasoner",
|
||||
temperature=0.8
|
||||
)
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-reasoner",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
@@ -156,9 +156,9 @@ async def test_browser_use_custom():
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
disable_security = True
|
||||
use_vision = False # Set to False when using DeepSeek
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 1
|
||||
max_actions_per_step = 10
|
||||
playwright = None
|
||||
browser = None
|
||||
browser_context = None
|
||||
@@ -193,7 +193,7 @@ async def test_browser_use_custom():
|
||||
)
|
||||
)
|
||||
agent = CustomAgent(
|
||||
task="Give me stock price of Nvidia",
|
||||
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
|
||||
6
webui.py
6
webui.py
@@ -332,7 +332,7 @@ async def run_org_agent(
|
||||
try:
|
||||
global _global_browser, _global_browser_context, _global_agent
|
||||
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
|
||||
cdp_url = chrome_cdp
|
||||
|
||||
if use_own_browser:
|
||||
@@ -362,6 +362,7 @@ async def run_org_agent(
|
||||
config=BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
@@ -435,7 +436,7 @@ async def run_custom_agent(
|
||||
try:
|
||||
global _global_browser, _global_browser_context, _global_agent
|
||||
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"]
|
||||
cdp_url = chrome_cdp
|
||||
if use_own_browser:
|
||||
cdp_url = os.getenv("CHROME_CDP", chrome_cdp)
|
||||
@@ -470,6 +471,7 @@ async def run_custom_agent(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user