mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
add browser-use agent run
This commit is contained in:
@@ -17,98 +17,18 @@ from browser_use.agent.views import AgentHistoryList
|
||||
from src.utils import utils
|
||||
|
||||
|
||||
async def test_browser_use_org():
|
||||
async def test_browser_use_agent():
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-chat",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
)
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
use_vision = False
|
||||
use_own_browser = False
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
tool_calling_method = "json_schema" # setting to json_schema when using ollma
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
use_vision=use_vision,
|
||||
tool_calling_method=tool_calling_method
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def test_browser_use_custom():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
from src.utils import llm_provider
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -118,14 +38,6 @@ async def test_browser_use_custom():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
@@ -153,13 +65,43 @@ async def test_browser_use_custom():
|
||||
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
# )
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
}
|
||||
}
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
playwright = None
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
@@ -178,29 +120,27 @@ async def test_browser_use_custom():
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=extra_chromium_args,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agent = CustomAgent(
|
||||
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
agent = Agent(
|
||||
task="download pdf from https://arxiv.org/abs/2504.10458 and rename this pdf to 'GUI-r1-test.pdf'",
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
agent_prompt_class=CustomAgentMessagePrompt,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
generate_gif=True
|
||||
@@ -213,28 +153,17 @@ async def test_browser_use_custom():
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
|
||||
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
if controller:
|
||||
await controller.close_mcp_client()
|
||||
|
||||
|
||||
async def test_browser_use_parallel():
|
||||
@@ -242,13 +171,20 @@ async def test_browser_use_parallel():
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
from browser_use.browser.browser import Browser
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -258,20 +194,13 @@ async def test_browser_use_parallel():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="gemini",
|
||||
model_name="gemini-2.0-flash-exp",
|
||||
temperature=1.0,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
)
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
# temperature=0.6,
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
@@ -293,72 +222,119 @@ async def test_browser_use_parallel():
|
||||
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
# )
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
max_actions_per_step = 10
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
disable_security=True,
|
||||
headless=False,
|
||||
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agents = [
|
||||
Agent(task=task, llm=llm, browser=browser)
|
||||
Agent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
for task in [
|
||||
'Search Google for weather in Tokyo',
|
||||
'Check Reddit front page title',
|
||||
'Find NASA image of the day',
|
||||
'Check top story on CNN',
|
||||
# 'Check Reddit front page title',
|
||||
# 'Find NASA image of the day',
|
||||
# 'Check top story on CNN',
|
||||
# 'Search latest SpaceX launch date',
|
||||
# 'Look up population of Paris',
|
||||
# 'Find current time in Sydney',
|
||||
# 'Check who won last Super Bowl',
|
||||
'Find current time in Sydney',
|
||||
'Check who won last Super Bowl',
|
||||
# 'Search trending topics on Twitter',
|
||||
]
|
||||
]
|
||||
|
||||
history = await asyncio.gather(*[agent.run() for agent in agents])
|
||||
pdb.set_trace()
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
pdb.set_trace()
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_browser_use_org())
|
||||
# asyncio.run(test_browser_use_parallel())
|
||||
# asyncio.run(test_browser_use_custom())
|
||||
# asyncio.run(test_browser_use_agent())
|
||||
asyncio.run(test_browser_use_parallel())
|
||||
|
||||
@@ -45,33 +45,37 @@ async def test_controller_with_mcp():
|
||||
from src.controller.custom_controller import CustomController
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
|
||||
test_server_config = {
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@playwright/mcp@latest",
|
||||
],
|
||||
"transport": "stdio",
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@modelcontextprotocol/server-filesystem",
|
||||
"/Users/xxx/ai_workspace",
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(test_server_config)
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
action_name = "mcp.desktop-commander.execute_command"
|
||||
action_info = controller.registry.registry.actions[action_name]
|
||||
param_model = action_info.param_model
|
||||
@@ -85,7 +89,8 @@ async def test_controller_with_mcp():
|
||||
result = await controller.act(action_model)
|
||||
result = result.extracted_content
|
||||
print(result)
|
||||
if result and "Command is still running. Use read_output to get more output." in result and "PID" in result.split("\n")[0]:
|
||||
if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
|
||||
result.split("\n")[0]:
|
||||
pid = int(result.split("\n")[0].split("PID")[-1].strip())
|
||||
action_name = "mcp.desktop-commander.read_output"
|
||||
action_info = controller.registry.registry.actions[action_name]
|
||||
|
||||
@@ -144,10 +144,10 @@ def test_ibm_model():
|
||||
if __name__ == "__main__":
|
||||
# test_openai_model()
|
||||
# test_google_model()
|
||||
# test_azure_openai_model()
|
||||
test_azure_openai_model()
|
||||
# test_deepseek_model()
|
||||
# test_ollama_model()
|
||||
# test_deepseek_r1_model()
|
||||
# test_deepseek_r1_ollama_model()
|
||||
# test_mistral_model()
|
||||
test_ibm_model()
|
||||
# test_ibm_model()
|
||||
|
||||
Reference in New Issue
Block a user