mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
add webui and readme
This commit is contained in:
@@ -12,4 +12,7 @@ AZURE_OPENAI_API_KEY=
|
||||
ANONYMIZED_TELEMETRY=true
|
||||
|
||||
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
||||
BROWSER_USE_LOGGING_LEVEL=info
|
||||
BROWSER_USE_LOGGING_LEVEL=info
|
||||
|
||||
CHROME_PATH=
|
||||
CHROME_USER_DATA=
|
||||
47
README.md
Normal file
47
README.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# Browser-Use WebUI
|
||||
|
||||
## Background
|
||||
|
||||
This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents. We have enhanced the original capabilities by providing:
|
||||
|
||||
1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
|
||||
|
||||
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic etc. And we plan to add support for even more models in the future.
|
||||
|
||||
3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
|
||||
|
||||
4. **Customized Agent:** We've implemented a custom agent that enhances `browser-use` with Optimized prompts.
|
||||
|
||||
<video src="https://github.com/user-attachments/assets/cc4ca59f-e4a5-43d8-86db-bb0e6edbedef" controls="controls" width="500" height="300" >Your browser does not support playing this video!</video>
|
||||
|
||||
## Environment Installation
|
||||
|
||||
1. **Python Version:** Ensure you have Python 3.11 or higher installed.
|
||||
2. **Install `browser-use`:**
|
||||
```bash
|
||||
pip install browser-use
|
||||
```
|
||||
3. **Install Playwright:**
|
||||
```bash
|
||||
playwright install
|
||||
```
|
||||
4. **Install Dependencies:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
5. **Configure Environment Variables:**
|
||||
- Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM.
|
||||
- **If using your own browser:**
|
||||
- Set `CHROME_PATH` to the executable path of your browser (e.g., `C:\Program Files\Google\Chrome\Application\chrome.exe` on Windows).
|
||||
- Set `CHROME_USER_DATA` to the user data directory of your browser (e.g.,`C:\Users\<YourUsername>\AppData\Local\Google\Chrome\User Data`).
|
||||
|
||||
## Usage
|
||||
|
||||
1. **Run the WebUI:**
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788
|
||||
```
|
||||
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
3. **Using Your Own Browser:**
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
- Check the "Use Own Browser" option within the Browser Settings.
|
||||
@@ -1,3 +1,4 @@
|
||||
browser-use
|
||||
langchain-google-genai
|
||||
pyperclip
|
||||
gradio
|
||||
@@ -6,6 +6,8 @@
|
||||
# @FileName: utils.py
|
||||
|
||||
import base64
|
||||
import os
|
||||
|
||||
from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
@@ -19,32 +21,64 @@ def get_llm_model(provider: str, **kwargs):
|
||||
:return:
|
||||
"""
|
||||
if provider == 'anthropic':
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = "https://api.anthropic.com"
|
||||
else:
|
||||
base_url = kwargs.get("base_url")
|
||||
|
||||
if not kwargs.get("api_key", ""):
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatAnthropic(
|
||||
model_name=kwargs.get("model_name", 'claude-3-5-sonnet-20240620'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=kwargs.get("base_url", "https://api.anthropic.com"),
|
||||
api_key=kwargs.get("api_key", None)
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
)
|
||||
elif provider == 'openai':
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = "https://api.openai.com/v1"
|
||||
else:
|
||||
base_url = kwargs.get("base_url")
|
||||
|
||||
if not kwargs.get("api_key", ""):
|
||||
api_key = os.getenv("OPENAI_API_KEY", "")
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatOpenAI(
|
||||
model=kwargs.get("model_name", 'gpt-4o'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=kwargs.get("base_url", "https://api.openai.com/v1/"),
|
||||
api_key=kwargs.get("api_key", None)
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
)
|
||||
elif provider == 'gemini':
|
||||
if not kwargs.get("api_key", ""):
|
||||
api_key = os.getenv("GOOGLE_API_KEY", "")
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
return ChatGoogleGenerativeAI(
|
||||
model=kwargs.get("model_name", 'gemini-2.0-flash-exp'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
google_api_key=kwargs.get("api_key", None),
|
||||
google_api_key=api_key,
|
||||
)
|
||||
elif provider == "azure_openai":
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
||||
else:
|
||||
base_url = kwargs.get("base_url")
|
||||
if not kwargs.get("api_key", ""):
|
||||
api_key = os.getenv("AZURE_OPENAI_API_KEY", "")
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
return AzureChatOpenAI(
|
||||
model=kwargs.get("model_name", 'gpt-4o'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
api_version="2024-05-01-preview",
|
||||
azure_endpoint=kwargs.get("base_url", ""),
|
||||
api_key=kwargs.get("api_key", None)
|
||||
azure_endpoint=base_url,
|
||||
api_key=api_key
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Unsupported provider: {provider}')
|
||||
|
||||
@@ -106,7 +106,7 @@ async def test_browser_use_custom():
|
||||
)
|
||||
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
playwright = None
|
||||
browser_context_ = None
|
||||
|
||||
297
webui.py
297
webui.py
@@ -4,3 +4,300 @@
|
||||
# @Email : wenshaoguo1026@gmail.com
|
||||
# @Project : browser-use-webui
|
||||
# @FileName: webui.py
|
||||
import pdb
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import argparse
|
||||
|
||||
import asyncio
|
||||
|
||||
import gradio as gr
|
||||
import asyncio
|
||||
import os
|
||||
from pprint import pprint
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContext,
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser, BrowserConfig
|
||||
from src.browser.custom_context import BrowserContext, BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
|
||||
from src.utils import utils
|
||||
|
||||
|
||||
async def run_browser_agent(
|
||||
agent_type,
|
||||
llm_provider,
|
||||
llm_model_name,
|
||||
llm_temperature,
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
progress=gr.Progress()
|
||||
):
|
||||
"""
|
||||
Runs the browser agent based on user configurations.
|
||||
"""
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider=llm_provider,
|
||||
model_name=llm_model_name,
|
||||
temperature=llm_temperature,
|
||||
base_url=llm_base_url,
|
||||
api_key=llm_api_key
|
||||
)
|
||||
if agent_type == "org":
|
||||
return await run_org_agent(
|
||||
llm=llm,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
window_h=window_h,
|
||||
save_recording_path=save_recording_path,
|
||||
task=task,
|
||||
progress=progress,
|
||||
)
|
||||
elif agent_type == "custom":
|
||||
return await run_custom_agent(
|
||||
llm=llm,
|
||||
use_own_browser=use_own_browser,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
window_w=window_w,
|
||||
window_h=window_h,
|
||||
save_recording_path=save_recording_path,
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
progress=progress,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid agent type: {agent_type}")
|
||||
|
||||
|
||||
async def run_org_agent(
|
||||
llm,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
progress
|
||||
):
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path='./tmp/traces',
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
)
|
||||
history = await agent.run(max_steps=10)
|
||||
|
||||
final_result = history.final_result()
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
await browser.close()
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
|
||||
|
||||
async def run_custom_agent(
|
||||
llm,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
progress
|
||||
):
|
||||
controller = CustomController()
|
||||
playwright = None
|
||||
browser_context_ = None
|
||||
try:
|
||||
if use_own_browser:
|
||||
playwright = await async_playwright().start()
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
browser_context_ = await playwright.chromium.launch_persistent_context(
|
||||
user_data_dir=chrome_use_data,
|
||||
executable_path=chrome_exe,
|
||||
no_viewport=False,
|
||||
headless=headless, # 保持浏览器窗口可见
|
||||
user_agent=(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir=save_recording_path if save_recording_path else None,
|
||||
record_video_size={'width': window_w, 'height': window_h}
|
||||
)
|
||||
else:
|
||||
browser_context_ = None
|
||||
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path='./tmp/result_processing',
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
),
|
||||
context=browser_context_
|
||||
) as browser_context:
|
||||
agent = CustomAgent(
|
||||
task=task,
|
||||
add_infos=add_infos,
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt
|
||||
)
|
||||
history = await agent.run(max_steps=10)
|
||||
|
||||
final_result = history.final_result()
|
||||
errors = history.errors()
|
||||
model_actions = history.model_actions()
|
||||
model_thoughts = history.model_thoughts()
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
final_result = ""
|
||||
errors = str(e) + "\n" + traceback.format_exc()
|
||||
model_actions = ""
|
||||
model_thoughts = ""
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context_:
|
||||
await browser_context_.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
await browser.close()
|
||||
return final_result, errors, model_actions, model_thoughts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
|
||||
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
|
||||
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
|
||||
args = parser.parse_args()
|
||||
|
||||
js_func = """
|
||||
function refresh() {
|
||||
const url = new URL(window.location);
|
||||
|
||||
if (url.searchParams.get('__theme') !== 'dark') {
|
||||
url.searchParams.set('__theme', 'dark');
|
||||
window.location.href = url.href;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# Gradio UI setup
|
||||
with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]),
|
||||
js=js_func) as demo:
|
||||
gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
|
||||
with gr.Row():
|
||||
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
|
||||
with gr.Row():
|
||||
llm_provider = gr.Dropdown(
|
||||
["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini"
|
||||
)
|
||||
llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
|
||||
llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
|
||||
with gr.Row():
|
||||
llm_base_url = gr.Textbox(label="LLM Base URL")
|
||||
llm_api_key = gr.Textbox(label="LLM API Key", type="password")
|
||||
|
||||
with gr.Accordion("Browser Settings", open=False):
|
||||
use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
|
||||
headless = gr.Checkbox(label="Headless", value=False)
|
||||
disable_security = gr.Checkbox(label="Disable Security", value=True)
|
||||
with gr.Row():
|
||||
window_w = gr.Number(label="Window Width", value=1920)
|
||||
window_h = gr.Number(label="Window Height", value=1080)
|
||||
save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
|
||||
value="./tmp/record_videos")
|
||||
with gr.Accordion("Task Settings", open=True):
|
||||
task = gr.Textbox(label="Task", lines=10,
|
||||
value="go to google.com and type 'OpenAI' click search and give me the first url")
|
||||
add_infos = gr.Textbox(label="Additional Infos", lines=10)
|
||||
|
||||
run_button = gr.Button("Run Agent", variant="primary")
|
||||
with gr.Column():
|
||||
final_result_output = gr.Textbox(label="Final Result", lines=5)
|
||||
errors_output = gr.Textbox(label="Errors", lines=5, )
|
||||
model_actions_output = gr.Textbox(label="Model Actions", lines=5)
|
||||
model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
|
||||
|
||||
run_button.click(
|
||||
fn=run_browser_agent,
|
||||
inputs=[
|
||||
agent_type,
|
||||
llm_provider,
|
||||
llm_model_name,
|
||||
llm_temperature,
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
],
|
||||
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
|
||||
)
|
||||
|
||||
demo.launch(server_name=args.ip, server_port=args.port)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user