mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
Merge branch 'main' of https://github.com/katiue/browser-use-webui
This commit is contained in:
54
README.md
54
README.md
@@ -5,4 +5,56 @@ sdk: gradio
|
||||
sdk_version: 5.9.1
|
||||
python_version: 3.12
|
||||
startup_duration_timeout: 2h
|
||||
---
|
||||
---
|
||||
# Browser-Use WebUI
|
||||
|
||||
## Background
|
||||
|
||||
This project builds upon the foundation of the [browser-use](https://github.com/browser-use/browser-use), which is designed to make websites accessible for AI agents. We have enhanced the original capabilities by providing:
|
||||
|
||||
1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
|
||||
|
||||
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
|
||||
|
||||
3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
|
||||
|
||||
4. **Customized Agent:** We've implemented a custom agent that enhances `browser-use` with Optimized prompts.
|
||||
|
||||
<video src="https://github.com/user-attachments/assets/58c0f59e-02b4-4413-aba8-6184616bf181" controls="controls" width="500" height="300" >Your browser does not support playing this video!</video>
|
||||
|
||||
**Changelog**
|
||||
- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
|
||||
|
||||
|
||||
## Environment Installation
|
||||
|
||||
1. **Python Version:** Ensure you have Python 3.11 or higher installed.
|
||||
2. **Install `browser-use`:**
|
||||
```bash
|
||||
pip install browser-use
|
||||
```
|
||||
3. **Install Playwright:**
|
||||
```bash
|
||||
playwright install
|
||||
```
|
||||
4. **Install Dependencies:**
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
5. **Configure Environment Variables:**
|
||||
- Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM.
|
||||
- **If using your own browser:**
|
||||
- Set `CHROME_PATH` to the executable path of your browser (e.g., `C:\Program Files\Google\Chrome\Application\chrome.exe` on Windows).
|
||||
- Set `CHROME_USER_DATA` to the user data directory of your browser (e.g.,`C:\Users\<YourUsername>\AppData\Local\Google\Chrome\User Data`).
|
||||
|
||||
## Usage
|
||||
|
||||
1. **Run the WebUI:**
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788
|
||||
```
|
||||
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
3. **Using Your Own Browser:**
|
||||
- Close all chrome windows
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
- Check the "Use Own Browser" option within the Browser Settings.
|
||||
|
||||
@@ -3,4 +3,6 @@ langchain-google-genai
|
||||
pyperclip
|
||||
gradio
|
||||
python-dotenv
|
||||
argparse
|
||||
argparse
|
||||
langchain-ollama
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
- sometimes labels overlap, so use the context to verify the correct element
|
||||
|
||||
7. Form filling:
|
||||
- If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
|
||||
- If you fill an input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
|
||||
|
||||
8. ACTION SEQUENCING:
|
||||
- Actions are executed in the order they appear in the list
|
||||
|
||||
@@ -11,6 +11,7 @@ import os
|
||||
from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from langchain_ollama import ChatOllama
|
||||
|
||||
|
||||
def get_llm_model(provider: str, **kwargs):
|
||||
@@ -39,7 +40,7 @@ def get_llm_model(provider: str, **kwargs):
|
||||
)
|
||||
elif provider == 'openai':
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = "https://api.openai.com/v1"
|
||||
base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
|
||||
else:
|
||||
base_url = kwargs.get("base_url")
|
||||
|
||||
@@ -66,7 +67,7 @@ def get_llm_model(provider: str, **kwargs):
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatOpenAI(
|
||||
model=kwargs.get("model_name", 'gpt-4o'),
|
||||
model=kwargs.get("model_name", 'deepseek-chat'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
@@ -81,6 +82,11 @@ def get_llm_model(provider: str, **kwargs):
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
google_api_key=api_key,
|
||||
)
|
||||
elif provider == 'ollama':
|
||||
return ChatOllama(
|
||||
model=kwargs.get("model_name", 'qwen2.5:7b'),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
)
|
||||
elif provider == "azure_openai":
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
|
||||
|
||||
@@ -105,9 +105,15 @@ async def test_browser_use_custom():
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-chat",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="deepseek",
|
||||
model_name="deepseek-chat",
|
||||
provider="ollama",
|
||||
model_name="qwen2.5:7b",
|
||||
temperature=0.8
|
||||
)
|
||||
|
||||
|
||||
@@ -106,7 +106,6 @@ def test_deepseek_model():
|
||||
base_url=os.getenv("DEEPSEEK_ENDPOINT", ""),
|
||||
api_key=os.getenv("DEEPSEEK_API_KEY", "")
|
||||
)
|
||||
pdb.set_trace()
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "who are you?"}
|
||||
@@ -116,8 +115,17 @@ def test_deepseek_model():
|
||||
print(ai_msg.content)
|
||||
|
||||
|
||||
def test_ollama_model():
|
||||
from langchain_ollama import ChatOllama
|
||||
|
||||
llm = ChatOllama(model="qwen2.5:7b")
|
||||
ai_msg = llm.invoke("Sing a ballad of LangChain.")
|
||||
print(ai_msg.content)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test_openai_model()
|
||||
# test_gemini_model()
|
||||
# test_azure_openai_model()
|
||||
test_deepseek_model()
|
||||
# test_deepseek_model()
|
||||
test_ollama_model()
|
||||
|
||||
44
webui.py
44
webui.py
@@ -46,10 +46,14 @@ async def run_browser_agent(
|
||||
use_vision,
|
||||
browser_context=None # Added optional argument
|
||||
):
|
||||
"""
|
||||
Runs the browser agent based on user configurations.
|
||||
"""
|
||||
# Ensure the recording directory exists
|
||||
os.makedirs(save_recording_path, exist_ok=True)
|
||||
|
||||
# Get the list of existing videos before the agent runs
|
||||
existing_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
|
||||
glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
|
||||
|
||||
# Run the agent
|
||||
llm = utils.get_llm_model(
|
||||
provider=llm_provider,
|
||||
model_name=llm_model_name,
|
||||
@@ -58,7 +62,7 @@ async def run_browser_agent(
|
||||
api_key=llm_api_key
|
||||
)
|
||||
if agent_type == "org":
|
||||
return await run_org_agent(
|
||||
final_result, errors, model_actions, model_thoughts = await run_org_agent(
|
||||
llm=llm,
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
@@ -71,7 +75,7 @@ async def run_browser_agent(
|
||||
browser_context=browser_context # pass context
|
||||
)
|
||||
elif agent_type == "custom":
|
||||
return await run_custom_agent(
|
||||
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
|
||||
llm=llm,
|
||||
use_own_browser=use_own_browser,
|
||||
headless=headless,
|
||||
@@ -88,6 +92,16 @@ async def run_browser_agent(
|
||||
else:
|
||||
raise ValueError(f"Invalid agent type: {agent_type}")
|
||||
|
||||
# Get the list of videos after the agent runs
|
||||
new_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
|
||||
glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
|
||||
|
||||
# Find the newly created video
|
||||
latest_video = None
|
||||
if new_videos - existing_videos:
|
||||
latest_video = list(new_videos - existing_videos)[0] # Get the first new video
|
||||
|
||||
return final_result, errors, model_actions, model_thoughts, latest_video
|
||||
|
||||
async def run_org_agent(
|
||||
llm,
|
||||
@@ -420,22 +434,10 @@ def main():
|
||||
run_button.click(
|
||||
fn=run_with_stream,
|
||||
inputs=[
|
||||
agent_type,
|
||||
llm_provider,
|
||||
llm_model_name,
|
||||
llm_temperature,
|
||||
llm_base_url,
|
||||
llm_api_key,
|
||||
use_own_browser,
|
||||
headless,
|
||||
disable_security,
|
||||
window_w,
|
||||
window_h,
|
||||
save_recording_path,
|
||||
task,
|
||||
add_infos,
|
||||
max_steps,
|
||||
use_vision
|
||||
agent_type, llm_provider, llm_model_name, llm_temperature,
|
||||
llm_base_url, llm_api_key, use_own_browser, headless,
|
||||
disable_security, window_w, window_h, save_recording_path,
|
||||
task, add_infos, max_steps, use_vision
|
||||
],
|
||||
outputs=[
|
||||
browser_view,
|
||||
|
||||
Reference in New Issue
Block a user