From e54c1fda0b3c44008a021cec46113a09f7466db1 Mon Sep 17 00:00:00 2001 From: warmshao Date: Sat, 4 Jan 2025 10:58:48 +0800 Subject: [PATCH 01/20] add ollama --- requirements.txt | 3 ++- src/utils/utils.py | 10 ++++++++-- tests/test_browser_use.py | 10 ++++++++-- tests/test_llm_api.py | 12 ++++++++++-- webui.py | 3 ++- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index eb339d6..1471909 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ browser-use langchain-google-genai pyperclip -gradio \ No newline at end of file +gradio +langchain-ollama \ No newline at end of file diff --git a/src/utils/utils.py b/src/utils/utils.py index cc3b9e4..6fbbd6c 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -11,6 +11,7 @@ import os from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_ollama import ChatOllama def get_llm_model(provider: str, **kwargs): @@ -39,7 +40,7 @@ def get_llm_model(provider: str, **kwargs): ) elif provider == 'openai': if not kwargs.get("base_url", ""): - base_url = "https://api.openai.com/v1" + base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1") else: base_url = kwargs.get("base_url") @@ -66,7 +67,7 @@ def get_llm_model(provider: str, **kwargs): api_key = kwargs.get("api_key") return ChatOpenAI( - model=kwargs.get("model_name", 'gpt-4o'), + model=kwargs.get("model_name", 'deepseek-chat'), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key @@ -81,6 +82,11 @@ def get_llm_model(provider: str, **kwargs): temperature=kwargs.get("temperature", 0.0), google_api_key=api_key, ) + elif provider == 'ollama': + return ChatOllama( + model=kwargs.get("model_name", 'qwen2.5:7b'), + temperature=kwargs.get("temperature", 0.0), + ) elif provider == "azure_openai": if not kwargs.get("base_url", ""): base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "") diff --git a/tests/test_browser_use.py b/tests/test_browser_use.py index cc6c11e..84ed23a 100644 --- a/tests/test_browser_use.py +++ b/tests/test_browser_use.py @@ -105,9 +105,15 @@ async def test_browser_use_custom(): # api_key=os.getenv("GOOGLE_API_KEY", "") # ) + # llm = utils.get_llm_model( + # provider="deepseek", + # model_name="deepseek-chat", + # temperature=0.8 + # ) + llm = utils.get_llm_model( - provider="deepseek", - model_name="deepseek-chat", + provider="ollama", + model_name="qwen2.5:7b", temperature=0.8 ) diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py index 03d5753..9e2a1d6 100644 --- a/tests/test_llm_api.py +++ b/tests/test_llm_api.py @@ -106,7 +106,6 @@ def test_deepseek_model(): base_url=os.getenv("DEEPSEEK_ENDPOINT", ""), api_key=os.getenv("DEEPSEEK_API_KEY", "") ) - pdb.set_trace() message = HumanMessage( content=[ {"type": "text", "text": "who are you?"} @@ -116,8 +115,17 @@ def test_deepseek_model(): print(ai_msg.content) +def test_ollama_model(): + from langchain_ollama import ChatOllama + + llm = ChatOllama(model="qwen2.5:7b") + ai_msg = llm.invoke("Sing a ballad of LangChain.") + print(ai_msg.content) + + if __name__ == '__main__': # test_openai_model() # test_gemini_model() # test_azure_openai_model() - test_deepseek_model() + # test_deepseek_model() + test_ollama_model() diff --git a/webui.py b/webui.py index f44bc14..b574115 100644 --- a/webui.py +++ b/webui.py @@ -255,7 +255,8 @@ def main(): use_vision = gr.Checkbox(label="use vision", value=True) with gr.Row(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini" + ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], label="LLM Provider", + value="gemini" ) llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp") llm_temperature = gr.Number(label="LLM Temperature", value=1.0) From 3287b6e00af88b51999d583444b646d590e7b16b Mon Sep 17 00:00:00 2001 From: warmshao Date: Sat, 4 Jan 2025 10:59:22 +0800 Subject: [PATCH 02/20] add ollama to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ecb9bfe..6b40a9f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This project builds upon the foundation of the [browser-use](https://github.com/ 1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent. -2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek etc. And we plan to add support for even more models in the future. +2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future. 3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording. From 0ecf9ffdd414ee3e47a0696ffa744712a677c55b Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Sun, 5 Jan 2025 16:58:45 +0900 Subject: [PATCH 03/20] chore: update custom_prompts.py minor fix --- src/agent/custom_prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index bd8e612..0d88e41 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -82,7 +82,7 @@ class CustomSystemPrompt(SystemPrompt): - sometimes labels overlap, so use the context to verify the correct element 7. Form filling: - - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list. + - If you fill an input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list. 8. ACTION SEQUENCING: - Actions are executed in the order they appear in the list From 901dcb7e1cf3d879731ec44c741d2986fc14df5e Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:02:11 +0530 Subject: [PATCH 04/20] Update webui.py --- webui.py | 302 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 222 insertions(+), 80 deletions(-) diff --git a/webui.py b/webui.py index f44bc14..5570be5 100644 --- a/webui.py +++ b/webui.py @@ -36,7 +36,6 @@ from src.agent.custom_prompts import CustomSystemPrompt from src.utils import utils - async def run_browser_agent( agent_type, llm_provider, @@ -55,9 +54,7 @@ async def run_browser_agent( max_steps, use_vision ): - """ - Runs the browser agent based on user configurations. - """ + llm = utils.get_llm_model( provider=llm_provider, @@ -95,7 +92,6 @@ async def run_browser_agent( else: raise ValueError(f"Invalid agent type: {agent_type}") - async def run_org_agent( llm, headless, @@ -137,7 +133,6 @@ async def run_org_agent( await browser.close() return final_result, errors, model_actions, model_thoughts - async def run_custom_agent( llm, use_own_browser, @@ -227,6 +222,226 @@ async def run_custom_agent( await browser.close() return final_result, errors, model_actions, model_thoughts +import argparse +import gradio as gr +from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean +import os + +# Define the theme map globally +theme_map = { + "Default": Default(), + "Soft": Soft(), + "Monochrome": Monochrome(), + "Glass": Glass(), + "Origin": Origin(), + "Citrus": Citrus(), + "Ocean": Ocean() +} + +def create_ui(theme_name="Ocean"): + """Create the UI with the specified theme""" + # Enhanced styling for better visual appeal + css = """ + .gradio-container { + max-width: 1200px !important; + margin: auto !important; + padding-top: 20px !important; + } + .header-text { + text-align: center; + margin-bottom: 30px; + } + .theme-section { + margin-bottom: 20px; + padding: 15px; + border-radius: 10px; + } + """ + + with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: + with gr.Row(): + gr.Markdown( + """ + # 🌐 Browser Use WebUI + ### Control your browser with AI assistance + """, + elem_classes=["header-text"] + ) + + # Quick access theme switcher at the top + with gr.Row(elem_classes=["theme-section"]): + theme_dropdown = gr.Dropdown( + choices=list(theme_map.keys()), + value=theme_name, + label="🎨 Quick Theme Switch", + container=False + ) + + with gr.Tabs() as tabs: + with gr.TabItem("🤖 Agent Settings", id=1): + with gr.Group(): + agent_type = gr.Radio( + ["org", "custom"], + label="Agent Type", + value="custom", + info="Select the type of agent to use" + ) + max_steps = gr.Slider( + minimum=1, + maximum=200, + value=100, + step=1, + label="Max Run Steps", + info="Maximum number of steps the agent will take" + ) + use_vision = gr.Checkbox( + label="Use Vision", + value=True, + info="Enable visual processing capabilities" + ) + + with gr.TabItem("🔧 LLM Configuration", id=2): + with gr.Group(): + llm_provider = gr.Dropdown( + ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], + label="LLM Provider", + value="gemini", + info="Select your preferred language model provider" + ) + llm_model_name = gr.Textbox( + label="Model Name", + value="gemini-2.0-flash-exp", + info="Specify the model to use" + ) + llm_temperature = gr.Slider( + minimum=0.0, + maximum=2.0, + value=1.0, + step=0.1, + label="Temperature", + info="Controls randomness in model outputs" + ) + with gr.Row(): + llm_base_url = gr.Textbox( + label="Base URL", + info="API endpoint URL (if required)" + ) + llm_api_key = gr.Textbox( + label="API Key", + type="password", + info="Your API key" + ) + + with gr.TabItem("🌐 Browser Settings", id=3): + with gr.Group(): + with gr.Row(): + use_own_browser = gr.Checkbox( + label="Use Own Browser", + value=False, + info="Use your existing browser instance" + ) + headless = gr.Checkbox( + label="Headless Mode", + value=False, + info="Run browser without GUI" + ) + disable_security = gr.Checkbox( + label="Disable Security", + value=True, + info="Disable browser security features" + ) + + with gr.Row(): + window_w = gr.Number( + label="Window Width", + value=1920, + info="Browser window width" + ) + window_h = gr.Number( + label="Window Height", + value=1080, + info="Browser window height" + ) + + save_recording_path = gr.Textbox( + label="Recording Path", + placeholder="e.g. ./tmp/record_videos", + value="./tmp/record_videos", + info="Path to save browser recordings" + ) + + with gr.TabItem("📝 Task Settings", id=4): + task = gr.Textbox( + label="Task Description", + lines=4, + placeholder="Enter your task here...", + value="go to google.com and type 'OpenAI' click search and give me the first url", + info="Describe what you want the agent to do" + ) + add_infos = gr.Textbox( + label="Additional Information", + lines=3, + placeholder="Add any helpful context or instructions...", + info="Optional hints to help the LLM complete the task" + ) + + with gr.Row(): + run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) + stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + + with gr.Group(): + gr.Markdown("### Results") + with gr.Row(): + with gr.Column(): + final_result_output = gr.Textbox( + label="Final Result", + lines=3, + show_label=True + ) + with gr.Column(): + errors_output = gr.Textbox( + label="Errors", + lines=3, + show_label=True + ) + with gr.Row(): + with gr.Column(): + model_actions_output = gr.Textbox( + label="Model Actions", + lines=3, + show_label=True + ) + with gr.Column(): + model_thoughts_output = gr.Textbox( + label="Model Thoughts", + lines=3, + show_label=True + ) + + # Handle theme changes + def reload_ui(new_theme): + """Reload the UI with the new theme""" + return create_ui(new_theme) + + theme_dropdown.change( + fn=reload_ui, + inputs=[theme_dropdown], + outputs=[demo] + ) + + # Run button click handler + run_button.click( + fn=run_browser_agent, + inputs=[ + agent_type, llm_provider, llm_model_name, llm_temperature, + llm_base_url, llm_api_key, use_own_browser, headless, + disable_security, window_w, window_h, save_recording_path, + task, add_infos, max_steps, use_vision + ], + outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output] + ) + + return demo def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") @@ -234,81 +449,8 @@ def main(): parser.add_argument("--port", type=int, default=7788, help="Port to listen on") args = parser.parse_args() - js_func = """ - function refresh() { - const url = new URL(window.location); - - if (url.searchParams.get('__theme') !== 'dark') { - url.searchParams.set('__theme', 'dark'); - window.location.href = url.href; - } - } - """ - - # Gradio UI setup - with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]), - js=js_func) as demo: - gr.Markdown("

Browser Use WebUI

") - with gr.Row(): - agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom") - max_steps = gr.Number(label="max run steps", value=100) - use_vision = gr.Checkbox(label="use vision", value=True) - with gr.Row(): - llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini" - ) - llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp") - llm_temperature = gr.Number(label="LLM Temperature", value=1.0) - with gr.Row(): - llm_base_url = gr.Textbox(label="LLM Base URL") - llm_api_key = gr.Textbox(label="LLM API Key", type="password") - - with gr.Accordion("Browser Settings", open=False): - use_own_browser = gr.Checkbox(label="Use Own Browser", value=False) - headless = gr.Checkbox(label="Headless", value=False) - disable_security = gr.Checkbox(label="Disable Security", value=True) - with gr.Row(): - window_w = gr.Number(label="Window Width", value=1920) - window_h = gr.Number(label="Window Height", value=1080) - save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos", - value="./tmp/record_videos") - with gr.Accordion("Task Settings", open=True): - task = gr.Textbox(label="Task", lines=10, - value="go to google.com and type 'OpenAI' click search and give me the first url") - add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5) - - run_button = gr.Button("Run Agent", variant="primary") - with gr.Column(): - final_result_output = gr.Textbox(label="Final Result", lines=5) - errors_output = gr.Textbox(label="Errors", lines=5, ) - model_actions_output = gr.Textbox(label="Model Actions", lines=5) - model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5) - - run_button.click( - fn=run_browser_agent, - inputs=[ - agent_type, - llm_provider, - llm_model_name, - llm_temperature, - llm_base_url, - llm_api_key, - use_own_browser, - headless, - disable_security, - window_w, - window_h, - save_recording_path, - task, - add_infos, - max_steps, - use_vision - ], - outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output], - ) - + demo = create_ui() demo.launch(server_name=args.ip, server_port=args.port) - if __name__ == '__main__': main() From 624f12f89e1864d2ed4ecb9af64e820158656518 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:02:37 +0530 Subject: [PATCH 05/20] Update requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index eb339d6..5b34217 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ browser-use langchain-google-genai pyperclip -gradio \ No newline at end of file +gradio +langchain-ollama From d4484162b93d058f862a9cc26b185651da88b3d0 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:03:14 +0530 Subject: [PATCH 06/20] Update utils.py --- src/utils/utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/utils/utils.py b/src/utils/utils.py index cc3b9e4..6fbbd6c 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -11,6 +11,7 @@ import os from langchain_openai import ChatOpenAI, AzureChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_ollama import ChatOllama def get_llm_model(provider: str, **kwargs): @@ -39,7 +40,7 @@ def get_llm_model(provider: str, **kwargs): ) elif provider == 'openai': if not kwargs.get("base_url", ""): - base_url = "https://api.openai.com/v1" + base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1") else: base_url = kwargs.get("base_url") @@ -66,7 +67,7 @@ def get_llm_model(provider: str, **kwargs): api_key = kwargs.get("api_key") return ChatOpenAI( - model=kwargs.get("model_name", 'gpt-4o'), + model=kwargs.get("model_name", 'deepseek-chat'), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key @@ -81,6 +82,11 @@ def get_llm_model(provider: str, **kwargs): temperature=kwargs.get("temperature", 0.0), google_api_key=api_key, ) + elif provider == 'ollama': + return ChatOllama( + model=kwargs.get("model_name", 'qwen2.5:7b'), + temperature=kwargs.get("temperature", 0.0), + ) elif provider == "azure_openai": if not kwargs.get("base_url", ""): base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "") From 6cc2df34bcb4a7fa891d2491ddcabffdf9d5baeb Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:04:01 +0530 Subject: [PATCH 07/20] Update custom_agent.py From 45c522db38311128e5aaeb07a41afaa58400c441 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:04:13 +0530 Subject: [PATCH 08/20] Update custom_massage_manager.py From e192bab825b6c179fe2ffa00bc1226ed4cb700a7 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:04:29 +0530 Subject: [PATCH 09/20] Update custom_prompts.py --- src/agent/custom_prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index bd8e612..0d88e41 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -82,7 +82,7 @@ class CustomSystemPrompt(SystemPrompt): - sometimes labels overlap, so use the context to verify the correct element 7. Form filling: - - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list. + - If you fill an input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list. 8. ACTION SEQUENCING: - Actions are executed in the order they appear in the list From 5ddd65adf461e62c583adeb530695e021455b75f Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:04:47 +0530 Subject: [PATCH 10/20] Update custom_views.py From aed448e07f0229b2f867b0c56343179cd6f74da9 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:05:01 +0530 Subject: [PATCH 11/20] Update custom_browser.py From dbf3224d31e47aefaf96f84ad961718fc392de0a Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:05:14 +0530 Subject: [PATCH 12/20] Update custom_context.py From c61c91d0595582588de30e5f6022c0f903cc0c96 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:05:30 +0530 Subject: [PATCH 13/20] Update custom_controller.py From d5823a51d57191a07bc08d3ab29b064a961e36d3 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:21:23 +0530 Subject: [PATCH 14/20] Update webui.py --- webui.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/webui.py b/webui.py index 20259ea..cd8d9dc 100644 --- a/webui.py +++ b/webui.py @@ -238,7 +238,7 @@ theme_map = { "Ocean": Ocean() } -def create_ui(theme_name="Ocean"): +def create_ui(theme_name="Citrus"): """Create the UI with the specified theme""" # Enhanced styling for better visual appeal css = """ @@ -260,7 +260,6 @@ def create_ui(theme_name="Ocean"): with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: with gr.Row(): - gr.Markdown( """ # 🌐 Browser Use WebUI @@ -304,7 +303,7 @@ def create_ui(theme_name="Ocean"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini", info="Select your preferred language model provider" @@ -390,6 +389,24 @@ def create_ui(theme_name="Ocean"): run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + with gr.TabItem("đŸŽŦ Recordings", id=5): + def list_videos(path): + if not os.path.exists(path): + return ["Recording path not found"] + video_files = [f for f in os.listdir(path) if f.endswith(('.mp4', '.webm'))] + return [os.path.join(path, vf) for vf in video_files] + + def display_videos(recording_path): + return list_videos(recording_path) + + recording_display = gr.Gallery(label="Recorded Videos", type="video") + + demo.load( + display_videos, + inputs=[save_recording_path], + outputs=[recording_display] + ) + with gr.Group(): gr.Markdown("### Results") with gr.Row(): From 7a099837b8f1e25dc4cca66854d7d3250272dfde Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:39:20 +0530 Subject: [PATCH 15/20] Update webui.py --- webui.py | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/webui.py b/webui.py index cd8d9dc..400fd0e 100644 --- a/webui.py +++ b/webui.py @@ -225,7 +225,7 @@ async def run_custom_agent( import argparse import gradio as gr from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean -import os +import os, glob # Define the theme map globally theme_map = { @@ -238,7 +238,7 @@ theme_map = { "Ocean": Ocean() } -def create_ui(theme_name="Citrus"): +def create_ui(): """Create the UI with the specified theme""" # Enhanced styling for better visual appeal css = """ @@ -258,7 +258,7 @@ def create_ui(theme_name="Citrus"): } """ - with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: + with gr.Blocks(title="Browser Use WebUI", theme=theme_map["Ocean"], css=css) as demo: with gr.Row(): gr.Markdown( """ @@ -268,15 +268,6 @@ def create_ui(theme_name="Citrus"): elem_classes=["header-text"] ) - # Quick access theme switcher at the top - with gr.Row(elem_classes=["theme-section"]): - theme_dropdown = gr.Dropdown( - choices=list(theme_map.keys()), - value=theme_name, - label="🎨 Quick Theme Switch", - container=False - ) - with gr.Tabs() as tabs: with gr.TabItem("🤖 Agent Settings", id=1): with gr.Group(): @@ -303,7 +294,7 @@ def create_ui(theme_name="Citrus"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek", ""], label="LLM Provider", value="gemini", info="Select your preferred language model provider" @@ -391,15 +382,27 @@ def create_ui(theme_name="Citrus"): with gr.TabItem("đŸŽŦ Recordings", id=5): def list_videos(path): + """Return the latest video file from the specified path.""" if not os.path.exists(path): return ["Recording path not found"] - video_files = [f for f in os.listdir(path) if f.endswith(('.mp4', '.webm'))] - return [os.path.join(path, vf) for vf in video_files] + + # Get all video files in the directory + video_files = glob.glob(os.path.join(path, '*.[mM][pP]4')) + glob.glob(os.path.join(path, '*.[wW][eE][bB][mM]')) + + if not video_files: + return ["No recordings found"] + + # Sort files by modification time (latest first) + video_files.sort(key=os.path.getmtime, reverse=True) + + # Return only the latest video + return [video_files[0]] def display_videos(recording_path): + """Display the latest video in the gallery.""" return list_videos(recording_path) - recording_display = gr.Gallery(label="Recorded Videos", type="video") + recording_display = gr.Gallery(label="Latest Recording", type="video") demo.load( display_videos, @@ -436,17 +439,6 @@ def create_ui(theme_name="Citrus"): show_label=True ) - # Handle theme changes - def reload_ui(new_theme): - """Reload the UI with the new theme""" - return create_ui(new_theme) - - theme_dropdown.change( - fn=reload_ui, - inputs=[theme_dropdown], - outputs=[demo] - ) - # Run button click handler run_button.click( fn=run_browser_agent, From 31f56b56f448b8130af879dd0f808700662b028c Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:46:32 +0530 Subject: [PATCH 16/20] Update webui.py --- webui.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/webui.py b/webui.py index 400fd0e..9c6c25b 100644 --- a/webui.py +++ b/webui.py @@ -238,7 +238,7 @@ theme_map = { "Ocean": Ocean() } -def create_ui(): +def create_ui(theme_name="Ocean"): """Create the UI with the specified theme""" # Enhanced styling for better visual appeal css = """ @@ -258,7 +258,7 @@ def create_ui(): } """ - with gr.Blocks(title="Browser Use WebUI", theme=theme_map["Ocean"], css=css) as demo: + with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: with gr.Row(): gr.Markdown( """ @@ -294,7 +294,7 @@ def create_ui(): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek", ""], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini", info="Select your preferred language model provider" @@ -457,9 +457,11 @@ def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") + parser.add_argument("--theme", type=str, default="Citrus", choices=theme_map.keys(), help="Theme to use for the UI") args = parser.parse_args() - demo = create_ui() + # Create the UI with the specified theme + demo = create_ui(theme_name=args.theme) demo.launch(server_name=args.ip, server_port=args.port) if __name__ == '__main__': From a7d162516cd94537410e0323a24a4b692da92d6e Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 19:07:30 +0530 Subject: [PATCH 17/20] Update webui.py --- webui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webui.py b/webui.py index 9c6c25b..53733bc 100644 --- a/webui.py +++ b/webui.py @@ -294,7 +294,7 @@ def create_ui(theme_name="Ocean"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], label="LLM Provider", value="gemini", info="Select your preferred language model provider" @@ -457,7 +457,7 @@ def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") - parser.add_argument("--theme", type=str, default="Citrus", choices=theme_map.keys(), help="Theme to use for the UI") + parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") args = parser.parse_args() # Create the UI with the specified theme From 1db2765935efc3542e92ff4299dfd1a6f980c27f Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 20:43:09 +0530 Subject: [PATCH 18/20] Update webui.py --- webui.py | 71 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/webui.py b/webui.py index 53733bc..1635531 100644 --- a/webui.py +++ b/webui.py @@ -54,8 +54,14 @@ async def run_browser_agent( max_steps, use_vision ): + # Ensure the recording directory exists + os.makedirs(save_recording_path, exist_ok=True) + # Get the list of existing videos before the agent runs + existing_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) + + glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]'))) + # Run the agent llm = utils.get_llm_model( provider=llm_provider, model_name=llm_model_name, @@ -64,7 +70,7 @@ async def run_browser_agent( api_key=llm_api_key ) if agent_type == "org": - return await run_org_agent( + final_result, errors, model_actions, model_thoughts = await run_org_agent( llm=llm, headless=headless, disable_security=disable_security, @@ -76,7 +82,7 @@ async def run_browser_agent( use_vision=use_vision ) elif agent_type == "custom": - return await run_custom_agent( + final_result, errors, model_actions, model_thoughts = await run_custom_agent( llm=llm, use_own_browser=use_own_browser, headless=headless, @@ -92,6 +98,17 @@ async def run_browser_agent( else: raise ValueError(f"Invalid agent type: {agent_type}") + # Get the list of videos after the agent runs + new_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) + + glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]'))) + + # Find the newly created video + latest_video = None + if new_videos - existing_videos: + latest_video = list(new_videos - existing_videos)[0] # Get the first new video + + return final_result, errors, model_actions, model_thoughts, latest_video + async def run_org_agent( llm, headless, @@ -222,6 +239,7 @@ async def run_custom_agent( await browser.close() return final_result, errors, model_actions, model_thoughts + import argparse import gradio as gr from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean @@ -239,8 +257,6 @@ theme_map = { } def create_ui(theme_name="Ocean"): - """Create the UI with the specified theme""" - # Enhanced styling for better visual appeal css = """ .gradio-container { max-width: 1200px !important; @@ -257,8 +273,18 @@ def create_ui(theme_name="Ocean"): border-radius: 10px; } """ + + js = """ + function refresh() { + const url = new URL(window.location); + if (url.searchParams.get('__theme') !== 'dark') { + url.searchParams.set('__theme', 'dark'); + window.location.href = url.href; + } + } + """ - with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: + with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js) as demo: with gr.Row(): gr.Markdown( """ @@ -294,7 +320,7 @@ def create_ui(theme_name="Ocean"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini", info="Select your preferred language model provider" @@ -381,34 +407,7 @@ def create_ui(theme_name="Ocean"): stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) with gr.TabItem("đŸŽŦ Recordings", id=5): - def list_videos(path): - """Return the latest video file from the specified path.""" - if not os.path.exists(path): - return ["Recording path not found"] - - # Get all video files in the directory - video_files = glob.glob(os.path.join(path, '*.[mM][pP]4')) + glob.glob(os.path.join(path, '*.[wW][eE][bB][mM]')) - - if not video_files: - return ["No recordings found"] - - # Sort files by modification time (latest first) - video_files.sort(key=os.path.getmtime, reverse=True) - - # Return only the latest video - return [video_files[0]] - - def display_videos(recording_path): - """Display the latest video in the gallery.""" - return list_videos(recording_path) - - recording_display = gr.Gallery(label="Latest Recording", type="video") - - demo.load( - display_videos, - inputs=[save_recording_path], - outputs=[recording_display] - ) + recording_display = gr.Video(label="Latest Recording") with gr.Group(): gr.Markdown("### Results") @@ -448,7 +447,7 @@ def create_ui(theme_name="Ocean"): disable_security, window_w, window_h, save_recording_path, task, add_infos, max_steps, use_vision ], - outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output] + outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display] ) return demo @@ -458,9 +457,9 @@ def main(): parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") + parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode") args = parser.parse_args() - # Create the UI with the specified theme demo = create_ui(theme_name=args.theme) demo.launch(server_name=args.ip, server_port=args.port) From d0a38042f495c9fc8fde52cd33e8fdc53905c1c3 Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Mon, 6 Jan 2025 20:45:42 +0530 Subject: [PATCH 19/20] Update webui.py --- webui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webui.py b/webui.py index 1635531..eef1e3c 100644 --- a/webui.py +++ b/webui.py @@ -320,7 +320,7 @@ def create_ui(theme_name="Ocean"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], + ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], label="LLM Provider", value="gemini", info="Select your preferred language model provider" From e1be9fcda199b695184ea72f7a706657b57e65b4 Mon Sep 17 00:00:00 2001 From: warmshao Date: Mon, 6 Jan 2025 23:57:57 +0800 Subject: [PATCH 20/20] feat: release new and well-designed WebUI --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 6b40a9f..5d6363e 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,10 @@ This project builds upon the foundation of the [browser-use](https://github.com/ +**Changelog** +- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113). + + ## Environment Installation 1. **Python Version:** Ensure you have Python 3.11 or higher installed.