From e54c1fda0b3c44008a021cec46113a09f7466db1 Mon Sep 17 00:00:00 2001
From: warmshao <wenshaoguo1026@gmail.com>
Date: Sat, 4 Jan 2025 10:58:48 +0800
Subject: [PATCH 01/20] add ollama

---
 requirements.txt          |  3 ++-
 src/utils/utils.py        | 10 ++++++++--
 tests/test_browser_use.py | 10 ++++++++--
 tests/test_llm_api.py     | 12 ++++++++++--
 webui.py                  |  3 ++-
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index eb339d6..1471909 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 browser-use
 langchain-google-genai
 pyperclip
-gradio
\ No newline at end of file
+gradio
+langchain-ollama
\ No newline at end of file
diff --git a/src/utils/utils.py b/src/utils/utils.py
index cc3b9e4..6fbbd6c 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -11,6 +11,7 @@ import os
 from langchain_openai import ChatOpenAI, AzureChatOpenAI
 from langchain_anthropic import ChatAnthropic
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_ollama import ChatOllama
 
 
 def get_llm_model(provider: str, **kwargs):
@@ -39,7 +40,7 @@ def get_llm_model(provider: str, **kwargs):
         )
     elif provider == 'openai':
         if not kwargs.get("base_url", ""):
-            base_url = "https://api.openai.com/v1"
+            base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
         else:
             base_url = kwargs.get("base_url")
 
@@ -66,7 +67,7 @@ def get_llm_model(provider: str, **kwargs):
             api_key = kwargs.get("api_key")
 
         return ChatOpenAI(
-            model=kwargs.get("model_name", 'gpt-4o'),
+            model=kwargs.get("model_name", 'deepseek-chat'),
             temperature=kwargs.get("temperature", 0.0),
             base_url=base_url,
             api_key=api_key
@@ -81,6 +82,11 @@ def get_llm_model(provider: str, **kwargs):
             temperature=kwargs.get("temperature", 0.0),
             google_api_key=api_key,
         )
+    elif provider == 'ollama':
+        return ChatOllama(
+            model=kwargs.get("model_name", 'qwen2.5:7b'),
+            temperature=kwargs.get("temperature", 0.0),
+        )
     elif provider == "azure_openai":
         if not kwargs.get("base_url", ""):
             base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
diff --git a/tests/test_browser_use.py b/tests/test_browser_use.py
index cc6c11e..84ed23a 100644
--- a/tests/test_browser_use.py
+++ b/tests/test_browser_use.py
@@ -105,9 +105,15 @@ async def test_browser_use_custom():
     #     api_key=os.getenv("GOOGLE_API_KEY", "")
     # )
 
+    # llm = utils.get_llm_model(
+    #     provider="deepseek",
+    #     model_name="deepseek-chat",
+    #     temperature=0.8
+    # )
+
     llm = utils.get_llm_model(
-        provider="deepseek",
-        model_name="deepseek-chat",
+        provider="ollama",
+        model_name="qwen2.5:7b",
         temperature=0.8
     )
 
diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py
index 03d5753..9e2a1d6 100644
--- a/tests/test_llm_api.py
+++ b/tests/test_llm_api.py
@@ -106,7 +106,6 @@ def test_deepseek_model():
         base_url=os.getenv("DEEPSEEK_ENDPOINT", ""),
         api_key=os.getenv("DEEPSEEK_API_KEY", "")
     )
-    pdb.set_trace()
     message = HumanMessage(
         content=[
             {"type": "text", "text": "who are you?"}
@@ -116,8 +115,17 @@ def test_deepseek_model():
     print(ai_msg.content)
 
 
+def test_ollama_model():
+    from langchain_ollama import ChatOllama
+
+    llm = ChatOllama(model="qwen2.5:7b")
+    ai_msg = llm.invoke("Sing a ballad of LangChain.")
+    print(ai_msg.content)
+
+
 if __name__ == '__main__':
     # test_openai_model()
     # test_gemini_model()
     # test_azure_openai_model()
-    test_deepseek_model()
+    # test_deepseek_model()
+    test_ollama_model()
diff --git a/webui.py b/webui.py
index f44bc14..b574115 100644
--- a/webui.py
+++ b/webui.py
@@ -255,7 +255,8 @@ def main():
             use_vision = gr.Checkbox(label="use vision", value=True)
         with gr.Row():
             llm_provider = gr.Dropdown(
-                ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
+                ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], label="LLM Provider",
+                value="gemini"
             )
             llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
             llm_temperature = gr.Number(label="LLM Temperature", value=1.0)

From 3287b6e00af88b51999d583444b646d590e7b16b Mon Sep 17 00:00:00 2001
From: warmshao <wenshaoguo1026@gmail.com>
Date: Sat, 4 Jan 2025 10:59:22 +0800
Subject: [PATCH 02/20] add ollama to readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ecb9bfe..6b40a9f 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@ This project builds upon the foundation of the [browser-use](https://github.com/
 
 1.  **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
 
-2.  **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek etc. And we plan to add support for even more models in the future.
+2.  **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek, Ollama etc. And we plan to add support for even more models in the future.
 
 3.  **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
 

From 0ecf9ffdd414ee3e47a0696ffa744712a677c55b Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Sun, 5 Jan 2025 16:58:45 +0900
Subject: [PATCH 03/20] chore: update custom_prompts.py

minor fix
---
 src/agent/custom_prompts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
index bd8e612..0d88e41 100644
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -82,7 +82,7 @@ class CustomSystemPrompt(SystemPrompt):
        - sometimes labels overlap, so use the context to verify the correct element
 
     7. Form filling:
-       - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
+       - If you fill an input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
 
     8. ACTION SEQUENCING:
        - Actions are executed in the order they appear in the list 

From 901dcb7e1cf3d879731ec44c741d2986fc14df5e Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:02:11 +0530
Subject: [PATCH 04/20] Update webui.py

---
 webui.py | 302 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 222 insertions(+), 80 deletions(-)

diff --git a/webui.py b/webui.py
index f44bc14..5570be5 100644
--- a/webui.py
+++ b/webui.py
@@ -36,7 +36,6 @@ from src.agent.custom_prompts import CustomSystemPrompt
 
 from src.utils import utils
 
-
 async def run_browser_agent(
         agent_type,
         llm_provider,
@@ -55,9 +54,7 @@ async def run_browser_agent(
         max_steps,
         use_vision
 ):
-    """
-    Runs the browser agent based on user configurations.
-    """
+
 
     llm = utils.get_llm_model(
         provider=llm_provider,
@@ -95,7 +92,6 @@ async def run_browser_agent(
     else:
         raise ValueError(f"Invalid agent type: {agent_type}")
 
-
 async def run_org_agent(
         llm,
         headless,
@@ -137,7 +133,6 @@ async def run_org_agent(
     await browser.close()
     return final_result, errors, model_actions, model_thoughts
 
-
 async def run_custom_agent(
         llm,
         use_own_browser,
@@ -227,6 +222,226 @@ async def run_custom_agent(
         await browser.close()
     return final_result, errors, model_actions, model_thoughts
 
+import argparse
+import gradio as gr
+from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
+import os
+
+# Define the theme map globally
+theme_map = {
+    "Default": Default(),
+    "Soft": Soft(),
+    "Monochrome": Monochrome(),
+    "Glass": Glass(),
+    "Origin": Origin(),
+    "Citrus": Citrus(),
+    "Ocean": Ocean()
+}
+
+def create_ui(theme_name="Ocean"):
+    """Create the UI with the specified theme"""
+    # Enhanced styling for better visual appeal
+    css = """
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+        padding-top: 20px !important;
+    }
+    .header-text {
+        text-align: center;
+        margin-bottom: 30px;
+    }
+    .theme-section {
+        margin-bottom: 20px;
+        padding: 15px;
+        border-radius: 10px;
+    }
+    """
+    
+    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
+        with gr.Row():
+            gr.Markdown(
+                """
+                # 🌐 Browser Use WebUI
+                ### Control your browser with AI assistance
+                """,
+                elem_classes=["header-text"]
+            )
+        
+        # Quick access theme switcher at the top
+        with gr.Row(elem_classes=["theme-section"]):
+            theme_dropdown = gr.Dropdown(
+                choices=list(theme_map.keys()),
+                value=theme_name,
+                label="🎨 Quick Theme Switch",
+                container=False
+            )
+        
+        with gr.Tabs() as tabs:
+            with gr.TabItem("🤖 Agent Settings", id=1):
+                with gr.Group():
+                    agent_type = gr.Radio(
+                        ["org", "custom"],
+                        label="Agent Type",
+                        value="custom",
+                        info="Select the type of agent to use"
+                    )
+                    max_steps = gr.Slider(
+                        minimum=1,
+                        maximum=200,
+                        value=100,
+                        step=1,
+                        label="Max Run Steps",
+                        info="Maximum number of steps the agent will take"
+                    )
+                    use_vision = gr.Checkbox(
+                        label="Use Vision",
+                        value=True,
+                        info="Enable visual processing capabilities"
+                    )
+
+            with gr.TabItem("🔧 LLM Configuration", id=2):
+                with gr.Group():
+                    llm_provider = gr.Dropdown(
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
+                        label="LLM Provider",
+                        value="gemini",
+                        info="Select your preferred language model provider"
+                    )
+                    llm_model_name = gr.Textbox(
+                        label="Model Name",
+                        value="gemini-2.0-flash-exp",
+                        info="Specify the model to use"
+                    )
+                    llm_temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=2.0,
+                        value=1.0,
+                        step=0.1,
+                        label="Temperature",
+                        info="Controls randomness in model outputs"
+                    )
+                    with gr.Row():
+                        llm_base_url = gr.Textbox(
+                            label="Base URL",
+                            info="API endpoint URL (if required)"
+                        )
+                        llm_api_key = gr.Textbox(
+                            label="API Key",
+                            type="password",
+                            info="Your API key"
+                        )
+
+            with gr.TabItem("🌐 Browser Settings", id=3):
+                with gr.Group():
+                    with gr.Row():
+                        use_own_browser = gr.Checkbox(
+                            label="Use Own Browser",
+                            value=False,
+                            info="Use your existing browser instance"
+                        )
+                        headless = gr.Checkbox(
+                            label="Headless Mode",
+                            value=False,
+                            info="Run browser without GUI"
+                        )
+                        disable_security = gr.Checkbox(
+                            label="Disable Security",
+                            value=True,
+                            info="Disable browser security features"
+                        )
+                    
+                    with gr.Row():
+                        window_w = gr.Number(
+                            label="Window Width",
+                            value=1920,
+                            info="Browser window width"
+                        )
+                        window_h = gr.Number(
+                            label="Window Height",
+                            value=1080,
+                            info="Browser window height"
+                        )
+                    
+                    save_recording_path = gr.Textbox(
+                        label="Recording Path",
+                        placeholder="e.g. ./tmp/record_videos",
+                        value="./tmp/record_videos",
+                        info="Path to save browser recordings"
+                    )
+
+            with gr.TabItem("📝 Task Settings", id=4):
+                task = gr.Textbox(
+                    label="Task Description",
+                    lines=4,
+                    placeholder="Enter your task here...",
+                    value="go to google.com and type 'OpenAI' click search and give me the first url",
+                    info="Describe what you want the agent to do"
+                )
+                add_infos = gr.Textbox(
+                    label="Additional Information",
+                    lines=3,
+                    placeholder="Add any helpful context or instructions...",
+                    info="Optional hints to help the LLM complete the task"
+                )
+
+                with gr.Row():
+                    run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
+                    stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
+
+                with gr.Group():
+                    gr.Markdown("### Results")
+                    with gr.Row():
+                        with gr.Column():
+                            final_result_output = gr.Textbox(
+                                label="Final Result",
+                                lines=3,
+                                show_label=True
+                            )
+                        with gr.Column():
+                            errors_output = gr.Textbox(
+                                label="Errors",
+                                lines=3,
+                                show_label=True
+                            )
+                    with gr.Row():
+                        with gr.Column():
+                            model_actions_output = gr.Textbox(
+                                label="Model Actions",
+                                lines=3,
+                                show_label=True
+                            )
+                        with gr.Column():
+                            model_thoughts_output = gr.Textbox(
+                                label="Model Thoughts",
+                                lines=3,
+                                show_label=True
+                            )
+
+        # Handle theme changes
+        def reload_ui(new_theme):
+            """Reload the UI with the new theme"""
+            return create_ui(new_theme)
+            
+        theme_dropdown.change(
+            fn=reload_ui,
+            inputs=[theme_dropdown],
+            outputs=[demo]
+        )
+
+        # Run button click handler
+        run_button.click(
+            fn=run_browser_agent,
+            inputs=[
+                agent_type, llm_provider, llm_model_name, llm_temperature,
+                llm_base_url, llm_api_key, use_own_browser, headless,
+                disable_security, window_w, window_h, save_recording_path,
+                task, add_infos, max_steps, use_vision
+            ],
+            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output]
+        )
+
+    return demo
 
 def main():
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
@@ -234,81 +449,8 @@ def main():
     parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
     args = parser.parse_args()
 
-    js_func = """
-        function refresh() {
-            const url = new URL(window.location);
-
-            if (url.searchParams.get('__theme') !== 'dark') {
-                url.searchParams.set('__theme', 'dark');
-                window.location.href = url.href;
-            }
-        }
-        """
-
-    # Gradio UI setup
-    with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]),
-                   js=js_func) as demo:
-        gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
-        with gr.Row():
-            agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
-            max_steps = gr.Number(label="max run steps", value=100)
-            use_vision = gr.Checkbox(label="use vision", value=True)
-        with gr.Row():
-            llm_provider = gr.Dropdown(
-                ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
-            )
-            llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
-            llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
-        with gr.Row():
-            llm_base_url = gr.Textbox(label="LLM Base URL")
-            llm_api_key = gr.Textbox(label="LLM API Key", type="password")
-
-        with gr.Accordion("Browser Settings", open=False):
-            use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
-            headless = gr.Checkbox(label="Headless", value=False)
-            disable_security = gr.Checkbox(label="Disable Security", value=True)
-            with gr.Row():
-                window_w = gr.Number(label="Window Width", value=1920)
-                window_h = gr.Number(label="Window Height", value=1080)
-            save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
-                                             value="./tmp/record_videos")
-        with gr.Accordion("Task Settings", open=True):
-            task = gr.Textbox(label="Task", lines=10,
-                              value="go to google.com and type 'OpenAI' click search and give me the first url")
-            add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)
-
-        run_button = gr.Button("Run Agent", variant="primary")
-        with gr.Column():
-            final_result_output = gr.Textbox(label="Final Result", lines=5)
-            errors_output = gr.Textbox(label="Errors", lines=5, )
-            model_actions_output = gr.Textbox(label="Model Actions", lines=5)
-            model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
-
-        run_button.click(
-            fn=run_browser_agent,
-            inputs=[
-                agent_type,
-                llm_provider,
-                llm_model_name,
-                llm_temperature,
-                llm_base_url,
-                llm_api_key,
-                use_own_browser,
-                headless,
-                disable_security,
-                window_w,
-                window_h,
-                save_recording_path,
-                task,
-                add_infos,
-                max_steps,
-                use_vision
-            ],
-            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
-        )
-
+    demo = create_ui()
     demo.launch(server_name=args.ip, server_port=args.port)
 
-
 if __name__ == '__main__':
     main()

From 624f12f89e1864d2ed4ecb9af64e820158656518 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:02:37 +0530
Subject: [PATCH 05/20] Update requirements.txt

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index eb339d6..5b34217 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 browser-use
 langchain-google-genai
 pyperclip
-gradio
\ No newline at end of file
+gradio
+langchain-ollama

From d4484162b93d058f862a9cc26b185651da88b3d0 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:03:14 +0530
Subject: [PATCH 06/20] Update utils.py

---
 src/utils/utils.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/utils/utils.py b/src/utils/utils.py
index cc3b9e4..6fbbd6c 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -11,6 +11,7 @@ import os
 from langchain_openai import ChatOpenAI, AzureChatOpenAI
 from langchain_anthropic import ChatAnthropic
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_ollama import ChatOllama
 
 
 def get_llm_model(provider: str, **kwargs):
@@ -39,7 +40,7 @@ def get_llm_model(provider: str, **kwargs):
         )
     elif provider == 'openai':
         if not kwargs.get("base_url", ""):
-            base_url = "https://api.openai.com/v1"
+            base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
         else:
             base_url = kwargs.get("base_url")
 
@@ -66,7 +67,7 @@ def get_llm_model(provider: str, **kwargs):
             api_key = kwargs.get("api_key")
 
         return ChatOpenAI(
-            model=kwargs.get("model_name", 'gpt-4o'),
+            model=kwargs.get("model_name", 'deepseek-chat'),
             temperature=kwargs.get("temperature", 0.0),
             base_url=base_url,
             api_key=api_key
@@ -81,6 +82,11 @@ def get_llm_model(provider: str, **kwargs):
             temperature=kwargs.get("temperature", 0.0),
             google_api_key=api_key,
         )
+    elif provider == 'ollama':
+        return ChatOllama(
+            model=kwargs.get("model_name", 'qwen2.5:7b'),
+            temperature=kwargs.get("temperature", 0.0),
+        )
     elif provider == "azure_openai":
         if not kwargs.get("base_url", ""):
             base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")

From 6cc2df34bcb4a7fa891d2491ddcabffdf9d5baeb Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:04:01 +0530
Subject: [PATCH 07/20] Update custom_agent.py


From 45c522db38311128e5aaeb07a41afaa58400c441 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:04:13 +0530
Subject: [PATCH 08/20] Update custom_massage_manager.py


From e192bab825b6c179fe2ffa00bc1226ed4cb700a7 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:04:29 +0530
Subject: [PATCH 09/20] Update custom_prompts.py

---
 src/agent/custom_prompts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
index bd8e612..0d88e41 100644
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -82,7 +82,7 @@ class CustomSystemPrompt(SystemPrompt):
        - sometimes labels overlap, so use the context to verify the correct element
 
     7. Form filling:
-       - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
+       - If you fill an input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
 
     8. ACTION SEQUENCING:
        - Actions are executed in the order they appear in the list 

From 5ddd65adf461e62c583adeb530695e021455b75f Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:04:47 +0530
Subject: [PATCH 10/20] Update custom_views.py


From aed448e07f0229b2f867b0c56343179cd6f74da9 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:05:01 +0530
Subject: [PATCH 11/20] Update custom_browser.py


From dbf3224d31e47aefaf96f84ad961718fc392de0a Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:05:14 +0530
Subject: [PATCH 12/20] Update custom_context.py


From c61c91d0595582588de30e5f6022c0f903cc0c96 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Sun, 5 Jan 2025 18:05:30 +0530
Subject: [PATCH 13/20] Update custom_controller.py


From d5823a51d57191a07bc08d3ab29b064a961e36d3 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 09:21:23 +0530
Subject: [PATCH 14/20] Update webui.py

---
 webui.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/webui.py b/webui.py
index 20259ea..cd8d9dc 100644
--- a/webui.py
+++ b/webui.py
@@ -238,7 +238,7 @@ theme_map = {
     "Ocean": Ocean()
 }
 
-def create_ui(theme_name="Ocean"):
+def create_ui(theme_name="Citrus"):
     """Create the UI with the specified theme"""
     # Enhanced styling for better visual appeal
     css = """
@@ -260,7 +260,6 @@ def create_ui(theme_name="Ocean"):
     
     with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
         with gr.Row():
-
             gr.Markdown(
                 """
                 # 🌐 Browser Use WebUI
@@ -304,7 +303,7 @@ def create_ui(theme_name="Ocean"):
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"
@@ -390,6 +389,24 @@ def create_ui(theme_name="Ocean"):
                     run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
                     stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
 
+            with gr.TabItem("🎬 Recordings", id=5):
+                def list_videos(path):
+                    if not os.path.exists(path):
+                        return ["Recording path not found"]
+                    video_files = [f for f in os.listdir(path) if f.endswith(('.mp4', '.webm'))]
+                    return [os.path.join(path, vf) for vf in video_files]
+
+                def display_videos(recording_path):
+                    return list_videos(recording_path)
+
+                recording_display = gr.Gallery(label="Recorded Videos", type="video")
+
+                demo.load(
+                    display_videos,
+                    inputs=[save_recording_path],
+                    outputs=[recording_display]
+                )
+
                 with gr.Group():
                     gr.Markdown("### Results")
                     with gr.Row():

From 7a099837b8f1e25dc4cca66854d7d3250272dfde Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 18:39:20 +0530
Subject: [PATCH 15/20] Update webui.py

---
 webui.py | 46 +++++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/webui.py b/webui.py
index cd8d9dc..400fd0e 100644
--- a/webui.py
+++ b/webui.py
@@ -225,7 +225,7 @@ async def run_custom_agent(
 import argparse
 import gradio as gr
 from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
-import os
+import os, glob
 
 # Define the theme map globally
 theme_map = {
@@ -238,7 +238,7 @@ theme_map = {
     "Ocean": Ocean()
 }
 
-def create_ui(theme_name="Citrus"):
+def create_ui():
     """Create the UI with the specified theme"""
     # Enhanced styling for better visual appeal
     css = """
@@ -258,7 +258,7 @@ def create_ui(theme_name="Citrus"):
     }
     """
     
-    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
+    with gr.Blocks(title="Browser Use WebUI", theme=theme_map["Ocean"], css=css) as demo:
         with gr.Row():
             gr.Markdown(
                 """
@@ -268,15 +268,6 @@ def create_ui(theme_name="Citrus"):
                 elem_classes=["header-text"]
             )
         
-        # Quick access theme switcher at the top
-        with gr.Row(elem_classes=["theme-section"]):
-            theme_dropdown = gr.Dropdown(
-                choices=list(theme_map.keys()),
-                value=theme_name,
-                label="🎨 Quick Theme Switch",
-                container=False
-            )
-        
         with gr.Tabs() as tabs:
             with gr.TabItem("🤖 Agent Settings", id=1):
                 with gr.Group():
@@ -303,7 +294,7 @@ def create_ui(theme_name="Citrus"):
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", ""],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"
@@ -391,15 +382,27 @@ def create_ui(theme_name="Citrus"):
 
             with gr.TabItem("🎬 Recordings", id=5):
                 def list_videos(path):
+                    """Return the latest video file from the specified path."""
                     if not os.path.exists(path):
                         return ["Recording path not found"]
-                    video_files = [f for f in os.listdir(path) if f.endswith(('.mp4', '.webm'))]
-                    return [os.path.join(path, vf) for vf in video_files]
+                    
+                    # Get all video files in the directory
+                    video_files = glob.glob(os.path.join(path, '*.[mM][pP]4')) + glob.glob(os.path.join(path, '*.[wW][eE][bB][mM]'))
+                    
+                    if not video_files:
+                        return ["No recordings found"]
+                    
+                    # Sort files by modification time (latest first)
+                    video_files.sort(key=os.path.getmtime, reverse=True)
+                    
+                    # Return only the latest video
+                    return [video_files[0]]
 
                 def display_videos(recording_path):
+                    """Display the latest video in the gallery."""
                     return list_videos(recording_path)
 
-                recording_display = gr.Gallery(label="Recorded Videos", type="video")
+                recording_display = gr.Gallery(label="Latest Recording", type="video")
 
                 demo.load(
                     display_videos,
@@ -436,17 +439,6 @@ def create_ui(theme_name="Citrus"):
                                 show_label=True
                             )
 
-        # Handle theme changes
-        def reload_ui(new_theme):
-            """Reload the UI with the new theme"""
-            return create_ui(new_theme)
-            
-        theme_dropdown.change(
-            fn=reload_ui,
-            inputs=[theme_dropdown],
-            outputs=[demo]
-        )
-
         # Run button click handler
         run_button.click(
             fn=run_browser_agent,

From 31f56b56f448b8130af879dd0f808700662b028c Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 18:46:32 +0530
Subject: [PATCH 16/20] Update webui.py

---
 webui.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/webui.py b/webui.py
index 400fd0e..9c6c25b 100644
--- a/webui.py
+++ b/webui.py
@@ -238,7 +238,7 @@ theme_map = {
     "Ocean": Ocean()
 }
 
-def create_ui():
+def create_ui(theme_name="Ocean"):
     """Create the UI with the specified theme"""
     # Enhanced styling for better visual appeal
     css = """
@@ -258,7 +258,7 @@ def create_ui():
     }
     """
     
-    with gr.Blocks(title="Browser Use WebUI", theme=theme_map["Ocean"], css=css) as demo:
+    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
         with gr.Row():
             gr.Markdown(
                 """
@@ -294,7 +294,7 @@ def create_ui():
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", ""],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"
@@ -457,9 +457,11 @@ def main():
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
     parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
     parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
+    parser.add_argument("--theme", type=str, default="Citrus", choices=theme_map.keys(), help="Theme to use for the UI")
     args = parser.parse_args()
 
-    demo = create_ui()
+    # Create the UI with the specified theme
+    demo = create_ui(theme_name=args.theme)
     demo.launch(server_name=args.ip, server_port=args.port)
 
 if __name__ == '__main__':

From a7d162516cd94537410e0323a24a4b692da92d6e Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 19:07:30 +0530
Subject: [PATCH 17/20] Update webui.py

---
 webui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/webui.py b/webui.py
index 9c6c25b..53733bc 100644
--- a/webui.py
+++ b/webui.py
@@ -294,7 +294,7 @@ def create_ui(theme_name="Ocean"):
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"
@@ -457,7 +457,7 @@ def main():
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
     parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
     parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
-    parser.add_argument("--theme", type=str, default="Citrus", choices=theme_map.keys(), help="Theme to use for the UI")
+    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
     args = parser.parse_args()
 
     # Create the UI with the specified theme

From 1db2765935efc3542e92ff4299dfd1a6f980c27f Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 20:43:09 +0530
Subject: [PATCH 18/20] Update webui.py

---
 webui.py | 71 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/webui.py b/webui.py
index 53733bc..1635531 100644
--- a/webui.py
+++ b/webui.py
@@ -54,8 +54,14 @@ async def run_browser_agent(
         max_steps,
         use_vision
 ):
+    # Ensure the recording directory exists
+    os.makedirs(save_recording_path, exist_ok=True)
 
+    # Get the list of existing videos before the agent runs
+    existing_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) + 
+                          glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
 
+    # Run the agent
     llm = utils.get_llm_model(
         provider=llm_provider,
         model_name=llm_model_name,
@@ -64,7 +70,7 @@ async def run_browser_agent(
         api_key=llm_api_key
     )
     if agent_type == "org":
-        return await run_org_agent(
+        final_result, errors, model_actions, model_thoughts = await run_org_agent(
             llm=llm,
             headless=headless,
             disable_security=disable_security,
@@ -76,7 +82,7 @@ async def run_browser_agent(
             use_vision=use_vision
         )
     elif agent_type == "custom":
-        return await run_custom_agent(
+        final_result, errors, model_actions, model_thoughts = await run_custom_agent(
             llm=llm,
             use_own_browser=use_own_browser,
             headless=headless,
@@ -92,6 +98,17 @@ async def run_browser_agent(
     else:
         raise ValueError(f"Invalid agent type: {agent_type}")
 
+    # Get the list of videos after the agent runs
+    new_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) + 
+                     glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
+
+    # Find the newly created video
+    latest_video = None
+    if new_videos - existing_videos:
+        latest_video = list(new_videos - existing_videos)[0]  # Get the first new video
+
+    return final_result, errors, model_actions, model_thoughts, latest_video
+
 async def run_org_agent(
         llm,
         headless,
@@ -222,6 +239,7 @@ async def run_custom_agent(
         await browser.close()
     return final_result, errors, model_actions, model_thoughts
 
+
 import argparse
 import gradio as gr
 from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
@@ -239,8 +257,6 @@ theme_map = {
 }
 
 def create_ui(theme_name="Ocean"):
-    """Create the UI with the specified theme"""
-    # Enhanced styling for better visual appeal
     css = """
     .gradio-container {
         max-width: 1200px !important;
@@ -257,8 +273,18 @@ def create_ui(theme_name="Ocean"):
         border-radius: 10px;
     }
     """
+
+    js = """
+    function refresh() {
+        const url = new URL(window.location);
+        if (url.searchParams.get('__theme') !== 'dark') {
+            url.searchParams.set('__theme', 'dark');
+            window.location.href = url.href;
+        }
+    }
+    """
     
-    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
+    with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js) as demo:
         with gr.Row():
             gr.Markdown(
                 """
@@ -294,7 +320,7 @@ def create_ui(theme_name="Ocean"):
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"
@@ -381,34 +407,7 @@ def create_ui(theme_name="Ocean"):
                     stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
 
             with gr.TabItem("🎬 Recordings", id=5):
-                def list_videos(path):
-                    """Return the latest video file from the specified path."""
-                    if not os.path.exists(path):
-                        return ["Recording path not found"]
-                    
-                    # Get all video files in the directory
-                    video_files = glob.glob(os.path.join(path, '*.[mM][pP]4')) + glob.glob(os.path.join(path, '*.[wW][eE][bB][mM]'))
-                    
-                    if not video_files:
-                        return ["No recordings found"]
-                    
-                    # Sort files by modification time (latest first)
-                    video_files.sort(key=os.path.getmtime, reverse=True)
-                    
-                    # Return only the latest video
-                    return [video_files[0]]
-
-                def display_videos(recording_path):
-                    """Display the latest video in the gallery."""
-                    return list_videos(recording_path)
-
-                recording_display = gr.Gallery(label="Latest Recording", type="video")
-
-                demo.load(
-                    display_videos,
-                    inputs=[save_recording_path],
-                    outputs=[recording_display]
-                )
+                recording_display = gr.Video(label="Latest Recording")
 
                 with gr.Group():
                     gr.Markdown("### Results")
@@ -448,7 +447,7 @@ def create_ui(theme_name="Ocean"):
                 disable_security, window_w, window_h, save_recording_path,
                 task, add_infos, max_steps, use_vision
             ],
-            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output]
+            outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display]
         )
 
     return demo
@@ -458,9 +457,9 @@ def main():
     parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
     parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
     parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
+    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
     args = parser.parse_args()
 
-    # Create the UI with the specified theme
     demo = create_ui(theme_name=args.theme)
     demo.launch(server_name=args.ip, server_port=args.port)
 

From d0a38042f495c9fc8fde52cd33e8fdc53905c1c3 Mon Sep 17 00:00:00 2001
From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com>
Date: Mon, 6 Jan 2025 20:45:42 +0530
Subject: [PATCH 19/20] Update webui.py

---
 webui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/webui.py b/webui.py
index 1635531..eef1e3c 100644
--- a/webui.py
+++ b/webui.py
@@ -320,7 +320,7 @@ def create_ui(theme_name="Ocean"):
             with gr.TabItem("🔧 LLM Configuration", id=2):
                 with gr.Group():
                     llm_provider = gr.Dropdown(
-                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
                         label="LLM Provider",
                         value="gemini",
                         info="Select your preferred language model provider"

From e1be9fcda199b695184ea72f7a706657b57e65b4 Mon Sep 17 00:00:00 2001
From: warmshao <wenshaoguo1026@gmail.com>
Date: Mon, 6 Jan 2025 23:57:57 +0800
Subject: [PATCH 20/20] feat: release new and well-designed WebUI

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 6b40a9f..5d6363e 100644
--- a/README.md
+++ b/README.md
@@ -14,6 +14,10 @@ This project builds upon the foundation of the [browser-use](https://github.com/
 
 <video src="https://github.com/user-attachments/assets/58c0f59e-02b4-4413-aba8-6184616bf181" controls="controls" width="500" height="300" >Your browser does not support playing this video!</video>
 
+**Changelog**
+- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
+
+
 ## Environment Installation
 
 1.  **Python Version:** Ensure you have Python 3.11 or higher installed.