Merge branch 'browser-use:main' into main

2026-03-22 11:17:17 +08:00 · 2025-04-01 11:39:51 +05:30
parent 2953098b6a f4f36b41ea
commit ebf9a06ae5
10 changed files with 254 additions and 236 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 browser-use==0.1.40
 pyperclip==1.9.0
-gradio==5.10.0
+gradio==5.23.1
 json-repair
 langchain-mistralai==0.2.4
 langchain-google-genai==2.0.8
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -208,8 +208,8 @@ class CustomAgent(Agent):
    @time_execution_async("--get_next_action")
    async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
        """Get next action from LLM based on current state"""
-
-        ai_message = self.llm.invoke(input_messages)
+        fixed_input_messages = self._convert_input_messages(input_messages)
+        ai_message = self.llm.invoke(fixed_input_messages)
        self.message_manager._add_message_with_tokens(ai_message)

        if hasattr(ai_message, "reasoning_content"):
@@ -222,10 +222,16 @@ class CustomAgent(Agent):
        else:
            ai_content = ai_message.content

-        ai_content = ai_content.replace("```json", "").replace("```", "")
-        ai_content = repair_json(ai_content)
-        parsed_json = json.loads(ai_content)
-        parsed: AgentOutput = self.AgentOutput(**parsed_json)
+        try:
+            ai_content = ai_content.replace("```json", "").replace("```", "")
+            ai_content = repair_json(ai_content)
+            parsed_json = json.loads(ai_content)
+            parsed: AgentOutput = self.AgentOutput(**parsed_json)
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            logger.debug(ai_message.content)
+            raise ValueError('Could not parse response.')

        if parsed is None:
            logger.debug(ai_message.content)
--- a/src/agent/custom_message_manager.py
+++ b/src/agent/custom_message_manager.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import logging
+import pdb
 from typing import List, Optional, Type, Dict

 from browser_use.agent.message_manager.service import MessageManager
@@ -96,7 +97,7 @@ class CustomMessageManager(MessageManager):
        self._add_message_with_tokens(state_message)

    def _remove_state_message_by_index(self, remove_ind=-1) -> None:
-        """Remove last state message from history"""
+        """Remove state message by index from history"""
        i = len(self.state.history.messages) - 1
        remove_cnt = 0
        while i >= 0:
--- a/src/agent/custom_system_prompt.md
+++ b/src/agent/custom_system_prompt.md
@@ -18,11 +18,17 @@ Example:

 # Response Rules
 1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
-{{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
-"important_contents": "Output important contents closely related to user's instruction on the current page. If there is, please output the contents. If not, please output ''.",
-"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
-"next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."}},
-"action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}}
+{{
+ "current_state": {{
+   "evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
+   "important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output empty string ''.",
+   "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
+   "next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."
+ }},
+ "action": [
+   {{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence
+ ]
+}}

 2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
 Common action sequences:
--- a/src/utils/agent_state.py
+++ b/src/utils/agent_state.py
@@ -1,5 +1,6 @@
 import asyncio

+
 class AgentState:
    _instance = None

@@ -27,4 +28,4 @@ class AgentState:
        self.last_valid_state = state

    def get_last_valid_state(self):
-        return self.last_valid_state
+        return self.last_valid_state
--- a/src/utils/deep_research.py
+++ b/src/utils/deep_research.py
@@ -19,7 +19,13 @@ from browser_use.agent.views import ActionResult
 from browser_use.browser.context import BrowserContext
 from browser_use.controller.service import Controller, DoneAction
 from main_content_extractor import MainContentExtractor
-from langchain.schema import SystemMessage, HumanMessage
+from langchain_core.messages import (
+    AIMessage,
+    BaseMessage,
+    HumanMessage,
+    ToolMessage,
+    SystemMessage
+)
 from json_repair import repair_json
 from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
 from src.controller.custom_controller import CustomController
--- a/src/utils/default_config_settings.py
+++ b/src/utils/default_config_settings.py
@@ -1,125 +0,0 @@
-import os
-import pickle
-import uuid
-import gradio as gr
-
-
-def default_config():
-    """Prepare the default configuration"""
-    return {
-        "agent_type": "custom",
-        "max_steps": 100,
-        "max_actions_per_step": 10,
-        "use_vision": True,
-        "tool_calling_method": "auto",
-        "llm_provider": "openai",
-        "llm_model_name": "gpt-4o",
-        "llm_num_ctx": 32000,
-        "llm_temperature": 0.6,
-        "llm_base_url": "",
-        "llm_api_key": "",
-        "use_own_browser": os.getenv("CHROME_PERSISTENT_SESSION", "false").lower() == "true",
-        "keep_browser_open": False,
-        "headless": False,
-        "disable_security": True,
-        "enable_recording": True,
-        "window_w": 1280,
-        "window_h": 1100,
-        "save_recording_path": "./tmp/record_videos",
-        "save_trace_path": "./tmp/traces",
-        "save_agent_history_path": "./tmp/agent_history",
-        "task": "go to google.com and type 'OpenAI' click search and give me the first url",
-    }
-
-
-def load_config_from_file(config_file):
-    """Load settings from a UUID.pkl file."""
-    try:
-        with open(config_file, 'rb') as f:
-            settings = pickle.load(f)
-        return settings
-    except Exception as e:
-        return f"Error loading configuration: {str(e)}"
-
-
-def save_config_to_file(settings, save_dir="./tmp/webui_settings"):
-    """Save the current settings to a UUID.pkl file with a UUID name."""
-    os.makedirs(save_dir, exist_ok=True)
-    config_file = os.path.join(save_dir, f"{uuid.uuid4()}.pkl")
-    with open(config_file, 'wb') as f:
-        pickle.dump(settings, f)
-    return f"Configuration saved to {config_file}"
-
-
-def save_current_config(*args):
-    current_config = {
-        "agent_type": args[0],
-        "max_steps": args[1],
-        "max_actions_per_step": args[2],
-        "use_vision": args[3],
-        "tool_calling_method": args[4],
-        "llm_provider": args[5],
-        "llm_model_name": args[6],
-        "llm_num_ctx": args[7],
-        "llm_temperature": args[8],
-        "llm_base_url": args[9],
-        "llm_api_key": args[10],
-        "use_own_browser": args[11],
-        "keep_browser_open": args[12],
-        "headless": args[13],
-        "disable_security": args[14],
-        "enable_recording": args[15],
-        "window_w": args[16],
-        "window_h": args[17],
-        "save_recording_path": args[18],
-        "save_trace_path": args[19],
-        "save_agent_history_path": args[20],
-        "task": args[21],
-    }
-    return save_config_to_file(current_config)
-
-
-def update_ui_from_config(config_file):
-    if config_file is not None:
-        loaded_config = load_config_from_file(config_file.name)
-        if isinstance(loaded_config, dict):
-            return (
-                gr.update(value=loaded_config.get("agent_type", "custom")),
-                gr.update(value=loaded_config.get("max_steps", 100)),
-                gr.update(value=loaded_config.get("max_actions_per_step", 10)),
-                gr.update(value=loaded_config.get("use_vision", True)),
-                gr.update(value=loaded_config.get("tool_calling_method", True)),
-                gr.update(value=loaded_config.get("llm_provider", "openai")),
-                gr.update(value=loaded_config.get("llm_model_name", "gpt-4o")),
-                gr.update(value=loaded_config.get("llm_num_ctx", 32000)),
-                gr.update(value=loaded_config.get("llm_temperature", 1.0)),
-                gr.update(value=loaded_config.get("llm_base_url", "")),
-                gr.update(value=loaded_config.get("llm_api_key", "")),
-                gr.update(value=loaded_config.get("use_own_browser", False)),
-                gr.update(value=loaded_config.get("keep_browser_open", False)),
-                gr.update(value=loaded_config.get("headless", False)),
-                gr.update(value=loaded_config.get("disable_security", True)),
-                gr.update(value=loaded_config.get("enable_recording", True)),
-                gr.update(value=loaded_config.get("window_w", 1280)),
-                gr.update(value=loaded_config.get("window_h", 1100)),
-                gr.update(value=loaded_config.get("save_recording_path", "./tmp/record_videos")),
-                gr.update(value=loaded_config.get("save_trace_path", "./tmp/traces")),
-                gr.update(value=loaded_config.get("save_agent_history_path", "./tmp/agent_history")),
-                gr.update(value=loaded_config.get("task", "")),
-                "Configuration loaded successfully."
-            )
-        else:
-            return (
-                gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-                gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-                gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-                gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-                gr.update(), "Error: Invalid configuration file."
-            )
-    return (
-        gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
-        gr.update(), "No file selected."
-    )
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -4,13 +4,15 @@ import time
 from pathlib import Path
 from typing import Dict, Optional
 import requests
+import json
+import gradio as gr
+import uuid

 from langchain_anthropic import ChatAnthropic
 from langchain_mistralai import ChatMistralAI
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_ollama import ChatOllama
 from langchain_openai import AzureChatOpenAI, ChatOpenAI
-import gradio as gr

 from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama, UnboundChatOpenAI

@@ -37,7 +39,7 @@ def get_llm_model(provider: str, **kwargs):
        env_var = f"{provider.upper()}_API_KEY"
        api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
        if not api_key:
-            handle_api_key_error(provider, env_var)
+            raise MissingAPIKeyError(provider, env_var)
        kwargs["api_key"] = api_key

    if provider == "anthropic":
@@ -185,7 +187,7 @@ model_names = {
    "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
               "deepseek-r1:14b", "deepseek-r1:32b"],
    "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
-    "mistral": ["mixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
+    "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
    "alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"],
    "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
    "unbound": ["gemini-2.0-flash","gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"]
@@ -197,6 +199,7 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None):
    """
    Update the model name dropdown with predefined models for the selected provider.
    """
+    import gradio as gr
    # Use API keys from .env if not provided
    if not api_key:
        api_key = os.getenv(f"{llm_provider.upper()}_API_KEY", "")
@@ -210,15 +213,13 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None):
        return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)


-def handle_api_key_error(provider: str, env_var: str):
-    """
-    Handles the missing API key error by raising a gr.Error with a clear message.
-    """
-    provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
-    raise gr.Error(
-        f"💥 {provider_display} API key not found! 🔑 Please set the "
-        f"`{env_var}` environment variable or provide it in the UI."
-    )
+class MissingAPIKeyError(Exception):
+    """Custom exception for missing API key."""
+
+    def __init__(self, provider: str, env_var: str):
+        provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
+        super().__init__(f"💥 {provider_display} API key not found! 🔑 Please set the "
+                         f"`{env_var}` environment variable or provide it in the UI.")


 def encode_image(img_path):
@@ -287,3 +288,70 @@ async def capture_screenshot(browser_context):
        return encoded
    except Exception as e:
        return None
+
+
+class ConfigManager:
+    def __init__(self):
+        self.components = {}
+        self.component_order = []
+
+    def register_component(self, name: str, component):
+        """Register a gradio component for config management."""
+        self.components[name] = component
+        if name not in self.component_order:
+            self.component_order.append(name)
+        return component
+
+    def save_current_config(self):
+        """Save the current configuration of all registered components."""
+        current_config = {}
+        for name in self.component_order:
+            component = self.components[name]
+            # Get the current value from the component
+            current_config[name] = getattr(component, "value", None)
+
+        return save_config_to_file(current_config)
+
+    def update_ui_from_config(self, config_file):
+        """Update UI components from a loaded configuration file."""
+        if config_file is None:
+            return [gr.update() for _ in self.component_order] + ["No file selected."]
+
+        loaded_config = load_config_from_file(config_file.name)
+
+        if not isinstance(loaded_config, dict):
+            return [gr.update() for _ in self.component_order] + ["Error: Invalid configuration file."]
+
+        # Prepare updates for all components
+        updates = []
+        for name in self.component_order:
+            if name in loaded_config:
+                updates.append(gr.update(value=loaded_config[name]))
+            else:
+                updates.append(gr.update())
+
+        updates.append("Configuration loaded successfully.")
+        return updates
+
+    def get_all_components(self):
+        """Return all registered components in the order they were registered."""
+        return [self.components[name] for name in self.component_order]
+
+
+def load_config_from_file(config_file):
+    """Load settings from a config file (JSON format)."""
+    try:
+        with open(config_file, 'r') as f:
+            settings = json.load(f)
+        return settings
+    except Exception as e:
+        return f"Error loading configuration: {str(e)}"
+
+
+def save_config_to_file(settings, save_dir="./tmp/webui_settings"):
+    """Save the current settings to a UUID.json file with a UUID name."""
+    os.makedirs(save_dir, exist_ok=True)
+    config_file = os.path.join(save_dir, f"{uuid.uuid4()}.json")
+    with open(config_file, 'w') as f:
+        json.dump(settings, f, indent=2)
+    return f"Configuration saved to {config_file}"
--- a/tests/test_browser_use.py
+++ b/tests/test_browser_use.py
@@ -133,11 +133,11 @@ async def test_browser_use_custom():
        api_key=os.getenv("GOOGLE_API_KEY", "")
    )

-    # llm = utils.get_llm_model(
-    #     provider="deepseek",
-    #     model_name="deepseek-reasoner",
-    #     temperature=0.8
-    # )
+    llm = utils.get_llm_model(
+        provider="deepseek",
+        model_name="deepseek-reasoner",
+        temperature=0.8
+    )

    # llm = utils.get_llm_model(
    #     provider="deepseek",
--- a/webui.py
+++ b/webui.py
@@ -13,6 +13,8 @@ import os
 logger = logging.getLogger(__name__)

 import gradio as gr
+import inspect
+from functools import wraps

 from browser_use.agent.service import Agent
 from playwright.async_api import async_playwright
@@ -32,9 +34,8 @@ from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePromp
 from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
 from src.controller.custom_controller import CustomController
 from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
-from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, \
-    save_current_config, update_ui_from_config
-from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
+from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot, MissingAPIKeyError
+from src.utils import utils

 # Global variables for persistence
 _global_browser = None
@@ -44,6 +45,49 @@ _global_agent = None
 # Create the global agent state instance
 _global_agent_state = AgentState()

+# webui config
+webui_config_manager = utils.ConfigManager()
+
+
+def scan_and_register_components(blocks):
+    """扫描一个 Blocks 对象并注册其中的所有交互式组件，但不包括按钮"""
+    global webui_config_manager
+
+    def traverse_blocks(block, prefix=""):
+        registered = 0
+
+        # 处理 Blocks 自身的组件
+        if hasattr(block, "children"):
+            for i, child in enumerate(block.children):
+                if isinstance(child, gr.components.Component):
+                    # 排除按钮 (Button) 组件
+                    if getattr(child, "interactive", False) and not isinstance(child, gr.Button):
+                        name = f"{prefix}component_{i}"
+                        if hasattr(child, "label") and child.label:
+                            # 使用标签作为名称的一部分
+                            label = child.label
+                            name = f"{prefix}{label}"
+                        logger.debug(f"Registering component: {name}")
+                        webui_config_manager.register_component(name, child)
+                        registered += 1
+                elif hasattr(child, "children"):
+                    # 递归处理嵌套的 Blocks
+                    new_prefix = f"{prefix}block_{i}_"
+                    registered += traverse_blocks(child, new_prefix)
+
+        return registered
+
+    total = traverse_blocks(blocks)
+    logger.info(f"Total registered components: {total}")
+
+
+def save_current_config():
+    return webui_config_manager.save_current_config()
+
+
+def update_ui_from_config(config_file):
+    return webui_config_manager.update_ui_from_config(config_file)
+

 def resolve_sensitive_env_variables(text):
    """
@@ -245,8 +289,9 @@ async def run_browser_agent(
            gr.update(interactive=True)  # Re-enable run button
        )

-    except gr.Error:
-        raise
+    except MissingAPIKeyError as e:
+        logger.error(str(e))
+        raise gr.Error(str(e), print_exception=False)

    except Exception as e:
        import traceback
@@ -539,8 +584,7 @@ async def run_with_stream(
            max_input_tokens=max_input_tokens
        )
        # Add HTML content at the start of the result array
-        html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
-        yield [html_content] + list(result)
+        yield [gr.update(visible=False)] + list(result)
    else:
        try:
            # Run the browser agent in the background
@@ -592,7 +636,7 @@ async def run_with_stream(

                if _global_agent and _global_agent.state.stopped:
                    yield [
-                        html_content,
+                        gr.HTML(value=html_content, visible=True),
                        final_result,
                        errors,
                        model_actions,
@@ -606,7 +650,7 @@ async def run_with_stream(
                    break
                else:
                    yield [
-                        html_content,
+                        gr.HTML(value=html_content, visible=True),
                        final_result,
                        errors,
                        model_actions,
@@ -633,7 +677,7 @@ async def run_with_stream(
                errors = f"Agent error: {str(e)}"

            yield [
-                html_content,
+                gr.HTML(value=html_content, visible=True),
                final_result,
                errors,
                model_actions,
@@ -648,7 +692,9 @@ async def run_with_stream(
        except Exception as e:
            import traceback
            yield [
-                f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
+                gr.HTML(
+                    value=f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
+                    visible=True),
                "",
                f"Error: {str(e)}\n{traceback.format_exc()}",
                "",
@@ -715,11 +761,13 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p
    return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True)


-def create_ui(config, theme_name="Ocean"):
+def create_ui(theme_name="Ocean"):
    css = """
    .gradio-container {
-        max-width: 1200px !important;
-        margin: auto !important;
+        width: 60vw !important; 
+        max-width: 60% !important; 
+        margin-left: auto !important;
+        margin-right: auto !important;
        padding-top: 20px !important;
    }
    .header-text {
@@ -751,41 +799,45 @@ def create_ui(config, theme_name="Ocean"):
                    agent_type = gr.Radio(
                        ["org", "custom"],
                        label="Agent Type",
-                        value=config['agent_type'],
+                        value="custom",
                        info="Select the type of agent to use",
+                        interactive=True
                    )
                    with gr.Column():
                        max_steps = gr.Slider(
                            minimum=1,
                            maximum=200,
-                            value=config['max_steps'],
+                            value=100,
                            step=1,
                            label="Max Run Steps",
                            info="Maximum number of steps the agent will take",
+                            interactive=True
                        )
                        max_actions_per_step = gr.Slider(
                            minimum=1,
-                            maximum=20,
-                            value=config['max_actions_per_step'],
+                            maximum=100,
+                            value=10,
                            step=1,
                            label="Max Actions per Step",
                            info="Maximum number of actions the agent will take per step",
+                            interactive=True
                        )
                    with gr.Column():
                        use_vision = gr.Checkbox(
                            label="Use Vision",
-                            value=config['use_vision'],
+                            value=True,
                            info="Enable visual processing capabilities",
+                            interactive=True
                        )
                        max_input_tokens = gr.Number(
                            label="Max Input Tokens",
                            value=128000,
-                            precision=0
-
+                            precision=0,
+                            interactive=True
                        )
                        tool_calling_method = gr.Dropdown(
                            label="Tool Calling Method",
-                            value=config['tool_calling_method'],
+                            value="auto",
                            interactive=True,
                            allow_custom_value=True,  # Allow users to input custom model names
                            choices=["auto", "json_schema", "function_calling"],
@@ -798,44 +850,47 @@ def create_ui(config, theme_name="Ocean"):
                    llm_provider = gr.Dropdown(
                        choices=[provider for provider, model in utils.model_names.items()],
                        label="LLM Provider",
-                        value=config['llm_provider'],
-                        info="Select your preferred language model provider"
+                        value="openai",
+                        info="Select your preferred language model provider",
+                        interactive=True
                    )
                    llm_model_name = gr.Dropdown(
                        label="Model Name",
                        choices=utils.model_names['openai'],
-                        value=config['llm_model_name'],
+                        value="gpt-4o",
                        interactive=True,
                        allow_custom_value=True,  # Allow users to input custom model names
                        info="Select a model in the dropdown options or directly type a custom model name"
                    )
-                    llm_num_ctx = gr.Slider(
+                    ollama_num_ctx = gr.Slider(
                        minimum=2 ** 8,
                        maximum=2 ** 16,
-                        value=config['llm_num_ctx'],
+                        value=16000,
                        step=1,
-                        label="Max Context Length",
+                        label="Ollama Context Length",
                        info="Controls max context length model needs to handle (less = faster)",
-                        visible=config['llm_provider'] == "ollama"
+                        visible=False,
+                        interactive=True
                    )
                    llm_temperature = gr.Slider(
                        minimum=0.0,
                        maximum=2.0,
-                        value=config['llm_temperature'],
+                        value=0.6,
                        step=0.1,
                        label="Temperature",
-                        info="Controls randomness in model outputs"
+                        info="Controls randomness in model outputs",
+                        interactive=True
                    )
                    with gr.Row():
                        llm_base_url = gr.Textbox(
                            label="Base URL",
-                            value=config['llm_base_url'],
+                            value="",
                            info="API endpoint URL (if required)"
                        )
                        llm_api_key = gr.Textbox(
                            label="API Key",
                            type="password",
-                            value=config['llm_api_key'],
+                            value="",
                            info="Your API key (leave blank to use .env)"
                        )

@@ -847,7 +902,7 @@ def create_ui(config, theme_name="Ocean"):
            llm_provider.change(
                fn=update_llm_num_ctx_visibility,
                inputs=llm_provider,
-                outputs=llm_num_ctx
+                outputs=ollama_num_ctx
            )

            with gr.TabItem("🌐 Browser Settings", id=3):
@@ -855,40 +910,47 @@ def create_ui(config, theme_name="Ocean"):
                    with gr.Row():
                        use_own_browser = gr.Checkbox(
                            label="Use Own Browser",
-                            value=config['use_own_browser'],
+                            value=False,
                            info="Use your existing browser instance",
+                            interactive=True
                        )
                        keep_browser_open = gr.Checkbox(
                            label="Keep Browser Open",
-                            value=config['keep_browser_open'],
+                            value=False,
                            info="Keep Browser Open between Tasks",
+                            interactive=True
                        )
                        headless = gr.Checkbox(
                            label="Headless Mode",
-                            value=config['headless'],
+                            value=False,
                            info="Run browser without GUI",
+                            interactive=True
                        )
                        disable_security = gr.Checkbox(
                            label="Disable Security",
-                            value=config['disable_security'],
+                            value=True,
                            info="Disable browser security features",
+                            interactive=True
                        )
                        enable_recording = gr.Checkbox(
                            label="Enable Recording",
-                            value=config['enable_recording'],
+                            value=True,
                            info="Enable saving browser recordings",
+                            interactive=True
                        )

                    with gr.Row():
                        window_w = gr.Number(
                            label="Window Width",
-                            value=config['window_w'],
+                            value=1280,
                            info="Browser window width",
+                            interactive=True
                        )
                        window_h = gr.Number(
                            label="Window Height",
-                            value=config['window_h'],
+                            value=1100,
                            info="Browser window height",
+                            interactive=True
                        )

                    chrome_cdp = gr.Textbox(
@@ -902,7 +964,7 @@ def create_ui(config, theme_name="Ocean"):
                    save_recording_path = gr.Textbox(
                        label="Recording Path",
                        placeholder="e.g. ./tmp/record_videos",
-                        value=config['save_recording_path'],
+                        value="./tmp/record_videos",
                        info="Path to save browser recordings",
                        interactive=True,  # Allow editing only if recording is enabled
                    )
@@ -910,7 +972,7 @@ def create_ui(config, theme_name="Ocean"):
                    save_trace_path = gr.Textbox(
                        label="Trace Path",
                        placeholder="e.g. ./tmp/traces",
-                        value=config['save_trace_path'],
+                        value="./tmp/traces",
                        info="Path to save Agent traces",
                        interactive=True,
                    )
@@ -918,7 +980,7 @@ def create_ui(config, theme_name="Ocean"):
                    save_agent_history_path = gr.Textbox(
                        label="Agent History Save Path",
                        placeholder="e.g., ./tmp/agent_history",
-                        value=config['save_agent_history_path'],
+                        value="./tmp/agent_history",
                        info="Specify the directory where agent history should be saved.",
                        interactive=True,
                    )
@@ -928,14 +990,17 @@ def create_ui(config, theme_name="Ocean"):
                    label="Task Description",
                    lines=4,
                    placeholder="Enter your task here...",
-                    value=config['task'],
+                    value="go to google.com and type 'OpenAI' click search and give me the first url",
                    info="Describe what you want the agent to do",
+                    interactive=True
                )
                add_infos = gr.Textbox(
                    label="Additional Information",
                    lines=3,
                    placeholder="Add any helpful context or instructions...",
                    info="Optional hints to help the LLM complete the task",
+                    value="",
+                    interactive=True
                )

                with gr.Row():
@@ -946,6 +1011,7 @@ def create_ui(config, theme_name="Ocean"):
                    browser_view = gr.HTML(
                        value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
                        label="Live Browser View",
+                        visible=False
                    )

                gr.Markdown("### Results")
@@ -973,12 +1039,15 @@ def create_ui(config, theme_name="Ocean"):

            with gr.TabItem("🧐 Deep Research", id=5):
                research_task_input = gr.Textbox(label="Research Task", lines=5,
-                                                 value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
+                                                 value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.",
+                                                 interactive=True)
                with gr.Row():
                    max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3,
-                                                           precision=0)  # precision=0 确保是整数
+                                                           precision=0,
+                                                           interactive=True)  # precision=0 确保是整数
                    max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1,
-                                                         precision=0)  # precision=0 确保是整数
+                                                         precision=0,
+                                                         interactive=True)  # precision=0 确保是整数
                with gr.Row():
                    research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2)
                    stop_research_button = gr.Button("⏹ Stop", variant="stop", scale=1)
@@ -996,7 +1065,7 @@ def create_ui(config, theme_name="Ocean"):
            run_button.click(
                fn=run_with_stream,
                inputs=[
-                    agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url,
+                    agent_type, llm_provider, llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url,
                    llm_api_key,
                    use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
                    save_recording_path, save_agent_history_path, save_trace_path,  # Include the new path
@@ -1021,7 +1090,7 @@ def create_ui(config, theme_name="Ocean"):
            research_button.click(
                fn=run_deep_search,
                inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider,
-                        llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision,
+                        llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision,
                        use_own_browser, headless, chrome_cdp],
                outputs=[markdown_output_display, markdown_download, stop_research_button, research_button]
            )
@@ -1054,7 +1123,6 @@ def create_ui(config, theme_name="Ocean"):

                recordings_gallery = gr.Gallery(
                    label="Recordings",
-                    value=list_recordings(config['save_recording_path']),
                    columns=3,
                    height="auto",
                    object_fit="contain"
@@ -1069,41 +1137,22 @@ def create_ui(config, theme_name="Ocean"):

            with gr.TabItem("📁 UI Configuration", id=8):
                config_file_input = gr.File(
-                    label="Load Config File",
-                    file_types=[".pkl"],
+                    label="Load UI Settings from Config File",
+                    file_types=[".json"],
                    interactive=True
                )
                with gr.Row():
-                    load_config_button = gr.Button("Load Existing Config From File", variant="primary")
-                    save_config_button = gr.Button("Save Current Config", variant="primary")
+                    load_config_button = gr.Button("Load Config", variant="primary")
+                    save_config_button = gr.Button("Save UI Settings", variant="primary")

                config_status = gr.Textbox(
                    label="Status",
                    lines=2,
                    interactive=False
                )
-
-                load_config_button.click(
-                    fn=update_ui_from_config,
-                    inputs=[config_file_input],
-                    outputs=[
-                        agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
-                        llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
-                        use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
-                        window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
-                        task, config_status
-                    ]
-                )
-
                save_config_button.click(
                    fn=save_current_config,
-                    inputs=[
-                        agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
-                        llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
-                        use_own_browser, keep_browser_open, headless, disable_security,
-                        enable_recording, window_w, window_h, save_recording_path, save_trace_path,
-                        save_agent_history_path, task,
-                    ],
+                    inputs=[],  # 不需要输入参数
                    outputs=[config_status]
                )

@@ -1124,6 +1173,15 @@ def create_ui(config, theme_name="Ocean"):
        use_own_browser.change(fn=close_global_browser)
        keep_browser_open.change(fn=close_global_browser)

+        scan_and_register_components(demo)
+        global webui_config_manager
+        all_components = webui_config_manager.get_all_components()
+
+        load_config_button.click(
+            fn=update_ui_from_config,
+            inputs=[config_file_input],
+            outputs=all_components + [config_status]
+        )
    return demo


@@ -1132,12 +1190,9 @@ def main():
    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
    parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
-    parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
    args = parser.parse_args()

-    config_dict = default_config()
-
-    demo = create_ui(config_dict, theme_name=args.theme)
+    demo = create_ui(theme_name=args.theme)
    demo.launch(server_name=args.ip, server_port=args.port)