update to bu==0.1.43 and fix deep research

2026-03-22 11:17:17 +08:00 · 2025-05-09 09:27:12 +08:00
parent fb65ca7ba2
commit eb91cb64ec
10 changed files with 218 additions and 267 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-browser-use==0.1.45
+browser-use==0.1.43
 pyperclip==1.9.0
 gradio==5.27.0
 json-repair
--- a/src/agent/browser_use/browser_use_agent.py
+++ b/src/agent/browser_use/browser_use_agent.py
@@ -8,9 +8,13 @@ import os
 from browser_use.agent.gif import create_history_gif
 from browser_use.agent.service import Agent, AgentHookFunc
 from browser_use.agent.views import (
+    ActionResult,
+    AgentHistory,
    AgentHistoryList,
    AgentStepInfo,
+    ToolCallingMethod,
 )
+from browser_use.browser.views import BrowserStateHistory
 from browser_use.telemetry.views import (
    AgentEndTelemetryEvent,
 )
@@ -21,17 +25,15 @@ load_dotenv()
 logger = logging.getLogger(__name__)

 SKIP_LLM_API_KEY_VERIFICATION = (
-    os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
+        os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
 )


 class BrowserUseAgent(Agent):
    @time_execution_async("--run (agent)")
    async def run(
-        self,
-        max_steps: int = 100,
-        on_step_start: AgentHookFunc | None = None,
-        on_step_end: AgentHookFunc | None = None,
+            self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
+            on_step_end: AgentHookFunc | None = None
    ) -> AgentHistoryList:
        """Execute the task with maximum number of steps"""

@@ -49,41 +51,28 @@ class BrowserUseAgent(Agent):
        )
        signal_handler.register()

-        # Wait for verification task to complete if it exists
-        if hasattr(self, "_verification_task") and not self._verification_task.done():
-            try:
-                await self._verification_task
-            except Exception:
-                # Error already logged in the task
-                pass
-
        try:
            self._log_agent_run()

            # Execute initial actions if provided
            if self.initial_actions:
-                result = await self.multi_act(
-                    self.initial_actions, check_for_new_elements=False
-                )
+                result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
                self.state.last_result = result

            for step in range(max_steps):
                # Check if waiting for user input after Ctrl+C
-                while self.state.paused:
-                    await asyncio.sleep(0.5)
-                    if self.state.stopped:
-                        break
+                if self.state.paused:
+                    signal_handler.wait_for_resume()
+                    signal_handler.reset()

                # Check if we should stop due to too many failures
                if self.state.consecutive_failures >= self.settings.max_failures:
-                    logger.error(
-                        f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
-                    )
+                    logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
                    break

                # Check control flags before each step
                if self.state.stopped:
-                    logger.info("Agent stopped")
+                    logger.info('Agent stopped')
                    break

                while self.state.paused:
@@ -108,15 +97,30 @@ class BrowserUseAgent(Agent):
                    await self.log_completion()
                    break
            else:
-                logger.info("❌ Failed to complete task in maximum steps")
+                error_message = 'Failed to complete task in maximum steps'
+
+                self.state.history.history.append(
+                    AgentHistory(
+                        model_output=None,
+                        result=[ActionResult(error=error_message, include_in_memory=True)],
+                        state=BrowserStateHistory(
+                            url='',
+                            title='',
+                            tabs=[],
+                            interacted_element=[],
+                            screenshot=None,
+                        ),
+                        metadata=None,
+                    )
+                )
+
+                logger.info(f'❌ {error_message}')

            return self.state.history

        except KeyboardInterrupt:
            # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
-            logger.info(
-                "Got KeyboardInterrupt during execution, returning current history"
-            )
+            logger.info('Got KeyboardInterrupt during execution, returning current history')
            return self.state.history

        finally:
@@ -136,13 +140,29 @@ class BrowserUseAgent(Agent):
                )
            )

+            if self.settings.save_playwright_script_path:
+                logger.info(
+                    f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
+                )
+                try:
+                    # Extract sensitive data keys if sensitive_data is provided
+                    keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
+                    # Pass browser and context config to the saving method
+                    self.state.history.save_as_playwright_script(
+                        self.settings.save_playwright_script_path,
+                        sensitive_data_keys=keys,
+                        browser_config=self.browser.config,
+                        context_config=self.browser_context.config,
+                    )
+                except Exception as script_gen_err:
+                    # Log any error during script generation/saving
+                    logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
+
            await self.close()

            if self.settings.generate_gif:
-                output_path: str = "agent_history.gif"
+                output_path: str = 'agent_history.gif'
                if isinstance(self.settings.generate_gif, str):
                    output_path = self.settings.generate_gif

-                create_history_gif(
-                    task=self.task, history=self.state.history, output_path=output_path
-                )
+                create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
--- a/src/agent/deep_research/deep_research_agent.py
+++ b/src/agent/deep_research/deep_research_agent.py
@@ -29,9 +29,10 @@ from langchain_core.tools import StructuredTool, Tool
 from langgraph.graph import StateGraph
 from pydantic import BaseModel, Field

+from browser_use.browser.context import BrowserContextWindowSize, BrowserContextConfig
+
 from src.agent.browser_use.browser_use_agent import BrowserUseAgent
 from src.browser.custom_browser import CustomBrowser
-from src.browser.custom_context import CustomBrowserContextConfig
 from src.controller.custom_controller import CustomController
 from src.utils.mcp_client import setup_mcp_client_and_tools

@@ -47,12 +48,12 @@ _BROWSER_AGENT_INSTANCES = {}


 async def run_single_browser_task(
-    task_query: str,
-    task_id: str,
-    llm: Any,  # Pass the main LLM
-    browser_config: Dict[str, Any],
-    stop_event: threading.Event,
-    use_vision: bool = False,
+        task_query: str,
+        task_id: str,
+        llm: Any,  # Pass the main LLM
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        use_vision: bool = False,
 ) -> Dict[str, Any]:
    """
    Runs a single BrowserUseAgent task.
@@ -104,10 +105,9 @@ async def run_single_browser_task(
            )
        )

-        context_config = CustomBrowserContextConfig(
+        context_config = BrowserContextConfig(
            save_downloads_path="./tmp/downloads",
-            window_width=window_w,
-            window_height=window_h,
+            browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
            force_new_context=True,
        )
        bu_browser_context = await bu_browser.new_context(config=context_config)
@@ -198,12 +198,12 @@ class BrowserSearchInput(BaseModel):


 async def _run_browser_search_tool(
-    queries: List[str],
-    task_id: str,  # Injected dependency
-    llm: Any,  # Injected dependency
-    browser_config: Dict[str, Any],
-    stop_event: threading.Event,
-    max_parallel_browsers: int = 1,
+        queries: List[str],
+        task_id: str,  # Injected dependency
+        llm: Any,  # Injected dependency
+        browser_config: Dict[str, Any],
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
 ) -> List[Dict[str, Any]]:
    """
    Internal function to execute parallel browser searches based on LLM-provided queries.
@@ -267,11 +267,11 @@ async def _run_browser_search_tool(


 def create_browser_search_tool(
-    llm: Any,
-    browser_config: Dict[str, Any],
-    task_id: str,
-    stop_event: threading.Event,
-    max_parallel_browsers: int = 1,
+        llm: Any,
+        browser_config: Dict[str, Any],
+        task_id: str,
+        stop_event: threading.Event,
+        max_parallel_browsers: int = 1,
 ) -> StructuredTool:
    """Factory function to create the browser search tool with necessary dependencies."""
    # Use partial to bind the dependencies that aren't part of the LLM call arguments
@@ -553,7 +553,7 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
    else:
        current_task_message = [
            SystemMessage(
-                content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool."
+                content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool. Please output at least one tool."
            ),
            HumanMessage(
                content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
@@ -582,8 +582,11 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
            _save_plan_to_md(plan, output_dir)
            return {
                "research_plan": plan,
-                "current_step_index": current_index + 1,
-                "error_message": f"LLM failed to call a tool for step {current_step['step']}.",
+                "status": "pending",
+                "current_step_index": current_index,
+                "messages": [
+                    f"LLM failed to call a tool for step {current_step['step']}. Response: {ai_response.content}"
+                    f". Please use tool to do research unless you are thinking or summary"],
            }

        # Process tool calls
@@ -665,8 +668,8 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
        browser_tool_called = "parallel_browser_search" in executed_tool_names
        # We might need a more nuanced status based on the *content* of tool_results
        step_failed = (
-            any("Error:" in str(tr.content) for tr in tool_results)
-            or not browser_tool_called
+                any("Error:" in str(tr.content) for tr in tool_results)
+                or not browser_tool_called
        )

        if step_failed:
@@ -695,9 +698,9 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
            "search_results": current_search_results,  # Update with new results
            "current_step_index": current_index + 1,
            "messages": state["messages"]
-            + current_task_message
-            + [ai_response]
-            + tool_results,
+                        + current_task_message
+                        + [ai_response]
+                        + tool_results,
            # Optionally return the tool_results messages if needed by downstream nodes
        }

@@ -879,10 +882,10 @@ def should_continue(state: DeepResearchState) -> str:

 class DeepResearchAgent:
    def __init__(
-        self,
-        llm: Any,
-        browser_config: Dict[str, Any],
-        mcp_server_config: Optional[Dict[str, Any]] = None,
+            self,
+            llm: Any,
+            browser_config: Dict[str, Any],
+            mcp_server_config: Optional[Dict[str, Any]] = None,
    ):
        """
        Initializes the DeepSearchAgent.
@@ -904,7 +907,7 @@ class DeepResearchAgent:
        self.runner: Optional[asyncio.Task] = None  # To hold the asyncio task for run

    async def _setup_tools(
-        self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
+            self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
    ) -> List[Tool]:
        """Sets up the basic tools (File I/O) and optional MCP tools."""
        tools = [
@@ -981,11 +984,11 @@ class DeepResearchAgent:
        return app

    async def run(
-        self,
-        topic: str,
-        task_id: Optional[str] = None,
-        save_dir: str = "./tmp/deep_research",
-        max_parallel_browsers: int = 1,
+            self,
+            topic: str,
+            task_id: Optional[str] = None,
+            save_dir: str = "./tmp/deep_research",
+            max_parallel_browsers: int = 1,
    ) -> Dict[str, Any]:
        """
        Starts the deep research process (Async Generator Version).
--- a/src/browser/custom_browser.py
+++ b/src/browser/custom_browser.py
@@ -26,25 +26,33 @@ from browser_use.browser.utils.screen_resolution import get_screen_resolution, g
 from browser_use.utils import time_execution_async
 import socket

-from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
+from .custom_context import CustomBrowserContext

 logger = logging.getLogger(__name__)


 class CustomBrowser(Browser):

-    async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
+    async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
        """Create a browser context"""
        browser_config = self.config.model_dump() if self.config else {}
        context_config = config.model_dump() if config else {}
        merged_config = {**browser_config, **context_config}
-        return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
+        return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)

    async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
        """Sets up and returns a Playwright Browser instance with anti-detection measures."""
        assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'

-        if self.config.headless:
+        # Use the configured window size from new_context_config if available
+        if (
+                not self.config.headless
+                and hasattr(self.config, 'new_context_config')
+                and hasattr(self.config.new_context_config, 'browser_window_size')
+        ):
+            screen_size = self.config.new_context_config.browser_window_size.model_dump()
+            offset_x, offset_y = get_window_adjustments()
+        elif self.config.headless:
            screen_size = {'width': 1920, 'height': 1080}
            offset_x, offset_y = 0, 0
        else:
@@ -52,6 +60,7 @@ class CustomBrowser(Browser):
            offset_x, offset_y = get_window_adjustments()

        chrome_args = {
+            f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
            *CHROME_ARGS,
            *(CHROME_DOCKER_ARGS if IN_DOCKER else []),
            *(CHROME_HEADLESS_ARGS if self.config.headless else []),
@@ -70,8 +79,8 @@ class CustomBrowser(Browser):

        # check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            if s.connect_ex(('localhost', 9222)) == 0:
-                chrome_args.remove('--remote-debugging-port=9222')
+            if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
+                chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')

        browser_class = getattr(playwright, self.config.browser_class)
        args = {
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -12,10 +12,6 @@ from browser_use.browser.context import BrowserContextState
 logger = logging.getLogger(__name__)


-class CustomBrowserContextConfig(BrowserContextConfig):
-    force_new_context: bool = False  # force to create new context
-
-
 class CustomBrowserContext(BrowserContext):
    def __init__(
            self,
@@ -24,96 +20,3 @@ class CustomBrowserContext(BrowserContext):
            state: Optional[BrowserContextState] = None,
    ):
        super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
-
-    async def _create_context(self, browser: PlaywrightBrowser):
-        """Creates a new browser context with anti-detection measures and loads cookies if available."""
-        if not self.config.force_new_context and self.browser.config.cdp_url and len(browser.contexts) > 0:
-            context = browser.contexts[0]
-        elif not self.config.force_new_context and self.browser.config.browser_binary_path and len(
-                browser.contexts) > 0:
-            # Connect to existing Chrome instance instead of creating new one
-            context = browser.contexts[0]
-        else:
-            # Original code for creating new context
-            context = await browser.new_context(
-                no_viewport=True,
-                user_agent=self.config.user_agent,
-                java_script_enabled=True,
-                bypass_csp=self.config.disable_security,
-                ignore_https_errors=self.config.disable_security,
-                record_video_dir=self.config.save_recording_path,
-                record_video_size={
-                    "width": self.config.window_width,
-                    "height": self.config.window_height
-                },
-                record_har_path=self.config.save_har_path,
-                locale=self.config.locale,
-                http_credentials=self.config.http_credentials,
-                is_mobile=self.config.is_mobile,
-                has_touch=self.config.has_touch,
-                geolocation=self.config.geolocation,
-                permissions=self.config.permissions,
-                timezone_id=self.config.timezone_id,
-            )
-
-        if self.config.trace_path:
-            await context.tracing.start(screenshots=True, snapshots=True, sources=True)
-
-        # Load cookies if they exist
-        if self.config.cookies_file and os.path.exists(self.config.cookies_file):
-            with open(self.config.cookies_file, 'r') as f:
-                try:
-                    cookies = json.load(f)
-
-                    valid_same_site_values = ['Strict', 'Lax', 'None']
-                    for cookie in cookies:
-                        if 'sameSite' in cookie:
-                            if cookie['sameSite'] not in valid_same_site_values:
-                                logger.warning(
-                                    f"Fixed invalid sameSite value '{cookie['sameSite']}' to 'None' for cookie {cookie.get('name')}"
-                                )
-                                cookie['sameSite'] = 'None'
-                    logger.info(f'🍪  Loaded {len(cookies)} cookies from {self.config.cookies_file}')
-                    await context.add_cookies(cookies)
-
-                except json.JSONDecodeError as e:
-                    logger.error(f'Failed to parse cookies file: {str(e)}')
-
-        # Expose anti-detection scripts
-        await context.add_init_script(
-            """
-            // Webdriver property
-            Object.defineProperty(navigator, 'webdriver', {
-                get: () => undefined
-            });
-
-            // Languages
-            Object.defineProperty(navigator, 'languages', {
-                get: () => ['en-US']
-            });
-
-            // Plugins
-            Object.defineProperty(navigator, 'plugins', {
-                get: () => [1, 2, 3, 4, 5]
-            });
-
-            // Chrome runtime
-            window.chrome = { runtime: {} };
-
-            // Permissions
-            const originalQuery = window.navigator.permissions.query;
-            window.navigator.permissions.query = (parameters) => (
-                parameters.name === 'notifications' ?
-                    Promise.resolve({ state: Notification.permission }) :
-                    originalQuery(parameters)
-            );
-            (function () {
-                const originalAttachShadow = Element.prototype.attachShadow;
-                Element.prototype.attachShadow = function attachShadow(options) {
-                    return originalAttachShadow.call(this, { ...options, mode: "open" });
-                };
-            })();
-            """
-        )
-
-        return context
--- a/src/controller/custom_controller.py
+++ b/src/controller/custom_controller.py
@@ -172,6 +172,10 @@ class CustomController(Controller):
                        param_model=create_tool_param_model(tool),
                    )
                    logger.info(f"Add mcp tool: {tool_name}")
+                logger.debug(
+                    f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
+        else:
+            logger.warning(f"MCP client not started.")

    async def close_mcp_client(self):
        if self.mcp_client:
--- a/src/webui/components/browser_use_agent_tab.py
+++ b/src/webui/components/browser_use_agent_tab.py
@@ -13,14 +13,13 @@ from browser_use.agent.views import (
    AgentOutput,
 )
 from browser_use.browser.browser import BrowserConfig
-from browser_use.browser.context import BrowserContext
+from browser_use.browser.context import BrowserContext, BrowserContextWindowSize, BrowserContextConfig
 from browser_use.browser.views import BrowserState
 from gradio.components import Component
 from langchain_core.language_models.chat_models import BaseChatModel

 from src.agent.browser_use.browser_use_agent import BrowserUseAgent
 from src.browser.custom_browser import CustomBrowser
-from src.browser.custom_context import CustomBrowserContextConfig
 from src.controller.custom_controller import CustomController
 from src.utils import llm_provider
 from src.webui.webui_manager import WebuiManager
@@ -32,12 +31,12 @@ logger = logging.getLogger(__name__)


 async def _initialize_llm(
-    provider: Optional[str],
-    model_name: Optional[str],
-    temperature: float,
-    base_url: Optional[str],
-    api_key: Optional[str],
-    num_ctx: Optional[int] = None,
+        provider: Optional[str],
+        model_name: Optional[str],
+        temperature: float,
+        base_url: Optional[str],
+        api_key: Optional[str],
+        num_ctx: Optional[int] = None,
 ) -> Optional[BaseChatModel]:
    """Initializes the LLM based on settings. Returns None if provider/model is missing."""
    if not provider or not model_name:
@@ -68,10 +67,10 @@ async def _initialize_llm(


 def _get_config_value(
-    webui_manager: WebuiManager,
-    comp_dict: Dict[gr.components.Component, Any],
-    comp_id_suffix: str,
-    default: Any = None,
+        webui_manager: WebuiManager,
+        comp_dict: Dict[gr.components.Component, Any],
+        comp_id_suffix: str,
+        default: Any = None,
 ) -> Any:
    """Safely get value from component dictionary using its ID suffix relative to the tab."""
    # Assumes component ID format is "tab_name.comp_name"
@@ -133,7 +132,7 @@ def _format_agent_output(model_output: AgentOutput) -> str:


 async def _handle_new_step(
-    webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
+        webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
 ):
    """Callback for each step taken by the agent, including screenshot display."""

@@ -157,12 +156,12 @@ async def _handle_new_step(
        try:
            # Basic validation: check if it looks like base64
            if (
-                isinstance(screenshot_data, str) and len(screenshot_data) > 100
+                    isinstance(screenshot_data, str) and len(screenshot_data) > 100
            ):  # Arbitrary length check
                # *** UPDATED STYLE: Removed centering, adjusted width ***
                img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
                screenshot_html = (
-                    img_tag + "<br/>"
+                        img_tag + "<br/>"
                )  # Use <br/> for line break after inline-block image
            else:
                logger.warning(
@@ -223,7 +222,7 @@ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):


 async def _ask_assistant_callback(
-    webui_manager: WebuiManager, query: str, browser_context: BrowserContext
+        webui_manager: WebuiManager, query: str, browser_context: BrowserContext
 ) -> Dict[str, Any]:
    """Callback triggered by the agent's ask_for_assistant action."""
    logger.info("Agent requires assistance. Waiting for user input.")
@@ -274,7 +273,7 @@ async def _ask_assistant_callback(


 async def run_agent_task(
-    webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
+        webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
 ) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
    """Handles the entire lifecycle of initializing and running the agent."""

@@ -358,6 +357,7 @@ async def run_agent_task(
    # Planner LLM Settings (Optional)
    planner_llm_provider_name = get_setting("planner_llm_provider") or None
    planner_llm = None
+    planner_use_vision = False
    if planner_llm_provider_name:
        planner_llm_model_name = get_setting("planner_llm_model_name")
        planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
@@ -387,7 +387,7 @@ async def run_agent_task(
    )  # Logic handled by CDP/WSS presence
    keep_browser_open = get_browser_setting("keep_browser_open", False)
    headless = get_browser_setting("headless", False)
-    disable_security = get_browser_setting("disable_security", True)
+    disable_security = get_browser_setting("disable_security", False)
    window_w = int(get_browser_setting("window_w", 1280))
    window_h = int(get_browser_setting("window_h", 1100))
    cdp_url = get_browser_setting("cdp_url") or None
@@ -422,7 +422,7 @@ async def run_agent_task(

    # Pass the webui_manager instance to the callback when wrapping it
    async def ask_callback_wrapper(
-        query: str, browser_context: BrowserContext
+            query: str, browser_context: BrowserContext
    ) -> Dict[str, Any]:
        return await _ask_assistant_callback(webui_manager, query, browser_context)

@@ -456,7 +456,7 @@ async def run_agent_task(

            if use_own_browser:
                browser_binary_path = (
-                    os.getenv("CHROME_PATH", None) or browser_binary_path
+                        os.getenv("CHROME_PATH", None) or browser_binary_path
                )
                if browser_binary_path == "":
                    browser_binary_path = None
@@ -479,14 +479,13 @@ async def run_agent_task(
        # Create Context if needed
        if not webui_manager.bu_browser_context:
            logger.info("Creating new browser context.")
-            context_config = CustomBrowserContextConfig(
+            context_config = BrowserContextConfig(
                trace_path=save_trace_path if save_trace_path else None,
                save_recording_path=save_recording_path
                if save_recording_path
                else None,
                save_downloads_path=save_download_path if save_download_path else None,
-               window_width=window_w,
-               window_height=window_h,
+                browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
            )
            if not webui_manager.bu_browser:
                raise ValueError("Browser not initialized, cannot create context.")
@@ -513,7 +512,7 @@ async def run_agent_task(

        # Pass the webui_manager to callbacks when wrapping them
        async def step_callback_wrapper(
-            state: BrowserState, output: AgentOutput, step_num: int
+                state: BrowserState, output: AgentOutput, step_num: int
        ):
            await _handle_new_step(webui_manager, state, output, step_num)

@@ -582,7 +581,7 @@ async def run_agent_task(
                    await asyncio.sleep(0.2)

                if (
-                    agent_task.done() or is_stopped
+                        agent_task.done() or is_stopped
                ):  # If stopped or task finished while paused
                    break

@@ -633,8 +632,8 @@ async def run_agent_task(
                yield update_dict
                # Wait until response is submitted or task finishes
                while (
-                    webui_manager.bu_response_event is not None
-                    and not agent_task.done()
+                        webui_manager.bu_response_event is not None
+                        and not agent_task.done()
                ):
                    await asyncio.sleep(0.2)
                # Restore UI after response submitted or if task ended unexpectedly
@@ -716,9 +715,9 @@ async def run_agent_task(
        except asyncio.CancelledError:
            logger.info("Agent task was cancelled.")
            if not any(
-                "Cancelled" in msg.get("content", "")
-                for msg in webui_manager.bu_chat_history
-                if msg.get("role") == "assistant"
+                    "Cancelled" in msg.get("content", "")
+                    for msg in webui_manager.bu_chat_history
+                    if msg.get("role") == "assistant"
            ):
                webui_manager.bu_chat_history.append(
                    {"role": "assistant", "content": "**Task Cancelled**."}
@@ -730,9 +729,9 @@ async def run_agent_task(
                f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
            )
            if not any(
-                error_message in msg.get("content", "")
-                for msg in webui_manager.bu_chat_history
-                if msg.get("role") == "assistant"
+                    error_message in msg.get("content", "")
+                    for msg in webui_manager.bu_chat_history
+                    if msg.get("role") == "assistant"
            ):
                webui_manager.bu_chat_history.append(
                    {"role": "assistant", "content": error_message}
@@ -788,7 +787,7 @@ async def run_agent_task(
            clear_button_comp: gr.update(interactive=True),
            chatbot_comp: gr.update(
                value=webui_manager.bu_chat_history
-                + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
+                      + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
            ),
        }

@@ -797,7 +796,7 @@ async def run_agent_task(


 async def handle_submit(
-    webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
+        webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
 ):
    """Handles clicks on the main 'Submit' button."""
    user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
@@ -1048,7 +1047,7 @@ def create_browser_use_agent_tab(webui_manager: WebuiManager):
    run_tab_outputs = list(tab_components.values())

    async def submit_wrapper(
-        components_dict: Dict[Component, Any],
+            components_dict: Dict[Component, Any],
    ) -> AsyncGenerator[Dict[Component, Any], None]:
        """Wrapper for handle_submit that yields its results."""
        async for update in handle_submit(webui_manager, components_dict):
--- a/src/webui/components/deep_research_agent_tab.py
+++ b/src/webui/components/deep_research_agent_tab.py
@@ -116,7 +116,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
        # LLM Config (from agent_settings tab)
        llm_provider_name = get_setting("agent_settings", "llm_provider")
        llm_model_name = get_setting("agent_settings", "llm_model_name")
-        llm_temperature = get_setting("agent_settings", "llm_temperature", 0.5)  # Default if not found
+        llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
        llm_base_url = get_setting("agent_settings", "llm_base_url")
        llm_api_key = get_setting("agent_settings", "llm_api_key")
        ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
@@ -132,7 +132,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
        # Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
        browser_config_dict = {
            "headless": get_setting("browser_settings", "headless", False),
-            "disable_security": get_setting("browser_settings", "disable_security", True),
+            "disable_security": get_setting("browser_settings", "disable_security", False),
            "browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
            "user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
            "window_width": int(get_setting("browser_settings", "window_w", 1280)),
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -26,9 +26,9 @@ async def test_browser_use_agent():
    from browser_use.agent.service import Agent

    from src.browser.custom_browser import CustomBrowser
-    from src.browser.custom_context import CustomBrowserContextConfig
    from src.controller.custom_controller import CustomController
    from src.utils import llm_provider
+    from src.agent.browser_use.browser_use_agent import BrowserUseAgent

    # llm = utils.get_llm_model(
    #     provider="openai",
@@ -77,15 +77,15 @@ async def test_browser_use_agent():

    mcp_server_config = {
        "mcpServers": {
-            "markitdown": {
-                "command": "docker",
-                "args": [
-                    "run",
-                    "--rm",
-                    "-i",
-                    "markitdown-mcp:latest"
-                ]
-            },
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
            "desktop-commander": {
                "command": "npx",
                "args": [
@@ -97,8 +97,8 @@ async def test_browser_use_agent():
    }
    controller = CustomController()
    await controller.setup_mcp_client(mcp_server_config)
-    use_own_browser = False
-    disable_security = True
+    use_own_browser = True
+    disable_security = False
    use_vision = True  # Set to False when using DeepSeek

    max_actions_per_step = 10
@@ -125,7 +125,7 @@ async def test_browser_use_agent():
            )
        )
        browser_context = await browser.new_context(
-            config=CustomBrowserContextConfig(
+            config=BrowserContextConfig(
                trace_path="./tmp/traces",
                save_recording_path="./tmp/record_videos",
                save_downloads_path="./tmp/downloads",
@@ -135,8 +135,9 @@ async def test_browser_use_agent():
                force_new_context=True
            )
        )
-        agent = Agent(
-            task="download pdf from https://arxiv.org/abs/2504.10458 and rename this pdf to 'GUI-r1-test.pdf'",
+        agent = BrowserUseAgent(
+            # task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
+            task="give me nvidia stock price",
            llm=llm,
            browser=browser,
            browser_context=browser_context,
@@ -153,7 +154,6 @@ async def test_browser_use_agent():
        print("\nErrors:")
        pprint(history.errors(), indent=4)

-
    except Exception:
        import traceback
        traceback.print_exc()
@@ -182,9 +182,9 @@ async def test_browser_use_parallel():
    from browser_use.agent.service import Agent

    from src.browser.custom_browser import CustomBrowser
-    from src.browser.custom_context import CustomBrowserContextConfig
    from src.controller.custom_controller import CustomController
    from src.utils import llm_provider
+    from src.agent.browser_use.browser_use_agent import BrowserUseAgent

    # llm = utils.get_llm_model(
    #     provider="openai",
@@ -233,15 +233,15 @@ async def test_browser_use_parallel():

    mcp_server_config = {
        "mcpServers": {
-            "markitdown": {
-                "command": "docker",
-                "args": [
-                    "run",
-                    "--rm",
-                    "-i",
-                    "markitdown-mcp:latest"
-                ]
-            },
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
            "desktop-commander": {
                "command": "npx",
                "args": [
@@ -262,7 +262,7 @@ async def test_browser_use_parallel():
    controller = CustomController()
    await controller.setup_mcp_client(mcp_server_config)
    use_own_browser = False
-    disable_security = True
+    disable_security = False
    use_vision = True  # Set to False when using DeepSeek

    max_actions_per_step = 10
@@ -289,7 +289,7 @@ async def test_browser_use_parallel():
            )
        )
        browser_context = await browser.new_context(
-            config=CustomBrowserContextConfig(
+            config=BrowserContextConfig(
                trace_path="./tmp/traces",
                save_recording_path="./tmp/record_videos",
                save_downloads_path="./tmp/downloads",
@@ -300,7 +300,7 @@ async def test_browser_use_parallel():
            )
        )
        agents = [
-            Agent(task=task, llm=llm, browser=browser, controller=controller)
+            BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
            for task in [
                'Search Google for weather in Tokyo',
                # 'Check Reddit front page title',
@@ -332,6 +332,8 @@ async def test_browser_use_parallel():
            await browser_context.close()
        if browser:
            await browser.close()
+        if controller:
+            await controller.close_mcp_client()


 async def test_deep_research_agent():
@@ -362,8 +364,8 @@ async def test_deep_research_agent():

    browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
    agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
-    research_topic = "Impact of Microplastics on Marine Ecosystems"
-    task_id_to_resume = "815460fb-337a-4850-8fa4-a5f2db301a89"  # Set this to resume a previous task ID
+    research_topic = "Give me a detailed travel plan to Switzerland from June 1st to 10th."
+    task_id_to_resume = ""  # Set this to resume a previous task ID

    print(f"Starting research on: {research_topic}")

--- a/tests/test_controller.py
+++ b/tests/test_controller.py
@@ -14,20 +14,31 @@ async def test_mcp_client():
    from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model

    test_server_config = {
-        "playwright": {
-            "command": "npx",
-            "args": [
-                "@playwright/mcp@latest",
-            ],
-            "transport": "stdio",
-        },
-        "filesystem": {
-            "command": "npx",
-            "args": [
-                "-y",
-                "@modelcontextprotocol/server-filesystem",
-                "/Users/warmshao/ai_workspace",
-            ]
+        "mcpServers": {
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
+            "desktop-commander": {
+                "command": "npx",
+                "args": [
+                    "-y",
+                    "@wonderwhy-er/desktop-commander"
+                ]
+            },
+            # "filesystem": {
+            #     "command": "npx",
+            #     "args": [
+            #         "-y",
+            #         "@modelcontextprotocol/server-filesystem",
+            #         "/Users/xxx/ai_workspace",
+            #     ]
+            # },
        }
    }

@@ -48,15 +59,15 @@ async def test_controller_with_mcp():

    mcp_server_config = {
        "mcpServers": {
-            "markitdown": {
-                "command": "docker",
-                "args": [
-                    "run",
-                    "--rm",
-                    "-i",
-                    "markitdown-mcp:latest"
-                ]
-            },
+            # "markitdown": {
+            #     "command": "docker",
+            #     "args": [
+            #         "run",
+            #         "--rm",
+            #         "-i",
+            #         "markitdown-mcp:latest"
+            #     ]
+            # },
            "desktop-commander": {
                "command": "npx",
                "args": [