mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
update to bu==0.1.43 and fix deep research
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
browser-use==0.1.45
|
||||
browser-use==0.1.43
|
||||
pyperclip==1.9.0
|
||||
gradio==5.27.0
|
||||
json-repair
|
||||
|
||||
@@ -8,9 +8,13 @@ import os
|
||||
from browser_use.agent.gif import create_history_gif
|
||||
from browser_use.agent.service import Agent, AgentHookFunc
|
||||
from browser_use.agent.views import (
|
||||
ActionResult,
|
||||
AgentHistory,
|
||||
AgentHistoryList,
|
||||
AgentStepInfo,
|
||||
ToolCallingMethod,
|
||||
)
|
||||
from browser_use.browser.views import BrowserStateHistory
|
||||
from browser_use.telemetry.views import (
|
||||
AgentEndTelemetryEvent,
|
||||
)
|
||||
@@ -21,17 +25,15 @@ load_dotenv()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SKIP_LLM_API_KEY_VERIFICATION = (
|
||||
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
||||
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
||||
)
|
||||
|
||||
|
||||
class BrowserUseAgent(Agent):
|
||||
@time_execution_async("--run (agent)")
|
||||
async def run(
|
||||
self,
|
||||
max_steps: int = 100,
|
||||
on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None,
|
||||
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None
|
||||
) -> AgentHistoryList:
|
||||
"""Execute the task with maximum number of steps"""
|
||||
|
||||
@@ -49,41 +51,28 @@ class BrowserUseAgent(Agent):
|
||||
)
|
||||
signal_handler.register()
|
||||
|
||||
# Wait for verification task to complete if it exists
|
||||
if hasattr(self, "_verification_task") and not self._verification_task.done():
|
||||
try:
|
||||
await self._verification_task
|
||||
except Exception:
|
||||
# Error already logged in the task
|
||||
pass
|
||||
|
||||
try:
|
||||
self._log_agent_run()
|
||||
|
||||
# Execute initial actions if provided
|
||||
if self.initial_actions:
|
||||
result = await self.multi_act(
|
||||
self.initial_actions, check_for_new_elements=False
|
||||
)
|
||||
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
||||
self.state.last_result = result
|
||||
|
||||
for step in range(max_steps):
|
||||
# Check if waiting for user input after Ctrl+C
|
||||
while self.state.paused:
|
||||
await asyncio.sleep(0.5)
|
||||
if self.state.stopped:
|
||||
break
|
||||
if self.state.paused:
|
||||
signal_handler.wait_for_resume()
|
||||
signal_handler.reset()
|
||||
|
||||
# Check if we should stop due to too many failures
|
||||
if self.state.consecutive_failures >= self.settings.max_failures:
|
||||
logger.error(
|
||||
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
|
||||
)
|
||||
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
||||
break
|
||||
|
||||
# Check control flags before each step
|
||||
if self.state.stopped:
|
||||
logger.info("Agent stopped")
|
||||
logger.info('Agent stopped')
|
||||
break
|
||||
|
||||
while self.state.paused:
|
||||
@@ -108,15 +97,30 @@ class BrowserUseAgent(Agent):
|
||||
await self.log_completion()
|
||||
break
|
||||
else:
|
||||
logger.info("❌ Failed to complete task in maximum steps")
|
||||
error_message = 'Failed to complete task in maximum steps'
|
||||
|
||||
self.state.history.history.append(
|
||||
AgentHistory(
|
||||
model_output=None,
|
||||
result=[ActionResult(error=error_message, include_in_memory=True)],
|
||||
state=BrowserStateHistory(
|
||||
url='',
|
||||
title='',
|
||||
tabs=[],
|
||||
interacted_element=[],
|
||||
screenshot=None,
|
||||
),
|
||||
metadata=None,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f'❌ {error_message}')
|
||||
|
||||
return self.state.history
|
||||
|
||||
except KeyboardInterrupt:
|
||||
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
||||
logger.info(
|
||||
"Got KeyboardInterrupt during execution, returning current history"
|
||||
)
|
||||
logger.info('Got KeyboardInterrupt during execution, returning current history')
|
||||
return self.state.history
|
||||
|
||||
finally:
|
||||
@@ -136,13 +140,29 @@ class BrowserUseAgent(Agent):
|
||||
)
|
||||
)
|
||||
|
||||
if self.settings.save_playwright_script_path:
|
||||
logger.info(
|
||||
f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
|
||||
)
|
||||
try:
|
||||
# Extract sensitive data keys if sensitive_data is provided
|
||||
keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
|
||||
# Pass browser and context config to the saving method
|
||||
self.state.history.save_as_playwright_script(
|
||||
self.settings.save_playwright_script_path,
|
||||
sensitive_data_keys=keys,
|
||||
browser_config=self.browser.config,
|
||||
context_config=self.browser_context.config,
|
||||
)
|
||||
except Exception as script_gen_err:
|
||||
# Log any error during script generation/saving
|
||||
logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
|
||||
|
||||
await self.close()
|
||||
|
||||
if self.settings.generate_gif:
|
||||
output_path: str = "agent_history.gif"
|
||||
output_path: str = 'agent_history.gif'
|
||||
if isinstance(self.settings.generate_gif, str):
|
||||
output_path = self.settings.generate_gif
|
||||
|
||||
create_history_gif(
|
||||
task=self.task, history=self.state.history, output_path=output_path
|
||||
)
|
||||
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
||||
|
||||
@@ -29,9 +29,10 @@ from langchain_core.tools import StructuredTool, Tool
|
||||
from langgraph.graph import StateGraph
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from browser_use.browser.context import BrowserContextWindowSize, BrowserContextConfig
|
||||
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils.mcp_client import setup_mcp_client_and_tools
|
||||
|
||||
@@ -47,12 +48,12 @@ _BROWSER_AGENT_INSTANCES = {}
|
||||
|
||||
|
||||
async def run_single_browser_task(
|
||||
task_query: str,
|
||||
task_id: str,
|
||||
llm: Any, # Pass the main LLM
|
||||
browser_config: Dict[str, Any],
|
||||
stop_event: threading.Event,
|
||||
use_vision: bool = False,
|
||||
task_query: str,
|
||||
task_id: str,
|
||||
llm: Any, # Pass the main LLM
|
||||
browser_config: Dict[str, Any],
|
||||
stop_event: threading.Event,
|
||||
use_vision: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Runs a single BrowserUseAgent task.
|
||||
@@ -104,10 +105,9 @@ async def run_single_browser_task(
|
||||
)
|
||||
)
|
||||
|
||||
context_config = CustomBrowserContextConfig(
|
||||
context_config = BrowserContextConfig(
|
||||
save_downloads_path="./tmp/downloads",
|
||||
window_width=window_w,
|
||||
window_height=window_h,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
force_new_context=True,
|
||||
)
|
||||
bu_browser_context = await bu_browser.new_context(config=context_config)
|
||||
@@ -198,12 +198,12 @@ class BrowserSearchInput(BaseModel):
|
||||
|
||||
|
||||
async def _run_browser_search_tool(
|
||||
queries: List[str],
|
||||
task_id: str, # Injected dependency
|
||||
llm: Any, # Injected dependency
|
||||
browser_config: Dict[str, Any],
|
||||
stop_event: threading.Event,
|
||||
max_parallel_browsers: int = 1,
|
||||
queries: List[str],
|
||||
task_id: str, # Injected dependency
|
||||
llm: Any, # Injected dependency
|
||||
browser_config: Dict[str, Any],
|
||||
stop_event: threading.Event,
|
||||
max_parallel_browsers: int = 1,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Internal function to execute parallel browser searches based on LLM-provided queries.
|
||||
@@ -267,11 +267,11 @@ async def _run_browser_search_tool(
|
||||
|
||||
|
||||
def create_browser_search_tool(
|
||||
llm: Any,
|
||||
browser_config: Dict[str, Any],
|
||||
task_id: str,
|
||||
stop_event: threading.Event,
|
||||
max_parallel_browsers: int = 1,
|
||||
llm: Any,
|
||||
browser_config: Dict[str, Any],
|
||||
task_id: str,
|
||||
stop_event: threading.Event,
|
||||
max_parallel_browsers: int = 1,
|
||||
) -> StructuredTool:
|
||||
"""Factory function to create the browser search tool with necessary dependencies."""
|
||||
# Use partial to bind the dependencies that aren't part of the LLM call arguments
|
||||
@@ -553,7 +553,7 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||
else:
|
||||
current_task_message = [
|
||||
SystemMessage(
|
||||
content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool."
|
||||
content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool. Please output at least one tool."
|
||||
),
|
||||
HumanMessage(
|
||||
content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
|
||||
@@ -582,8 +582,11 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||
_save_plan_to_md(plan, output_dir)
|
||||
return {
|
||||
"research_plan": plan,
|
||||
"current_step_index": current_index + 1,
|
||||
"error_message": f"LLM failed to call a tool for step {current_step['step']}.",
|
||||
"status": "pending",
|
||||
"current_step_index": current_index,
|
||||
"messages": [
|
||||
f"LLM failed to call a tool for step {current_step['step']}. Response: {ai_response.content}"
|
||||
f". Please use tool to do research unless you are thinking or summary"],
|
||||
}
|
||||
|
||||
# Process tool calls
|
||||
@@ -665,8 +668,8 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||
browser_tool_called = "parallel_browser_search" in executed_tool_names
|
||||
# We might need a more nuanced status based on the *content* of tool_results
|
||||
step_failed = (
|
||||
any("Error:" in str(tr.content) for tr in tool_results)
|
||||
or not browser_tool_called
|
||||
any("Error:" in str(tr.content) for tr in tool_results)
|
||||
or not browser_tool_called
|
||||
)
|
||||
|
||||
if step_failed:
|
||||
@@ -695,9 +698,9 @@ async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||
"search_results": current_search_results, # Update with new results
|
||||
"current_step_index": current_index + 1,
|
||||
"messages": state["messages"]
|
||||
+ current_task_message
|
||||
+ [ai_response]
|
||||
+ tool_results,
|
||||
+ current_task_message
|
||||
+ [ai_response]
|
||||
+ tool_results,
|
||||
# Optionally return the tool_results messages if needed by downstream nodes
|
||||
}
|
||||
|
||||
@@ -879,10 +882,10 @@ def should_continue(state: DeepResearchState) -> str:
|
||||
|
||||
class DeepResearchAgent:
|
||||
def __init__(
|
||||
self,
|
||||
llm: Any,
|
||||
browser_config: Dict[str, Any],
|
||||
mcp_server_config: Optional[Dict[str, Any]] = None,
|
||||
self,
|
||||
llm: Any,
|
||||
browser_config: Dict[str, Any],
|
||||
mcp_server_config: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""
|
||||
Initializes the DeepSearchAgent.
|
||||
@@ -904,7 +907,7 @@ class DeepResearchAgent:
|
||||
self.runner: Optional[asyncio.Task] = None # To hold the asyncio task for run
|
||||
|
||||
async def _setup_tools(
|
||||
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
|
||||
self, task_id: str, stop_event: threading.Event, max_parallel_browsers: int = 1
|
||||
) -> List[Tool]:
|
||||
"""Sets up the basic tools (File I/O) and optional MCP tools."""
|
||||
tools = [
|
||||
@@ -981,11 +984,11 @@ class DeepResearchAgent:
|
||||
return app
|
||||
|
||||
async def run(
|
||||
self,
|
||||
topic: str,
|
||||
task_id: Optional[str] = None,
|
||||
save_dir: str = "./tmp/deep_research",
|
||||
max_parallel_browsers: int = 1,
|
||||
self,
|
||||
topic: str,
|
||||
task_id: Optional[str] = None,
|
||||
save_dir: str = "./tmp/deep_research",
|
||||
max_parallel_browsers: int = 1,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Starts the deep research process (Async Generator Version).
|
||||
|
||||
@@ -26,25 +26,33 @@ from browser_use.browser.utils.screen_resolution import get_screen_resolution, g
|
||||
from browser_use.utils import time_execution_async
|
||||
import socket
|
||||
|
||||
from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
|
||||
from .custom_context import CustomBrowserContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowser(Browser):
|
||||
|
||||
async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
|
||||
async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
|
||||
"""Create a browser context"""
|
||||
browser_config = self.config.model_dump() if self.config else {}
|
||||
context_config = config.model_dump() if config else {}
|
||||
merged_config = {**browser_config, **context_config}
|
||||
return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
|
||||
return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
|
||||
|
||||
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
||||
|
||||
if self.config.headless:
|
||||
# Use the configured window size from new_context_config if available
|
||||
if (
|
||||
not self.config.headless
|
||||
and hasattr(self.config, 'new_context_config')
|
||||
and hasattr(self.config.new_context_config, 'browser_window_size')
|
||||
):
|
||||
screen_size = self.config.new_context_config.browser_window_size.model_dump()
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
elif self.config.headless:
|
||||
screen_size = {'width': 1920, 'height': 1080}
|
||||
offset_x, offset_y = 0, 0
|
||||
else:
|
||||
@@ -52,6 +60,7 @@ class CustomBrowser(Browser):
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
|
||||
chrome_args = {
|
||||
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
||||
*CHROME_ARGS,
|
||||
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
||||
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
||||
@@ -70,8 +79,8 @@ class CustomBrowser(Browser):
|
||||
|
||||
# check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
if s.connect_ex(('localhost', 9222)) == 0:
|
||||
chrome_args.remove('--remote-debugging-port=9222')
|
||||
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
|
||||
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
|
||||
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
args = {
|
||||
|
||||
@@ -12,10 +12,6 @@ from browser_use.browser.context import BrowserContextState
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowserContextConfig(BrowserContextConfig):
|
||||
force_new_context: bool = False # force to create new context
|
||||
|
||||
|
||||
class CustomBrowserContext(BrowserContext):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -24,96 +20,3 @@ class CustomBrowserContext(BrowserContext):
|
||||
state: Optional[BrowserContextState] = None,
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser):
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
if not self.config.force_new_context and self.browser.config.cdp_url and len(browser.contexts) > 0:
|
||||
context = browser.contexts[0]
|
||||
elif not self.config.force_new_context and self.browser.config.browser_binary_path and len(
|
||||
browser.contexts) > 0:
|
||||
# Connect to existing Chrome instance instead of creating new one
|
||||
context = browser.contexts[0]
|
||||
else:
|
||||
# Original code for creating new context
|
||||
context = await browser.new_context(
|
||||
no_viewport=True,
|
||||
user_agent=self.config.user_agent,
|
||||
java_script_enabled=True,
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size={
|
||||
"width": self.config.window_width,
|
||||
"height": self.config.window_height
|
||||
},
|
||||
record_har_path=self.config.save_har_path,
|
||||
locale=self.config.locale,
|
||||
http_credentials=self.config.http_credentials,
|
||||
is_mobile=self.config.is_mobile,
|
||||
has_touch=self.config.has_touch,
|
||||
geolocation=self.config.geolocation,
|
||||
permissions=self.config.permissions,
|
||||
timezone_id=self.config.timezone_id,
|
||||
)
|
||||
|
||||
if self.config.trace_path:
|
||||
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
|
||||
|
||||
# Load cookies if they exist
|
||||
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
|
||||
with open(self.config.cookies_file, 'r') as f:
|
||||
try:
|
||||
cookies = json.load(f)
|
||||
|
||||
valid_same_site_values = ['Strict', 'Lax', 'None']
|
||||
for cookie in cookies:
|
||||
if 'sameSite' in cookie:
|
||||
if cookie['sameSite'] not in valid_same_site_values:
|
||||
logger.warning(
|
||||
f"Fixed invalid sameSite value '{cookie['sameSite']}' to 'None' for cookie {cookie.get('name')}"
|
||||
)
|
||||
cookie['sameSite'] = 'None'
|
||||
logger.info(f'🍪 Loaded {len(cookies)} cookies from {self.config.cookies_file}')
|
||||
await context.add_cookies(cookies)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f'Failed to parse cookies file: {str(e)}')
|
||||
|
||||
# Expose anti-detection scripts
|
||||
await context.add_init_script(
|
||||
"""
|
||||
// Webdriver property
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// Languages
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US']
|
||||
});
|
||||
|
||||
// Plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [1, 2, 3, 4, 5]
|
||||
});
|
||||
|
||||
// Chrome runtime
|
||||
window.chrome = { runtime: {} };
|
||||
|
||||
// Permissions
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: Notification.permission }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
(function () {
|
||||
const originalAttachShadow = Element.prototype.attachShadow;
|
||||
Element.prototype.attachShadow = function attachShadow(options) {
|
||||
return originalAttachShadow.call(this, { ...options, mode: "open" });
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
|
||||
return context
|
||||
|
||||
@@ -172,6 +172,10 @@ class CustomController(Controller):
|
||||
param_model=create_tool_param_model(tool),
|
||||
)
|
||||
logger.info(f"Add mcp tool: {tool_name}")
|
||||
logger.debug(
|
||||
f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
|
||||
else:
|
||||
logger.warning(f"MCP client not started.")
|
||||
|
||||
async def close_mcp_client(self):
|
||||
if self.mcp_client:
|
||||
|
||||
@@ -13,14 +13,13 @@ from browser_use.agent.views import (
|
||||
AgentOutput,
|
||||
)
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextWindowSize, BrowserContextConfig
|
||||
from browser_use.browser.views import BrowserState
|
||||
from gradio.components import Component
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.webui.webui_manager import WebuiManager
|
||||
@@ -32,12 +31,12 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _initialize_llm(
|
||||
provider: Optional[str],
|
||||
model_name: Optional[str],
|
||||
temperature: float,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
num_ctx: Optional[int] = None,
|
||||
provider: Optional[str],
|
||||
model_name: Optional[str],
|
||||
temperature: float,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
num_ctx: Optional[int] = None,
|
||||
) -> Optional[BaseChatModel]:
|
||||
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
||||
if not provider or not model_name:
|
||||
@@ -68,10 +67,10 @@ async def _initialize_llm(
|
||||
|
||||
|
||||
def _get_config_value(
|
||||
webui_manager: WebuiManager,
|
||||
comp_dict: Dict[gr.components.Component, Any],
|
||||
comp_id_suffix: str,
|
||||
default: Any = None,
|
||||
webui_manager: WebuiManager,
|
||||
comp_dict: Dict[gr.components.Component, Any],
|
||||
comp_id_suffix: str,
|
||||
default: Any = None,
|
||||
) -> Any:
|
||||
"""Safely get value from component dictionary using its ID suffix relative to the tab."""
|
||||
# Assumes component ID format is "tab_name.comp_name"
|
||||
@@ -133,7 +132,7 @@ def _format_agent_output(model_output: AgentOutput) -> str:
|
||||
|
||||
|
||||
async def _handle_new_step(
|
||||
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
||||
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
||||
):
|
||||
"""Callback for each step taken by the agent, including screenshot display."""
|
||||
|
||||
@@ -157,12 +156,12 @@ async def _handle_new_step(
|
||||
try:
|
||||
# Basic validation: check if it looks like base64
|
||||
if (
|
||||
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
||||
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
||||
): # Arbitrary length check
|
||||
# *** UPDATED STYLE: Removed centering, adjusted width ***
|
||||
img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
|
||||
screenshot_html = (
|
||||
img_tag + "<br/>"
|
||||
img_tag + "<br/>"
|
||||
) # Use <br/> for line break after inline-block image
|
||||
else:
|
||||
logger.warning(
|
||||
@@ -223,7 +222,7 @@ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
|
||||
|
||||
|
||||
async def _ask_assistant_callback(
|
||||
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
||||
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
||||
) -> Dict[str, Any]:
|
||||
"""Callback triggered by the agent's ask_for_assistant action."""
|
||||
logger.info("Agent requires assistance. Waiting for user input.")
|
||||
@@ -274,7 +273,7 @@ async def _ask_assistant_callback(
|
||||
|
||||
|
||||
async def run_agent_task(
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
|
||||
"""Handles the entire lifecycle of initializing and running the agent."""
|
||||
|
||||
@@ -358,6 +357,7 @@ async def run_agent_task(
|
||||
# Planner LLM Settings (Optional)
|
||||
planner_llm_provider_name = get_setting("planner_llm_provider") or None
|
||||
planner_llm = None
|
||||
planner_use_vision = False
|
||||
if planner_llm_provider_name:
|
||||
planner_llm_model_name = get_setting("planner_llm_model_name")
|
||||
planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
|
||||
@@ -387,7 +387,7 @@ async def run_agent_task(
|
||||
) # Logic handled by CDP/WSS presence
|
||||
keep_browser_open = get_browser_setting("keep_browser_open", False)
|
||||
headless = get_browser_setting("headless", False)
|
||||
disable_security = get_browser_setting("disable_security", True)
|
||||
disable_security = get_browser_setting("disable_security", False)
|
||||
window_w = int(get_browser_setting("window_w", 1280))
|
||||
window_h = int(get_browser_setting("window_h", 1100))
|
||||
cdp_url = get_browser_setting("cdp_url") or None
|
||||
@@ -422,7 +422,7 @@ async def run_agent_task(
|
||||
|
||||
# Pass the webui_manager instance to the callback when wrapping it
|
||||
async def ask_callback_wrapper(
|
||||
query: str, browser_context: BrowserContext
|
||||
query: str, browser_context: BrowserContext
|
||||
) -> Dict[str, Any]:
|
||||
return await _ask_assistant_callback(webui_manager, query, browser_context)
|
||||
|
||||
@@ -456,7 +456,7 @@ async def run_agent_task(
|
||||
|
||||
if use_own_browser:
|
||||
browser_binary_path = (
|
||||
os.getenv("CHROME_PATH", None) or browser_binary_path
|
||||
os.getenv("CHROME_PATH", None) or browser_binary_path
|
||||
)
|
||||
if browser_binary_path == "":
|
||||
browser_binary_path = None
|
||||
@@ -479,14 +479,13 @@ async def run_agent_task(
|
||||
# Create Context if needed
|
||||
if not webui_manager.bu_browser_context:
|
||||
logger.info("Creating new browser context.")
|
||||
context_config = CustomBrowserContextConfig(
|
||||
context_config = BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path
|
||||
if save_recording_path
|
||||
else None,
|
||||
save_downloads_path=save_download_path if save_download_path else None,
|
||||
window_width=window_w,
|
||||
window_height=window_h,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
)
|
||||
if not webui_manager.bu_browser:
|
||||
raise ValueError("Browser not initialized, cannot create context.")
|
||||
@@ -513,7 +512,7 @@ async def run_agent_task(
|
||||
|
||||
# Pass the webui_manager to callbacks when wrapping them
|
||||
async def step_callback_wrapper(
|
||||
state: BrowserState, output: AgentOutput, step_num: int
|
||||
state: BrowserState, output: AgentOutput, step_num: int
|
||||
):
|
||||
await _handle_new_step(webui_manager, state, output, step_num)
|
||||
|
||||
@@ -582,7 +581,7 @@ async def run_agent_task(
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
if (
|
||||
agent_task.done() or is_stopped
|
||||
agent_task.done() or is_stopped
|
||||
): # If stopped or task finished while paused
|
||||
break
|
||||
|
||||
@@ -633,8 +632,8 @@ async def run_agent_task(
|
||||
yield update_dict
|
||||
# Wait until response is submitted or task finishes
|
||||
while (
|
||||
webui_manager.bu_response_event is not None
|
||||
and not agent_task.done()
|
||||
webui_manager.bu_response_event is not None
|
||||
and not agent_task.done()
|
||||
):
|
||||
await asyncio.sleep(0.2)
|
||||
# Restore UI after response submitted or if task ended unexpectedly
|
||||
@@ -716,9 +715,9 @@ async def run_agent_task(
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Agent task was cancelled.")
|
||||
if not any(
|
||||
"Cancelled" in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
"Cancelled" in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
):
|
||||
webui_manager.bu_chat_history.append(
|
||||
{"role": "assistant", "content": "**Task Cancelled**."}
|
||||
@@ -730,9 +729,9 @@ async def run_agent_task(
|
||||
f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
|
||||
)
|
||||
if not any(
|
||||
error_message in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
error_message in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
):
|
||||
webui_manager.bu_chat_history.append(
|
||||
{"role": "assistant", "content": error_message}
|
||||
@@ -788,7 +787,7 @@ async def run_agent_task(
|
||||
clear_button_comp: gr.update(interactive=True),
|
||||
chatbot_comp: gr.update(
|
||||
value=webui_manager.bu_chat_history
|
||||
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
||||
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
||||
),
|
||||
}
|
||||
|
||||
@@ -797,7 +796,7 @@ async def run_agent_task(
|
||||
|
||||
|
||||
async def handle_submit(
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
):
|
||||
"""Handles clicks on the main 'Submit' button."""
|
||||
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
||||
@@ -1048,7 +1047,7 @@ def create_browser_use_agent_tab(webui_manager: WebuiManager):
|
||||
run_tab_outputs = list(tab_components.values())
|
||||
|
||||
async def submit_wrapper(
|
||||
components_dict: Dict[Component, Any],
|
||||
components_dict: Dict[Component, Any],
|
||||
) -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_submit that yields its results."""
|
||||
async for update in handle_submit(webui_manager, components_dict):
|
||||
|
||||
@@ -116,7 +116,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
|
||||
# LLM Config (from agent_settings tab)
|
||||
llm_provider_name = get_setting("agent_settings", "llm_provider")
|
||||
llm_model_name = get_setting("agent_settings", "llm_model_name")
|
||||
llm_temperature = get_setting("agent_settings", "llm_temperature", 0.5) # Default if not found
|
||||
llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
|
||||
llm_base_url = get_setting("agent_settings", "llm_base_url")
|
||||
llm_api_key = get_setting("agent_settings", "llm_api_key")
|
||||
ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
|
||||
@@ -132,7 +132,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
|
||||
# Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
|
||||
browser_config_dict = {
|
||||
"headless": get_setting("browser_settings", "headless", False),
|
||||
"disable_security": get_setting("browser_settings", "disable_security", True),
|
||||
"disable_security": get_setting("browser_settings", "disable_security", False),
|
||||
"browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
|
||||
"user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
|
||||
"window_width": int(get_setting("browser_settings", "window_w", 1280)),
|
||||
|
||||
@@ -26,9 +26,9 @@ async def test_browser_use_agent():
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -77,15 +77,15 @@ async def test_browser_use_agent():
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
@@ -97,8 +97,8 @@ async def test_browser_use_agent():
|
||||
}
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_own_browser = True
|
||||
disable_security = False
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
@@ -125,7 +125,7 @@ async def test_browser_use_agent():
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
save_downloads_path="./tmp/downloads",
|
||||
@@ -135,8 +135,9 @@ async def test_browser_use_agent():
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agent = Agent(
|
||||
task="download pdf from https://arxiv.org/abs/2504.10458 and rename this pdf to 'GUI-r1-test.pdf'",
|
||||
agent = BrowserUseAgent(
|
||||
# task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
|
||||
task="give me nvidia stock price",
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
@@ -153,7 +154,6 @@ async def test_browser_use_agent():
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -182,9 +182,9 @@ async def test_browser_use_parallel():
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -233,15 +233,15 @@ async def test_browser_use_parallel():
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
@@ -262,7 +262,7 @@ async def test_browser_use_parallel():
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
disable_security = False
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
@@ -289,7 +289,7 @@ async def test_browser_use_parallel():
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
save_downloads_path="./tmp/downloads",
|
||||
@@ -300,7 +300,7 @@ async def test_browser_use_parallel():
|
||||
)
|
||||
)
|
||||
agents = [
|
||||
Agent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
for task in [
|
||||
'Search Google for weather in Tokyo',
|
||||
# 'Check Reddit front page title',
|
||||
@@ -332,6 +332,8 @@ async def test_browser_use_parallel():
|
||||
await browser_context.close()
|
||||
if browser:
|
||||
await browser.close()
|
||||
if controller:
|
||||
await controller.close_mcp_client()
|
||||
|
||||
|
||||
async def test_deep_research_agent():
|
||||
@@ -362,8 +364,8 @@ async def test_deep_research_agent():
|
||||
|
||||
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
||||
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
||||
research_topic = "Impact of Microplastics on Marine Ecosystems"
|
||||
task_id_to_resume = "815460fb-337a-4850-8fa4-a5f2db301a89" # Set this to resume a previous task ID
|
||||
research_topic = "Give me a detailed travel plan to Switzerland from June 1st to 10th."
|
||||
task_id_to_resume = "" # Set this to resume a previous task ID
|
||||
|
||||
print(f"Starting research on: {research_topic}")
|
||||
|
||||
|
||||
@@ -14,20 +14,31 @@ async def test_mcp_client():
|
||||
from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
|
||||
|
||||
test_server_config = {
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@playwright/mcp@latest",
|
||||
],
|
||||
"transport": "stdio",
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@modelcontextprotocol/server-filesystem",
|
||||
"/Users/warmshao/ai_workspace",
|
||||
]
|
||||
"mcpServers": {
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,15 +59,15 @@ async def test_controller_with_mcp():
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
|
||||
Reference in New Issue
Block a user