mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
add browser-use agent run
This commit is contained in:
@@ -9,11 +9,23 @@ from playwright.async_api import (
|
||||
Playwright,
|
||||
async_playwright,
|
||||
)
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.browser import Browser, IN_DOCKER
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
import logging
|
||||
|
||||
from browser_use.browser.chrome import (
|
||||
CHROME_ARGS,
|
||||
CHROME_DETERMINISTIC_RENDERING_ARGS,
|
||||
CHROME_DISABLE_SECURITY_ARGS,
|
||||
CHROME_DOCKER_ARGS,
|
||||
CHROME_HEADLESS_ARGS,
|
||||
)
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
|
||||
from browser_use.utils import time_execution_async
|
||||
import socket
|
||||
|
||||
from .custom_context import CustomBrowserContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -26,3 +38,62 @@ class CustomBrowser(Browser):
|
||||
config: BrowserContextConfig = BrowserContextConfig()
|
||||
) -> CustomBrowserContext:
|
||||
return CustomBrowserContext(config=config, browser=self)
|
||||
|
||||
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
||||
|
||||
if self.config.headless:
|
||||
screen_size = {'width': 1920, 'height': 1080}
|
||||
offset_x, offset_y = 0, 0
|
||||
else:
|
||||
screen_size = get_screen_resolution()
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
|
||||
chrome_args = {
|
||||
*CHROME_ARGS,
|
||||
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
||||
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
||||
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
|
||||
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
|
||||
f'--window-position={offset_x},{offset_y}',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
contain_window_size = False
|
||||
for arg in self.config.extra_browser_args:
|
||||
if "--window-size" in arg:
|
||||
contain_window_size = True
|
||||
break
|
||||
if not contain_window_size:
|
||||
chrome_args.add(f'--window-size={screen_size["width"]},{screen_size["height"]}')
|
||||
|
||||
# check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
if s.connect_ex(('localhost', 9222)) == 0:
|
||||
chrome_args.remove('--remote-debugging-port=9222')
|
||||
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
args = {
|
||||
'chromium': list(chrome_args),
|
||||
'firefox': [
|
||||
*{
|
||||
'-no-remote',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
],
|
||||
'webkit': [
|
||||
*{
|
||||
'--no-startup-window',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
browser = await browser_class.launch(
|
||||
headless=self.config.headless,
|
||||
args=args[self.config.browser_class],
|
||||
proxy=self.config.proxy.model_dump() if self.config.proxy else None,
|
||||
handle_sigterm=False,
|
||||
handle_sigint=False,
|
||||
)
|
||||
return browser
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.browser import Browser, IN_DOCKER
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
@@ -10,10 +10,104 @@ from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowserContextConfig(BrowserContextConfig):
|
||||
force_new_context: bool = False # force to create new context
|
||||
|
||||
|
||||
class CustomBrowserContext(BrowserContext):
|
||||
def __init__(
|
||||
self,
|
||||
browser: "Browser",
|
||||
config: BrowserContextConfig = BrowserContextConfig()
|
||||
config: CustomBrowserContextConfig = CustomBrowserContextConfig(),
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser):
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
if not self.config.force_new_context and self.browser.config.cdp_url and len(browser.contexts) > 0:
|
||||
context = browser.contexts[0]
|
||||
elif not self.config.force_new_context and self.browser.config.browser_binary_path and len(
|
||||
browser.contexts) > 0:
|
||||
# Connect to existing Chrome instance instead of creating new one
|
||||
context = browser.contexts[0]
|
||||
else:
|
||||
# Original code for creating new context
|
||||
context = await browser.new_context(
|
||||
no_viewport=True,
|
||||
user_agent=self.config.user_agent,
|
||||
java_script_enabled=True,
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size=self.config.browser_window_size.model_dump(),
|
||||
record_har_path=self.config.save_har_path,
|
||||
locale=self.config.locale,
|
||||
http_credentials=self.config.http_credentials,
|
||||
is_mobile=self.config.is_mobile,
|
||||
has_touch=self.config.has_touch,
|
||||
geolocation=self.config.geolocation,
|
||||
permissions=self.config.permissions,
|
||||
timezone_id=self.config.timezone_id,
|
||||
)
|
||||
|
||||
if self.config.trace_path:
|
||||
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
|
||||
|
||||
# Load cookies if they exist
|
||||
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
|
||||
with open(self.config.cookies_file, 'r') as f:
|
||||
try:
|
||||
cookies = json.load(f)
|
||||
|
||||
valid_same_site_values = ['Strict', 'Lax', 'None']
|
||||
for cookie in cookies:
|
||||
if 'sameSite' in cookie:
|
||||
if cookie['sameSite'] not in valid_same_site_values:
|
||||
logger.warning(
|
||||
f"Fixed invalid sameSite value '{cookie['sameSite']}' to 'None' for cookie {cookie.get('name')}"
|
||||
)
|
||||
cookie['sameSite'] = 'None'
|
||||
logger.info(f'🍪 Loaded {len(cookies)} cookies from {self.config.cookies_file}')
|
||||
await context.add_cookies(cookies)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f'Failed to parse cookies file: {str(e)}')
|
||||
|
||||
# Expose anti-detection scripts
|
||||
await context.add_init_script(
|
||||
"""
|
||||
// Webdriver property
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// Languages
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US']
|
||||
});
|
||||
|
||||
// Plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [1, 2, 3, 4, 5]
|
||||
});
|
||||
|
||||
// Chrome runtime
|
||||
window.chrome = { runtime: {} };
|
||||
|
||||
// Permissions
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: Notification.permission }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
(function () {
|
||||
const originalAttachShadow = Element.prototype.attachShadow;
|
||||
Element.prototype.attachShadow = function attachShadow(options) {
|
||||
return originalAttachShadow.call(this, { ...options, mode: "open" });
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
|
||||
return context
|
||||
|
||||
@@ -48,28 +48,6 @@ class CustomController(Controller):
|
||||
self.mcp_client = None
|
||||
self.mcp_server_config = None
|
||||
|
||||
async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
|
||||
self.mcp_server_config = mcp_server_config
|
||||
if self.mcp_server_config:
|
||||
self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
|
||||
self.register_mcp_tools()
|
||||
|
||||
def register_mcp_tools(self):
|
||||
"""
|
||||
Register the MCP tools used by this controller.
|
||||
"""
|
||||
if self.mcp_client:
|
||||
for server_name in self.mcp_client.server_name_to_tools:
|
||||
for tool in self.mcp_client.server_name_to_tools[server_name]:
|
||||
tool_name = f"mcp.{server_name}.{tool.name}"
|
||||
self.registry.registry.actions[tool_name] = RegisteredAction(
|
||||
name=tool_name,
|
||||
description=tool.description,
|
||||
function=tool,
|
||||
param_model=create_tool_param_model(tool),
|
||||
)
|
||||
logger.info(f"Add mcp tool: {tool_name}")
|
||||
|
||||
def _register_custom_actions(self):
|
||||
"""Register all custom browser actions"""
|
||||
|
||||
@@ -173,6 +151,28 @@ class CustomController(Controller):
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
async def setup_mcp_client(self, mcp_server_config: Optional[Dict[str, Any]] = None):
|
||||
self.mcp_server_config = mcp_server_config
|
||||
if self.mcp_server_config:
|
||||
self.mcp_client = await setup_mcp_client_and_tools(self.mcp_server_config)
|
||||
self.register_mcp_tools()
|
||||
|
||||
def register_mcp_tools(self):
|
||||
"""
|
||||
Register the MCP tools used by this controller.
|
||||
"""
|
||||
if self.mcp_client:
|
||||
for server_name in self.mcp_client.server_name_to_tools:
|
||||
for tool in self.mcp_client.server_name_to_tools[server_name]:
|
||||
tool_name = f"mcp.{server_name}.{tool.name}"
|
||||
self.registry.registry.actions[tool_name] = RegisteredAction(
|
||||
name=tool_name,
|
||||
description=tool.description,
|
||||
function=tool,
|
||||
param_model=create_tool_param_model(tool),
|
||||
)
|
||||
logger.info(f"Add mcp tool: {tool_name}")
|
||||
|
||||
async def close_mcp_client(self):
|
||||
if self.mcp_client:
|
||||
await self.mcp_client.__aexit__(None, None, None)
|
||||
|
||||
@@ -40,7 +40,13 @@ async def setup_mcp_client_and_tools(mcp_server_config: Dict[str, Any]) -> Optio
|
||||
|
||||
logger.info("Initializing MultiServerMCPClient...")
|
||||
|
||||
if not mcp_server_config:
|
||||
logger.error("No MCP server configuration provided.")
|
||||
return None
|
||||
|
||||
try:
|
||||
if "mcpServers" in mcp_server_config:
|
||||
mcp_server_config = mcp_server_config["mcpServers"]
|
||||
client = MultiServerMCPClient(mcp_server_config)
|
||||
await client.__aenter__()
|
||||
return client
|
||||
|
||||
@@ -9,25 +9,6 @@ import gradio as gr
|
||||
import uuid
|
||||
|
||||
|
||||
# Callback to update the model name dropdown based on the selected provider
|
||||
def update_model_dropdown(llm_provider, api_key=None, base_url=None):
|
||||
"""
|
||||
Update the model name dropdown with predefined models for the selected provider.
|
||||
"""
|
||||
import gradio as gr
|
||||
# Use API keys from .env if not provided
|
||||
if not api_key:
|
||||
api_key = os.getenv(f"{llm_provider.upper()}_API_KEY", "")
|
||||
if not base_url:
|
||||
base_url = os.getenv(f"{llm_provider.upper()}_BASE_URL", "")
|
||||
|
||||
# Use predefined models for the selected provider
|
||||
if llm_provider in model_names:
|
||||
return gr.Dropdown(choices=model_names[llm_provider], value=model_names[llm_provider][0], interactive=True)
|
||||
else:
|
||||
return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
|
||||
|
||||
|
||||
def encode_image(img_path):
|
||||
if not img_path:
|
||||
return None
|
||||
@@ -56,108 +37,3 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di
|
||||
print(f"Error getting latest {file_type} file: {e}")
|
||||
|
||||
return latest_files
|
||||
|
||||
|
||||
async def capture_screenshot(browser_context):
|
||||
"""Capture and encode a screenshot"""
|
||||
# Extract the Playwright browser instance
|
||||
playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct.
|
||||
|
||||
# Check if the browser instance is valid and if an existing context can be reused
|
||||
if playwright_browser and playwright_browser.contexts:
|
||||
playwright_context = playwright_browser.contexts[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
# Access pages in the context
|
||||
pages = None
|
||||
if playwright_context:
|
||||
pages = playwright_context.pages
|
||||
|
||||
# Use an existing page or create a new one if none exist
|
||||
if pages:
|
||||
active_page = pages[0]
|
||||
for page in pages:
|
||||
if page.url != "about:blank":
|
||||
active_page = page
|
||||
else:
|
||||
return None
|
||||
|
||||
# Take screenshot
|
||||
try:
|
||||
screenshot = await active_page.screenshot(
|
||||
type='jpeg',
|
||||
quality=75,
|
||||
scale="css"
|
||||
)
|
||||
encoded = base64.b64encode(screenshot).decode('utf-8')
|
||||
return encoded
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
class ConfigManager:
|
||||
def __init__(self):
|
||||
self.components = {}
|
||||
self.component_order = []
|
||||
|
||||
def register_component(self, name: str, component):
|
||||
"""Register a gradio component for config management."""
|
||||
self.components[name] = component
|
||||
if name not in self.component_order:
|
||||
self.component_order.append(name)
|
||||
return component
|
||||
|
||||
def save_current_config(self):
|
||||
"""Save the current configuration of all registered components."""
|
||||
current_config = {}
|
||||
for name in self.component_order:
|
||||
component = self.components[name]
|
||||
# Get the current value from the component
|
||||
current_config[name] = getattr(component, "value", None)
|
||||
|
||||
return save_config_to_file(current_config)
|
||||
|
||||
def update_ui_from_config(self, config_file):
|
||||
"""Update UI components from a loaded configuration file."""
|
||||
if config_file is None:
|
||||
return [gr.update() for _ in self.component_order] + ["No file selected."]
|
||||
|
||||
loaded_config = load_config_from_file(config_file.name)
|
||||
|
||||
if not isinstance(loaded_config, dict):
|
||||
return [gr.update() for _ in self.component_order] + ["Error: Invalid configuration file."]
|
||||
|
||||
# Prepare updates for all components
|
||||
updates = []
|
||||
for name in self.component_order:
|
||||
if name in loaded_config:
|
||||
updates.append(gr.update(value=loaded_config[name]))
|
||||
else:
|
||||
updates.append(gr.update())
|
||||
|
||||
updates.append("Configuration loaded successfully.")
|
||||
return updates
|
||||
|
||||
def get_all_components(self):
|
||||
"""Return all registered components in the order they were registered."""
|
||||
return [self.components[name] for name in self.component_order]
|
||||
|
||||
|
||||
def load_config_from_file(config_file):
|
||||
"""Load settings from a config file (JSON format)."""
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
settings = json.load(f)
|
||||
return settings
|
||||
except Exception as e:
|
||||
return f"Error loading configuration: {str(e)}"
|
||||
|
||||
|
||||
def save_config_to_file(settings, save_dir="./tmp/webui_settings"):
|
||||
"""Save the current settings to a UUID.json file with a UUID name."""
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
config_file = os.path.join(save_dir, f"{uuid.uuid4()}.json")
|
||||
with open(config_file, 'w') as f:
|
||||
json.dump(settings, f, indent=2)
|
||||
return f"Configuration saved to {config_file}"
|
||||
|
||||
@@ -50,7 +50,7 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
|
||||
extend_system_prompt = gr.Textbox(label="Extend system prompt", lines=4, interactive=True)
|
||||
|
||||
with gr.Group():
|
||||
mcp_json_file = gr.File(label="MCP server file", interactive=True, file_types=[".json"])
|
||||
mcp_json_file = gr.File(label="MCP server json", interactive=True, file_types=[".json"])
|
||||
mcp_server_config = gr.Textbox(label="MCP server", lines=6, interactive=True, visible=False)
|
||||
|
||||
with gr.Group():
|
||||
@@ -118,6 +118,7 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
|
||||
choices=[provider for provider, model in config.model_names.items()],
|
||||
label="Planner LLM Provider",
|
||||
info="Select LLM provider for LLM",
|
||||
value=None,
|
||||
interactive=True
|
||||
)
|
||||
planner_llm_model_name = gr.Dropdown(
|
||||
@@ -201,7 +202,6 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
|
||||
interactive=True,
|
||||
allow_custom_value=True,
|
||||
choices=["auto", "json_schema", "function_calling", "None"],
|
||||
info="Tool Calls Function Name",
|
||||
visible=True
|
||||
)
|
||||
tab_components.update(dict(
|
||||
@@ -228,6 +228,8 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
|
||||
mcp_json_file=mcp_json_file,
|
||||
mcp_server_config=mcp_server_config,
|
||||
))
|
||||
webui_manager.add_components("agent_settings", tab_components)
|
||||
|
||||
llm_provider.change(
|
||||
fn=lambda x: gr.update(visible=x == "ollama"),
|
||||
inputs=llm_provider,
|
||||
@@ -236,23 +238,21 @@ def create_agent_settings_tab(webui_manager: WebuiManager) -> dict[str, Componen
|
||||
llm_provider.change(
|
||||
lambda provider: update_model_dropdown(provider),
|
||||
inputs=[llm_provider],
|
||||
outputs=llm_model_name
|
||||
outputs=[llm_model_name]
|
||||
)
|
||||
planner_llm_provider.change(
|
||||
fn=lambda x: gr.update(visible=x == "ollama"),
|
||||
inputs=planner_llm_provider,
|
||||
outputs=planner_ollama_num_ctx
|
||||
inputs=[planner_llm_provider],
|
||||
outputs=[planner_ollama_num_ctx]
|
||||
)
|
||||
planner_llm_provider.change(
|
||||
lambda provider: update_model_dropdown(provider),
|
||||
inputs=[planner_llm_provider],
|
||||
outputs=planner_llm_model_name
|
||||
outputs=[planner_llm_model_name]
|
||||
)
|
||||
|
||||
mcp_json_file.change(
|
||||
update_mcp_server,
|
||||
inputs=mcp_json_file,
|
||||
inputs=[mcp_json_file],
|
||||
outputs=[mcp_server_config, mcp_server_config]
|
||||
)
|
||||
|
||||
return tab_components
|
||||
|
||||
@@ -35,7 +35,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager) -> dict[str, Compon
|
||||
)
|
||||
keep_browser_open = gr.Checkbox(
|
||||
label="Keep Browser Open",
|
||||
value=False,
|
||||
value=True,
|
||||
info="Keep Browser Open between Tasks",
|
||||
interactive=True
|
||||
)
|
||||
@@ -119,7 +119,9 @@ def create_browser_settings_tab(webui_manager: WebuiManager) -> dict[str, Compon
|
||||
save_agent_history_path=save_agent_history_path,
|
||||
save_download_path=save_download_path,
|
||||
cdp_url=cdp_url,
|
||||
wss_url=wss_url
|
||||
wss_url=wss_url,
|
||||
window_h=window_h,
|
||||
window_w=window_w,
|
||||
)
|
||||
)
|
||||
return tab_components
|
||||
webui_manager.add_components("browser_settings", tab_components)
|
||||
|
||||
@@ -1,62 +1,921 @@
|
||||
import gradio as gr
|
||||
from gradio.components import Component
|
||||
import asyncio
|
||||
import os
|
||||
import json
|
||||
import uuid
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional, Any, Set, Generator, AsyncGenerator, Union
|
||||
from collections.abc import Awaitable
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
import base64
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig, BrowserContextWindowSize
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.agent.views import ToolCallingMethod # Adjust import
|
||||
from browser_use.agent.views import (
|
||||
REQUIRED_LLM_API_ENV_VARS,
|
||||
ActionResult,
|
||||
AgentError,
|
||||
AgentHistory,
|
||||
AgentHistoryList,
|
||||
AgentOutput,
|
||||
AgentSettings,
|
||||
AgentState,
|
||||
AgentStepInfo,
|
||||
StepMetadata,
|
||||
ToolCallingMethod,
|
||||
)
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.views import BrowserState, BrowserStateHistory
|
||||
|
||||
from src.webui.webui_manager import WebuiManager
|
||||
from src.utils import config
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContext, CustomBrowserContextConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_browser_use_agent_tab(webui_manager: WebuiManager) -> dict[str, Component]:
|
||||
"""
|
||||
Create the run agent tab
|
||||
"""
|
||||
input_components = set(webui_manager.get_components())
|
||||
tab_components = {}
|
||||
# --- Helper Functions --- (Defined at module level)
|
||||
|
||||
chatbot = gr.Chatbot(type='messages', label="Chat History", height=600)
|
||||
user_input = gr.Textbox(
|
||||
label="User Input",
|
||||
lines=3,
|
||||
value="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
interactive=True
|
||||
async def _initialize_llm(provider: Optional[str], model_name: Optional[str], temperature: float,
|
||||
base_url: Optional[str], api_key: Optional[str], num_ctx: Optional[int] = None) -> Optional[
|
||||
BaseChatModel]:
|
||||
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
||||
if not provider or not model_name:
|
||||
logger.info("LLM Provider or Model Name not specified, LLM will be None.")
|
||||
return None
|
||||
try:
|
||||
# Use your actual LLM provider logic here
|
||||
logger.info(f"Initializing LLM: Provider={provider}, Model={model_name}, Temp={temperature}")
|
||||
# Example using a placeholder function
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider=provider,
|
||||
model_name=model_name,
|
||||
temperature=temperature,
|
||||
base_url=base_url or None,
|
||||
api_key=api_key or None,
|
||||
# Add other relevant params like num_ctx for ollama
|
||||
num_ctx=num_ctx if provider == "ollama" else None
|
||||
)
|
||||
return llm
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize LLM: {e}", exc_info=True)
|
||||
gr.Warning(
|
||||
f"Failed to initialize LLM '{model_name}' for provider '{provider}'. Please check settings. Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _get_config_value(webui_manager: WebuiManager, comp_dict: Dict[gr.components.Component, Any], comp_id_suffix: str,
|
||||
default: Any = None) -> Any:
|
||||
"""Safely get value from component dictionary using its ID suffix relative to the tab."""
|
||||
# Assumes component ID format is "tab_name.comp_name"
|
||||
tab_name = "browser_use_agent" # Hardcode or derive if needed
|
||||
comp_id = f"{tab_name}.{comp_id_suffix}"
|
||||
# Need to find the component object first using the ID from the manager
|
||||
try:
|
||||
comp = webui_manager.get_component_by_id(comp_id)
|
||||
return comp_dict.get(comp, default)
|
||||
except KeyError:
|
||||
# Try accessing settings tabs as well
|
||||
for prefix in ["agent_settings", "browser_settings"]:
|
||||
try:
|
||||
comp_id = f"{prefix}.{comp_id_suffix}"
|
||||
comp = webui_manager.get_component_by_id(comp_id)
|
||||
return comp_dict.get(comp, default)
|
||||
except KeyError:
|
||||
continue
|
||||
logger.warning(f"Component with suffix '{comp_id_suffix}' not found in manager for value lookup.")
|
||||
return default
|
||||
|
||||
|
||||
def _format_agent_output(model_output: AgentOutput) -> str:
|
||||
"""Formats AgentOutput for display in the chatbot using JSON."""
|
||||
content = ""
|
||||
if model_output:
|
||||
try:
|
||||
# Directly use model_dump if actions and current_state are Pydantic models
|
||||
action_dump = [action.model_dump(exclude_none=True) for action in model_output.action]
|
||||
|
||||
state_dump = model_output.current_state.model_dump(exclude_none=True)
|
||||
model_output_dump = {
|
||||
'current_state': state_dump,
|
||||
'action': action_dump,
|
||||
}
|
||||
# Dump to JSON string with indentation
|
||||
json_string = json.dumps(model_output_dump, indent=4, ensure_ascii=False)
|
||||
# Wrap in <pre><code> for proper display in HTML
|
||||
content = f"<pre><code class='language-json'>{json_string}</code></pre>"
|
||||
|
||||
except AttributeError as ae:
|
||||
logger.error(
|
||||
f"AttributeError during model dump: {ae}. Check if 'action' or 'current_state' or their items support 'model_dump'.")
|
||||
content = f"<pre><code>Error: Could not format agent output (AttributeError: {ae}).\nRaw output: {str(model_output)}</code></pre>"
|
||||
except Exception as e:
|
||||
logger.error(f"Error formatting agent output: {e}", exc_info=True)
|
||||
# Fallback to simple string representation on error
|
||||
content = f"<pre><code>Error formatting agent output.\nRaw output:\n{str(model_output)}</code></pre>"
|
||||
|
||||
return content.strip()
|
||||
|
||||
|
||||
# --- Updated Callback Implementation ---
|
||||
|
||||
async def _handle_new_step(webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int):
|
||||
"""Callback for each step taken by the agent, including screenshot display."""
|
||||
|
||||
# Use the correct chat history attribute name from the user's code
|
||||
if not hasattr(webui_manager, 'bu_chat_history'):
|
||||
logger.error("Attribute 'bu_chat_history' not found in webui_manager! Cannot add chat message.")
|
||||
# Initialize it maybe? Or raise an error? For now, log and potentially skip chat update.
|
||||
webui_manager.bu_chat_history = [] # Initialize if missing (consider if this is the right place)
|
||||
# return # Or stop if this is critical
|
||||
step_num -= 1
|
||||
logger.info(f"Step {step_num} completed.")
|
||||
|
||||
# --- Screenshot Handling ---
|
||||
screenshot_html = ""
|
||||
# Ensure state.screenshot exists and is not empty before proceeding
|
||||
# Use getattr for safer access
|
||||
screenshot_data = getattr(state, 'screenshot', None)
|
||||
if screenshot_data:
|
||||
try:
|
||||
# Basic validation: check if it looks like base64
|
||||
if isinstance(screenshot_data, str) and len(screenshot_data) > 100: # Arbitrary length check
|
||||
# *** UPDATED STYLE: Removed centering, adjusted width ***
|
||||
img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 600px; max-height: 300px; object-fit:contain; margin-bottom: 10px;" />'
|
||||
screenshot_html = img_tag + "<br/>" # Use <br/> for line break after inline-block image
|
||||
else:
|
||||
logger.warning(
|
||||
f"Screenshot for step {step_num} seems invalid (type: {type(screenshot_data)}, len: {len(screenshot_data) if isinstance(screenshot_data, str) else 'N/A'}).")
|
||||
screenshot_html = "**[Invalid screenshot data]**<br/>"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing or formatting screenshot for step {step_num}: {e}", exc_info=True)
|
||||
screenshot_html = "**[Error displaying screenshot]**<br/>"
|
||||
else:
|
||||
logger.debug(f"No screenshot available for step {step_num}.")
|
||||
|
||||
# --- Format Agent Output ---
|
||||
formatted_output = _format_agent_output(output) # Use the updated function
|
||||
|
||||
# --- Combine and Append to Chat ---
|
||||
step_header = f"--- **Step {step_num}** ---"
|
||||
# Combine header, image (with line break), and JSON block
|
||||
final_content = step_header + "<br/>" + screenshot_html + formatted_output
|
||||
|
||||
chat_message = {
|
||||
"role": "assistant",
|
||||
"content": final_content.strip() # Remove leading/trailing whitespace
|
||||
}
|
||||
|
||||
# Append to the correct chat history list
|
||||
webui_manager.bu_chat_history.append(chat_message)
|
||||
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
|
||||
def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
|
||||
"""Callback when the agent finishes the task (success or failure)."""
|
||||
logger.info(
|
||||
f"Agent task finished. Duration: {history.total_duration_seconds():.2f}s, Tokens: {history.total_input_tokens()}")
|
||||
final_summary = f"**Task Completed**\n"
|
||||
final_summary += f"- Duration: {history.total_duration_seconds():.2f} seconds\n"
|
||||
final_summary += f"- Total Input Tokens: {history.total_input_tokens()}\n" # Or total tokens if available
|
||||
|
||||
final_result = history.final_result()
|
||||
if final_result:
|
||||
final_summary += f"- Final Result: {final_result}\n"
|
||||
|
||||
errors = history.errors()
|
||||
if errors and any(errors):
|
||||
final_summary += f"- **Errors:**\n```\n{errors}\n```\n"
|
||||
else:
|
||||
final_summary += "- Status: Success\n"
|
||||
|
||||
webui_manager.bu_chat_history.append({"role": "assistant", "content": final_summary})
|
||||
|
||||
|
||||
async def _ask_assistant_callback(webui_manager: WebuiManager, query: str, browser_context: BrowserContext) -> Dict[
|
||||
str, Any]:
|
||||
"""Callback triggered by the agent's ask_for_assistant action."""
|
||||
logger.info("Agent requires assistance. Waiting for user input.")
|
||||
|
||||
if not hasattr(webui_manager, '_chat_history'):
|
||||
logger.error("Chat history not found in webui_manager during ask_assistant!")
|
||||
return {"response": "Internal Error: Cannot display help request."}
|
||||
|
||||
webui_manager.bu_chat_history.append({"role": "assistant",
|
||||
"content": f"**Need Help:** {query}\nPlease provide information or perform the required action in the browser, then type your response/confirmation below and click 'Submit Response'."})
|
||||
|
||||
# Use state stored in webui_manager
|
||||
webui_manager.bu_response_event = asyncio.Event()
|
||||
webui_manager.bu_user_help_response = None # Reset previous response
|
||||
|
||||
try:
|
||||
logger.info("Waiting for user response event...")
|
||||
await asyncio.wait_for(webui_manager.bu_response_event.wait(), timeout=3600.0) # Long timeout
|
||||
logger.info("User response event received.")
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Timeout waiting for user assistance.")
|
||||
webui_manager.bu_chat_history.append(
|
||||
{"role": "assistant", "content": "**Timeout:** No response received. Trying to proceed."})
|
||||
webui_manager.bu_response_event = None # Clear the event
|
||||
return {"response": "Timeout: User did not respond."} # Inform the agent
|
||||
|
||||
response = webui_manager.bu_user_help_response
|
||||
webui_manager.bu_chat_history.append({"role": "user", "content": response}) # Show user response in chat
|
||||
webui_manager.bu_response_event = None # Clear the event for the next potential request
|
||||
return {"response": response}
|
||||
|
||||
|
||||
async def capture_screenshot(browser_context):
|
||||
"""Capture and encode a screenshot"""
|
||||
# Extract the Playwright browser instance
|
||||
playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct.
|
||||
|
||||
# Check if the browser instance is valid and if an existing context can be reused
|
||||
if playwright_browser and playwright_browser.contexts:
|
||||
playwright_context = playwright_browser.contexts[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
# Access pages in the context
|
||||
pages = None
|
||||
if playwright_context:
|
||||
pages = playwright_context.pages
|
||||
|
||||
# Use an existing page or create a new one if none exist
|
||||
if pages:
|
||||
active_page = pages[0]
|
||||
for page in pages:
|
||||
if page.url != "about:blank":
|
||||
active_page = page
|
||||
else:
|
||||
return None
|
||||
|
||||
# Take screenshot
|
||||
try:
|
||||
screenshot = await active_page.screenshot(
|
||||
type='jpeg',
|
||||
quality=75,
|
||||
scale="css"
|
||||
)
|
||||
encoded = base64.b64encode(screenshot).decode('utf-8')
|
||||
return encoded
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
|
||||
# --- Core Agent Execution Logic --- (Needs access to webui_manager)
|
||||
|
||||
async def run_agent_task(webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]) -> AsyncGenerator[
|
||||
Dict[gr.components.Component, Any], None]:
|
||||
"""Handles the entire lifecycle of initializing and running the agent."""
|
||||
|
||||
# --- Get Components ---
|
||||
# Need handles to specific UI components to update them
|
||||
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
||||
run_button_comp = webui_manager.get_component_by_id("browser_use_agent.run_button")
|
||||
stop_button_comp = webui_manager.get_component_by_id("browser_use_agent.stop_button")
|
||||
pause_resume_button_comp = webui_manager.get_component_by_id("browser_use_agent.pause_resume_button")
|
||||
clear_button_comp = webui_manager.get_component_by_id("browser_use_agent.clear_button")
|
||||
chatbot_comp = webui_manager.get_component_by_id("browser_use_agent.chatbot")
|
||||
history_file_comp = webui_manager.get_component_by_id("browser_use_agent.agent_history_file")
|
||||
gif_comp = webui_manager.get_component_by_id("browser_use_agent.recording_gif")
|
||||
browser_view_comp = webui_manager.get_component_by_id("browser_use_agent.browser_view")
|
||||
|
||||
# --- 1. Get Task and Initial UI Update ---
|
||||
task = components.get(user_input_comp, "").strip()
|
||||
if not task:
|
||||
gr.Warning("Please enter a task.")
|
||||
yield {run_button_comp: gr.update(interactive=True)}
|
||||
return
|
||||
|
||||
# Set running state indirectly via _current_task
|
||||
webui_manager.bu_chat_history.append({"role": "user", "content": task})
|
||||
|
||||
yield {
|
||||
user_input_comp: gr.Textbox(value="", interactive=False, placeholder="Agent is running..."),
|
||||
run_button_comp: gr.Button(value="⏳ Running...", interactive=False),
|
||||
stop_button_comp: gr.Button(interactive=True),
|
||||
pause_resume_button_comp: gr.Button(value="⏸️ Pause", interactive=True),
|
||||
clear_button_comp: gr.Button(interactive=False),
|
||||
chatbot_comp: gr.update(value=webui_manager.bu_chat_history),
|
||||
history_file_comp: gr.update(value=None),
|
||||
gif_comp: gr.update(value=None),
|
||||
}
|
||||
|
||||
# --- Agent Settings ---
|
||||
# Access settings values via components dict, getting IDs from webui_manager
|
||||
def get_setting(key, default=None):
|
||||
comp = webui_manager.id_to_component.get(f"agent_settings.{key}")
|
||||
return components.get(comp, default) if comp else default
|
||||
|
||||
override_system_prompt = get_setting("override_system_prompt") or None
|
||||
extend_system_prompt = get_setting("extend_system_prompt") or None
|
||||
llm_provider_name = get_setting("llm_provider", None) # Default to None if not found
|
||||
llm_model_name = get_setting("llm_model_name", None)
|
||||
llm_temperature = get_setting("llm_temperature", 0.6)
|
||||
use_vision = get_setting("use_vision", True)
|
||||
ollama_num_ctx = get_setting("ollama_num_ctx", 16000)
|
||||
llm_base_url = get_setting("llm_base_url") or None
|
||||
llm_api_key = get_setting("llm_api_key") or None
|
||||
max_steps = get_setting("max_steps", 100)
|
||||
max_actions = get_setting("max_actions", 10)
|
||||
max_input_tokens = get_setting("max_input_tokens", 128000)
|
||||
tool_calling_str = get_setting("tool_calling_method", "auto")
|
||||
tool_calling_method = tool_calling_str if tool_calling_str != "None" else None
|
||||
mcp_server_config_comp = webui_manager.id_to_component.get("agent_settings.mcp_server_config")
|
||||
mcp_server_config_str = components.get(mcp_server_config_comp) if mcp_server_config_comp else None
|
||||
mcp_server_config = json.loads(mcp_server_config_str) if mcp_server_config_str else None
|
||||
|
||||
# Planner LLM Settings (Optional)
|
||||
planner_llm_provider_name = get_setting("planner_llm_provider") or None
|
||||
planner_llm = None
|
||||
if planner_llm_provider_name:
|
||||
planner_llm_model_name = get_setting("planner_llm_model_name")
|
||||
planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
|
||||
planner_ollama_num_ctx = get_setting("planner_ollama_num_ctx", 16000)
|
||||
planner_llm_base_url = get_setting("planner_llm_base_url") or None
|
||||
planner_llm_api_key = get_setting("planner_llm_api_key") or None
|
||||
planner_use_vision = get_setting("planner_use_vision", False)
|
||||
|
||||
planner_llm = await _initialize_llm(
|
||||
planner_llm_provider_name, planner_llm_model_name, planner_llm_temperature,
|
||||
planner_llm_base_url, planner_llm_api_key,
|
||||
planner_ollama_num_ctx if planner_llm_provider_name == "ollama" else None
|
||||
)
|
||||
|
||||
# --- Browser Settings ---
|
||||
def get_browser_setting(key, default=None):
|
||||
comp = webui_manager.id_to_component.get(f"browser_settings.{key}")
|
||||
return components.get(comp, default) if comp else default
|
||||
|
||||
browser_binary_path = get_browser_setting("browser_binary_path") or None
|
||||
browser_user_data_dir = get_browser_setting("browser_user_data_dir") or None
|
||||
use_own_browser = get_browser_setting("use_own_browser", False) # Logic handled by CDP/WSS presence
|
||||
keep_browser_open = get_browser_setting("keep_browser_open", False)
|
||||
headless = get_browser_setting("headless", False)
|
||||
disable_security = get_browser_setting("disable_security", True)
|
||||
window_w = int(get_browser_setting("window_w", 1280))
|
||||
window_h = int(get_browser_setting("window_h", 1100))
|
||||
cdp_url = get_browser_setting("cdp_url") or None
|
||||
wss_url = get_browser_setting("wss_url") or None
|
||||
save_recording_path = get_browser_setting("save_recording_path") or None
|
||||
save_trace_path = get_browser_setting("save_trace_path") or None
|
||||
save_agent_history_path = get_browser_setting("save_agent_history_path", "./tmp/agent_history")
|
||||
save_download_path = get_browser_setting("save_download_path", "./tmp/downloads")
|
||||
|
||||
stream_vw = 80
|
||||
stream_vh = int(80 * window_h // window_w)
|
||||
|
||||
os.makedirs(save_agent_history_path, exist_ok=True)
|
||||
if save_recording_path: os.makedirs(save_recording_path, exist_ok=True)
|
||||
if save_trace_path: os.makedirs(save_trace_path, exist_ok=True)
|
||||
if save_download_path: os.makedirs(save_download_path, exist_ok=True)
|
||||
|
||||
# --- 2. Initialize LLM ---
|
||||
main_llm = await _initialize_llm(
|
||||
llm_provider_name, llm_model_name, llm_temperature, llm_base_url, llm_api_key,
|
||||
ollama_num_ctx if llm_provider_name == "ollama" else None
|
||||
)
|
||||
|
||||
# Pass the webui_manager instance to the callback when wrapping it
|
||||
async def ask_callback_wrapper(query: str, browser_context: BrowserContext) -> Dict[str, Any]:
|
||||
return await _ask_assistant_callback(webui_manager, query, browser_context)
|
||||
|
||||
if not webui_manager.bu_controller:
|
||||
webui_manager.bu_controller = CustomController(ask_assistant_callback=ask_callback_wrapper)
|
||||
await webui_manager.bu_controller.setup_mcp_client(mcp_server_config)
|
||||
|
||||
# --- 4. Initialize Browser and Context ---
|
||||
should_close_browser_on_finish = not keep_browser_open
|
||||
|
||||
try:
|
||||
# Close existing resources if not keeping open
|
||||
if not keep_browser_open:
|
||||
if webui_manager.bu_browser_context:
|
||||
logger.info("Closing previous browser context.")
|
||||
await webui_manager.bu_browser_context.close()
|
||||
webui_manager.bu_browser_context = None
|
||||
if webui_manager.bu_browser:
|
||||
logger.info("Closing previous browser.")
|
||||
await webui_manager.bu_browser.close()
|
||||
webui_manager.bu_browser = None
|
||||
|
||||
# Create Browser if needed
|
||||
if not webui_manager.bu_browser:
|
||||
logger.info("Launching new browser instance.")
|
||||
extra_args = [f"--window-size={window_w},{window_h}"]
|
||||
if browser_user_data_dir:
|
||||
extra_args.append(f"--user-data-dir={browser_user_data_dir}")
|
||||
|
||||
if use_own_browser:
|
||||
browser_binary_path = os.getenv("CHROME_PATH", None) or browser_binary_path
|
||||
if browser_binary_path == "":
|
||||
browser_binary_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
browser_binary_path = None
|
||||
|
||||
webui_manager.bu_browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
browser_binary_path=browser_binary_path,
|
||||
extra_browser_args=extra_args,
|
||||
wss_url=wss_url,
|
||||
cdp_url=cdp_url,
|
||||
)
|
||||
)
|
||||
|
||||
# Create Context if needed
|
||||
if not webui_manager.bu_browser_context:
|
||||
logger.info("Creating new browser context.")
|
||||
context_config = CustomBrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path if save_recording_path else None,
|
||||
save_downloads_path=save_download_path if save_download_path else None,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h)
|
||||
)
|
||||
if not webui_manager.bu_browser:
|
||||
raise ValueError("Browser not initialized, cannot create context.")
|
||||
webui_manager.bu_browser_context = await webui_manager.bu_browser.new_context(config=context_config)
|
||||
|
||||
# --- 5. Initialize or Update Agent ---
|
||||
webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run
|
||||
os.makedirs(os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id), exist_ok=True)
|
||||
history_file = os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id,
|
||||
f"{webui_manager.bu_agent_task_id}.json")
|
||||
gif_path = os.path.join(save_agent_history_path, webui_manager.bu_agent_task_id,
|
||||
f"{webui_manager.bu_agent_task_id}.gif")
|
||||
|
||||
# Pass the webui_manager to callbacks when wrapping them
|
||||
async def step_callback_wrapper(state: BrowserState, output: AgentOutput, step_num: int):
|
||||
await _handle_new_step(webui_manager, state, output, step_num)
|
||||
|
||||
def done_callback_wrapper(history: AgentHistoryList):
|
||||
_handle_done(webui_manager, history)
|
||||
|
||||
if not webui_manager.bu_agent:
|
||||
logger.info(f"Initializing new agent for task: {task}")
|
||||
if not webui_manager.bu_browser or not webui_manager.bu_browser_context:
|
||||
raise ValueError("Browser or Context not initialized, cannot create agent.")
|
||||
|
||||
webui_manager.bu_agent = Agent(
|
||||
task=task,
|
||||
llm=main_llm,
|
||||
browser=webui_manager.bu_browser,
|
||||
browser_context=webui_manager.bu_browser_context,
|
||||
controller=webui_manager.bu_controller,
|
||||
register_new_step_callback=step_callback_wrapper,
|
||||
register_done_callback=done_callback_wrapper,
|
||||
# Agent settings
|
||||
use_vision=use_vision,
|
||||
override_system_message=override_system_prompt,
|
||||
extend_system_message=extend_system_prompt,
|
||||
max_input_tokens=max_input_tokens,
|
||||
max_actions_per_step=max_actions,
|
||||
tool_calling_method=tool_calling_method,
|
||||
planner_llm=planner_llm,
|
||||
use_vision_for_planner=planner_use_vision if planner_llm else False,
|
||||
save_conversation_path=history_file,
|
||||
)
|
||||
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
||||
webui_manager.bu_agent.settings.generate_gif = gif_path
|
||||
else:
|
||||
webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id
|
||||
webui_manager.bu_agent.add_new_task(task)
|
||||
webui_manager.bu_agent.settings.generate_gif = gif_path
|
||||
|
||||
# --- 6. Run Agent Task and Stream Updates ---
|
||||
agent_run_coro = webui_manager.bu_agent.run(max_steps=max_steps)
|
||||
agent_task = asyncio.create_task(agent_run_coro)
|
||||
webui_manager.bu_current_task = agent_task # Store the task
|
||||
|
||||
last_chat_len = len(webui_manager.bu_chat_history)
|
||||
while not agent_task.done():
|
||||
is_paused = webui_manager.bu_agent.state.paused
|
||||
is_stopped = webui_manager.bu_agent.state.stopped
|
||||
|
||||
# Check for pause state
|
||||
if is_paused:
|
||||
yield {
|
||||
pause_resume_button_comp: gr.update(value="▶️ Resume", interactive=True),
|
||||
run_button_comp: gr.update(value="⏸️ Paused", interactive=False),
|
||||
stop_button_comp: gr.update(interactive=True), # Allow stop while paused
|
||||
}
|
||||
# Wait until pause is released or task is stopped/done
|
||||
while is_paused and not agent_task.done():
|
||||
# Re-check agent state in loop
|
||||
is_paused = webui_manager.bu_agent.state.paused
|
||||
is_stopped = webui_manager.bu_agent.state.stopped
|
||||
if is_stopped: # Stop signal received while paused
|
||||
break
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
if agent_task.done() or is_stopped: # If stopped or task finished while paused
|
||||
break
|
||||
|
||||
# If resumed, yield UI update
|
||||
yield {
|
||||
pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=True),
|
||||
run_button_comp: gr.update(value="⏳ Running...", interactive=False),
|
||||
}
|
||||
|
||||
# Check if agent stopped itself or stop button was pressed (which sets agent.state.stopped)
|
||||
if is_stopped:
|
||||
logger.info("Agent has stopped (internally or via stop button).")
|
||||
if not agent_task.done():
|
||||
# Ensure the task coroutine finishes if agent just set flag
|
||||
try:
|
||||
await asyncio.wait_for(agent_task, timeout=1.0) # Give it a moment to exit run()
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Agent task did not finish quickly after stop signal, cancelling.")
|
||||
agent_task.cancel()
|
||||
except Exception: # Catch task exceptions if it errors on stop
|
||||
pass
|
||||
break # Exit the streaming loop
|
||||
|
||||
# Check if agent is asking for help (via response_event)
|
||||
update_dict = {}
|
||||
if webui_manager.bu_response_event is not None:
|
||||
update_dict = {
|
||||
user_input_comp: gr.update(placeholder="Agent needs help. Enter response and submit.",
|
||||
interactive=True),
|
||||
run_button_comp: gr.update(value="✔️ Submit Response", interactive=True),
|
||||
pause_resume_button_comp: gr.update(interactive=False),
|
||||
stop_button_comp: gr.update(interactive=False),
|
||||
chatbot_comp: gr.update(value=webui_manager.bu_chat_history)
|
||||
}
|
||||
last_chat_len = len(webui_manager.bu_chat_history)
|
||||
yield update_dict
|
||||
# Wait until response is submitted or task finishes
|
||||
while webui_manager.bu_response_event is not None and not agent_task.done():
|
||||
await asyncio.sleep(0.2)
|
||||
# Restore UI after response submitted or if task ended unexpectedly
|
||||
if not agent_task.done():
|
||||
yield {
|
||||
user_input_comp: gr.update(placeholder="Agent is running...", interactive=False),
|
||||
run_button_comp: gr.update(value="⏳ Running...", interactive=False),
|
||||
pause_resume_button_comp: gr.update(interactive=True),
|
||||
stop_button_comp: gr.update(interactive=True),
|
||||
}
|
||||
else:
|
||||
break # Task finished while waiting for response
|
||||
|
||||
# Update Chatbot if new messages arrived via callbacks
|
||||
if len(webui_manager.bu_chat_history) > last_chat_len:
|
||||
update_dict[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
||||
last_chat_len = len(webui_manager.bu_chat_history)
|
||||
|
||||
# Update Browser View
|
||||
if headless and webui_manager.bu_browser_context:
|
||||
try:
|
||||
screenshot_b64 = await capture_screenshot(webui_manager.bu_browser_context)
|
||||
if screenshot_b64:
|
||||
html_content = f'<img src="data:image/jpeg;base64,{screenshot_b64}" style="width:{stream_vw}vw; height:{stream_vh}vh ; border:1px solid #ccc;">'
|
||||
update_dict[browser_view_comp] = gr.update(value=html_content, visible=True)
|
||||
else:
|
||||
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>"
|
||||
update_dict[browser_view_comp] = gr.update(value=html_content,
|
||||
visible=True)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to capture screenshot: {e}")
|
||||
update_dict[browser_view_comp] = gr.update(value="<div style='...'>Error loading view...</div>",
|
||||
visible=True)
|
||||
else:
|
||||
update_dict[browser_view_comp] = gr.update(visible=False)
|
||||
|
||||
# Yield accumulated updates
|
||||
if update_dict:
|
||||
yield update_dict
|
||||
|
||||
await asyncio.sleep(0.1) # Polling interval
|
||||
|
||||
# --- 7. Task Finalization ---
|
||||
webui_manager.bu_agent.state.paused = False
|
||||
webui_manager.bu_agent.state.stopped = False
|
||||
final_update = {}
|
||||
try:
|
||||
logger.info("Agent task completing...")
|
||||
# Await the task ensure completion and catch exceptions if not already caught
|
||||
if not agent_task.done():
|
||||
await agent_task # Retrieve result/exception
|
||||
elif agent_task.exception(): # Check if task finished with exception
|
||||
agent_task.result() # Raise the exception to be caught below
|
||||
logger.info("Agent task completed processing.")
|
||||
|
||||
logger.info(f"Explicitly saving agent history to: {history_file}")
|
||||
webui_manager.bu_agent.save_history(history_file)
|
||||
|
||||
if os.path.exists(history_file):
|
||||
final_update[history_file_comp] = gr.File(value=history_file)
|
||||
|
||||
if gif_path and os.path.exists(gif_path):
|
||||
logger.info(f"GIF found at: {gif_path}")
|
||||
final_update[gif_comp] = gr.Image(value=gif_path)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Agent task was cancelled.")
|
||||
if not any("Cancelled" in msg.get("content", "") for msg in webui_manager.bu_chat_history if
|
||||
msg.get("role") == "assistant"):
|
||||
webui_manager.bu_chat_history.append({"role": "assistant", "content": "**Task Cancelled**."})
|
||||
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during agent execution: {e}", exc_info=True)
|
||||
error_message = f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
|
||||
if not any(error_message in msg.get("content", "") for msg in webui_manager.bu_chat_history if
|
||||
msg.get("role") == "assistant"):
|
||||
webui_manager.bu_chat_history.append({"role": "assistant", "content": error_message})
|
||||
final_update[chatbot_comp] = gr.update(value=webui_manager.bu_chat_history)
|
||||
gr.Error(f"Agent execution failed: {e}")
|
||||
|
||||
finally:
|
||||
webui_manager.bu_current_task = None # Clear the task reference
|
||||
|
||||
# Close browser/context if requested
|
||||
if should_close_browser_on_finish:
|
||||
if webui_manager.bu_browser_context:
|
||||
logger.info("Closing browser context after task.")
|
||||
await webui_manager.bu_browser_context.close()
|
||||
webui_manager.bu_browser_context = None
|
||||
if webui_manager.bu_browser:
|
||||
logger.info("Closing browser after task.")
|
||||
await webui_manager.bu_browser.close()
|
||||
webui_manager.bu_browser = None
|
||||
|
||||
# --- 8. Final UI Update ---
|
||||
final_update.update({
|
||||
user_input_comp: gr.update(value="", interactive=True, placeholder="Enter your next task..."),
|
||||
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
||||
stop_button_comp: gr.update(interactive=False),
|
||||
pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
|
||||
clear_button_comp: gr.update(interactive=True),
|
||||
# Ensure final chat history is shown
|
||||
chatbot_comp: gr.update(value=webui_manager.bu_chat_history)
|
||||
})
|
||||
yield final_update
|
||||
|
||||
except Exception as e:
|
||||
# Catch errors during setup (before agent run starts)
|
||||
logger.error(f"Error setting up agent task: {e}", exc_info=True)
|
||||
webui_manager.bu_current_task = None # Ensure state is reset
|
||||
yield {
|
||||
user_input_comp: gr.update(interactive=True, placeholder="Error during setup. Enter task..."),
|
||||
run_button_comp: gr.update(value="▶️ Submit Task", interactive=True),
|
||||
stop_button_comp: gr.update(interactive=False),
|
||||
pause_resume_button_comp: gr.update(value="⏸️ Pause", interactive=False),
|
||||
clear_button_comp: gr.update(interactive=True),
|
||||
chatbot_comp: gr.update(
|
||||
value=webui_manager.bu_chat_history + [{"role": "assistant", "content": f"**Setup Error:** {e}"}]),
|
||||
}
|
||||
|
||||
|
||||
# --- Button Click Handlers --- (Need access to webui_manager)
|
||||
|
||||
async def handle_submit(webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]):
|
||||
"""Handles clicks on the main 'Submit' button."""
|
||||
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
||||
user_input_value = components.get(user_input_comp, "").strip()
|
||||
|
||||
# Check if waiting for user assistance
|
||||
if webui_manager.bu_response_event and not webui_manager.bu_response_event.is_set():
|
||||
logger.info(f"User submitted assistance: {user_input_value}")
|
||||
webui_manager.bu_user_help_response = user_input_value if user_input_value else "User provided no text response."
|
||||
webui_manager.bu_response_event.set()
|
||||
# UI updates handled by the main loop reacting to the event being set
|
||||
yield {
|
||||
user_input_comp: gr.update(value="", interactive=False, placeholder="Waiting for agent to continue..."),
|
||||
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(value="⏳ Running...",
|
||||
interactive=False)
|
||||
}
|
||||
# Check if a task is currently running (using _current_task)
|
||||
elif webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
|
||||
logger.warning("Submit button clicked while agent is already running and not asking for help.")
|
||||
gr.Info("Agent is currently running. Please wait or use Stop/Pause.")
|
||||
yield {} # No change
|
||||
else:
|
||||
# Handle submission for a new task
|
||||
logger.info("Submit button clicked for new task.")
|
||||
# Use async generator to stream updates from run_agent_task
|
||||
async for update in run_agent_task(webui_manager, components):
|
||||
yield update
|
||||
|
||||
|
||||
async def handle_stop(webui_manager: WebuiManager):
|
||||
"""Handles clicks on the 'Stop' button."""
|
||||
logger.info("Stop button clicked.")
|
||||
agent = webui_manager.bu_agent
|
||||
task = webui_manager.bu_current_task
|
||||
|
||||
if agent and task and not task.done():
|
||||
# Signal the agent to stop by setting its internal flag
|
||||
agent.state.stopped = True
|
||||
agent.state.paused = False # Ensure not paused if stopped
|
||||
return {
|
||||
webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(interactive=False,
|
||||
value="⏹️ Stopping..."),
|
||||
webui_manager.get_component_by_id("browser_use_agent.pause_resume_button"): gr.update(interactive=False),
|
||||
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(interactive=False),
|
||||
}
|
||||
else:
|
||||
logger.warning("Stop clicked but agent is not running or task is already done.")
|
||||
# Reset UI just in case it's stuck
|
||||
return {
|
||||
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(interactive=True),
|
||||
webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(interactive=False),
|
||||
webui_manager.get_component_by_id("browser_use_agent.pause_resume_button"): gr.update(interactive=False),
|
||||
webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(interactive=True),
|
||||
}
|
||||
|
||||
|
||||
async def handle_pause_resume(webui_manager: WebuiManager):
|
||||
"""Handles clicks on the 'Pause/Resume' button."""
|
||||
agent = webui_manager.bu_agent
|
||||
task = webui_manager.bu_current_task
|
||||
|
||||
if agent and task and not task.done():
|
||||
if agent.state.paused:
|
||||
logger.info("Resume button clicked.")
|
||||
agent.resume()
|
||||
# UI update happens in main loop
|
||||
return {
|
||||
webui_manager.get_component_by_id("browser_use_agent.pause_resume_button"): gr.update(value="⏸️ Pause",
|
||||
interactive=True)} # Optimistic update
|
||||
else:
|
||||
logger.info("Pause button clicked.")
|
||||
agent.pause()
|
||||
return {
|
||||
webui_manager.get_component_by_id("browser_use_agent.pause_resume_button"): gr.update(value="▶️ Resume",
|
||||
interactive=True)} # Optimistic update
|
||||
else:
|
||||
logger.warning("Pause/Resume clicked but agent is not running or doesn't support state.")
|
||||
return {} # No change
|
||||
|
||||
|
||||
async def handle_clear(webui_manager: WebuiManager):
|
||||
"""Handles clicks on the 'Clear' button."""
|
||||
logger.info("Clear button clicked.")
|
||||
|
||||
# Stop any running task first
|
||||
task = webui_manager.bu_current_task
|
||||
if task and not task.done():
|
||||
logger.info("Clearing requires stopping the current task.")
|
||||
webui_manager.bu_agent.stop()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=2.0) # Wait briefly
|
||||
except (asyncio.CancelledError, asyncio.TimeoutError):
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping task on clear: {e}")
|
||||
webui_manager.bu_current_task.cancel()
|
||||
webui_manager.bu_current_task = None
|
||||
|
||||
if webui_manager.bu_controller:
|
||||
await webui_manager.bu_controller.close_mcp_client()
|
||||
webui_manager.bu_controller = None
|
||||
webui_manager.bu_agent = None
|
||||
|
||||
# Reset state stored in manager
|
||||
webui_manager.bu_chat_history = []
|
||||
webui_manager.bu_response_event = None
|
||||
webui_manager.bu_user_help_response = None
|
||||
webui_manager.bu_agent_task_id = None
|
||||
|
||||
logger.info("Agent state and browser resources cleared.")
|
||||
|
||||
# Reset UI components
|
||||
return {
|
||||
webui_manager.get_component_by_id("browser_use_agent.chatbot"): gr.update(value=[]),
|
||||
webui_manager.get_component_by_id("browser_use_agent.user_input"): gr.update(value="",
|
||||
placeholder="Enter your task here..."),
|
||||
webui_manager.get_component_by_id("browser_use_agent.agent_history_file"): gr.update(value=None),
|
||||
webui_manager.get_component_by_id("browser_use_agent.recording_gif"): gr.update(value=None),
|
||||
webui_manager.get_component_by_id("browser_use_agent.browser_view"): gr.update(
|
||||
value="<div style='...'>Browser Cleared</div>"),
|
||||
webui_manager.get_component_by_id("browser_use_agent.run_button"): gr.update(value="▶️ Submit Task",
|
||||
interactive=True),
|
||||
webui_manager.get_component_by_id("browser_use_agent.stop_button"): gr.update(interactive=False),
|
||||
webui_manager.get_component_by_id("browser_use_agent.pause_resume_button"): gr.update(value="⏸️ Pause",
|
||||
interactive=False),
|
||||
webui_manager.get_component_by_id("browser_use_agent.clear_button"): gr.update(interactive=True),
|
||||
}
|
||||
|
||||
|
||||
# --- Tab Creation Function ---
|
||||
|
||||
def create_browser_use_agent_tab(webui_manager: WebuiManager):
|
||||
"""
|
||||
Create the run agent tab, defining UI, state, and handlers.
|
||||
"""
|
||||
webui_manager.init_browser_use_agent()
|
||||
|
||||
# --- Define UI Components ---
|
||||
tab_components = {}
|
||||
with gr.Column():
|
||||
chatbot = gr.Chatbot(
|
||||
lambda: webui_manager.bu_chat_history, # Load history dynamically
|
||||
elem_id="browser_use_chatbot",
|
||||
label="Agent Interaction",
|
||||
type="messages",
|
||||
height=600,
|
||||
show_copy_button=True,
|
||||
bubble_full_width=False,
|
||||
)
|
||||
user_input = gr.Textbox(
|
||||
label="Your Task or Response",
|
||||
placeholder="Enter your task here or provide assistance when asked.",
|
||||
lines=3,
|
||||
interactive=True,
|
||||
elem_id="user_input"
|
||||
)
|
||||
with gr.Row():
|
||||
stop_button = gr.Button("⏹️ Stop", interactive=False, variant="stop", scale=2)
|
||||
clear_button = gr.Button("🧹 Clear", interactive=True, variant="stop", scale=2)
|
||||
run_button = gr.Button("▶️ Summit", variant="primary", scale=3)
|
||||
stop_button = gr.Button("⏹️ Stop", interactive=False, variant="stop", scale=1)
|
||||
pause_resume_button = gr.Button("⏸️ Pause", interactive=False, variant="secondary", scale=1)
|
||||
clear_button = gr.Button("🗑️ Clear", interactive=True, variant="secondary", scale=1)
|
||||
run_button = gr.Button("▶️ Submit Task", variant="primary", scale=2)
|
||||
|
||||
browser_view = gr.HTML(
|
||||
value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
|
||||
value="<div style='width:100%; height:50vh; display:flex; justify-content:center; align-items:center; border:1px solid #ccc; background-color:#f0f0f0;'><p>Browser View (Requires Headless=True)</p></div>",
|
||||
label="Browser Live View",
|
||||
visible=False
|
||||
elem_id="browser_view",
|
||||
visible=False,
|
||||
)
|
||||
with gr.Column():
|
||||
gr.Markdown("### Task Outputs")
|
||||
agent_history_file = gr.File(label="Agent History JSON", interactive=False)
|
||||
recording_gif = gr.Image(label="Task Recording GIF", format="gif", interactive=False,
|
||||
type="filepath")
|
||||
|
||||
with gr.Row():
|
||||
agent_final_result = gr.Textbox(
|
||||
label="Final Result", lines=3, show_label=True, interactive=False
|
||||
)
|
||||
agent_errors = gr.Textbox(
|
||||
label="Errors", lines=3, show_label=True, interactive=False
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
agent_trace_file = gr.File(label="Trace File", interactive=False)
|
||||
agent_history_file = gr.File(label="Agent History", interactive=False)
|
||||
|
||||
recording_gif = gr.Image(label="Result GIF", format="gif", interactive=False)
|
||||
# --- Store Components in Manager ---
|
||||
tab_components.update(
|
||||
dict(
|
||||
chatbot=chatbot,
|
||||
user_input=user_input,
|
||||
clear_button=clear_button,
|
||||
run_button=run_button,
|
||||
stop_button=stop_button,
|
||||
agent_final_result=agent_final_result,
|
||||
agent_errors=agent_errors,
|
||||
agent_trace_file=agent_trace_file,
|
||||
agent_history_file=agent_history_file,
|
||||
recording_gif=recording_gif,
|
||||
chatbot=chatbot, user_input=user_input, clear_button=clear_button,
|
||||
run_button=run_button, stop_button=stop_button, pause_resume_button=pause_resume_button,
|
||||
agent_history_file=agent_history_file, recording_gif=recording_gif,
|
||||
browser_view=browser_view
|
||||
)
|
||||
)
|
||||
return tab_components
|
||||
webui_manager.add_components("browser_use_agent", tab_components) # Use "browser_use_agent" as tab_name prefix
|
||||
|
||||
all_managed_components = set(webui_manager.get_components()) # Get all components known to manager
|
||||
run_tab_outputs = list(tab_components.values())
|
||||
|
||||
async def submit_wrapper(components_dict: Dict[Component, Any]) -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_submit that yields its results."""
|
||||
# handle_submit is an async generator, iterate and yield
|
||||
async for update in handle_submit(webui_manager, components_dict):
|
||||
yield update
|
||||
|
||||
async def stop_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_stop."""
|
||||
# handle_stop is async def but returns a single dict. We yield it once.
|
||||
update_dict = await handle_stop(webui_manager)
|
||||
yield update_dict # Yield the final dictionary
|
||||
|
||||
async def pause_resume_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_pause_resume."""
|
||||
update_dict = await handle_pause_resume(webui_manager)
|
||||
yield update_dict
|
||||
|
||||
async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_clear."""
|
||||
update_dict = await handle_clear(webui_manager)
|
||||
yield update_dict
|
||||
|
||||
# --- Connect Event Handlers using the Wrappers --
|
||||
run_button.click(
|
||||
fn=submit_wrapper,
|
||||
inputs=all_managed_components,
|
||||
outputs=run_tab_outputs
|
||||
)
|
||||
user_input.submit(
|
||||
fn=submit_wrapper,
|
||||
inputs=all_managed_components,
|
||||
outputs=run_tab_outputs
|
||||
)
|
||||
stop_button.click(
|
||||
fn=stop_wrapper,
|
||||
inputs=None,
|
||||
outputs=run_tab_outputs
|
||||
)
|
||||
pause_resume_button.click(
|
||||
fn=pause_resume_wrapper,
|
||||
inputs=None,
|
||||
outputs=run_tab_outputs
|
||||
)
|
||||
clear_button.click(
|
||||
fn=clear_wrapper,
|
||||
inputs=None,
|
||||
outputs=run_tab_outputs
|
||||
)
|
||||
|
||||
|
||||
@@ -38,4 +38,4 @@ def create_deep_research_agent_tab(webui_manager: WebuiManager) -> dict[str, Com
|
||||
markdown_download=markdown_download,
|
||||
)
|
||||
)
|
||||
return tab_components
|
||||
webui_manager.add_components("deep_research_agent", tab_components)
|
||||
|
||||
@@ -34,16 +34,17 @@ def create_load_save_config_tab(webui_manager: WebuiManager) -> dict[str, Compon
|
||||
config_file=config_file,
|
||||
))
|
||||
|
||||
webui_manager.add_components("load_save_config", tab_components)
|
||||
|
||||
save_config_button.click(
|
||||
fn=webui_manager.save_current_config,
|
||||
inputs=[],
|
||||
fn=webui_manager.save_config,
|
||||
inputs=set(webui_manager.get_components()),
|
||||
outputs=[config_status]
|
||||
)
|
||||
|
||||
load_config_button.click(
|
||||
fn=webui_manager.load_config,
|
||||
inputs=[config_file],
|
||||
outputs=[config_status]
|
||||
outputs=webui_manager.get_components(),
|
||||
)
|
||||
|
||||
return tab_components
|
||||
|
||||
@@ -32,6 +32,9 @@ def create_ui(theme_name="Ocean"):
|
||||
text-align: center;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.tab-header-text {
|
||||
text-align: center;
|
||||
}
|
||||
.theme-section {
|
||||
margin-bottom: 10px;
|
||||
padding: 15px;
|
||||
@@ -67,18 +70,26 @@ def create_ui(theme_name="Ocean"):
|
||||
|
||||
with gr.Tabs() as tabs:
|
||||
with gr.TabItem("⚙️ Agent Settings"):
|
||||
ui_manager.add_components("agent_settings", create_agent_settings_tab(ui_manager))
|
||||
create_agent_settings_tab(ui_manager)
|
||||
|
||||
with gr.TabItem("🌐 Browser Settings"):
|
||||
ui_manager.add_components("browser_settings", create_browser_settings_tab(ui_manager))
|
||||
create_browser_settings_tab(ui_manager)
|
||||
|
||||
with gr.TabItem("🤖 Run Agent"):
|
||||
ui_manager.add_components("browser_use_agent", create_browser_use_agent_tab(ui_manager))
|
||||
create_browser_use_agent_tab(ui_manager)
|
||||
|
||||
with gr.TabItem("🧐 Deep Research"):
|
||||
ui_manager.add_components("deep_research_agent", create_deep_research_agent_tab(ui_manager))
|
||||
with gr.TabItem("🎁 Agent Collections"):
|
||||
gr.Markdown(
|
||||
"""
|
||||
### Agents built on Browser-Use
|
||||
""",
|
||||
elem_classes=["tab-header-text"],
|
||||
)
|
||||
with gr.Tabs():
|
||||
with gr.TabItem("Deep Research"):
|
||||
create_deep_research_agent_tab(ui_manager)
|
||||
|
||||
with gr.TabItem("📁 Load & Save Config"):
|
||||
ui_manager.add_components("load_save_config", create_load_save_config_tab(ui_manager))
|
||||
create_load_save_config_tab(ui_manager)
|
||||
|
||||
return demo
|
||||
|
||||
@@ -4,11 +4,17 @@ from typing import TYPE_CHECKING
|
||||
import os
|
||||
import gradio as gr
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, List
|
||||
import uuid
|
||||
import asyncio
|
||||
|
||||
from gradio.components import Component
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.agent.service import Agent
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContext
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
|
||||
class WebuiManager:
|
||||
@@ -19,9 +25,19 @@ class WebuiManager:
|
||||
self.settings_save_dir = settings_save_dir
|
||||
os.makedirs(self.settings_save_dir, exist_ok=True)
|
||||
|
||||
self.browser: Browser = None
|
||||
self.browser_context: BrowserContext = None
|
||||
self.bu_agent: Agent = None
|
||||
def init_browser_use_agent(self) -> None:
|
||||
"""
|
||||
init browser use agent
|
||||
"""
|
||||
self.bu_agent: Optional[Agent] = None
|
||||
self.bu_browser: Optional[CustomBrowser] = None
|
||||
self.bu_browser_context: Optional[CustomBrowserContext] = None
|
||||
self.bu_controller: Optional[CustomController] = None
|
||||
self.bu_chat_history: List[Dict[str, Optional[str]]] = []
|
||||
self.bu_response_event: Optional[asyncio.Event] = None
|
||||
self.bu_user_help_response: Optional[str] = None
|
||||
self.bu_current_task: Optional[asyncio.Task] = None
|
||||
self.bu_agent_task_id: Optional[str] = None
|
||||
|
||||
def add_components(self, tab_name: str, components_dict: dict[str, "Component"]) -> None:
|
||||
"""
|
||||
@@ -50,15 +66,16 @@ class WebuiManager:
|
||||
"""
|
||||
return self.component_to_id[comp]
|
||||
|
||||
def save_current_config(self):
|
||||
def save_config(self, components: Dict["Component", str]) -> None:
|
||||
"""
|
||||
Save current config
|
||||
Save config
|
||||
"""
|
||||
cur_settings = {}
|
||||
for comp_id, comp in self.id_to_component.items():
|
||||
for comp in components:
|
||||
if not isinstance(comp, gr.Button) and not isinstance(comp, gr.File) and str(
|
||||
getattr(comp, "interactive", True)).lower() != "false":
|
||||
cur_settings[comp_id] = getattr(comp, "value", None)
|
||||
comp_id = self.get_id_by_component(comp)
|
||||
cur_settings[comp_id] = components[comp]
|
||||
|
||||
config_name = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
with open(os.path.join(self.settings_save_dir, f"{config_name}.json"), "w") as fw:
|
||||
@@ -76,6 +93,13 @@ class WebuiManager:
|
||||
update_components = {}
|
||||
for comp_id, comp_val in ui_settings.items():
|
||||
if comp_id in self.id_to_component:
|
||||
update_components[self.id_to_component[comp_id]].value = comp_val
|
||||
comp = self.id_to_component[comp_id]
|
||||
update_components[comp] = comp.__class__(value=comp_val)
|
||||
|
||||
return f"Successfully loaded config from {config_path}"
|
||||
config_status = self.id_to_component["load_save_config.config_status"]
|
||||
update_components.update(
|
||||
{
|
||||
config_status: config_status.__class__(value=f"Successfully loaded config: {config_path}")
|
||||
}
|
||||
)
|
||||
yield update_components
|
||||
|
||||
@@ -17,98 +17,18 @@ from browser_use.agent.views import AgentHistoryList
|
||||
from src.utils import utils
|
||||
|
||||
|
||||
async def test_browser_use_org():
|
||||
async def test_browser_use_agent():
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-chat",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
)
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
use_vision = False
|
||||
use_own_browser = False
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
tool_calling_method = "json_schema" # setting to json_schema when using ollma
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
use_vision=use_vision,
|
||||
tool_calling_method=tool_calling_method
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def test_browser_use_custom():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
from src.utils import llm_provider
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -118,14 +38,6 @@ async def test_browser_use_custom():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
@@ -153,13 +65,43 @@ async def test_browser_use_custom():
|
||||
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
# )
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
}
|
||||
}
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
playwright = None
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
@@ -178,29 +120,27 @@ async def test_browser_use_custom():
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=extra_chromium_args,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agent = CustomAgent(
|
||||
task="open youtube in tab 1 , open google email in tab 2, open facebook in tab 3",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
agent = Agent(
|
||||
task="download pdf from https://arxiv.org/abs/2504.10458 and rename this pdf to 'GUI-r1-test.pdf'",
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
agent_prompt_class=CustomAgentMessagePrompt,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
generate_gif=True
|
||||
@@ -213,28 +153,17 @@ async def test_browser_use_custom():
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
|
||||
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
if controller:
|
||||
await controller.close_mcp_client()
|
||||
|
||||
|
||||
async def test_browser_use_parallel():
|
||||
@@ -242,13 +171,20 @@ async def test_browser_use_parallel():
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
from browser_use.browser.browser import Browser
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -258,20 +194,13 @@ async def test_browser_use_parallel():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="gemini",
|
||||
model_name="gemini-2.0-flash-exp",
|
||||
temperature=1.0,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
)
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
# temperature=0.6,
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
@@ -293,72 +222,119 @@ async def test_browser_use_parallel():
|
||||
# provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
# )
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
controller = CustomController()
|
||||
use_own_browser = True
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
max_actions_per_step = 10
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
disable_security=True,
|
||||
headless=False,
|
||||
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agents = [
|
||||
Agent(task=task, llm=llm, browser=browser)
|
||||
Agent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
for task in [
|
||||
'Search Google for weather in Tokyo',
|
||||
'Check Reddit front page title',
|
||||
'Find NASA image of the day',
|
||||
'Check top story on CNN',
|
||||
# 'Check Reddit front page title',
|
||||
# 'Find NASA image of the day',
|
||||
# 'Check top story on CNN',
|
||||
# 'Search latest SpaceX launch date',
|
||||
# 'Look up population of Paris',
|
||||
# 'Find current time in Sydney',
|
||||
# 'Check who won last Super Bowl',
|
||||
'Find current time in Sydney',
|
||||
'Check who won last Super Bowl',
|
||||
# 'Search trending topics on Twitter',
|
||||
]
|
||||
]
|
||||
|
||||
history = await asyncio.gather(*[agent.run() for agent in agents])
|
||||
pdb.set_trace()
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
pdb.set_trace()
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_browser_use_org())
|
||||
# asyncio.run(test_browser_use_parallel())
|
||||
# asyncio.run(test_browser_use_custom())
|
||||
# asyncio.run(test_browser_use_agent())
|
||||
asyncio.run(test_browser_use_parallel())
|
||||
|
||||
@@ -45,20 +45,15 @@ async def test_controller_with_mcp():
|
||||
from src.controller.custom_controller import CustomController
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
|
||||
test_server_config = {
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"@playwright/mcp@latest",
|
||||
],
|
||||
"transport": "stdio",
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@modelcontextprotocol/server-filesystem",
|
||||
"/Users/xxx/ai_workspace",
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
"desktop-commander": {
|
||||
@@ -67,11 +62,20 @@ async def test_controller_with_mcp():
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(test_server_config)
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
action_name = "mcp.desktop-commander.execute_command"
|
||||
action_info = controller.registry.registry.actions[action_name]
|
||||
param_model = action_info.param_model
|
||||
@@ -85,7 +89,8 @@ async def test_controller_with_mcp():
|
||||
result = await controller.act(action_model)
|
||||
result = result.extracted_content
|
||||
print(result)
|
||||
if result and "Command is still running. Use read_output to get more output." in result and "PID" in result.split("\n")[0]:
|
||||
if result and "Command is still running. Use read_output to get more output." in result and "PID" in \
|
||||
result.split("\n")[0]:
|
||||
pid = int(result.split("\n")[0].split("PID")[-1].strip())
|
||||
action_name = "mcp.desktop-commander.read_output"
|
||||
action_info = controller.registry.registry.actions[action_name]
|
||||
|
||||
@@ -144,10 +144,10 @@ def test_ibm_model():
|
||||
if __name__ == "__main__":
|
||||
# test_openai_model()
|
||||
# test_google_model()
|
||||
# test_azure_openai_model()
|
||||
test_azure_openai_model()
|
||||
# test_deepseek_model()
|
||||
# test_ollama_model()
|
||||
# test_deepseek_r1_model()
|
||||
# test_deepseek_r1_ollama_model()
|
||||
# test_mistral_model()
|
||||
test_ibm_model()
|
||||
# test_ibm_model()
|
||||
|
||||
2
webui.py
2
webui.py
@@ -1,3 +1,5 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
import argparse
|
||||
from src.webui.interface import theme_map, create_ui
|
||||
|
||||
|
||||
107
webui2.py
107
webui2.py
@@ -42,77 +42,6 @@ _global_browser = None
|
||||
_global_browser_context = None
|
||||
_global_agent = None
|
||||
|
||||
# Create the global agent state instance
|
||||
_global_agent_state = AgentState()
|
||||
|
||||
# webui config
|
||||
webui_config_manager = utils.ConfigManager()
|
||||
|
||||
|
||||
def scan_and_register_components(blocks):
|
||||
"""扫描一个 Blocks 对象并注册其中的所有交互式组件,但不包括按钮"""
|
||||
global webui_config_manager
|
||||
|
||||
def traverse_blocks(block, prefix=""):
|
||||
registered = 0
|
||||
|
||||
# 处理 Blocks 自身的组件
|
||||
if hasattr(block, "children"):
|
||||
for i, child in enumerate(block.children):
|
||||
if isinstance(child, gr.components.Component):
|
||||
# 排除按钮 (Button) 组件
|
||||
if getattr(child, "interactive", False) and not isinstance(child, gr.Button):
|
||||
name = f"{prefix}component_{i}"
|
||||
if hasattr(child, "label") and child.label:
|
||||
# 使用标签作为名称的一部分
|
||||
label = child.label
|
||||
name = f"{prefix}{label}"
|
||||
logger.debug(f"Registering component: {name}")
|
||||
webui_config_manager.register_component(name, child)
|
||||
registered += 1
|
||||
elif hasattr(child, "children"):
|
||||
# 递归处理嵌套的 Blocks
|
||||
new_prefix = f"{prefix}block_{i}_"
|
||||
registered += traverse_blocks(child, new_prefix)
|
||||
|
||||
return registered
|
||||
|
||||
total = traverse_blocks(blocks)
|
||||
logger.info(f"Total registered components: {total}")
|
||||
|
||||
|
||||
def save_current_config():
|
||||
return webui_config_manager.save_current_config()
|
||||
|
||||
|
||||
def update_ui_from_config(config_file):
|
||||
return webui_config_manager.update_ui_from_config(config_file)
|
||||
|
||||
|
||||
def resolve_sensitive_env_variables(text):
|
||||
"""
|
||||
Replace environment variable placeholders ($SENSITIVE_*) with their values.
|
||||
Only replaces variables that start with SENSITIVE_.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
import re
|
||||
|
||||
# Find all $SENSITIVE_* patterns
|
||||
env_vars = re.findall(r'\$SENSITIVE_[A-Za-z0-9_]*', text)
|
||||
|
||||
result = text
|
||||
for var in env_vars:
|
||||
# Remove the $ prefix to get the actual environment variable name
|
||||
env_name = var[1:] # removes the $
|
||||
env_value = os.getenv(env_name)
|
||||
if env_value is not None:
|
||||
# Replace $SENSITIVE_VAR_NAME with its value
|
||||
result = result.replace(var, env_value)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def stop_agent():
|
||||
"""Request the agent to stop and update UI with enhanced feedback"""
|
||||
@@ -140,32 +69,6 @@ async def stop_agent():
|
||||
)
|
||||
|
||||
|
||||
async def stop_research_agent():
|
||||
"""Request the agent to stop and update UI with enhanced feedback"""
|
||||
global _global_agent_state
|
||||
|
||||
try:
|
||||
# Request stop
|
||||
_global_agent_state.request_stop()
|
||||
|
||||
# Update UI immediately
|
||||
message = "Stop requested - the agent will halt at the next safe point"
|
||||
logger.info(f"🛑 {message}")
|
||||
|
||||
# Return UI updates
|
||||
return ( # errors_output
|
||||
gr.update(value="Stopping...", interactive=False), # stop_button
|
||||
gr.update(interactive=False), # run_button
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = f"Error during stop: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return (
|
||||
gr.update(value="Stop", interactive=True),
|
||||
gr.update(interactive=True)
|
||||
)
|
||||
|
||||
|
||||
async def run_browser_agent(
|
||||
agent_type,
|
||||
llm_provider,
|
||||
@@ -202,16 +105,6 @@ async def run_browser_agent(
|
||||
if save_recording_path:
|
||||
os.makedirs(save_recording_path, exist_ok=True)
|
||||
|
||||
# Get the list of existing videos before the agent runs
|
||||
existing_videos = set()
|
||||
if save_recording_path:
|
||||
existing_videos = set(
|
||||
glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4"))
|
||||
+ glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]"))
|
||||
)
|
||||
|
||||
task = resolve_sensitive_env_variables(task)
|
||||
|
||||
# Run the agent
|
||||
llm = utils.get_llm_model(
|
||||
provider=llm_provider,
|
||||
|
||||
Reference in New Issue
Block a user