Merge branch 'browser-use:main' into main

This commit is contained in:
apoorvshah10
2025-04-01 11:39:51 +05:30
committed by GitHub
10 changed files with 254 additions and 236 deletions

View File

@@ -1,6 +1,6 @@
browser-use==0.1.40
pyperclip==1.9.0
gradio==5.10.0
gradio==5.23.1
json-repair
langchain-mistralai==0.2.4
langchain-google-genai==2.0.8

View File

@@ -208,8 +208,8 @@ class CustomAgent(Agent):
@time_execution_async("--get_next_action")
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
"""Get next action from LLM based on current state"""
ai_message = self.llm.invoke(input_messages)
fixed_input_messages = self._convert_input_messages(input_messages)
ai_message = self.llm.invoke(fixed_input_messages)
self.message_manager._add_message_with_tokens(ai_message)
if hasattr(ai_message, "reasoning_content"):
@@ -222,10 +222,16 @@ class CustomAgent(Agent):
else:
ai_content = ai_message.content
ai_content = ai_content.replace("```json", "").replace("```", "")
ai_content = repair_json(ai_content)
parsed_json = json.loads(ai_content)
parsed: AgentOutput = self.AgentOutput(**parsed_json)
try:
ai_content = ai_content.replace("```json", "").replace("```", "")
ai_content = repair_json(ai_content)
parsed_json = json.loads(ai_content)
parsed: AgentOutput = self.AgentOutput(**parsed_json)
except Exception as e:
import traceback
traceback.print_exc()
logger.debug(ai_message.content)
raise ValueError('Could not parse response.')
if parsed is None:
logger.debug(ai_message.content)

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import logging
import pdb
from typing import List, Optional, Type, Dict
from browser_use.agent.message_manager.service import MessageManager
@@ -96,7 +97,7 @@ class CustomMessageManager(MessageManager):
self._add_message_with_tokens(state_message)
def _remove_state_message_by_index(self, remove_ind=-1) -> None:
"""Remove last state message from history"""
"""Remove state message by index from history"""
i = len(self.state.history.messages) - 1
remove_cnt = 0
while i >= 0:

View File

@@ -18,11 +18,17 @@ Example:
# Response Rules
1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
{{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
"important_contents": "Output important contents closely related to user's instruction on the current page. If there is, please output the contents. If not, please output ''.",
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
"next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."}},
"action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}}
{{
"current_state": {{
"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.",
"important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output empty string ''.",
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.",
"next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."
}},
"action": [
{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence
]
}}
2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
Common action sequences:

View File

@@ -1,5 +1,6 @@
import asyncio
class AgentState:
_instance = None
@@ -27,4 +28,4 @@ class AgentState:
self.last_valid_state = state
def get_last_valid_state(self):
return self.last_valid_state
return self.last_valid_state

View File

@@ -19,7 +19,13 @@ from browser_use.agent.views import ActionResult
from browser_use.browser.context import BrowserContext
from browser_use.controller.service import Controller, DoneAction
from main_content_extractor import MainContentExtractor
from langchain.schema import SystemMessage, HumanMessage
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
ToolMessage,
SystemMessage
)
from json_repair import repair_json
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
from src.controller.custom_controller import CustomController

View File

@@ -1,125 +0,0 @@
import os
import pickle
import uuid
import gradio as gr
def default_config():
"""Prepare the default configuration"""
return {
"agent_type": "custom",
"max_steps": 100,
"max_actions_per_step": 10,
"use_vision": True,
"tool_calling_method": "auto",
"llm_provider": "openai",
"llm_model_name": "gpt-4o",
"llm_num_ctx": 32000,
"llm_temperature": 0.6,
"llm_base_url": "",
"llm_api_key": "",
"use_own_browser": os.getenv("CHROME_PERSISTENT_SESSION", "false").lower() == "true",
"keep_browser_open": False,
"headless": False,
"disable_security": True,
"enable_recording": True,
"window_w": 1280,
"window_h": 1100,
"save_recording_path": "./tmp/record_videos",
"save_trace_path": "./tmp/traces",
"save_agent_history_path": "./tmp/agent_history",
"task": "go to google.com and type 'OpenAI' click search and give me the first url",
}
def load_config_from_file(config_file):
"""Load settings from a UUID.pkl file."""
try:
with open(config_file, 'rb') as f:
settings = pickle.load(f)
return settings
except Exception as e:
return f"Error loading configuration: {str(e)}"
def save_config_to_file(settings, save_dir="./tmp/webui_settings"):
"""Save the current settings to a UUID.pkl file with a UUID name."""
os.makedirs(save_dir, exist_ok=True)
config_file = os.path.join(save_dir, f"{uuid.uuid4()}.pkl")
with open(config_file, 'wb') as f:
pickle.dump(settings, f)
return f"Configuration saved to {config_file}"
def save_current_config(*args):
current_config = {
"agent_type": args[0],
"max_steps": args[1],
"max_actions_per_step": args[2],
"use_vision": args[3],
"tool_calling_method": args[4],
"llm_provider": args[5],
"llm_model_name": args[6],
"llm_num_ctx": args[7],
"llm_temperature": args[8],
"llm_base_url": args[9],
"llm_api_key": args[10],
"use_own_browser": args[11],
"keep_browser_open": args[12],
"headless": args[13],
"disable_security": args[14],
"enable_recording": args[15],
"window_w": args[16],
"window_h": args[17],
"save_recording_path": args[18],
"save_trace_path": args[19],
"save_agent_history_path": args[20],
"task": args[21],
}
return save_config_to_file(current_config)
def update_ui_from_config(config_file):
if config_file is not None:
loaded_config = load_config_from_file(config_file.name)
if isinstance(loaded_config, dict):
return (
gr.update(value=loaded_config.get("agent_type", "custom")),
gr.update(value=loaded_config.get("max_steps", 100)),
gr.update(value=loaded_config.get("max_actions_per_step", 10)),
gr.update(value=loaded_config.get("use_vision", True)),
gr.update(value=loaded_config.get("tool_calling_method", True)),
gr.update(value=loaded_config.get("llm_provider", "openai")),
gr.update(value=loaded_config.get("llm_model_name", "gpt-4o")),
gr.update(value=loaded_config.get("llm_num_ctx", 32000)),
gr.update(value=loaded_config.get("llm_temperature", 1.0)),
gr.update(value=loaded_config.get("llm_base_url", "")),
gr.update(value=loaded_config.get("llm_api_key", "")),
gr.update(value=loaded_config.get("use_own_browser", False)),
gr.update(value=loaded_config.get("keep_browser_open", False)),
gr.update(value=loaded_config.get("headless", False)),
gr.update(value=loaded_config.get("disable_security", True)),
gr.update(value=loaded_config.get("enable_recording", True)),
gr.update(value=loaded_config.get("window_w", 1280)),
gr.update(value=loaded_config.get("window_h", 1100)),
gr.update(value=loaded_config.get("save_recording_path", "./tmp/record_videos")),
gr.update(value=loaded_config.get("save_trace_path", "./tmp/traces")),
gr.update(value=loaded_config.get("save_agent_history_path", "./tmp/agent_history")),
gr.update(value=loaded_config.get("task", "")),
"Configuration loaded successfully."
)
else:
return (
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), "Error: Invalid configuration file."
)
return (
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
gr.update(), "No file selected."
)

View File

@@ -4,13 +4,15 @@ import time
from pathlib import Path
from typing import Dict, Optional
import requests
import json
import gradio as gr
import uuid
from langchain_anthropic import ChatAnthropic
from langchain_mistralai import ChatMistralAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_ollama import ChatOllama
from langchain_openai import AzureChatOpenAI, ChatOpenAI
import gradio as gr
from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama, UnboundChatOpenAI
@@ -37,7 +39,7 @@ def get_llm_model(provider: str, **kwargs):
env_var = f"{provider.upper()}_API_KEY"
api_key = kwargs.get("api_key", "") or os.getenv(env_var, "")
if not api_key:
handle_api_key_error(provider, env_var)
raise MissingAPIKeyError(provider, env_var)
kwargs["api_key"] = api_key
if provider == "anthropic":
@@ -185,7 +187,7 @@ model_names = {
"ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b",
"deepseek-r1:14b", "deepseek-r1:32b"],
"azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
"mistral": ["mixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
"mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"],
"alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"],
"moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"],
"unbound": ["gemini-2.0-flash","gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"]
@@ -197,6 +199,7 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None):
"""
Update the model name dropdown with predefined models for the selected provider.
"""
import gradio as gr
# Use API keys from .env if not provided
if not api_key:
api_key = os.getenv(f"{llm_provider.upper()}_API_KEY", "")
@@ -210,15 +213,13 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None):
return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True)
def handle_api_key_error(provider: str, env_var: str):
"""
Handles the missing API key error by raising a gr.Error with a clear message.
"""
provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
raise gr.Error(
f"💥 {provider_display} API key not found! 🔑 Please set the "
f"`{env_var}` environment variable or provide it in the UI."
)
class MissingAPIKeyError(Exception):
"""Custom exception for missing API key."""
def __init__(self, provider: str, env_var: str):
provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper())
super().__init__(f"💥 {provider_display} API key not found! 🔑 Please set the "
f"`{env_var}` environment variable or provide it in the UI.")
def encode_image(img_path):
@@ -287,3 +288,70 @@ async def capture_screenshot(browser_context):
return encoded
except Exception as e:
return None
class ConfigManager:
def __init__(self):
self.components = {}
self.component_order = []
def register_component(self, name: str, component):
"""Register a gradio component for config management."""
self.components[name] = component
if name not in self.component_order:
self.component_order.append(name)
return component
def save_current_config(self):
"""Save the current configuration of all registered components."""
current_config = {}
for name in self.component_order:
component = self.components[name]
# Get the current value from the component
current_config[name] = getattr(component, "value", None)
return save_config_to_file(current_config)
def update_ui_from_config(self, config_file):
"""Update UI components from a loaded configuration file."""
if config_file is None:
return [gr.update() for _ in self.component_order] + ["No file selected."]
loaded_config = load_config_from_file(config_file.name)
if not isinstance(loaded_config, dict):
return [gr.update() for _ in self.component_order] + ["Error: Invalid configuration file."]
# Prepare updates for all components
updates = []
for name in self.component_order:
if name in loaded_config:
updates.append(gr.update(value=loaded_config[name]))
else:
updates.append(gr.update())
updates.append("Configuration loaded successfully.")
return updates
def get_all_components(self):
"""Return all registered components in the order they were registered."""
return [self.components[name] for name in self.component_order]
def load_config_from_file(config_file):
"""Load settings from a config file (JSON format)."""
try:
with open(config_file, 'r') as f:
settings = json.load(f)
return settings
except Exception as e:
return f"Error loading configuration: {str(e)}"
def save_config_to_file(settings, save_dir="./tmp/webui_settings"):
"""Save the current settings to a UUID.json file with a UUID name."""
os.makedirs(save_dir, exist_ok=True)
config_file = os.path.join(save_dir, f"{uuid.uuid4()}.json")
with open(config_file, 'w') as f:
json.dump(settings, f, indent=2)
return f"Configuration saved to {config_file}"

View File

@@ -133,11 +133,11 @@ async def test_browser_use_custom():
api_key=os.getenv("GOOGLE_API_KEY", "")
)
# llm = utils.get_llm_model(
# provider="deepseek",
# model_name="deepseek-reasoner",
# temperature=0.8
# )
llm = utils.get_llm_model(
provider="deepseek",
model_name="deepseek-reasoner",
temperature=0.8
)
# llm = utils.get_llm_model(
# provider="deepseek",

213
webui.py
View File

@@ -13,6 +13,8 @@ import os
logger = logging.getLogger(__name__)
import gradio as gr
import inspect
from functools import wraps
from browser_use.agent.service import Agent
from playwright.async_api import async_playwright
@@ -32,9 +34,8 @@ from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePromp
from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
from src.controller.custom_controller import CustomController
from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, \
save_current_config, update_ui_from_config
from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot
from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot, MissingAPIKeyError
from src.utils import utils
# Global variables for persistence
_global_browser = None
@@ -44,6 +45,49 @@ _global_agent = None
# Create the global agent state instance
_global_agent_state = AgentState()
# webui config
webui_config_manager = utils.ConfigManager()
def scan_and_register_components(blocks):
"""扫描一个 Blocks 对象并注册其中的所有交互式组件,但不包括按钮"""
global webui_config_manager
def traverse_blocks(block, prefix=""):
registered = 0
# 处理 Blocks 自身的组件
if hasattr(block, "children"):
for i, child in enumerate(block.children):
if isinstance(child, gr.components.Component):
# 排除按钮 (Button) 组件
if getattr(child, "interactive", False) and not isinstance(child, gr.Button):
name = f"{prefix}component_{i}"
if hasattr(child, "label") and child.label:
# 使用标签作为名称的一部分
label = child.label
name = f"{prefix}{label}"
logger.debug(f"Registering component: {name}")
webui_config_manager.register_component(name, child)
registered += 1
elif hasattr(child, "children"):
# 递归处理嵌套的 Blocks
new_prefix = f"{prefix}block_{i}_"
registered += traverse_blocks(child, new_prefix)
return registered
total = traverse_blocks(blocks)
logger.info(f"Total registered components: {total}")
def save_current_config():
return webui_config_manager.save_current_config()
def update_ui_from_config(config_file):
return webui_config_manager.update_ui_from_config(config_file)
def resolve_sensitive_env_variables(text):
"""
@@ -245,8 +289,9 @@ async def run_browser_agent(
gr.update(interactive=True) # Re-enable run button
)
except gr.Error:
raise
except MissingAPIKeyError as e:
logger.error(str(e))
raise gr.Error(str(e), print_exception=False)
except Exception as e:
import traceback
@@ -539,8 +584,7 @@ async def run_with_stream(
max_input_tokens=max_input_tokens
)
# Add HTML content at the start of the result array
html_content = f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Using browser...</h1>"
yield [html_content] + list(result)
yield [gr.update(visible=False)] + list(result)
else:
try:
# Run the browser agent in the background
@@ -592,7 +636,7 @@ async def run_with_stream(
if _global_agent and _global_agent.state.stopped:
yield [
html_content,
gr.HTML(value=html_content, visible=True),
final_result,
errors,
model_actions,
@@ -606,7 +650,7 @@ async def run_with_stream(
break
else:
yield [
html_content,
gr.HTML(value=html_content, visible=True),
final_result,
errors,
model_actions,
@@ -633,7 +677,7 @@ async def run_with_stream(
errors = f"Agent error: {str(e)}"
yield [
html_content,
gr.HTML(value=html_content, visible=True),
final_result,
errors,
model_actions,
@@ -648,7 +692,9 @@ async def run_with_stream(
except Exception as e:
import traceback
yield [
f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
gr.HTML(
value=f"<h1 style='width:{stream_vw}vw; height:{stream_vh}vh'>Waiting for browser session...</h1>",
visible=True),
"",
f"Error: {str(e)}\n{traceback.format_exc()}",
"",
@@ -715,11 +761,13 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p
return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True)
def create_ui(config, theme_name="Ocean"):
def create_ui(theme_name="Ocean"):
css = """
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
width: 60vw !important;
max-width: 60% !important;
margin-left: auto !important;
margin-right: auto !important;
padding-top: 20px !important;
}
.header-text {
@@ -751,41 +799,45 @@ def create_ui(config, theme_name="Ocean"):
agent_type = gr.Radio(
["org", "custom"],
label="Agent Type",
value=config['agent_type'],
value="custom",
info="Select the type of agent to use",
interactive=True
)
with gr.Column():
max_steps = gr.Slider(
minimum=1,
maximum=200,
value=config['max_steps'],
value=100,
step=1,
label="Max Run Steps",
info="Maximum number of steps the agent will take",
interactive=True
)
max_actions_per_step = gr.Slider(
minimum=1,
maximum=20,
value=config['max_actions_per_step'],
maximum=100,
value=10,
step=1,
label="Max Actions per Step",
info="Maximum number of actions the agent will take per step",
interactive=True
)
with gr.Column():
use_vision = gr.Checkbox(
label="Use Vision",
value=config['use_vision'],
value=True,
info="Enable visual processing capabilities",
interactive=True
)
max_input_tokens = gr.Number(
label="Max Input Tokens",
value=128000,
precision=0
precision=0,
interactive=True
)
tool_calling_method = gr.Dropdown(
label="Tool Calling Method",
value=config['tool_calling_method'],
value="auto",
interactive=True,
allow_custom_value=True, # Allow users to input custom model names
choices=["auto", "json_schema", "function_calling"],
@@ -798,44 +850,47 @@ def create_ui(config, theme_name="Ocean"):
llm_provider = gr.Dropdown(
choices=[provider for provider, model in utils.model_names.items()],
label="LLM Provider",
value=config['llm_provider'],
info="Select your preferred language model provider"
value="openai",
info="Select your preferred language model provider",
interactive=True
)
llm_model_name = gr.Dropdown(
label="Model Name",
choices=utils.model_names['openai'],
value=config['llm_model_name'],
value="gpt-4o",
interactive=True,
allow_custom_value=True, # Allow users to input custom model names
info="Select a model in the dropdown options or directly type a custom model name"
)
llm_num_ctx = gr.Slider(
ollama_num_ctx = gr.Slider(
minimum=2 ** 8,
maximum=2 ** 16,
value=config['llm_num_ctx'],
value=16000,
step=1,
label="Max Context Length",
label="Ollama Context Length",
info="Controls max context length model needs to handle (less = faster)",
visible=config['llm_provider'] == "ollama"
visible=False,
interactive=True
)
llm_temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=config['llm_temperature'],
value=0.6,
step=0.1,
label="Temperature",
info="Controls randomness in model outputs"
info="Controls randomness in model outputs",
interactive=True
)
with gr.Row():
llm_base_url = gr.Textbox(
label="Base URL",
value=config['llm_base_url'],
value="",
info="API endpoint URL (if required)"
)
llm_api_key = gr.Textbox(
label="API Key",
type="password",
value=config['llm_api_key'],
value="",
info="Your API key (leave blank to use .env)"
)
@@ -847,7 +902,7 @@ def create_ui(config, theme_name="Ocean"):
llm_provider.change(
fn=update_llm_num_ctx_visibility,
inputs=llm_provider,
outputs=llm_num_ctx
outputs=ollama_num_ctx
)
with gr.TabItem("🌐 Browser Settings", id=3):
@@ -855,40 +910,47 @@ def create_ui(config, theme_name="Ocean"):
with gr.Row():
use_own_browser = gr.Checkbox(
label="Use Own Browser",
value=config['use_own_browser'],
value=False,
info="Use your existing browser instance",
interactive=True
)
keep_browser_open = gr.Checkbox(
label="Keep Browser Open",
value=config['keep_browser_open'],
value=False,
info="Keep Browser Open between Tasks",
interactive=True
)
headless = gr.Checkbox(
label="Headless Mode",
value=config['headless'],
value=False,
info="Run browser without GUI",
interactive=True
)
disable_security = gr.Checkbox(
label="Disable Security",
value=config['disable_security'],
value=True,
info="Disable browser security features",
interactive=True
)
enable_recording = gr.Checkbox(
label="Enable Recording",
value=config['enable_recording'],
value=True,
info="Enable saving browser recordings",
interactive=True
)
with gr.Row():
window_w = gr.Number(
label="Window Width",
value=config['window_w'],
value=1280,
info="Browser window width",
interactive=True
)
window_h = gr.Number(
label="Window Height",
value=config['window_h'],
value=1100,
info="Browser window height",
interactive=True
)
chrome_cdp = gr.Textbox(
@@ -902,7 +964,7 @@ def create_ui(config, theme_name="Ocean"):
save_recording_path = gr.Textbox(
label="Recording Path",
placeholder="e.g. ./tmp/record_videos",
value=config['save_recording_path'],
value="./tmp/record_videos",
info="Path to save browser recordings",
interactive=True, # Allow editing only if recording is enabled
)
@@ -910,7 +972,7 @@ def create_ui(config, theme_name="Ocean"):
save_trace_path = gr.Textbox(
label="Trace Path",
placeholder="e.g. ./tmp/traces",
value=config['save_trace_path'],
value="./tmp/traces",
info="Path to save Agent traces",
interactive=True,
)
@@ -918,7 +980,7 @@ def create_ui(config, theme_name="Ocean"):
save_agent_history_path = gr.Textbox(
label="Agent History Save Path",
placeholder="e.g., ./tmp/agent_history",
value=config['save_agent_history_path'],
value="./tmp/agent_history",
info="Specify the directory where agent history should be saved.",
interactive=True,
)
@@ -928,14 +990,17 @@ def create_ui(config, theme_name="Ocean"):
label="Task Description",
lines=4,
placeholder="Enter your task here...",
value=config['task'],
value="go to google.com and type 'OpenAI' click search and give me the first url",
info="Describe what you want the agent to do",
interactive=True
)
add_infos = gr.Textbox(
label="Additional Information",
lines=3,
placeholder="Add any helpful context or instructions...",
info="Optional hints to help the LLM complete the task",
value="",
interactive=True
)
with gr.Row():
@@ -946,6 +1011,7 @@ def create_ui(config, theme_name="Ocean"):
browser_view = gr.HTML(
value="<h1 style='width:80vw; height:50vh'>Waiting for browser session...</h1>",
label="Live Browser View",
visible=False
)
gr.Markdown("### Results")
@@ -973,12 +1039,15 @@ def create_ui(config, theme_name="Ocean"):
with gr.TabItem("🧐 Deep Research", id=5):
research_task_input = gr.Textbox(label="Research Task", lines=5,
value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.")
value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.",
interactive=True)
with gr.Row():
max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3,
precision=0) # precision=0 确保是整数
precision=0,
interactive=True) # precision=0 确保是整数
max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1,
precision=0) # precision=0 确保是整数
precision=0,
interactive=True) # precision=0 确保是整数
with gr.Row():
research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2)
stop_research_button = gr.Button("⏹ Stop", variant="stop", scale=1)
@@ -996,7 +1065,7 @@ def create_ui(config, theme_name="Ocean"):
run_button.click(
fn=run_with_stream,
inputs=[
agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url,
agent_type, llm_provider, llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url,
llm_api_key,
use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h,
save_recording_path, save_agent_history_path, save_trace_path, # Include the new path
@@ -1021,7 +1090,7 @@ def create_ui(config, theme_name="Ocean"):
research_button.click(
fn=run_deep_search,
inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider,
llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision,
llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision,
use_own_browser, headless, chrome_cdp],
outputs=[markdown_output_display, markdown_download, stop_research_button, research_button]
)
@@ -1054,7 +1123,6 @@ def create_ui(config, theme_name="Ocean"):
recordings_gallery = gr.Gallery(
label="Recordings",
value=list_recordings(config['save_recording_path']),
columns=3,
height="auto",
object_fit="contain"
@@ -1069,41 +1137,22 @@ def create_ui(config, theme_name="Ocean"):
with gr.TabItem("📁 UI Configuration", id=8):
config_file_input = gr.File(
label="Load Config File",
file_types=[".pkl"],
label="Load UI Settings from Config File",
file_types=[".json"],
interactive=True
)
with gr.Row():
load_config_button = gr.Button("Load Existing Config From File", variant="primary")
save_config_button = gr.Button("Save Current Config", variant="primary")
load_config_button = gr.Button("Load Config", variant="primary")
save_config_button = gr.Button("Save UI Settings", variant="primary")
config_status = gr.Textbox(
label="Status",
lines=2,
interactive=False
)
load_config_button.click(
fn=update_ui_from_config,
inputs=[config_file_input],
outputs=[
agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
use_own_browser, keep_browser_open, headless, disable_security, enable_recording,
window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path,
task, config_status
]
)
save_config_button.click(
fn=save_current_config,
inputs=[
agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method,
llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key,
use_own_browser, keep_browser_open, headless, disable_security,
enable_recording, window_w, window_h, save_recording_path, save_trace_path,
save_agent_history_path, task,
],
inputs=[], # 不需要输入参数
outputs=[config_status]
)
@@ -1124,6 +1173,15 @@ def create_ui(config, theme_name="Ocean"):
use_own_browser.change(fn=close_global_browser)
keep_browser_open.change(fn=close_global_browser)
scan_and_register_components(demo)
global webui_config_manager
all_components = webui_config_manager.get_all_components()
load_config_button.click(
fn=update_ui_from_config,
inputs=[config_file_input],
outputs=all_components + [config_status]
)
return demo
@@ -1132,12 +1190,9 @@ def main():
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
args = parser.parse_args()
config_dict = default_config()
demo = create_ui(config_dict, theme_name=args.theme)
demo = create_ui(theme_name=args.theme)
demo.launch(server_name=args.ip, server_port=args.port)