From 7fdf95edaeaf2505b36c10966b7b8d65359f1de6 Mon Sep 17 00:00:00 2001 From: vvincent1234 Date: Sat, 29 Mar 2025 11:18:13 +0800 Subject: [PATCH] optmize webui settings and fix vulnerability --- src/utils/agent_state.py | 3 +- src/utils/deep_research.py | 8 +- src/utils/default_config_settings.py | 125 ------------------ src/utils/utils.py | 74 ++++++++++- webui.py | 186 +++++++++++++++++---------- 5 files changed, 201 insertions(+), 195 deletions(-) delete mode 100644 src/utils/default_config_settings.py diff --git a/src/utils/agent_state.py b/src/utils/agent_state.py index 487a810..2456a55 100644 --- a/src/utils/agent_state.py +++ b/src/utils/agent_state.py @@ -1,5 +1,6 @@ import asyncio + class AgentState: _instance = None @@ -27,4 +28,4 @@ class AgentState: self.last_valid_state = state def get_last_valid_state(self): - return self.last_valid_state \ No newline at end of file + return self.last_valid_state diff --git a/src/utils/deep_research.py b/src/utils/deep_research.py index ab538e0..0409385 100644 --- a/src/utils/deep_research.py +++ b/src/utils/deep_research.py @@ -19,7 +19,13 @@ from browser_use.agent.views import ActionResult from browser_use.browser.context import BrowserContext from browser_use.controller.service import Controller, DoneAction from main_content_extractor import MainContentExtractor -from langchain.schema import SystemMessage, HumanMessage +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + ToolMessage, + SystemMessage +) from json_repair import repair_json from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt from src.controller.custom_controller import CustomController diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py deleted file mode 100644 index 22c6185..0000000 --- a/src/utils/default_config_settings.py +++ /dev/null @@ -1,125 +0,0 @@ -import os -import pickle -import uuid -import gradio as gr - - -def default_config(): - """Prepare the default configuration""" - return { - "agent_type": "custom", - "max_steps": 100, - "max_actions_per_step": 10, - "use_vision": True, - "tool_calling_method": "auto", - "llm_provider": "openai", - "llm_model_name": "gpt-4o", - "llm_num_ctx": 32000, - "llm_temperature": 0.6, - "llm_base_url": "", - "llm_api_key": "", - "use_own_browser": os.getenv("CHROME_PERSISTENT_SESSION", "false").lower() == "true", - "keep_browser_open": False, - "headless": False, - "disable_security": True, - "enable_recording": True, - "window_w": 1280, - "window_h": 1100, - "save_recording_path": "./tmp/record_videos", - "save_trace_path": "./tmp/traces", - "save_agent_history_path": "./tmp/agent_history", - "task": "go to google.com and type 'OpenAI' click search and give me the first url", - } - - -def load_config_from_file(config_file): - """Load settings from a UUID.pkl file.""" - try: - with open(config_file, 'rb') as f: - settings = pickle.load(f) - return settings - except Exception as e: - return f"Error loading configuration: {str(e)}" - - -def save_config_to_file(settings, save_dir="./tmp/webui_settings"): - """Save the current settings to a UUID.pkl file with a UUID name.""" - os.makedirs(save_dir, exist_ok=True) - config_file = os.path.join(save_dir, f"{uuid.uuid4()}.pkl") - with open(config_file, 'wb') as f: - pickle.dump(settings, f) - return f"Configuration saved to {config_file}" - - -def save_current_config(*args): - current_config = { - "agent_type": args[0], - "max_steps": args[1], - "max_actions_per_step": args[2], - "use_vision": args[3], - "tool_calling_method": args[4], - "llm_provider": args[5], - "llm_model_name": args[6], - "llm_num_ctx": args[7], - "llm_temperature": args[8], - "llm_base_url": args[9], - "llm_api_key": args[10], - "use_own_browser": args[11], - "keep_browser_open": args[12], - "headless": args[13], - "disable_security": args[14], - "enable_recording": args[15], - "window_w": args[16], - "window_h": args[17], - "save_recording_path": args[18], - "save_trace_path": args[19], - "save_agent_history_path": args[20], - "task": args[21], - } - return save_config_to_file(current_config) - - -def update_ui_from_config(config_file): - if config_file is not None: - loaded_config = load_config_from_file(config_file.name) - if isinstance(loaded_config, dict): - return ( - gr.update(value=loaded_config.get("agent_type", "custom")), - gr.update(value=loaded_config.get("max_steps", 100)), - gr.update(value=loaded_config.get("max_actions_per_step", 10)), - gr.update(value=loaded_config.get("use_vision", True)), - gr.update(value=loaded_config.get("tool_calling_method", True)), - gr.update(value=loaded_config.get("llm_provider", "openai")), - gr.update(value=loaded_config.get("llm_model_name", "gpt-4o")), - gr.update(value=loaded_config.get("llm_num_ctx", 32000)), - gr.update(value=loaded_config.get("llm_temperature", 1.0)), - gr.update(value=loaded_config.get("llm_base_url", "")), - gr.update(value=loaded_config.get("llm_api_key", "")), - gr.update(value=loaded_config.get("use_own_browser", False)), - gr.update(value=loaded_config.get("keep_browser_open", False)), - gr.update(value=loaded_config.get("headless", False)), - gr.update(value=loaded_config.get("disable_security", True)), - gr.update(value=loaded_config.get("enable_recording", True)), - gr.update(value=loaded_config.get("window_w", 1280)), - gr.update(value=loaded_config.get("window_h", 1100)), - gr.update(value=loaded_config.get("save_recording_path", "./tmp/record_videos")), - gr.update(value=loaded_config.get("save_trace_path", "./tmp/traces")), - gr.update(value=loaded_config.get("save_agent_history_path", "./tmp/agent_history")), - gr.update(value=loaded_config.get("task", "")), - "Configuration loaded successfully." - ) - else: - return ( - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), "Error: Invalid configuration file." - ) - return ( - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), "No file selected." - ) diff --git a/src/utils/utils.py b/src/utils/utils.py index 0f1cee2..2590a0b 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -4,6 +4,9 @@ import time from pathlib import Path from typing import Dict, Optional import requests +import json +import gradio as gr +import uuid from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI @@ -196,12 +199,14 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None): else: return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True) + class MissingAPIKeyError(Exception): """Custom exception for missing API key.""" + def __init__(self, provider: str, env_var: str): provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper()) super().__init__(f"💥 {provider_display} API key not found! 🔑 Please set the " - f"`{env_var}` environment variable or provide it in the UI.") + f"`{env_var}` environment variable or provide it in the UI.") def encode_image(img_path): @@ -270,3 +275,70 @@ async def capture_screenshot(browser_context): return encoded except Exception as e: return None + + +class ConfigManager: + def __init__(self): + self.components = {} + self.component_order = [] + + def register_component(self, name: str, component): + """Register a gradio component for config management.""" + self.components[name] = component + if name not in self.component_order: + self.component_order.append(name) + return component + + def save_current_config(self): + """Save the current configuration of all registered components.""" + current_config = {} + for name in self.component_order: + component = self.components[name] + # Get the current value from the component + current_config[name] = getattr(component, "value", None) + + return save_config_to_file(current_config) + + def update_ui_from_config(self, config_file): + """Update UI components from a loaded configuration file.""" + if config_file is None: + return [gr.update() for _ in self.component_order] + ["No file selected."] + + loaded_config = load_config_from_file(config_file.name) + + if not isinstance(loaded_config, dict): + return [gr.update() for _ in self.component_order] + ["Error: Invalid configuration file."] + + # Prepare updates for all components + updates = [] + for name in self.component_order: + if name in loaded_config: + updates.append(gr.update(value=loaded_config[name])) + else: + updates.append(gr.update()) + + updates.append("Configuration loaded successfully.") + return updates + + def get_all_components(self): + """Return all registered components in the order they were registered.""" + return [self.components[name] for name in self.component_order] + + +def load_config_from_file(config_file): + """Load settings from a config file (JSON format).""" + try: + with open(config_file, 'r') as f: + settings = json.load(f) + return settings + except Exception as e: + return f"Error loading configuration: {str(e)}" + + +def save_config_to_file(settings, save_dir="./tmp/webui_settings"): + """Save the current settings to a UUID.json file with a UUID name.""" + os.makedirs(save_dir, exist_ok=True) + config_file = os.path.join(save_dir, f"{uuid.uuid4()}.json") + with open(config_file, 'w') as f: + json.dump(settings, f, indent=2) + return f"Configuration saved to {config_file}" diff --git a/webui.py b/webui.py index f566287..bc68605 100644 --- a/webui.py +++ b/webui.py @@ -13,6 +13,8 @@ import os logger = logging.getLogger(__name__) import gradio as gr +import inspect +from functools import wraps from browser_use.agent.service import Agent from playwright.async_api import async_playwright @@ -32,9 +34,8 @@ from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePromp from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext from src.controller.custom_controller import CustomController from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base -from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, \ - save_current_config, update_ui_from_config from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot, MissingAPIKeyError +from src.utils import utils # Global variables for persistence _global_browser = None @@ -44,6 +45,49 @@ _global_agent = None # Create the global agent state instance _global_agent_state = AgentState() +# webui config +webui_config_manager = utils.ConfigManager() + + +def scan_and_register_components(blocks): + """扫描一个 Blocks 对象并注册其中的所有交互式组件,但不包括按钮""" + global webui_config_manager + + def traverse_blocks(block, prefix=""): + registered = 0 + + # 处理 Blocks 自身的组件 + if hasattr(block, "children"): + for i, child in enumerate(block.children): + if isinstance(child, gr.components.Component): + # 排除按钮 (Button) 组件 + if getattr(child, "interactive", False) and not isinstance(child, gr.Button): + name = f"{prefix}component_{i}" + if hasattr(child, "label") and child.label: + # 使用标签作为名称的一部分 + label = child.label + name = f"{prefix}{label}" + logger.debug(f"Registering component: {name}") + webui_config_manager.register_component(name, child) + registered += 1 + elif hasattr(child, "children"): + # 递归处理嵌套的 Blocks + new_prefix = f"{prefix}block_{i}_" + registered += traverse_blocks(child, new_prefix) + + return registered + + total = traverse_blocks(blocks) + logger.info(f"Total registered components: {total}") + + +def save_current_config(): + return webui_config_manager.save_current_config() + + +def update_ui_from_config(config_file): + return webui_config_manager.update_ui_from_config(config_file) + def resolve_sensitive_env_variables(text): """ @@ -717,11 +761,13 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True) -def create_ui(config, theme_name="Ocean"): +def create_ui(theme_name="Ocean"): css = """ .gradio-container { - max-width: 1200px !important; - margin: auto !important; + width: 60vw !important; + max-width: 60% !important; + margin-left: auto !important; + margin-right: auto !important; padding-top: 20px !important; } .header-text { @@ -753,41 +799,45 @@ def create_ui(config, theme_name="Ocean"): agent_type = gr.Radio( ["org", "custom"], label="Agent Type", - value=config['agent_type'], + value="custom", info="Select the type of agent to use", + interactive=True ) with gr.Column(): max_steps = gr.Slider( minimum=1, maximum=200, - value=config['max_steps'], + value=100, step=1, label="Max Run Steps", info="Maximum number of steps the agent will take", + interactive=True ) max_actions_per_step = gr.Slider( minimum=1, - maximum=20, - value=config['max_actions_per_step'], + maximum=100, + value=10, step=1, label="Max Actions per Step", info="Maximum number of actions the agent will take per step", + interactive=True ) with gr.Column(): use_vision = gr.Checkbox( label="Use Vision", - value=config['use_vision'], + value=True, info="Enable visual processing capabilities", + interactive=True ) max_input_tokens = gr.Number( label="Max Input Tokens", value=128000, - precision=0 - + precision=0, + interactive=True ) tool_calling_method = gr.Dropdown( label="Tool Calling Method", - value=config['tool_calling_method'], + value="auto", interactive=True, allow_custom_value=True, # Allow users to input custom model names choices=["auto", "json_schema", "function_calling"], @@ -800,44 +850,47 @@ def create_ui(config, theme_name="Ocean"): llm_provider = gr.Dropdown( choices=[provider for provider, model in utils.model_names.items()], label="LLM Provider", - value=config['llm_provider'], - info="Select your preferred language model provider" + value="openai", + info="Select your preferred language model provider", + interactive=True ) llm_model_name = gr.Dropdown( label="Model Name", choices=utils.model_names['openai'], - value=config['llm_model_name'], + value="gpt-4o", interactive=True, allow_custom_value=True, # Allow users to input custom model names info="Select a model in the dropdown options or directly type a custom model name" ) - llm_num_ctx = gr.Slider( + ollama_num_ctx = gr.Slider( minimum=2 ** 8, maximum=2 ** 16, - value=config['llm_num_ctx'], + value=16000, step=1, - label="Max Context Length", + label="Ollama Context Length", info="Controls max context length model needs to handle (less = faster)", - visible=config['llm_provider'] == "ollama" + visible=False, + interactive=True ) llm_temperature = gr.Slider( minimum=0.0, maximum=2.0, - value=config['llm_temperature'], + value=0.6, step=0.1, label="Temperature", - info="Controls randomness in model outputs" + info="Controls randomness in model outputs", + interactive=True ) with gr.Row(): llm_base_url = gr.Textbox( label="Base URL", - value=config['llm_base_url'], + value="", info="API endpoint URL (if required)" ) llm_api_key = gr.Textbox( label="API Key", type="password", - value=config['llm_api_key'], + value="", info="Your API key (leave blank to use .env)" ) @@ -849,7 +902,7 @@ def create_ui(config, theme_name="Ocean"): llm_provider.change( fn=update_llm_num_ctx_visibility, inputs=llm_provider, - outputs=llm_num_ctx + outputs=ollama_num_ctx ) with gr.TabItem("🌐 Browser Settings", id=3): @@ -857,40 +910,47 @@ def create_ui(config, theme_name="Ocean"): with gr.Row(): use_own_browser = gr.Checkbox( label="Use Own Browser", - value=config['use_own_browser'], + value=False, info="Use your existing browser instance", + interactive=True ) keep_browser_open = gr.Checkbox( label="Keep Browser Open", - value=config['keep_browser_open'], + value=False, info="Keep Browser Open between Tasks", + interactive=True ) headless = gr.Checkbox( label="Headless Mode", - value=config['headless'], + value=False, info="Run browser without GUI", + interactive=True ) disable_security = gr.Checkbox( label="Disable Security", - value=config['disable_security'], + value=True, info="Disable browser security features", + interactive=True ) enable_recording = gr.Checkbox( label="Enable Recording", - value=config['enable_recording'], + value=True, info="Enable saving browser recordings", + interactive=True ) with gr.Row(): window_w = gr.Number( label="Window Width", - value=config['window_w'], + value=1280, info="Browser window width", + interactive=True ) window_h = gr.Number( label="Window Height", - value=config['window_h'], + value=1100, info="Browser window height", + interactive=True ) chrome_cdp = gr.Textbox( @@ -904,7 +964,7 @@ def create_ui(config, theme_name="Ocean"): save_recording_path = gr.Textbox( label="Recording Path", placeholder="e.g. ./tmp/record_videos", - value=config['save_recording_path'], + value="./tmp/record_videos", info="Path to save browser recordings", interactive=True, # Allow editing only if recording is enabled ) @@ -912,7 +972,7 @@ def create_ui(config, theme_name="Ocean"): save_trace_path = gr.Textbox( label="Trace Path", placeholder="e.g. ./tmp/traces", - value=config['save_trace_path'], + value="./tmp/traces", info="Path to save Agent traces", interactive=True, ) @@ -920,7 +980,7 @@ def create_ui(config, theme_name="Ocean"): save_agent_history_path = gr.Textbox( label="Agent History Save Path", placeholder="e.g., ./tmp/agent_history", - value=config['save_agent_history_path'], + value="./tmp/agent_history", info="Specify the directory where agent history should be saved.", interactive=True, ) @@ -930,14 +990,17 @@ def create_ui(config, theme_name="Ocean"): label="Task Description", lines=4, placeholder="Enter your task here...", - value=config['task'], + value="go to google.com and type 'OpenAI' click search and give me the first url", info="Describe what you want the agent to do", + interactive=True ) add_infos = gr.Textbox( label="Additional Information", lines=3, placeholder="Add any helpful context or instructions...", info="Optional hints to help the LLM complete the task", + value="", + interactive=True ) with gr.Row(): @@ -976,12 +1039,15 @@ def create_ui(config, theme_name="Ocean"): with gr.TabItem("🧐 Deep Research", id=5): research_task_input = gr.Textbox(label="Research Task", lines=5, - value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") + value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.", + interactive=True) with gr.Row(): max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, - precision=0) # precision=0 确保是整数 + precision=0, + interactive=True) # precision=0 确保是整数 max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, - precision=0) # precision=0 确保是整数 + precision=0, + interactive=True) # precision=0 确保是整数 with gr.Row(): research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2) stop_research_button = gr.Button("⏹ Stop", variant="stop", scale=1) @@ -999,7 +1065,7 @@ def create_ui(config, theme_name="Ocean"): run_button.click( fn=run_with_stream, inputs=[ - agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, + agent_type, llm_provider, llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, # Include the new path @@ -1024,7 +1090,7 @@ def create_ui(config, theme_name="Ocean"): research_button.click( fn=run_deep_search, inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, - llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, + llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless, chrome_cdp], outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] ) @@ -1057,7 +1123,6 @@ def create_ui(config, theme_name="Ocean"): recordings_gallery = gr.Gallery( label="Recordings", - value=list_recordings(config['save_recording_path']), columns=3, height="auto", object_fit="contain" @@ -1073,7 +1138,7 @@ def create_ui(config, theme_name="Ocean"): with gr.TabItem("📁 UI Configuration", id=8): config_file_input = gr.File( label="Load UI Settings from Config File", - file_types=[".pkl"], + file_types=[".json"], interactive=True ) with gr.Row(): @@ -1085,28 +1150,9 @@ def create_ui(config, theme_name="Ocean"): lines=2, interactive=False ) - - load_config_button.click( - fn=update_ui_from_config, - inputs=[config_file_input], - outputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, enable_recording, - window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path, - task, config_status - ] - ) - save_config_button.click( fn=save_current_config, - inputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, - enable_recording, window_w, window_h, save_recording_path, save_trace_path, - save_agent_history_path, task, - ], + inputs=[], # 不需要输入参数 outputs=[config_status] ) @@ -1127,6 +1173,15 @@ def create_ui(config, theme_name="Ocean"): use_own_browser.change(fn=close_global_browser) keep_browser_open.change(fn=close_global_browser) + scan_and_register_components(demo) + global webui_config_manager + all_components = webui_config_manager.get_all_components() + + load_config_button.click( + fn=update_ui_from_config, + inputs=[config_file_input], + outputs=all_components + [config_status] + ) return demo @@ -1135,12 +1190,9 @@ def main(): parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") - parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode") args = parser.parse_args() - config_dict = default_config() - - demo = create_ui(config_dict, theme_name=args.theme) + demo = create_ui(theme_name=args.theme) demo.launch(server_name=args.ip, server_port=args.port)