diff --git a/requirements.txt b/requirements.txt index 9777ebc..7f2d12c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ browser-use==0.1.40 pyperclip==1.9.0 -gradio==5.10.0 +gradio==5.23.1 json-repair langchain-mistralai==0.2.4 langchain-google-genai==2.0.8 diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index a41245b..4b0eff3 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -208,8 +208,8 @@ class CustomAgent(Agent): @time_execution_async("--get_next_action") async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput: """Get next action from LLM based on current state""" - - ai_message = self.llm.invoke(input_messages) + fixed_input_messages = self._convert_input_messages(input_messages) + ai_message = self.llm.invoke(fixed_input_messages) self.message_manager._add_message_with_tokens(ai_message) if hasattr(ai_message, "reasoning_content"): @@ -222,10 +222,16 @@ class CustomAgent(Agent): else: ai_content = ai_message.content - ai_content = ai_content.replace("```json", "").replace("```", "") - ai_content = repair_json(ai_content) - parsed_json = json.loads(ai_content) - parsed: AgentOutput = self.AgentOutput(**parsed_json) + try: + ai_content = ai_content.replace("```json", "").replace("```", "") + ai_content = repair_json(ai_content) + parsed_json = json.loads(ai_content) + parsed: AgentOutput = self.AgentOutput(**parsed_json) + except Exception as e: + import traceback + traceback.print_exc() + logger.debug(ai_message.content) + raise ValueError('Could not parse response.') if parsed is None: logger.debug(ai_message.content) diff --git a/src/agent/custom_message_manager.py b/src/agent/custom_message_manager.py index 8f2276b..212c3fb 100644 --- a/src/agent/custom_message_manager.py +++ b/src/agent/custom_message_manager.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import pdb from typing import List, Optional, Type, Dict from browser_use.agent.message_manager.service import MessageManager @@ -96,7 +97,7 @@ class CustomMessageManager(MessageManager): self._add_message_with_tokens(state_message) def _remove_state_message_by_index(self, remove_ind=-1) -> None: - """Remove last state message from history""" + """Remove state message by index from history""" i = len(self.state.history.messages) - 1 remove_cnt = 0 while i >= 0: diff --git a/src/agent/custom_system_prompt.md b/src/agent/custom_system_prompt.md index 13efbdb..9cefaa2 100644 --- a/src/agent/custom_system_prompt.md +++ b/src/agent/custom_system_prompt.md @@ -18,11 +18,17 @@ Example: # Response Rules 1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format: -{{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.", -"important_contents": "Output important contents closely related to user's instruction on the current page. If there is, please output the contents. If not, please output ''.", -"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.", -"next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought."}}, -"action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}} +{{ + "current_state": {{ + "evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not.", + "important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output empty string ''.", + "thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of evaluation_previous_goal is 'Failed', please reflect and output your reflection here.", + "next_goal": "Please generate a brief natural language description for the goal of your next actions based on your thought." + }}, + "action": [ + {{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence + ] +}} 2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence. Common action sequences: diff --git a/src/utils/agent_state.py b/src/utils/agent_state.py index 487a810..2456a55 100644 --- a/src/utils/agent_state.py +++ b/src/utils/agent_state.py @@ -1,5 +1,6 @@ import asyncio + class AgentState: _instance = None @@ -27,4 +28,4 @@ class AgentState: self.last_valid_state = state def get_last_valid_state(self): - return self.last_valid_state \ No newline at end of file + return self.last_valid_state diff --git a/src/utils/deep_research.py b/src/utils/deep_research.py index ab538e0..0409385 100644 --- a/src/utils/deep_research.py +++ b/src/utils/deep_research.py @@ -19,7 +19,13 @@ from browser_use.agent.views import ActionResult from browser_use.browser.context import BrowserContext from browser_use.controller.service import Controller, DoneAction from main_content_extractor import MainContentExtractor -from langchain.schema import SystemMessage, HumanMessage +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + ToolMessage, + SystemMessage +) from json_repair import repair_json from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt from src.controller.custom_controller import CustomController diff --git a/src/utils/default_config_settings.py b/src/utils/default_config_settings.py deleted file mode 100644 index 22c6185..0000000 --- a/src/utils/default_config_settings.py +++ /dev/null @@ -1,125 +0,0 @@ -import os -import pickle -import uuid -import gradio as gr - - -def default_config(): - """Prepare the default configuration""" - return { - "agent_type": "custom", - "max_steps": 100, - "max_actions_per_step": 10, - "use_vision": True, - "tool_calling_method": "auto", - "llm_provider": "openai", - "llm_model_name": "gpt-4o", - "llm_num_ctx": 32000, - "llm_temperature": 0.6, - "llm_base_url": "", - "llm_api_key": "", - "use_own_browser": os.getenv("CHROME_PERSISTENT_SESSION", "false").lower() == "true", - "keep_browser_open": False, - "headless": False, - "disable_security": True, - "enable_recording": True, - "window_w": 1280, - "window_h": 1100, - "save_recording_path": "./tmp/record_videos", - "save_trace_path": "./tmp/traces", - "save_agent_history_path": "./tmp/agent_history", - "task": "go to google.com and type 'OpenAI' click search and give me the first url", - } - - -def load_config_from_file(config_file): - """Load settings from a UUID.pkl file.""" - try: - with open(config_file, 'rb') as f: - settings = pickle.load(f) - return settings - except Exception as e: - return f"Error loading configuration: {str(e)}" - - -def save_config_to_file(settings, save_dir="./tmp/webui_settings"): - """Save the current settings to a UUID.pkl file with a UUID name.""" - os.makedirs(save_dir, exist_ok=True) - config_file = os.path.join(save_dir, f"{uuid.uuid4()}.pkl") - with open(config_file, 'wb') as f: - pickle.dump(settings, f) - return f"Configuration saved to {config_file}" - - -def save_current_config(*args): - current_config = { - "agent_type": args[0], - "max_steps": args[1], - "max_actions_per_step": args[2], - "use_vision": args[3], - "tool_calling_method": args[4], - "llm_provider": args[5], - "llm_model_name": args[6], - "llm_num_ctx": args[7], - "llm_temperature": args[8], - "llm_base_url": args[9], - "llm_api_key": args[10], - "use_own_browser": args[11], - "keep_browser_open": args[12], - "headless": args[13], - "disable_security": args[14], - "enable_recording": args[15], - "window_w": args[16], - "window_h": args[17], - "save_recording_path": args[18], - "save_trace_path": args[19], - "save_agent_history_path": args[20], - "task": args[21], - } - return save_config_to_file(current_config) - - -def update_ui_from_config(config_file): - if config_file is not None: - loaded_config = load_config_from_file(config_file.name) - if isinstance(loaded_config, dict): - return ( - gr.update(value=loaded_config.get("agent_type", "custom")), - gr.update(value=loaded_config.get("max_steps", 100)), - gr.update(value=loaded_config.get("max_actions_per_step", 10)), - gr.update(value=loaded_config.get("use_vision", True)), - gr.update(value=loaded_config.get("tool_calling_method", True)), - gr.update(value=loaded_config.get("llm_provider", "openai")), - gr.update(value=loaded_config.get("llm_model_name", "gpt-4o")), - gr.update(value=loaded_config.get("llm_num_ctx", 32000)), - gr.update(value=loaded_config.get("llm_temperature", 1.0)), - gr.update(value=loaded_config.get("llm_base_url", "")), - gr.update(value=loaded_config.get("llm_api_key", "")), - gr.update(value=loaded_config.get("use_own_browser", False)), - gr.update(value=loaded_config.get("keep_browser_open", False)), - gr.update(value=loaded_config.get("headless", False)), - gr.update(value=loaded_config.get("disable_security", True)), - gr.update(value=loaded_config.get("enable_recording", True)), - gr.update(value=loaded_config.get("window_w", 1280)), - gr.update(value=loaded_config.get("window_h", 1100)), - gr.update(value=loaded_config.get("save_recording_path", "./tmp/record_videos")), - gr.update(value=loaded_config.get("save_trace_path", "./tmp/traces")), - gr.update(value=loaded_config.get("save_agent_history_path", "./tmp/agent_history")), - gr.update(value=loaded_config.get("task", "")), - "Configuration loaded successfully." - ) - else: - return ( - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), "Error: Invalid configuration file." - ) - return ( - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), - gr.update(), "No file selected." - ) diff --git a/src/utils/utils.py b/src/utils/utils.py index c113843..7289002 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -4,13 +4,15 @@ import time from pathlib import Path from typing import Dict, Optional import requests +import json +import gradio as gr +import uuid from langchain_anthropic import ChatAnthropic from langchain_mistralai import ChatMistralAI from langchain_google_genai import ChatGoogleGenerativeAI from langchain_ollama import ChatOllama from langchain_openai import AzureChatOpenAI, ChatOpenAI -import gradio as gr from .llm import DeepSeekR1ChatOpenAI, DeepSeekR1ChatOllama, UnboundChatOpenAI @@ -37,7 +39,7 @@ def get_llm_model(provider: str, **kwargs): env_var = f"{provider.upper()}_API_KEY" api_key = kwargs.get("api_key", "") or os.getenv(env_var, "") if not api_key: - handle_api_key_error(provider, env_var) + raise MissingAPIKeyError(provider, env_var) kwargs["api_key"] = api_key if provider == "anthropic": @@ -185,7 +187,7 @@ model_names = { "ollama": ["qwen2.5:7b", "qwen2.5:14b", "qwen2.5:32b", "qwen2.5-coder:14b", "qwen2.5-coder:32b", "llama2:7b", "deepseek-r1:14b", "deepseek-r1:32b"], "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"], - "mistral": ["mixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"], + "mistral": ["pixtral-large-latest", "mistral-large-latest", "mistral-small-latest", "ministral-8b-latest"], "alibaba": ["qwen-plus", "qwen-max", "qwen-turbo", "qwen-long"], "moonshot": ["moonshot-v1-32k-vision-preview", "moonshot-v1-8k-vision-preview"], "unbound": ["gemini-2.0-flash","gpt-4o-mini", "gpt-4o", "gpt-4.5-preview"] @@ -197,6 +199,7 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None): """ Update the model name dropdown with predefined models for the selected provider. """ + import gradio as gr # Use API keys from .env if not provided if not api_key: api_key = os.getenv(f"{llm_provider.upper()}_API_KEY", "") @@ -210,15 +213,13 @@ def update_model_dropdown(llm_provider, api_key=None, base_url=None): return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True) -def handle_api_key_error(provider: str, env_var: str): - """ - Handles the missing API key error by raising a gr.Error with a clear message. - """ - provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper()) - raise gr.Error( - f"💥 {provider_display} API key not found! 🔑 Please set the " - f"`{env_var}` environment variable or provide it in the UI." - ) +class MissingAPIKeyError(Exception): + """Custom exception for missing API key.""" + + def __init__(self, provider: str, env_var: str): + provider_display = PROVIDER_DISPLAY_NAMES.get(provider, provider.upper()) + super().__init__(f"💥 {provider_display} API key not found! 🔑 Please set the " + f"`{env_var}` environment variable or provide it in the UI.") def encode_image(img_path): @@ -287,3 +288,70 @@ async def capture_screenshot(browser_context): return encoded except Exception as e: return None + + +class ConfigManager: + def __init__(self): + self.components = {} + self.component_order = [] + + def register_component(self, name: str, component): + """Register a gradio component for config management.""" + self.components[name] = component + if name not in self.component_order: + self.component_order.append(name) + return component + + def save_current_config(self): + """Save the current configuration of all registered components.""" + current_config = {} + for name in self.component_order: + component = self.components[name] + # Get the current value from the component + current_config[name] = getattr(component, "value", None) + + return save_config_to_file(current_config) + + def update_ui_from_config(self, config_file): + """Update UI components from a loaded configuration file.""" + if config_file is None: + return [gr.update() for _ in self.component_order] + ["No file selected."] + + loaded_config = load_config_from_file(config_file.name) + + if not isinstance(loaded_config, dict): + return [gr.update() for _ in self.component_order] + ["Error: Invalid configuration file."] + + # Prepare updates for all components + updates = [] + for name in self.component_order: + if name in loaded_config: + updates.append(gr.update(value=loaded_config[name])) + else: + updates.append(gr.update()) + + updates.append("Configuration loaded successfully.") + return updates + + def get_all_components(self): + """Return all registered components in the order they were registered.""" + return [self.components[name] for name in self.component_order] + + +def load_config_from_file(config_file): + """Load settings from a config file (JSON format).""" + try: + with open(config_file, 'r') as f: + settings = json.load(f) + return settings + except Exception as e: + return f"Error loading configuration: {str(e)}" + + +def save_config_to_file(settings, save_dir="./tmp/webui_settings"): + """Save the current settings to a UUID.json file with a UUID name.""" + os.makedirs(save_dir, exist_ok=True) + config_file = os.path.join(save_dir, f"{uuid.uuid4()}.json") + with open(config_file, 'w') as f: + json.dump(settings, f, indent=2) + return f"Configuration saved to {config_file}" diff --git a/tests/test_browser_use.py b/tests/test_browser_use.py index db35c5f..6ef4210 100644 --- a/tests/test_browser_use.py +++ b/tests/test_browser_use.py @@ -133,11 +133,11 @@ async def test_browser_use_custom(): api_key=os.getenv("GOOGLE_API_KEY", "") ) - # llm = utils.get_llm_model( - # provider="deepseek", - # model_name="deepseek-reasoner", - # temperature=0.8 - # ) + llm = utils.get_llm_model( + provider="deepseek", + model_name="deepseek-reasoner", + temperature=0.8 + ) # llm = utils.get_llm_model( # provider="deepseek", diff --git a/webui.py b/webui.py index ec51779..bc68605 100644 --- a/webui.py +++ b/webui.py @@ -13,6 +13,8 @@ import os logger = logging.getLogger(__name__) import gradio as gr +import inspect +from functools import wraps from browser_use.agent.service import Agent from playwright.async_api import async_playwright @@ -32,9 +34,8 @@ from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePromp from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext from src.controller.custom_controller import CustomController from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base -from src.utils.default_config_settings import default_config, load_config_from_file, save_config_to_file, \ - save_current_config, update_ui_from_config -from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot +from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot, MissingAPIKeyError +from src.utils import utils # Global variables for persistence _global_browser = None @@ -44,6 +45,49 @@ _global_agent = None # Create the global agent state instance _global_agent_state = AgentState() +# webui config +webui_config_manager = utils.ConfigManager() + + +def scan_and_register_components(blocks): + """扫描一个 Blocks 对象并注册其中的所有交互式组件,但不包括按钮""" + global webui_config_manager + + def traverse_blocks(block, prefix=""): + registered = 0 + + # 处理 Blocks 自身的组件 + if hasattr(block, "children"): + for i, child in enumerate(block.children): + if isinstance(child, gr.components.Component): + # 排除按钮 (Button) 组件 + if getattr(child, "interactive", False) and not isinstance(child, gr.Button): + name = f"{prefix}component_{i}" + if hasattr(child, "label") and child.label: + # 使用标签作为名称的一部分 + label = child.label + name = f"{prefix}{label}" + logger.debug(f"Registering component: {name}") + webui_config_manager.register_component(name, child) + registered += 1 + elif hasattr(child, "children"): + # 递归处理嵌套的 Blocks + new_prefix = f"{prefix}block_{i}_" + registered += traverse_blocks(child, new_prefix) + + return registered + + total = traverse_blocks(blocks) + logger.info(f"Total registered components: {total}") + + +def save_current_config(): + return webui_config_manager.save_current_config() + + +def update_ui_from_config(config_file): + return webui_config_manager.update_ui_from_config(config_file) + def resolve_sensitive_env_variables(text): """ @@ -245,8 +289,9 @@ async def run_browser_agent( gr.update(interactive=True) # Re-enable run button ) - except gr.Error: - raise + except MissingAPIKeyError as e: + logger.error(str(e)) + raise gr.Error(str(e), print_exception=False) except Exception as e: import traceback @@ -539,8 +584,7 @@ async def run_with_stream( max_input_tokens=max_input_tokens ) # Add HTML content at the start of the result array - html_content = f"

Using browser...

" - yield [html_content] + list(result) + yield [gr.update(visible=False)] + list(result) else: try: # Run the browser agent in the background @@ -592,7 +636,7 @@ async def run_with_stream( if _global_agent and _global_agent.state.stopped: yield [ - html_content, + gr.HTML(value=html_content, visible=True), final_result, errors, model_actions, @@ -606,7 +650,7 @@ async def run_with_stream( break else: yield [ - html_content, + gr.HTML(value=html_content, visible=True), final_result, errors, model_actions, @@ -633,7 +677,7 @@ async def run_with_stream( errors = f"Agent error: {str(e)}" yield [ - html_content, + gr.HTML(value=html_content, visible=True), final_result, errors, model_actions, @@ -648,7 +692,9 @@ async def run_with_stream( except Exception as e: import traceback yield [ - f"

Waiting for browser session...

", + gr.HTML( + value=f"

Waiting for browser session...

", + visible=True), "", f"Error: {str(e)}\n{traceback.format_exc()}", "", @@ -715,11 +761,13 @@ async def run_deep_search(research_task, max_search_iteration_input, max_query_p return markdown_content, file_path, gr.update(value="Stop", interactive=True), gr.update(interactive=True) -def create_ui(config, theme_name="Ocean"): +def create_ui(theme_name="Ocean"): css = """ .gradio-container { - max-width: 1200px !important; - margin: auto !important; + width: 60vw !important; + max-width: 60% !important; + margin-left: auto !important; + margin-right: auto !important; padding-top: 20px !important; } .header-text { @@ -751,41 +799,45 @@ def create_ui(config, theme_name="Ocean"): agent_type = gr.Radio( ["org", "custom"], label="Agent Type", - value=config['agent_type'], + value="custom", info="Select the type of agent to use", + interactive=True ) with gr.Column(): max_steps = gr.Slider( minimum=1, maximum=200, - value=config['max_steps'], + value=100, step=1, label="Max Run Steps", info="Maximum number of steps the agent will take", + interactive=True ) max_actions_per_step = gr.Slider( minimum=1, - maximum=20, - value=config['max_actions_per_step'], + maximum=100, + value=10, step=1, label="Max Actions per Step", info="Maximum number of actions the agent will take per step", + interactive=True ) with gr.Column(): use_vision = gr.Checkbox( label="Use Vision", - value=config['use_vision'], + value=True, info="Enable visual processing capabilities", + interactive=True ) max_input_tokens = gr.Number( label="Max Input Tokens", value=128000, - precision=0 - + precision=0, + interactive=True ) tool_calling_method = gr.Dropdown( label="Tool Calling Method", - value=config['tool_calling_method'], + value="auto", interactive=True, allow_custom_value=True, # Allow users to input custom model names choices=["auto", "json_schema", "function_calling"], @@ -798,44 +850,47 @@ def create_ui(config, theme_name="Ocean"): llm_provider = gr.Dropdown( choices=[provider for provider, model in utils.model_names.items()], label="LLM Provider", - value=config['llm_provider'], - info="Select your preferred language model provider" + value="openai", + info="Select your preferred language model provider", + interactive=True ) llm_model_name = gr.Dropdown( label="Model Name", choices=utils.model_names['openai'], - value=config['llm_model_name'], + value="gpt-4o", interactive=True, allow_custom_value=True, # Allow users to input custom model names info="Select a model in the dropdown options or directly type a custom model name" ) - llm_num_ctx = gr.Slider( + ollama_num_ctx = gr.Slider( minimum=2 ** 8, maximum=2 ** 16, - value=config['llm_num_ctx'], + value=16000, step=1, - label="Max Context Length", + label="Ollama Context Length", info="Controls max context length model needs to handle (less = faster)", - visible=config['llm_provider'] == "ollama" + visible=False, + interactive=True ) llm_temperature = gr.Slider( minimum=0.0, maximum=2.0, - value=config['llm_temperature'], + value=0.6, step=0.1, label="Temperature", - info="Controls randomness in model outputs" + info="Controls randomness in model outputs", + interactive=True ) with gr.Row(): llm_base_url = gr.Textbox( label="Base URL", - value=config['llm_base_url'], + value="", info="API endpoint URL (if required)" ) llm_api_key = gr.Textbox( label="API Key", type="password", - value=config['llm_api_key'], + value="", info="Your API key (leave blank to use .env)" ) @@ -847,7 +902,7 @@ def create_ui(config, theme_name="Ocean"): llm_provider.change( fn=update_llm_num_ctx_visibility, inputs=llm_provider, - outputs=llm_num_ctx + outputs=ollama_num_ctx ) with gr.TabItem("🌐 Browser Settings", id=3): @@ -855,40 +910,47 @@ def create_ui(config, theme_name="Ocean"): with gr.Row(): use_own_browser = gr.Checkbox( label="Use Own Browser", - value=config['use_own_browser'], + value=False, info="Use your existing browser instance", + interactive=True ) keep_browser_open = gr.Checkbox( label="Keep Browser Open", - value=config['keep_browser_open'], + value=False, info="Keep Browser Open between Tasks", + interactive=True ) headless = gr.Checkbox( label="Headless Mode", - value=config['headless'], + value=False, info="Run browser without GUI", + interactive=True ) disable_security = gr.Checkbox( label="Disable Security", - value=config['disable_security'], + value=True, info="Disable browser security features", + interactive=True ) enable_recording = gr.Checkbox( label="Enable Recording", - value=config['enable_recording'], + value=True, info="Enable saving browser recordings", + interactive=True ) with gr.Row(): window_w = gr.Number( label="Window Width", - value=config['window_w'], + value=1280, info="Browser window width", + interactive=True ) window_h = gr.Number( label="Window Height", - value=config['window_h'], + value=1100, info="Browser window height", + interactive=True ) chrome_cdp = gr.Textbox( @@ -902,7 +964,7 @@ def create_ui(config, theme_name="Ocean"): save_recording_path = gr.Textbox( label="Recording Path", placeholder="e.g. ./tmp/record_videos", - value=config['save_recording_path'], + value="./tmp/record_videos", info="Path to save browser recordings", interactive=True, # Allow editing only if recording is enabled ) @@ -910,7 +972,7 @@ def create_ui(config, theme_name="Ocean"): save_trace_path = gr.Textbox( label="Trace Path", placeholder="e.g. ./tmp/traces", - value=config['save_trace_path'], + value="./tmp/traces", info="Path to save Agent traces", interactive=True, ) @@ -918,7 +980,7 @@ def create_ui(config, theme_name="Ocean"): save_agent_history_path = gr.Textbox( label="Agent History Save Path", placeholder="e.g., ./tmp/agent_history", - value=config['save_agent_history_path'], + value="./tmp/agent_history", info="Specify the directory where agent history should be saved.", interactive=True, ) @@ -928,14 +990,17 @@ def create_ui(config, theme_name="Ocean"): label="Task Description", lines=4, placeholder="Enter your task here...", - value=config['task'], + value="go to google.com and type 'OpenAI' click search and give me the first url", info="Describe what you want the agent to do", + interactive=True ) add_infos = gr.Textbox( label="Additional Information", lines=3, placeholder="Add any helpful context or instructions...", info="Optional hints to help the LLM complete the task", + value="", + interactive=True ) with gr.Row(): @@ -946,6 +1011,7 @@ def create_ui(config, theme_name="Ocean"): browser_view = gr.HTML( value="

Waiting for browser session...

", label="Live Browser View", + visible=False ) gr.Markdown("### Results") @@ -973,12 +1039,15 @@ def create_ui(config, theme_name="Ocean"): with gr.TabItem("🧐 Deep Research", id=5): research_task_input = gr.Textbox(label="Research Task", lines=5, - value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.") + value="Compose a report on the use of Reinforcement Learning for training Large Language Models, encompassing its origins, current advancements, and future prospects, substantiated with examples of relevant models and techniques. The report should reflect original insights and analysis, moving beyond mere summarization of existing literature.", + interactive=True) with gr.Row(): max_search_iteration_input = gr.Number(label="Max Search Iteration", value=3, - precision=0) # precision=0 确保是整数 + precision=0, + interactive=True) # precision=0 确保是整数 max_query_per_iter_input = gr.Number(label="Max Query per Iteration", value=1, - precision=0) # precision=0 确保是整数 + precision=0, + interactive=True) # precision=0 确保是整数 with gr.Row(): research_button = gr.Button("▶️ Run Deep Research", variant="primary", scale=2) stop_research_button = gr.Button("⏹ Stop", variant="stop", scale=1) @@ -996,7 +1065,7 @@ def create_ui(config, theme_name="Ocean"): run_button.click( fn=run_with_stream, inputs=[ - agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, + agent_type, llm_provider, llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, # Include the new path @@ -1021,7 +1090,7 @@ def create_ui(config, theme_name="Ocean"): research_button.click( fn=run_deep_search, inputs=[research_task_input, max_search_iteration_input, max_query_per_iter_input, llm_provider, - llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, + llm_model_name, ollama_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_vision, use_own_browser, headless, chrome_cdp], outputs=[markdown_output_display, markdown_download, stop_research_button, research_button] ) @@ -1054,7 +1123,6 @@ def create_ui(config, theme_name="Ocean"): recordings_gallery = gr.Gallery( label="Recordings", - value=list_recordings(config['save_recording_path']), columns=3, height="auto", object_fit="contain" @@ -1069,41 +1137,22 @@ def create_ui(config, theme_name="Ocean"): with gr.TabItem("📁 UI Configuration", id=8): config_file_input = gr.File( - label="Load Config File", - file_types=[".pkl"], + label="Load UI Settings from Config File", + file_types=[".json"], interactive=True ) with gr.Row(): - load_config_button = gr.Button("Load Existing Config From File", variant="primary") - save_config_button = gr.Button("Save Current Config", variant="primary") + load_config_button = gr.Button("Load Config", variant="primary") + save_config_button = gr.Button("Save UI Settings", variant="primary") config_status = gr.Textbox( label="Status", lines=2, interactive=False ) - - load_config_button.click( - fn=update_ui_from_config, - inputs=[config_file_input], - outputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, enable_recording, - window_w, window_h, save_recording_path, save_trace_path, save_agent_history_path, - task, config_status - ] - ) - save_config_button.click( fn=save_current_config, - inputs=[ - agent_type, max_steps, max_actions_per_step, use_vision, tool_calling_method, - llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, - use_own_browser, keep_browser_open, headless, disable_security, - enable_recording, window_w, window_h, save_recording_path, save_trace_path, - save_agent_history_path, task, - ], + inputs=[], # 不需要输入参数 outputs=[config_status] ) @@ -1124,6 +1173,15 @@ def create_ui(config, theme_name="Ocean"): use_own_browser.change(fn=close_global_browser) keep_browser_open.change(fn=close_global_browser) + scan_and_register_components(demo) + global webui_config_manager + all_components = webui_config_manager.get_all_components() + + load_config_button.click( + fn=update_ui_from_config, + inputs=[config_file_input], + outputs=all_components + [config_status] + ) return demo @@ -1132,12 +1190,9 @@ def main(): parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") - parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode") args = parser.parse_args() - config_dict = default_config() - - demo = create_ui(config_dict, theme_name=args.theme) + demo = create_ui(theme_name=args.theme) demo.launch(server_name=args.ip, server_port=args.port)