From 901dcb7e1cf3d879731ec44c741d2986fc14df5e Mon Sep 17 00:00:00 2001 From: Richardson Gunde <152559661+richard-devbot@users.noreply.github.com> Date: Sun, 5 Jan 2025 18:02:11 +0530 Subject: [PATCH] Update webui.py --- webui.py | 302 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 222 insertions(+), 80 deletions(-) diff --git a/webui.py b/webui.py index f44bc14..5570be5 100644 --- a/webui.py +++ b/webui.py @@ -36,7 +36,6 @@ from src.agent.custom_prompts import CustomSystemPrompt from src.utils import utils - async def run_browser_agent( agent_type, llm_provider, @@ -55,9 +54,7 @@ async def run_browser_agent( max_steps, use_vision ): - """ - Runs the browser agent based on user configurations. - """ + llm = utils.get_llm_model( provider=llm_provider, @@ -95,7 +92,6 @@ async def run_browser_agent( else: raise ValueError(f"Invalid agent type: {agent_type}") - async def run_org_agent( llm, headless, @@ -137,7 +133,6 @@ async def run_org_agent( await browser.close() return final_result, errors, model_actions, model_thoughts - async def run_custom_agent( llm, use_own_browser, @@ -227,6 +222,226 @@ async def run_custom_agent( await browser.close() return final_result, errors, model_actions, model_thoughts +import argparse +import gradio as gr +from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean +import os + +# Define the theme map globally +theme_map = { + "Default": Default(), + "Soft": Soft(), + "Monochrome": Monochrome(), + "Glass": Glass(), + "Origin": Origin(), + "Citrus": Citrus(), + "Ocean": Ocean() +} + +def create_ui(theme_name="Ocean"): + """Create the UI with the specified theme""" + # Enhanced styling for better visual appeal + css = """ + .gradio-container { + max-width: 1200px !important; + margin: auto !important; + padding-top: 20px !important; + } + .header-text { + text-align: center; + margin-bottom: 30px; + } + .theme-section { + margin-bottom: 20px; + padding: 15px; + border-radius: 10px; + } + """ + + with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo: + with gr.Row(): + gr.Markdown( + """ + # 🌐 Browser Use WebUI + ### Control your browser with AI assistance + """, + elem_classes=["header-text"] + ) + + # Quick access theme switcher at the top + with gr.Row(elem_classes=["theme-section"]): + theme_dropdown = gr.Dropdown( + choices=list(theme_map.keys()), + value=theme_name, + label="🎨 Quick Theme Switch", + container=False + ) + + with gr.Tabs() as tabs: + with gr.TabItem("🤖 Agent Settings", id=1): + with gr.Group(): + agent_type = gr.Radio( + ["org", "custom"], + label="Agent Type", + value="custom", + info="Select the type of agent to use" + ) + max_steps = gr.Slider( + minimum=1, + maximum=200, + value=100, + step=1, + label="Max Run Steps", + info="Maximum number of steps the agent will take" + ) + use_vision = gr.Checkbox( + label="Use Vision", + value=True, + info="Enable visual processing capabilities" + ) + + with gr.TabItem("🔧 LLM Configuration", id=2): + with gr.Group(): + llm_provider = gr.Dropdown( + ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], + label="LLM Provider", + value="gemini", + info="Select your preferred language model provider" + ) + llm_model_name = gr.Textbox( + label="Model Name", + value="gemini-2.0-flash-exp", + info="Specify the model to use" + ) + llm_temperature = gr.Slider( + minimum=0.0, + maximum=2.0, + value=1.0, + step=0.1, + label="Temperature", + info="Controls randomness in model outputs" + ) + with gr.Row(): + llm_base_url = gr.Textbox( + label="Base URL", + info="API endpoint URL (if required)" + ) + llm_api_key = gr.Textbox( + label="API Key", + type="password", + info="Your API key" + ) + + with gr.TabItem("🌐 Browser Settings", id=3): + with gr.Group(): + with gr.Row(): + use_own_browser = gr.Checkbox( + label="Use Own Browser", + value=False, + info="Use your existing browser instance" + ) + headless = gr.Checkbox( + label="Headless Mode", + value=False, + info="Run browser without GUI" + ) + disable_security = gr.Checkbox( + label="Disable Security", + value=True, + info="Disable browser security features" + ) + + with gr.Row(): + window_w = gr.Number( + label="Window Width", + value=1920, + info="Browser window width" + ) + window_h = gr.Number( + label="Window Height", + value=1080, + info="Browser window height" + ) + + save_recording_path = gr.Textbox( + label="Recording Path", + placeholder="e.g. ./tmp/record_videos", + value="./tmp/record_videos", + info="Path to save browser recordings" + ) + + with gr.TabItem("📝 Task Settings", id=4): + task = gr.Textbox( + label="Task Description", + lines=4, + placeholder="Enter your task here...", + value="go to google.com and type 'OpenAI' click search and give me the first url", + info="Describe what you want the agent to do" + ) + add_infos = gr.Textbox( + label="Additional Information", + lines=3, + placeholder="Add any helpful context or instructions...", + info="Optional hints to help the LLM complete the task" + ) + + with gr.Row(): + run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) + stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) + + with gr.Group(): + gr.Markdown("### Results") + with gr.Row(): + with gr.Column(): + final_result_output = gr.Textbox( + label="Final Result", + lines=3, + show_label=True + ) + with gr.Column(): + errors_output = gr.Textbox( + label="Errors", + lines=3, + show_label=True + ) + with gr.Row(): + with gr.Column(): + model_actions_output = gr.Textbox( + label="Model Actions", + lines=3, + show_label=True + ) + with gr.Column(): + model_thoughts_output = gr.Textbox( + label="Model Thoughts", + lines=3, + show_label=True + ) + + # Handle theme changes + def reload_ui(new_theme): + """Reload the UI with the new theme""" + return create_ui(new_theme) + + theme_dropdown.change( + fn=reload_ui, + inputs=[theme_dropdown], + outputs=[demo] + ) + + # Run button click handler + run_button.click( + fn=run_browser_agent, + inputs=[ + agent_type, llm_provider, llm_model_name, llm_temperature, + llm_base_url, llm_api_key, use_own_browser, headless, + disable_security, window_w, window_h, save_recording_path, + task, add_infos, max_steps, use_vision + ], + outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output] + ) + + return demo def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") @@ -234,81 +449,8 @@ def main(): parser.add_argument("--port", type=int, default=7788, help="Port to listen on") args = parser.parse_args() - js_func = """ - function refresh() { - const url = new URL(window.location); - - if (url.searchParams.get('__theme') !== 'dark') { - url.searchParams.set('__theme', 'dark'); - window.location.href = url.href; - } - } - """ - - # Gradio UI setup - with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]), - js=js_func) as demo: - gr.Markdown("

Browser Use WebUI

") - with gr.Row(): - agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom") - max_steps = gr.Number(label="max run steps", value=100) - use_vision = gr.Checkbox(label="use vision", value=True) - with gr.Row(): - llm_provider = gr.Dropdown( - ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini" - ) - llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp") - llm_temperature = gr.Number(label="LLM Temperature", value=1.0) - with gr.Row(): - llm_base_url = gr.Textbox(label="LLM Base URL") - llm_api_key = gr.Textbox(label="LLM API Key", type="password") - - with gr.Accordion("Browser Settings", open=False): - use_own_browser = gr.Checkbox(label="Use Own Browser", value=False) - headless = gr.Checkbox(label="Headless", value=False) - disable_security = gr.Checkbox(label="Disable Security", value=True) - with gr.Row(): - window_w = gr.Number(label="Window Width", value=1920) - window_h = gr.Number(label="Window Height", value=1080) - save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos", - value="./tmp/record_videos") - with gr.Accordion("Task Settings", open=True): - task = gr.Textbox(label="Task", lines=10, - value="go to google.com and type 'OpenAI' click search and give me the first url") - add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5) - - run_button = gr.Button("Run Agent", variant="primary") - with gr.Column(): - final_result_output = gr.Textbox(label="Final Result", lines=5) - errors_output = gr.Textbox(label="Errors", lines=5, ) - model_actions_output = gr.Textbox(label="Model Actions", lines=5) - model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5) - - run_button.click( - fn=run_browser_agent, - inputs=[ - agent_type, - llm_provider, - llm_model_name, - llm_temperature, - llm_base_url, - llm_api_key, - use_own_browser, - headless, - disable_security, - window_w, - window_h, - save_recording_path, - task, - add_infos, - max_steps, - use_vision - ], - outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output], - ) - + demo = create_ui() demo.launch(server_name=args.ip, server_port=args.port) - if __name__ == '__main__': main()