Update webui.py

This commit is contained in:
Richardson Gunde
2025-01-05 18:02:11 +05:30
committed by GitHub
parent a65ca85336
commit 901dcb7e1c

302
webui.py
View File

@@ -36,7 +36,6 @@ from src.agent.custom_prompts import CustomSystemPrompt
from src.utils import utils
async def run_browser_agent(
agent_type,
llm_provider,
@@ -55,9 +54,7 @@ async def run_browser_agent(
max_steps,
use_vision
):
"""
Runs the browser agent based on user configurations.
"""
llm = utils.get_llm_model(
provider=llm_provider,
@@ -95,7 +92,6 @@ async def run_browser_agent(
else:
raise ValueError(f"Invalid agent type: {agent_type}")
async def run_org_agent(
llm,
headless,
@@ -137,7 +133,6 @@ async def run_org_agent(
await browser.close()
return final_result, errors, model_actions, model_thoughts
async def run_custom_agent(
llm,
use_own_browser,
@@ -227,6 +222,226 @@ async def run_custom_agent(
await browser.close()
return final_result, errors, model_actions, model_thoughts
import argparse
import gradio as gr
from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
import os
# Define the theme map globally
theme_map = {
"Default": Default(),
"Soft": Soft(),
"Monochrome": Monochrome(),
"Glass": Glass(),
"Origin": Origin(),
"Citrus": Citrus(),
"Ocean": Ocean()
}
def create_ui(theme_name="Ocean"):
"""Create the UI with the specified theme"""
# Enhanced styling for better visual appeal
css = """
.gradio-container {
max-width: 1200px !important;
margin: auto !important;
padding-top: 20px !important;
}
.header-text {
text-align: center;
margin-bottom: 30px;
}
.theme-section {
margin-bottom: 20px;
padding: 15px;
border-radius: 10px;
}
"""
with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css) as demo:
with gr.Row():
gr.Markdown(
"""
# 🌐 Browser Use WebUI
### Control your browser with AI assistance
""",
elem_classes=["header-text"]
)
# Quick access theme switcher at the top
with gr.Row(elem_classes=["theme-section"]):
theme_dropdown = gr.Dropdown(
choices=list(theme_map.keys()),
value=theme_name,
label="🎨 Quick Theme Switch",
container=False
)
with gr.Tabs() as tabs:
with gr.TabItem("🤖 Agent Settings", id=1):
with gr.Group():
agent_type = gr.Radio(
["org", "custom"],
label="Agent Type",
value="custom",
info="Select the type of agent to use"
)
max_steps = gr.Slider(
minimum=1,
maximum=200,
value=100,
step=1,
label="Max Run Steps",
info="Maximum number of steps the agent will take"
)
use_vision = gr.Checkbox(
label="Use Vision",
value=True,
info="Enable visual processing capabilities"
)
with gr.TabItem("🔧 LLM Configuration", id=2):
with gr.Group():
llm_provider = gr.Dropdown(
["anthropic", "openai", "gemini", "azure_openai", "deepseek"],
label="LLM Provider",
value="gemini",
info="Select your preferred language model provider"
)
llm_model_name = gr.Textbox(
label="Model Name",
value="gemini-2.0-flash-exp",
info="Specify the model to use"
)
llm_temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.0,
step=0.1,
label="Temperature",
info="Controls randomness in model outputs"
)
with gr.Row():
llm_base_url = gr.Textbox(
label="Base URL",
info="API endpoint URL (if required)"
)
llm_api_key = gr.Textbox(
label="API Key",
type="password",
info="Your API key"
)
with gr.TabItem("🌐 Browser Settings", id=3):
with gr.Group():
with gr.Row():
use_own_browser = gr.Checkbox(
label="Use Own Browser",
value=False,
info="Use your existing browser instance"
)
headless = gr.Checkbox(
label="Headless Mode",
value=False,
info="Run browser without GUI"
)
disable_security = gr.Checkbox(
label="Disable Security",
value=True,
info="Disable browser security features"
)
with gr.Row():
window_w = gr.Number(
label="Window Width",
value=1920,
info="Browser window width"
)
window_h = gr.Number(
label="Window Height",
value=1080,
info="Browser window height"
)
save_recording_path = gr.Textbox(
label="Recording Path",
placeholder="e.g. ./tmp/record_videos",
value="./tmp/record_videos",
info="Path to save browser recordings"
)
with gr.TabItem("📝 Task Settings", id=4):
task = gr.Textbox(
label="Task Description",
lines=4,
placeholder="Enter your task here...",
value="go to google.com and type 'OpenAI' click search and give me the first url",
info="Describe what you want the agent to do"
)
add_infos = gr.Textbox(
label="Additional Information",
lines=3,
placeholder="Add any helpful context or instructions...",
info="Optional hints to help the LLM complete the task"
)
with gr.Row():
run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
with gr.Group():
gr.Markdown("### Results")
with gr.Row():
with gr.Column():
final_result_output = gr.Textbox(
label="Final Result",
lines=3,
show_label=True
)
with gr.Column():
errors_output = gr.Textbox(
label="Errors",
lines=3,
show_label=True
)
with gr.Row():
with gr.Column():
model_actions_output = gr.Textbox(
label="Model Actions",
lines=3,
show_label=True
)
with gr.Column():
model_thoughts_output = gr.Textbox(
label="Model Thoughts",
lines=3,
show_label=True
)
# Handle theme changes
def reload_ui(new_theme):
"""Reload the UI with the new theme"""
return create_ui(new_theme)
theme_dropdown.change(
fn=reload_ui,
inputs=[theme_dropdown],
outputs=[demo]
)
# Run button click handler
run_button.click(
fn=run_browser_agent,
inputs=[
agent_type, llm_provider, llm_model_name, llm_temperature,
llm_base_url, llm_api_key, use_own_browser, headless,
disable_security, window_w, window_h, save_recording_path,
task, add_infos, max_steps, use_vision
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output]
)
return demo
def main():
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
@@ -234,81 +449,8 @@ def main():
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
args = parser.parse_args()
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
# Gradio UI setup
with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]),
js=js_func) as demo:
gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
with gr.Row():
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
max_steps = gr.Number(label="max run steps", value=100)
use_vision = gr.Checkbox(label="use vision", value=True)
with gr.Row():
llm_provider = gr.Dropdown(
["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
)
llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
with gr.Row():
llm_base_url = gr.Textbox(label="LLM Base URL")
llm_api_key = gr.Textbox(label="LLM API Key", type="password")
with gr.Accordion("Browser Settings", open=False):
use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
headless = gr.Checkbox(label="Headless", value=False)
disable_security = gr.Checkbox(label="Disable Security", value=True)
with gr.Row():
window_w = gr.Number(label="Window Width", value=1920)
window_h = gr.Number(label="Window Height", value=1080)
save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
value="./tmp/record_videos")
with gr.Accordion("Task Settings", open=True):
task = gr.Textbox(label="Task", lines=10,
value="go to google.com and type 'OpenAI' click search and give me the first url")
add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)
run_button = gr.Button("Run Agent", variant="primary")
with gr.Column():
final_result_output = gr.Textbox(label="Final Result", lines=5)
errors_output = gr.Textbox(label="Errors", lines=5, )
model_actions_output = gr.Textbox(label="Model Actions", lines=5)
model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
run_button.click(
fn=run_browser_agent,
inputs=[
agent_type,
llm_provider,
llm_model_name,
llm_temperature,
llm_base_url,
llm_api_key,
use_own_browser,
headless,
disable_security,
window_w,
window_h,
save_recording_path,
task,
add_infos,
max_steps,
use_vision
],
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
)
demo = create_ui()
demo.launch(server_name=args.ip, server_port=args.port)
if __name__ == '__main__':
main()