diff --git a/requirements.txt b/requirements.txt index eabdb7d..852269e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ browser-use>=0.1.18 langchain-google-genai>=2.0.8 pyperclip gradio -langchain-ollama \ No newline at end of file +langchain-ollama + diff --git a/src/utils/utils.py b/src/utils/utils.py index f0c5fcb..dfc7451 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -12,7 +12,7 @@ from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI from langchain_ollama import ChatOllama from langchain_openai import AzureChatOpenAI, ChatOpenAI - +import gradio as gr def get_llm_model(provider: str, **kwargs): """ @@ -106,8 +106,34 @@ def get_llm_model(provider: str, **kwargs): ) else: raise ValueError(f"Unsupported provider: {provider}") + +# Predefined model names for common providers +model_names = { + "anthropic": ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"], + "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"], + "deepseek": ["deepseek-chat"], + "gemini": ["gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-1219" ], + "ollama": ["qwen2.5:7b", "llama2:7b"], + "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"] +} +# Callback to update the model name dropdown based on the selected provider +def update_model_dropdown(llm_provider, api_key=None, base_url=None): + """ + Update the model name dropdown with predefined models for the selected provider. + """ + # Use API keys from .env if not provided + if not api_key: + api_key = os.getenv(f"{llm_provider.upper()}_API_KEY", "") + if not base_url: + base_url = os.getenv(f"{llm_provider.upper()}_BASE_URL", "") + # Use predefined models for the selected provider + if llm_provider in model_names: + return gr.Dropdown(choices=model_names[llm_provider], value=model_names[llm_provider][0], interactive=True) + else: + return gr.Dropdown(choices=[], value="", interactive=True, allow_custom_value=True) + def encode_image(img_path): if not img_path: return None diff --git a/webui.py b/webui.py index d1968e9..85a5299 100644 --- a/webui.py +++ b/webui.py @@ -5,6 +5,8 @@ # @Project : browser-use-webui # @FileName: webui.py +import pdb + from dotenv import load_dotenv load_dotenv() @@ -12,6 +14,12 @@ import argparse import os import gradio as gr +import argparse + + +from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean +import asyncio +import os, glob from browser_use.agent.service import Agent from browser_use.browser.browser import Browser, BrowserConfig from browser_use.browser.context import ( @@ -26,7 +34,7 @@ from src.browser.custom_browser import CustomBrowser from src.browser.custom_context import BrowserContextConfig from src.controller.custom_controller import CustomController from src.utils import utils - +from src.utils.utils import update_model_dropdown async def run_browser_agent( agent_type, @@ -268,11 +276,6 @@ async def run_custom_agent( await browser.close() return final_result, errors, model_actions, model_thoughts - -import glob - -from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft - # Define the theme map globally theme_map = { "Default": Default(), @@ -282,6 +285,7 @@ theme_map = { "Origin": Origin(), "Citrus": Citrus(), "Ocean": Ocean(), + "Base": Base() } @@ -364,22 +368,17 @@ def create_ui(theme_name="Ocean"): with gr.TabItem("🔧 LLM Configuration", id=2): with gr.Group(): llm_provider = gr.Dropdown( - [ - "anthropic", - "openai", - "gemini", - "azure_openai", - "deepseek", - "ollama", - ], + ["anthropic", "openai", "deepseek", "gemini", "ollama", "azure_openai"], label="LLM Provider", - value="openai", - info="Select your preferred language model provider", + value="", + info="Select your preferred language model provider" ) - llm_model_name = gr.Textbox( + llm_model_name = gr.Dropdown( label="Model Name", - value="gpt-4o", - info="Specify the model to use", + value="", + interactive=True, + allow_custom_value=True, # Allow users to input custom model names + info="Select a model from the dropdown or type a custom model name" ) llm_temperature = gr.Slider( minimum=0.0, @@ -387,16 +386,21 @@ def create_ui(theme_name="Ocean"): value=1.0, step=0.1, label="Temperature", - info="Controls randomness in model outputs", + info="Controls randomness in model outputs" ) with gr.Row(): llm_base_url = gr.Textbox( - label="Base URL", info="API endpoint URL (if required)" + label="Base URL", + value=os.getenv(f"{llm_provider.value.upper()}_BASE_URL ", ""), # Default to .env value + info="API endpoint URL (if required)" ) llm_api_key = gr.Textbox( - label="API Key", type="password", info="Your API key" + label="API Key", + type="password", + value=os.getenv(f"{llm_provider.value.upper()}_API_KEY", ""), # Default to .env value + info="Your API key (leave blank to use .env)" ) - + with gr.TabItem("🌐 Browser Settings", id=3): with gr.Group(): with gr.Row(): @@ -454,7 +458,7 @@ def create_ui(theme_name="Ocean"): run_button = gr.Button("â–ļī¸ Run Agent", variant="primary", scale=2) stop_button = gr.Button("âšī¸ Stop", variant="stop", scale=1) - with gr.TabItem("đŸŽŦ Recordings", id=5): + with gr.TabItem("📊 Results", id=5): recording_display = gr.Video(label="Latest Recording") with gr.Group(): @@ -477,61 +481,67 @@ def create_ui(theme_name="Ocean"): model_thoughts_output = gr.Textbox( label="Model Thoughts", lines=3, show_label=True ) + + with gr.TabItem("đŸŽĨ Recordings", id=6): + def list_recordings(save_recording_path): + if not os.path.exists(save_recording_path): + return [] + + # Get all video files + recordings = glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) + + # Sort recordings by creation time (oldest first) + recordings.sort(key=os.path.getctime) + + # Add numbering to the recordings + numbered_recordings = [] + for idx, recording in enumerate(recordings, start=1): + filename = os.path.basename(recording) + numbered_recordings.append((recording, f"{idx}. {filename}")) + + return numbered_recordings + + recordings_gallery = gr.Gallery( + label="Recordings", + value=list_recordings("./tmp/record_videos"), + columns=3, + height="auto", + object_fit="contain" + ) + + refresh_button = gr.Button("🔄 Refresh Recordings", variant="secondary") + refresh_button.click( + fn=list_recordings, + inputs=save_recording_path, + outputs=recordings_gallery + ) + + # Attach the callback to the LLM provider dropdown + llm_provider.change( + lambda provider, api_key, base_url: update_model_dropdown(provider, api_key, base_url), + inputs=[llm_provider, llm_api_key, llm_base_url], + outputs=llm_model_name + ) # Run button click handler run_button.click( fn=run_browser_agent, - inputs=[ - agent_type, - llm_provider, - llm_model_name, - llm_temperature, - llm_base_url, - llm_api_key, - use_own_browser, - headless, - disable_security, - window_w, - window_h, - save_recording_path, - task, - add_infos, - max_steps, - use_vision, - max_actions_per_step, - tool_call_in_content - ], - outputs=[ - final_result_output, - errors_output, - model_actions_output, - model_thoughts_output, - recording_display, - ], + inputs=[agent_type, llm_provider, llm_model_name, llm_temperature, llm_base_url, llm_api_key, use_own_browser, headless, disable_security, window_w, window_h, save_recording_path, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_call_in_content], + outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display,], ) return demo - def main(): parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent") - parser.add_argument( - "--ip", type=str, default="127.0.0.1", help="IP address to bind to" - ) + parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to") parser.add_argument("--port", type=int, default=7788, help="Port to listen on") - parser.add_argument( - "--theme", - type=str, - default="Ocean", - choices=theme_map.keys(), - help="Theme to use for the UI", - ) + parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI") parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode") args = parser.parse_args() demo = create_ui(theme_name=args.theme) demo.launch(server_name=args.ip, server_port=args.port) - -if __name__ == "__main__": +if __name__ == '__main__': main()