diff --git a/.env.example b/.env.example index e13240b..2ebe67b 100644 --- a/.env.example +++ b/.env.example @@ -17,5 +17,17 @@ ANONYMIZED_TELEMETRY=true # LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info BROWSER_USE_LOGGING_LEVEL=info +# Chrome settings CHROME_PATH= -CHROME_USER_DATA= \ No newline at end of file +CHROME_USER_DATA= +CHROME_DEBUGGING_PORT=9222 +CHROME_DEBUGGING_HOST=localhost +CHROME_PERSISTENT_SESSION=false # Set to true to keep browser open between AI tasks + +# Display settings +RESOLUTION=1920x1080x24 # Format: WIDTHxHEIGHTxDEPTH +RESOLUTION_WIDTH=1920 # Width in pixels +RESOLUTION_HEIGHT=1080 # Height in pixels + +# VNC settings +VNC_PASSWORD=youvncpassword \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..af1d438 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,82 @@ +FROM python:3.11-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + wget \ + gnupg \ + curl \ + unzip \ + xvfb \ + libgconf-2-4 \ + libxss1 \ + libnss3 \ + libnspr4 \ + libasound2 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdbus-1-3 \ + libdrm2 \ + libgbm1 \ + libgtk-3-0 \ + libxcomposite1 \ + libxdamage1 \ + libxfixes3 \ + libxrandr2 \ + xdg-utils \ + fonts-liberation \ + dbus \ + xauth \ + xvfb \ + x11vnc \ + tigervnc-tools \ + supervisor \ + net-tools \ + procps \ + git \ + python3-numpy \ + && rm -rf /var/lib/apt/lists/* + +# Install noVNC +RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \ + && git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \ + && ln -s /opt/novnc/vnc.html /opt/novnc/index.html + +# Install Chrome +RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ + && echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \ + && apt-get update \ + && apt-get install -y google-chrome-stable \ + && rm -rf /var/lib/apt/lists/* + +# Set up working directory +WORKDIR /app + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Install Playwright and browsers with system dependencies +ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright +RUN playwright install --with-deps chromium +RUN playwright install-deps + +# Copy the application code +COPY . . + +# Set environment variables +ENV PYTHONUNBUFFERED=1 +ENV BROWSER_USE_LOGGING_LEVEL=info +ENV CHROME_PATH=/usr/bin/google-chrome +ENV ANONYMIZED_TELEMETRY=false +ENV DISPLAY=:99 +ENV RESOLUTION=1920x1080x24 +ENV VNC_PASSWORD=vncpassword + +# Set up supervisor configuration +RUN mkdir -p /var/log/supervisor +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +EXPOSE 7788 6080 5900 + +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/README.md b/README.md index 1ebee46..184eeb9 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,13 @@ We would like to officially thank [WarmShao](https://github.com/warmshao) for hi **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording. - +**Persistent Browser Sessions:** You can choose to keep the browser window open between AI tasks, allowing you to see the complete history and state of AI interactions. -## Installation Guide + + +## Installation Options + +### Option 1: Local Installation Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started. @@ -49,84 +53,132 @@ Then install playwright: playwright install ``` +### Option 2: Docker Installation + +1. **Prerequisites:** + - Docker and Docker Compose installed on your system + - Git to clone the repository + +2. **Setup:** + ```bash + # Clone the repository + git clone https://github.com/browser-use/web-ui.git + cd web-ui + + # Copy and configure environment variables + cp .env.example .env + # Edit .env with your preferred text editor and add your API keys + ``` + +3. **Run with Docker:** + ```bash + # Build and start the container with default settings (browser closes after AI tasks) + docker compose up --build + + # Or run with persistent browser (browser stays open between AI tasks) + CHROME_PERSISTENT_SESSION=true docker compose up --build + ``` + +4. **Access the Application:** + - WebUI: `http://localhost:7788` + - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html` + + Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file. + + ## Usage -1. **Run the WebUI:** +### Local Setup +1. Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env` +2. **Run the WebUI:** ```bash python webui.py --ip 127.0.0.1 --port 7788 ``` -2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`. -3. **Using Your Own Browser:** - - Close all chrome windows +4. WebUI options: + - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`. + - `--port`: The port to bind the WebUI to. Default is `7788`. + - `--theme`: The theme for the user interface. Default is `Ocean`. + - **Default**: The standard theme with a balanced design. + - **Soft**: A gentle, muted color scheme for a relaxed viewing experience. + - **Monochrome**: A grayscale theme with minimal color for simplicity and focus. + - **Glass**: A sleek, semi-transparent design for a modern appearance. + - **Origin**: A classic, retro-inspired theme for a nostalgic feel. + - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors. + - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect. + - `--dark-mode`: Enables dark mode for the user interface. +3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`. +4. **Using Your Own Browser(Optional):** + - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. + - Windows + ```env + CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe" + CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data" + ``` + > Note: Replace `YourUsername` with your actual Windows username for Windows systems. + - Mac + ```env + CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" + CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1" + ``` + - Close all Chrome windows - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent. - Check the "Use Own Browser" option within the Browser Settings. +5. **Keep Browser Open(Optional):** + - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file. -### Options: +### Docker Setup +1. **Environment Variables:** + - All configuration is done through the `.env` file + - Available environment variables: + ``` + # LLM API Keys + OPENAI_API_KEY=your_key_here + ANTHROPIC_API_KEY=your_key_here + GOOGLE_API_KEY=your_key_here -### `--theme` + # Browser Settings + CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks + RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH + RESOLUTION_WIDTH=1920 # Custom width in pixels + RESOLUTION_HEIGHT=1080 # Custom height in pixels -- **Type**: `str` -- **Default**: `Ocean` -- **Description**: Specifies the theme for the user interface. -- **Options**: - The available themes are defined in the `theme_map` dictionary. Below are the options you can choose from: - - **Default**: The standard theme with a balanced design. - - **Soft**: A gentle, muted color scheme for a relaxed viewing experience. - - **Monochrome**: A grayscale theme with minimal color for simplicity and focus. - - **Glass**: A sleek, semi-transparent design for a modern appearance. - - **Origin**: A classic, retro-inspired theme for a nostalgic feel. - - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors. - - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect. + # VNC Settings + VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword" + ``` -**Example**: +2. **Browser Persistence Modes:** + - **Default Mode (CHROME_PERSISTENT_SESSION=false):** + - Browser opens and closes with each AI task + - Clean state for each interaction + - Lower resource usage -```bash -python webui.py --ip 127.0.0.1 --port 7788 --theme Glass -``` + - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):** + - Browser stays open between AI tasks + - Maintains history and state + - Allows viewing previous AI interactions + - Set in `.env` file or via environment variable when starting container -### `--dark-mode` +3. **Viewing Browser Interactions:** + - Access the noVNC viewer at `http://localhost:6080/vnc.html` + - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD) + - You can now see all browser interactions in real-time -- **Type**: `boolean` -- **Default**: Disabled -- **Description**: Enables dark mode for the user interface. This is a simple toggle; including the flag activates dark mode, while omitting it keeps the interface in light mode. -- **Options**: - - **Enabled (`--dark-mode`)**: Activates dark mode, switching the interface to a dark color scheme for better visibility in low-light environments. - - **Disabled (default)**: Keeps the interface in the default light mode. +4. **Container Management:** + ```bash + # Start with persistent browser + CHROME_PERSISTENT_SESSION=true docker compose up -d -**Example**: + # Start with default mode (browser closes after tasks) + docker compose up -d -```bash -python webui.py --ip 127.0.0.1 --port 7788 --dark-mode -``` + # View logs + docker compose logs -f -## (Optional) Configure Environment Variables - -Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. With - -```bash -cp .env.example .env -``` - -**If using your own browser:** - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. - -You can just copy examples down below to your `.env` file. - -### Windows - -```env -CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe" -CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data" -``` - -> Note: Replace `YourUsername` with your actual Windows username for Windows systems. - -### Mac - -```env -CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" -CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1" -``` + # Stop the container + docker compose down + ``` ## Changelog -- [x] **2025/01/06:** Thanks to @richard-devbot, a New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113). +- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750). +- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113). \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6253a4a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,51 @@ +services: + browser-use-webui: + build: + context: . + dockerfile: Dockerfile + ports: + - "7788:7788" # Gradio default port + - "6080:6080" # noVNC web interface + - "5900:5900" # VNC port + - "9222:9222" # Chrome remote debugging port + environment: + - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-} + - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-} + - DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com} + - DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-} + - BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info} + - ANONYMIZED_TELEMETRY=false + - CHROME_PATH=/usr/bin/google-chrome + - CHROME_USER_DATA=/app/data/chrome_data + - CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false} + - DISPLAY=:99 + - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright + - RESOLUTION=${RESOLUTION:-1920x1080x24} + - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920} + - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080} + - VNC_PASSWORD=${VNC_PASSWORD:-vncpassword} + - PERSISTENT_BROWSER_PORT=9222 + - PERSISTENT_BROWSER_HOST=localhost + - CHROME_DEBUGGING_PORT=9222 + - CHROME_DEBUGGING_HOST=localhost + volumes: + - ./data:/app/data + - ./data/chrome_data:/app/data/chrome_data + - /tmp/.X11-unix:/tmp/.X11-unix + restart: unless-stopped + shm_size: '2gb' + cap_add: + - SYS_ADMIN + security_opt: + - seccomp=unconfined + tmpfs: + - /tmp + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "5900"] + interval: 10s + timeout: 5s + retries: 3 \ No newline at end of file diff --git a/src/browser/config.py b/src/browser/config.py new file mode 100644 index 0000000..32329c4 --- /dev/null +++ b/src/browser/config.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/6 +# @Author : wenshao +# @ProjectName: browser-use-webui +# @FileName: config.py + +import os +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class BrowserPersistenceConfig: + """Configuration for browser persistence""" + + persistent_session: bool = False + user_data_dir: Optional[str] = None + debugging_port: Optional[int] = None + debugging_host: Optional[str] = None + + @classmethod + def from_env(cls) -> "BrowserPersistenceConfig": + """Create config from environment variables""" + return cls( + persistent_session=os.getenv("CHROME_PERSISTENT_SESSION", "").lower() + == "true", + user_data_dir=os.getenv("CHROME_USER_DATA"), + debugging_port=int(os.getenv("CHROME_DEBUGGING_PORT", "9222")), + debugging_host=os.getenv("CHROME_DEBUGGING_HOST", "localhost"), + ) \ No newline at end of file diff --git a/src/browser/custom_browser.py b/src/browser/custom_browser.py index 790eb95..287cd06 100644 --- a/src/browser/custom_browser.py +++ b/src/browser/custom_browser.py @@ -6,15 +6,45 @@ from browser_use.browser.browser import Browser from browser_use.browser.context import BrowserContext, BrowserContextConfig +from playwright.async_api import BrowserContext as PlaywrightBrowserContext +import logging +from .config import BrowserPersistenceConfig from .custom_context import CustomBrowserContext +logger = logging.getLogger(__name__) class CustomBrowser(Browser): + _global_context = None + async def new_context( self, config: BrowserContextConfig = BrowserContextConfig(), - context: CustomBrowserContext = None, - ) -> BrowserContext: - """Create a browser context""" + context: PlaywrightBrowserContext = None, + ) -> CustomBrowserContext: + """Create a browser context with persistence support""" + persistence_config = BrowserPersistenceConfig.from_env() + + if persistence_config.persistent_session: + if CustomBrowser._global_context is not None: + logger.info("Reusing existing persistent browser context") + return CustomBrowser._global_context + + context_instance = CustomBrowserContext(config=config, browser=self, context=context) + CustomBrowser._global_context = context_instance + logger.info("Created new persistent browser context") + return context_instance + + logger.info("Creating non-persistent browser context") return CustomBrowserContext(config=config, browser=self, context=context) + + async def close(self): + """Override close to respect persistence setting""" + persistence_config = BrowserPersistenceConfig.from_env() + if not persistence_config.persistent_session: + if CustomBrowser._global_context is not None: + await CustomBrowser._global_context.close() + CustomBrowser._global_context = None + await super().close() + else: + logger.info("Skipping browser close due to persistent session") \ No newline at end of file diff --git a/src/browser/custom_context.py b/src/browser/custom_context.py index 2fe7e7c..b46dddb 100644 --- a/src/browser/custom_context.py +++ b/src/browser/custom_context.py @@ -9,84 +9,77 @@ import json import logging import os -from playwright.async_api import Browser as PlaywrightBrowser, Page, BrowserContext as PlaywrightContext from browser_use.browser.browser import Browser from browser_use.browser.context import BrowserContext, BrowserContextConfig +from playwright.async_api import Browser as PlaywrightBrowser +from playwright.async_api import BrowserContext as PlaywrightBrowserContext + +from .config import BrowserPersistenceConfig logger = logging.getLogger(__name__) + + class CustomBrowserContext(BrowserContext): def __init__( self, - browser: "CustomBrowser", # Forward declaration for CustomBrowser + browser: "Browser", config: BrowserContextConfig = BrowserContextConfig(), - context: PlaywrightContext = None + context: PlaywrightBrowserContext = None, ): super(CustomBrowserContext, self).__init__(browser=browser, config=config) - self.context = context # Rename to avoid confusion + self.context = context self._page = None + self._persistence_config = BrowserPersistenceConfig.from_env() @property - def impl_context(self) -> PlaywrightContext: + def impl_context(self) -> PlaywrightBrowserContext: """Returns the underlying Playwright context implementation""" return self.context - async def _create_context(self, browser: PlaywrightBrowser = None): + async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext: """Creates a new browser context with anti-detection measures and loads cookies if available.""" if self.context: + logger.info("Browser context already exists, returning existing context.") return self.context - # If a Playwright browser is not provided, get it from our custom browser - pw_browser = browser or await self.browser.get_playwright_browser() - - context_args = { - 'viewport': self.config.browser_window_size, - 'no_viewport': False, - 'bypass_csp': self.config.disable_security, - 'ignore_https_errors': self.config.disable_security - } - - if self.config.save_recording_path: - context_args.update({ - 'record_video_dir': self.config.save_recording_path, - 'record_video_size': self.config.browser_window_size - }) - - self.context = await pw_browser.new_context(**context_args) + # Check for persistent context + if self._persistence_config.persistent_session and len(browser.contexts) > 0: + logger.info("Using existing persistent context.") + self.context = browser.contexts[0] + else: + logger.info("Creating a new browser context.") + self.context = await browser.new_context( + viewport=self.config.browser_window_size, + no_viewport=False, + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36" + ), + java_script_enabled=True, + bypass_csp=self.config.disable_security, + ignore_https_errors=self.config.disable_security, + record_video_dir=self.config.save_recording_path, + record_video_size=self.config.browser_window_size, + ) + # Handle tracing if self.config.trace_path: await self.context.tracing.start(screenshots=True, snapshots=True, sources=True) - # Load cookies if they exist + # Load cookies if self.config.cookies_file and os.path.exists(self.config.cookies_file): with open(self.config.cookies_file, "r") as f: cookies = json.load(f) - logger.info( - f"Loaded {len(cookies)} cookies from {self.config.cookies_file}" - ) + logger.info(f"Loaded {len(cookies)} cookies from {self.config.cookies_file}.") await self.context.add_cookies(cookies) - # Expose anti-detection scripts + # Inject anti-detection scripts await self.context.add_init_script( """ - // Webdriver property - Object.defineProperty(navigator, 'webdriver', { - get: () => undefined - }); - - // Languages - Object.defineProperty(navigator, 'languages', { - get: () => ['en-US', 'en'] - }); - - // Plugins - Object.defineProperty(navigator, 'plugins', { - get: () => [1, 2, 3, 4, 5] - }); - - // Chrome runtime + Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); + Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); + Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); window.chrome = { runtime: {} }; - - // Permissions const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? @@ -96,42 +89,39 @@ class CustomBrowserContext(BrowserContext): """ ) - # Create an initial page - self._page = await self.context.new_page() - await self._page.goto('about:blank') # Ensure page is ready - + # Create initial page if none exists + if not self.context.pages: + self._page = await self.context.new_page() + await self._page.goto('about:blank') + return self.context - async def new_page(self) -> Page: - """Creates and returns a new page in this context""" + async def new_page(self): + """Creates and returns a new page in this context.""" if not self.context: - await self._create_context() + await self._create_context(await self.browser.get_playwright_browser()) return await self.context.new_page() - async def __aenter__(self): + async def get_current_page(self): + """Returns the current page or creates one if none exists.""" if not self.context: - await self._create_context() - return self + await self._create_context(await self.browser.get_playwright_browser()) + pages = self.context.pages + if not pages: + logger.warning("No existing pages in the context. Creating a new page.") + return await self.context.new_page() + return pages[0] - async def __aexit__(self, *args): - if self.context: + async def close(self): + """Override close to respect persistence setting.""" + if not self._persistence_config.persistent_session and self.context: await self.context.close() self.context = None @property def pages(self): - """Returns list of pages in context""" - return self.context.pages if self.context else [] - - async def get_state(self, **kwargs): - if self.context: - pages = self.context.pages - if pages: - return await super().get_state(**kwargs) - return None - - async def get_pages(self): - """Get pages in a way that works""" + """Returns list of pages in the context.""" if not self.context: + logger.warning("Attempting to access pages but context is not initialized.") return [] return self.context.pages diff --git a/src/utils/stream_utils.py b/src/utils/stream_utils.py index e3bdc22..f4dde56 100644 --- a/src/utils/stream_utils.py +++ b/src/utils/stream_utils.py @@ -3,24 +3,38 @@ import asyncio from typing import AsyncGenerator from playwright.async_api import BrowserContext, Error as PlaywrightError -async def capture_screenshot(browser_context: BrowserContext) -> str: +async def capture_screenshot(browser_context) -> str: """Capture and encode a screenshot""" try: - # Get the implementation context - context = getattr(browser_context, 'impl_context', None) + # Get the implementation context - handle both direct Playwright context and wrapped context + context = browser_context + if hasattr(browser_context, 'context'): + context = browser_context.context + if not context: - return "