mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
@@ -1,2 +1,5 @@
|
||||
data
|
||||
tmp
|
||||
tmp
|
||||
results
|
||||
|
||||
.env
|
||||
14
.env.example
14
.env.example
@@ -40,14 +40,14 @@ ANONYMIZED_TELEMETRY=false
|
||||
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
||||
BROWSER_USE_LOGGING_LEVEL=info
|
||||
|
||||
# Chrome settings
|
||||
CHROME_PATH=
|
||||
CHROME_USER_DATA=
|
||||
CHROME_DEBUGGING_PORT=9222
|
||||
CHROME_DEBUGGING_HOST=localhost
|
||||
# Browser settings
|
||||
BROWSER_PATH=
|
||||
BROWSER_USER_DATA=
|
||||
BROWSER_DEBUGGING_PORT=9222
|
||||
BROWSER_DEBUGGING_HOST=localhost
|
||||
# Set to true to keep browser open between AI tasks
|
||||
CHROME_PERSISTENT_SESSION=false
|
||||
CHROME_CDP=
|
||||
KEEP_BROWSER_OPEN=true
|
||||
BROWSER_CDP=
|
||||
# Display settings
|
||||
# Format: WIDTHxHEIGHTxDEPTH
|
||||
RESOLUTION=1920x1080x24
|
||||
|
||||
54
Dockerfile
54
Dockerfile
@@ -1,5 +1,9 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set platform for multi-arch builds (Docker Buildx will set this)
|
||||
ARG TARGETPLATFORM
|
||||
ARG NODE_MAJOR=20
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
wget \
|
||||
@@ -28,7 +32,6 @@ RUN apt-get update && apt-get install -y \
|
||||
fonts-liberation \
|
||||
dbus \
|
||||
xauth \
|
||||
xvfb \
|
||||
x11vnc \
|
||||
tigervnc-tools \
|
||||
supervisor \
|
||||
@@ -40,6 +43,7 @@ RUN apt-get update && apt-get install -y \
|
||||
fonts-dejavu \
|
||||
fonts-dejavu-core \
|
||||
fonts-dejavu-extra \
|
||||
vim \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install noVNC
|
||||
@@ -47,40 +51,50 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
||||
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
||||
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
||||
|
||||
# Set platform for ARM64 compatibility
|
||||
ARG TARGETPLATFORM=linux/amd64
|
||||
# Install Node.js using NodeSource PPA
|
||||
RUN mkdir -p /etc/apt/keyrings \
|
||||
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install nodejs -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Verify Node.js and npm installation (optional, but good for debugging)
|
||||
RUN node -v && npm -v && npx -v
|
||||
|
||||
# Set up working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
# Ensure 'patchright' is in your requirements.txt or install it directly
|
||||
# RUN pip install --no-cache-dir -r requirements.txt patchright # If not in requirements
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install patchright and browsers with system dependencies
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-patchright
|
||||
RUN patchright install --with-deps chromium
|
||||
RUN patchright install-deps
|
||||
# Install Patchright browsers and dependencies
|
||||
# Patchright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
|
||||
# or that Patchright installs to a similar default location that Playwright would.
|
||||
# Let's assume Patchright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
|
||||
RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
|
||||
|
||||
# Install recommended: Google Chrome (instead of just Chromium for better undetectability)
|
||||
# The 'patchright install chrome' command might download and place it.
|
||||
# The '--with-deps' equivalent for patchright install is to run 'patchright install-deps chrome' after.
|
||||
# RUN patchright install chrome --with-deps
|
||||
|
||||
# Alternative: Install Chromium if Google Chrome is problematic in certain environments
|
||||
RUN patchright install chromium --with-deps
|
||||
|
||||
|
||||
# Copy the application code
|
||||
COPY . .
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV BROWSER_USE_LOGGING_LEVEL=info
|
||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
||||
ENV ANONYMIZED_TELEMETRY=false
|
||||
ENV DISPLAY=:99
|
||||
ENV RESOLUTION=1920x1080x24
|
||||
ENV VNC_PASSWORD=vncpassword
|
||||
ENV CHROME_PERSISTENT_SESSION=true
|
||||
ENV RESOLUTION_WIDTH=1920
|
||||
ENV RESOLUTION_HEIGHT=1080
|
||||
|
||||
# Set up supervisor configuration
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
EXPOSE 7788 6080 5901
|
||||
EXPOSE 7788 6080 5901 9222
|
||||
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
#CMD ["/bin/bash"]
|
||||
@@ -1,85 +0,0 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
wget \
|
||||
gnupg \
|
||||
curl \
|
||||
unzip \
|
||||
xvfb \
|
||||
libgconf-2-4 \
|
||||
libxss1 \
|
||||
libnss3 \
|
||||
libnspr4 \
|
||||
libasound2 \
|
||||
libatk1.0-0 \
|
||||
libatk-bridge2.0-0 \
|
||||
libcups2 \
|
||||
libdbus-1-3 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
xdg-utils \
|
||||
fonts-liberation \
|
||||
dbus \
|
||||
xauth \
|
||||
xvfb \
|
||||
x11vnc \
|
||||
tigervnc-tools \
|
||||
supervisor \
|
||||
net-tools \
|
||||
procps \
|
||||
git \
|
||||
python3-numpy \
|
||||
fontconfig \
|
||||
fonts-dejavu \
|
||||
fonts-dejavu-core \
|
||||
fonts-dejavu-extra \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install noVNC
|
||||
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
||||
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
||||
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
||||
|
||||
# Set platform explicitly for ARM64
|
||||
ARG TARGETPLATFORM=linux/arm64
|
||||
|
||||
# Set up working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install Playwright and browsers with system dependencies optimized for ARM64
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
RUN PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 pip install playwright && \
|
||||
playwright install --with-deps chromium
|
||||
|
||||
# Copy the application code
|
||||
COPY . .
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV BROWSER_USE_LOGGING_LEVEL=info
|
||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
||||
ENV ANONYMIZED_TELEMETRY=false
|
||||
ENV DISPLAY=:99
|
||||
ENV RESOLUTION=1920x1080x24
|
||||
ENV VNC_PASSWORD=vncpassword
|
||||
ENV CHROME_PERSISTENT_SESSION=true
|
||||
ENV RESOLUTION_WIDTH=1920
|
||||
ENV RESOLUTION_HEIGHT=1080
|
||||
|
||||
# Set up supervisor configuration
|
||||
RUN mkdir -p /var/log/supervisor
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
EXPOSE 7788 6080 5901
|
||||
|
||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||
153
README.md
153
README.md
@@ -23,10 +23,6 @@ We would like to officially thank [WarmShao](https://github.com/warmshao) for hi
|
||||
|
||||
## Installation Guide
|
||||
|
||||
### Prerequisites
|
||||
- Python 3.11 or higher
|
||||
- Git (for cloning the repository)
|
||||
|
||||
### Option 1: Local Installation
|
||||
|
||||
Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
|
||||
@@ -65,10 +61,13 @@ Install Python packages:
|
||||
uv pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Install Browsers in Playwright:
|
||||
You can install specific browsers by running:
|
||||
Install Browsers in Patchright.
|
||||
```bash
|
||||
patchright install chromium
|
||||
patchright install --with-deps
|
||||
```
|
||||
Or you can install specific browsers by running:
|
||||
```bash
|
||||
patchright install chromium --with-deps
|
||||
```
|
||||
|
||||
#### Step 4: Configure Environment
|
||||
@@ -83,6 +82,29 @@ cp .env.example .env
|
||||
```
|
||||
2. Open `.env` in your preferred text editor and add your API keys and other settings
|
||||
|
||||
#### Step 5: Enjoy the web-ui
|
||||
1. **Run the WebUI:**
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788
|
||||
```
|
||||
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
3. **Using Your Own Browser(Optional):**
|
||||
- Set `BROWSER_PATH` to the executable path of your browser and `BROWSER_USER_DATA` to the user data directory of your browser. Leave `BROWSER_USER_DATA` empty if you want to use local user data.
|
||||
- Windows
|
||||
```env
|
||||
BROWSER_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
BROWSER_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
||||
```
|
||||
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
||||
- Mac
|
||||
```env
|
||||
BROWSER_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
BROWSER_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
|
||||
```
|
||||
- Close all Chrome windows
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
- Check the "Use Own Browser" option within the Browser Settings.
|
||||
|
||||
### Option 2: Docker Installation
|
||||
|
||||
#### Prerequisites
|
||||
@@ -90,14 +112,14 @@ cp .env.example .env
|
||||
- [Docker Desktop](https://www.docker.com/products/docker-desktop/) (For Windows/macOS)
|
||||
- [Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) (For Linux)
|
||||
|
||||
#### Installation Steps
|
||||
1. Clone the repository:
|
||||
#### Step 1: Clone the Repository
|
||||
```bash
|
||||
git clone https://github.com/browser-use/web-ui.git
|
||||
cd web-ui
|
||||
```
|
||||
|
||||
2. Create and configure environment file:
|
||||
#### Step 2: Configure Environment
|
||||
1. Create a copy of the example environment file:
|
||||
- Windows (Command Prompt):
|
||||
```bash
|
||||
copy .env.example .env
|
||||
@@ -106,122 +128,23 @@ copy .env.example .env
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
Edit `.env` with your preferred text editor and add your API keys
|
||||
2. Open `.env` in your preferred text editor and add your API keys and other settings
|
||||
|
||||
3. Run with Docker:
|
||||
#### Step 3: Docker Build and Run
|
||||
```bash
|
||||
# Build and start the container with default settings (browser closes after AI tasks)
|
||||
docker compose up --build
|
||||
```
|
||||
For ARM64 systems (e.g., Apple Silicon Macs), please run follow command:
|
||||
```bash
|
||||
# Or run with persistent browser (browser stays open between AI tasks)
|
||||
CHROME_PERSISTENT_SESSION=true docker compose up --build
|
||||
TARGETPLATFORM=linux/arm64 docker compose up --build
|
||||
```
|
||||
|
||||
|
||||
4. Access the Application:
|
||||
- Web Interface: Open `http://localhost:7788` in your browser
|
||||
#### Step 4: Enjoy the web-ui and vnc
|
||||
- Web-UI: Open `http://localhost:7788` in your browser
|
||||
- VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
|
||||
- Default VNC password: "youvncpassword"
|
||||
- Can be changed by setting `VNC_PASSWORD` in your `.env` file
|
||||
|
||||
## Usage
|
||||
|
||||
### Local Setup
|
||||
1. **Run the WebUI:**
|
||||
After completing the installation steps above, start the application:
|
||||
```bash
|
||||
python webui.py --ip 127.0.0.1 --port 7788
|
||||
```
|
||||
2. WebUI options:
|
||||
- `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
|
||||
- `--port`: The port to bind the WebUI to. Default is `7788`.
|
||||
- `--theme`: The theme for the user interface. Default is `Ocean`.
|
||||
- **Default**: The standard theme with a balanced design.
|
||||
- **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
|
||||
- **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
|
||||
- **Glass**: A sleek, semi-transparent design for a modern appearance.
|
||||
- **Origin**: A classic, retro-inspired theme for a nostalgic feel.
|
||||
- **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
|
||||
- **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
|
||||
- `--dark-mode`: Enables dark mode for the user interface.
|
||||
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
4. **Using Your Own Browser(Optional):**
|
||||
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data.
|
||||
- Windows
|
||||
```env
|
||||
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
||||
```
|
||||
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
||||
- Mac
|
||||
```env
|
||||
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
|
||||
```
|
||||
- Close all Chrome windows
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
- Check the "Use Own Browser" option within the Browser Settings.
|
||||
5. **Keep Browser Open(Optional):**
|
||||
- Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
|
||||
|
||||
### Docker Setup
|
||||
1. **Environment Variables:**
|
||||
- All configuration is done through the `.env` file
|
||||
- Available environment variables:
|
||||
```
|
||||
# LLM API Keys
|
||||
OPENAI_API_KEY=your_key_here
|
||||
ANTHROPIC_API_KEY=your_key_here
|
||||
GOOGLE_API_KEY=your_key_here
|
||||
|
||||
# Browser Settings
|
||||
CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks
|
||||
RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH
|
||||
RESOLUTION_WIDTH=1920 # Custom width in pixels
|
||||
RESOLUTION_HEIGHT=1080 # Custom height in pixels
|
||||
|
||||
# VNC Settings
|
||||
VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
|
||||
```
|
||||
|
||||
2. **Platform Support:**
|
||||
- Supports both AMD64 and ARM64 architectures
|
||||
- For ARM64 systems (e.g., Apple Silicon Macs), the container will automatically use the appropriate image
|
||||
|
||||
3. **Browser Persistence Modes:**
|
||||
- **Default Mode (CHROME_PERSISTENT_SESSION=false):**
|
||||
- Browser opens and closes with each AI task
|
||||
- Clean state for each interaction
|
||||
- Lower resource usage
|
||||
|
||||
- **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
|
||||
- Browser stays open between AI tasks
|
||||
- Maintains history and state
|
||||
- Allows viewing previous AI interactions
|
||||
- Set in `.env` file or via environment variable when starting container
|
||||
|
||||
4. **Viewing Browser Interactions:**
|
||||
- Access the noVNC viewer at `http://localhost:6080/vnc.html`
|
||||
- Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
|
||||
- Direct VNC access available on port 5900 (mapped to container port 5901)
|
||||
- You can now see all browser interactions in real-time
|
||||
|
||||
5. **Container Management:**
|
||||
```bash
|
||||
# Start with persistent browser
|
||||
CHROME_PERSISTENT_SESSION=true docker compose up -d
|
||||
|
||||
# Start with default mode (browser closes after tasks)
|
||||
docker compose up -d
|
||||
|
||||
# View logs
|
||||
docker compose logs -f
|
||||
|
||||
# Stop the container
|
||||
docker compose down
|
||||
```
|
||||
|
||||
## Changelog
|
||||
- [x] **2025/01/26:** Thanks to @vvincent1234. Now browser-use-webui can combine with DeepSeek-r1 to engage in deep thinking!
|
||||
- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
|
||||
|
||||
@@ -1,62 +1,79 @@
|
||||
services:
|
||||
# debug: docker compose run --rm -it browser-use-webui bash
|
||||
browser-use-webui:
|
||||
platform: linux/amd64
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||
ports:
|
||||
- "7788:7788" # Gradio default port
|
||||
- "6080:6080" # noVNC web interface
|
||||
- "5901:5901" # VNC port
|
||||
- "9222:9222" # Chrome remote debugging port
|
||||
- "7788:7788"
|
||||
- "6080:6080"
|
||||
- "5901:5901"
|
||||
- "9222:9222"
|
||||
environment:
|
||||
# LLM API Keys & Endpoints
|
||||
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
||||
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
||||
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
||||
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
|
||||
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
||||
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
||||
- MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
||||
- ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
|
||||
- ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
|
||||
- MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
|
||||
- MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
|
||||
- IBM_API_KEY=${IBM_API_KEY:-}
|
||||
- UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
|
||||
- UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
|
||||
- SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
|
||||
- SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
|
||||
- IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
|
||||
- IBM_API_KEY=${IBM_API_KEY:-}
|
||||
- IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
|
||||
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
||||
|
||||
# Application Settings
|
||||
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
||||
- CHROME_PATH=/usr/bin/google-chrome
|
||||
- CHROME_USER_DATA=/app/data/chrome_data
|
||||
- CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
|
||||
- CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
|
||||
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
||||
|
||||
# Browser Settings
|
||||
- BROWSER_PATH=
|
||||
- BROWSER_USER_DATA=
|
||||
- BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
|
||||
- BROWSER_DEBUGGING_HOST=localhost
|
||||
- USE_OWN_BROWSER=false
|
||||
- KEEP_BROWSER_OPEN=true
|
||||
- BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
|
||||
|
||||
# Display Settings
|
||||
- DISPLAY=:99
|
||||
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||
# This ENV is used by the Dockerfile during build time if Patchright respects it.
|
||||
# It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
|
||||
- PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
|
||||
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
||||
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
||||
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
||||
- VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
|
||||
- CHROME_DEBUGGING_PORT=9222
|
||||
- CHROME_DEBUGGING_HOST=localhost
|
||||
|
||||
# VNC Settings
|
||||
- VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
|
||||
|
||||
volumes:
|
||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||
# - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
|
||||
restart: unless-stopped
|
||||
shm_size: '2gb'
|
||||
cap_add:
|
||||
- SYS_ADMIN
|
||||
security_opt:
|
||||
- seccomp=unconfined
|
||||
tmpfs:
|
||||
- /tmp
|
||||
healthcheck:
|
||||
test: ["CMD", "nc", "-z", "localhost", "5901"]
|
||||
test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
retries: 3
|
||||
@@ -1,4 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Start supervisord in the foreground to properly manage child processes
|
||||
exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf
|
||||
@@ -8,30 +8,49 @@ import os
|
||||
from browser_use.agent.gif import create_history_gif
|
||||
from browser_use.agent.service import Agent, AgentHookFunc
|
||||
from browser_use.agent.views import (
|
||||
ActionResult,
|
||||
AgentHistory,
|
||||
AgentHistoryList,
|
||||
AgentStepInfo,
|
||||
ToolCallingMethod,
|
||||
)
|
||||
from browser_use.browser.views import BrowserStateHistory
|
||||
from browser_use.telemetry.views import (
|
||||
AgentEndTelemetryEvent,
|
||||
)
|
||||
from browser_use.utils import time_execution_async
|
||||
from dotenv import load_dotenv
|
||||
from browser_use.agent.message_manager.utils import is_model_without_tool_support
|
||||
|
||||
load_dotenv()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SKIP_LLM_API_KEY_VERIFICATION = (
|
||||
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
||||
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
||||
)
|
||||
|
||||
|
||||
class BrowserUseAgent(Agent):
|
||||
def _set_tool_calling_method(self) -> ToolCallingMethod | None:
|
||||
tool_calling_method = self.settings.tool_calling_method
|
||||
if tool_calling_method == 'auto':
|
||||
if is_model_without_tool_support(self.model_name):
|
||||
return 'raw'
|
||||
elif self.chat_model_library == 'ChatGoogleGenerativeAI':
|
||||
return None
|
||||
elif self.chat_model_library == 'ChatOpenAI':
|
||||
return 'function_calling'
|
||||
elif self.chat_model_library == 'AzureChatOpenAI':
|
||||
return 'function_calling'
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return tool_calling_method
|
||||
|
||||
@time_execution_async("--run (agent)")
|
||||
async def run(
|
||||
self,
|
||||
max_steps: int = 100,
|
||||
on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None,
|
||||
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None
|
||||
) -> AgentHistoryList:
|
||||
"""Execute the task with maximum number of steps"""
|
||||
|
||||
@@ -49,41 +68,28 @@ class BrowserUseAgent(Agent):
|
||||
)
|
||||
signal_handler.register()
|
||||
|
||||
# Wait for verification task to complete if it exists
|
||||
if hasattr(self, "_verification_task") and not self._verification_task.done():
|
||||
try:
|
||||
await self._verification_task
|
||||
except Exception:
|
||||
# Error already logged in the task
|
||||
pass
|
||||
|
||||
try:
|
||||
self._log_agent_run()
|
||||
|
||||
# Execute initial actions if provided
|
||||
if self.initial_actions:
|
||||
result = await self.multi_act(
|
||||
self.initial_actions, check_for_new_elements=False
|
||||
)
|
||||
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
||||
self.state.last_result = result
|
||||
|
||||
for step in range(max_steps):
|
||||
# Check if waiting for user input after Ctrl+C
|
||||
while self.state.paused:
|
||||
await asyncio.sleep(0.5)
|
||||
if self.state.stopped:
|
||||
break
|
||||
if self.state.paused:
|
||||
signal_handler.wait_for_resume()
|
||||
signal_handler.reset()
|
||||
|
||||
# Check if we should stop due to too many failures
|
||||
if self.state.consecutive_failures >= self.settings.max_failures:
|
||||
logger.error(
|
||||
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
|
||||
)
|
||||
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
||||
break
|
||||
|
||||
# Check control flags before each step
|
||||
if self.state.stopped:
|
||||
logger.info("Agent stopped")
|
||||
logger.info('Agent stopped')
|
||||
break
|
||||
|
||||
while self.state.paused:
|
||||
@@ -108,15 +114,30 @@ class BrowserUseAgent(Agent):
|
||||
await self.log_completion()
|
||||
break
|
||||
else:
|
||||
logger.info("❌ Failed to complete task in maximum steps")
|
||||
error_message = 'Failed to complete task in maximum steps'
|
||||
|
||||
self.state.history.history.append(
|
||||
AgentHistory(
|
||||
model_output=None,
|
||||
result=[ActionResult(error=error_message, include_in_memory=True)],
|
||||
state=BrowserStateHistory(
|
||||
url='',
|
||||
title='',
|
||||
tabs=[],
|
||||
interacted_element=[],
|
||||
screenshot=None,
|
||||
),
|
||||
metadata=None,
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(f'❌ {error_message}')
|
||||
|
||||
return self.state.history
|
||||
|
||||
except KeyboardInterrupt:
|
||||
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
||||
logger.info(
|
||||
"Got KeyboardInterrupt during execution, returning current history"
|
||||
)
|
||||
logger.info('Got KeyboardInterrupt during execution, returning current history')
|
||||
return self.state.history
|
||||
|
||||
finally:
|
||||
@@ -136,13 +157,29 @@ class BrowserUseAgent(Agent):
|
||||
)
|
||||
)
|
||||
|
||||
if self.settings.save_playwright_script_path:
|
||||
logger.info(
|
||||
f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}'
|
||||
)
|
||||
try:
|
||||
# Extract sensitive data keys if sensitive_data is provided
|
||||
keys = list(self.sensitive_data.keys()) if self.sensitive_data else None
|
||||
# Pass browser and context config to the saving method
|
||||
self.state.history.save_as_playwright_script(
|
||||
self.settings.save_playwright_script_path,
|
||||
sensitive_data_keys=keys,
|
||||
browser_config=self.browser.config,
|
||||
context_config=self.browser_context.config,
|
||||
)
|
||||
except Exception as script_gen_err:
|
||||
# Log any error during script generation/saving
|
||||
logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True)
|
||||
|
||||
await self.close()
|
||||
|
||||
if self.settings.generate_gif:
|
||||
output_path: str = "agent_history.gif"
|
||||
output_path: str = 'agent_history.gif'
|
||||
if isinstance(self.settings.generate_gif, str):
|
||||
output_path = self.settings.generate_gif
|
||||
|
||||
create_history_gif(
|
||||
task=self.task, history=self.state.history, output_path=output_path
|
||||
)
|
||||
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,25 +26,33 @@ from browser_use.browser.utils.screen_resolution import get_screen_resolution, g
|
||||
from browser_use.utils import time_execution_async
|
||||
import socket
|
||||
|
||||
from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
|
||||
from .custom_context import CustomBrowserContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowser(Browser):
|
||||
|
||||
async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
|
||||
async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext:
|
||||
"""Create a browser context"""
|
||||
browser_config = self.config.model_dump() if self.config else {}
|
||||
context_config = config.model_dump() if config else {}
|
||||
merged_config = {**browser_config, **context_config}
|
||||
return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
|
||||
return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
|
||||
|
||||
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
||||
|
||||
if self.config.headless:
|
||||
# Use the configured window size from new_context_config if available
|
||||
if (
|
||||
not self.config.headless
|
||||
and hasattr(self.config, 'new_context_config')
|
||||
and hasattr(self.config.new_context_config, 'browser_window_size')
|
||||
):
|
||||
screen_size = self.config.new_context_config.browser_window_size.model_dump()
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
elif self.config.headless:
|
||||
screen_size = {'width': 1920, 'height': 1080}
|
||||
offset_x, offset_y = 0, 0
|
||||
else:
|
||||
@@ -52,6 +60,7 @@ class CustomBrowser(Browser):
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
|
||||
chrome_args = {
|
||||
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
||||
*CHROME_ARGS,
|
||||
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
||||
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
||||
@@ -70,8 +79,8 @@ class CustomBrowser(Browser):
|
||||
|
||||
# check if port 9222 is already taken, if so remove the remote-debugging-port arg to prevent conflicts
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
if s.connect_ex(('localhost', 9222)) == 0:
|
||||
chrome_args.remove('--remote-debugging-port=9222')
|
||||
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
|
||||
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
|
||||
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
args = {
|
||||
|
||||
@@ -12,10 +12,6 @@ from browser_use.browser.context import BrowserContextState
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowserContextConfig(BrowserContextConfig):
|
||||
force_new_context: bool = False # force to create new context
|
||||
|
||||
|
||||
class CustomBrowserContext(BrowserContext):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -24,96 +20,3 @@ class CustomBrowserContext(BrowserContext):
|
||||
state: Optional[BrowserContextState] = None,
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
|
||||
|
||||
async def _create_context(self, browser: PlaywrightBrowser):
|
||||
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
|
||||
if not self.config.force_new_context and self.browser.config.cdp_url and len(browser.contexts) > 0:
|
||||
context = browser.contexts[0]
|
||||
elif not self.config.force_new_context and self.browser.config.browser_binary_path and len(
|
||||
browser.contexts) > 0:
|
||||
# Connect to existing Chrome instance instead of creating new one
|
||||
context = browser.contexts[0]
|
||||
else:
|
||||
# Original code for creating new context
|
||||
context = await browser.new_context(
|
||||
no_viewport=True,
|
||||
user_agent=self.config.user_agent,
|
||||
java_script_enabled=True,
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size={
|
||||
"width": self.config.window_width,
|
||||
"height": self.config.window_height
|
||||
},
|
||||
record_har_path=self.config.save_har_path,
|
||||
locale=self.config.locale,
|
||||
http_credentials=self.config.http_credentials,
|
||||
is_mobile=self.config.is_mobile,
|
||||
has_touch=self.config.has_touch,
|
||||
geolocation=self.config.geolocation,
|
||||
permissions=self.config.permissions,
|
||||
timezone_id=self.config.timezone_id,
|
||||
)
|
||||
|
||||
if self.config.trace_path:
|
||||
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
|
||||
|
||||
# Load cookies if they exist
|
||||
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
|
||||
with open(self.config.cookies_file, 'r') as f:
|
||||
try:
|
||||
cookies = json.load(f)
|
||||
|
||||
valid_same_site_values = ['Strict', 'Lax', 'None']
|
||||
for cookie in cookies:
|
||||
if 'sameSite' in cookie:
|
||||
if cookie['sameSite'] not in valid_same_site_values:
|
||||
logger.warning(
|
||||
f"Fixed invalid sameSite value '{cookie['sameSite']}' to 'None' for cookie {cookie.get('name')}"
|
||||
)
|
||||
cookie['sameSite'] = 'None'
|
||||
logger.info(f'🍪 Loaded {len(cookies)} cookies from {self.config.cookies_file}')
|
||||
await context.add_cookies(cookies)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f'Failed to parse cookies file: {str(e)}')
|
||||
|
||||
# Expose anti-detection scripts
|
||||
await context.add_init_script(
|
||||
"""
|
||||
// Webdriver property
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
});
|
||||
|
||||
// Languages
|
||||
Object.defineProperty(navigator, 'languages', {
|
||||
get: () => ['en-US']
|
||||
});
|
||||
|
||||
// Plugins
|
||||
Object.defineProperty(navigator, 'plugins', {
|
||||
get: () => [1, 2, 3, 4, 5]
|
||||
});
|
||||
|
||||
// Chrome runtime
|
||||
window.chrome = { runtime: {} };
|
||||
|
||||
// Permissions
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: Notification.permission }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
(function () {
|
||||
const originalAttachShadow = Element.prototype.attachShadow;
|
||||
Element.prototype.attachShadow = function attachShadow(options) {
|
||||
return originalAttachShadow.call(this, { ...options, mode: "open" });
|
||||
};
|
||||
})();
|
||||
"""
|
||||
)
|
||||
|
||||
return context
|
||||
|
||||
@@ -172,6 +172,10 @@ class CustomController(Controller):
|
||||
param_model=create_tool_param_model(tool),
|
||||
)
|
||||
logger.info(f"Add mcp tool: {tool_name}")
|
||||
logger.debug(
|
||||
f"Registered {len(self.mcp_client.server_name_to_tools[server_name])} mcp tools for {server_name}")
|
||||
else:
|
||||
logger.warning(f"MCP client not started.")
|
||||
|
||||
async def close_mcp_client(self):
|
||||
if self.mcp_client:
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import os
|
||||
|
||||
import gradio as gr
|
||||
import logging
|
||||
from gradio.components import Component
|
||||
@@ -56,7 +58,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
|
||||
)
|
||||
keep_browser_open = gr.Checkbox(
|
||||
label="Keep Browser Open",
|
||||
value=True,
|
||||
value=os.getenv("KEEP_BROWSER_OPEN", True),
|
||||
info="Keep Browser Open between Tasks",
|
||||
interactive=True
|
||||
)
|
||||
@@ -91,6 +93,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
|
||||
with gr.Row():
|
||||
cdp_url = gr.Textbox(
|
||||
label="CDP URL",
|
||||
value=os.getenv("BROWSER_CDP", None),
|
||||
info="CDP URL for browser remote debugging",
|
||||
interactive=True,
|
||||
)
|
||||
|
||||
@@ -13,14 +13,13 @@ from browser_use.agent.views import (
|
||||
AgentOutput,
|
||||
)
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.browser.views import BrowserState
|
||||
from gradio.components import Component
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.webui.webui_manager import WebuiManager
|
||||
@@ -32,12 +31,12 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _initialize_llm(
|
||||
provider: Optional[str],
|
||||
model_name: Optional[str],
|
||||
temperature: float,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
num_ctx: Optional[int] = None,
|
||||
provider: Optional[str],
|
||||
model_name: Optional[str],
|
||||
temperature: float,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
num_ctx: Optional[int] = None,
|
||||
) -> Optional[BaseChatModel]:
|
||||
"""Initializes the LLM based on settings. Returns None if provider/model is missing."""
|
||||
if not provider or not model_name:
|
||||
@@ -68,10 +67,10 @@ async def _initialize_llm(
|
||||
|
||||
|
||||
def _get_config_value(
|
||||
webui_manager: WebuiManager,
|
||||
comp_dict: Dict[gr.components.Component, Any],
|
||||
comp_id_suffix: str,
|
||||
default: Any = None,
|
||||
webui_manager: WebuiManager,
|
||||
comp_dict: Dict[gr.components.Component, Any],
|
||||
comp_id_suffix: str,
|
||||
default: Any = None,
|
||||
) -> Any:
|
||||
"""Safely get value from component dictionary using its ID suffix relative to the tab."""
|
||||
# Assumes component ID format is "tab_name.comp_name"
|
||||
@@ -133,7 +132,7 @@ def _format_agent_output(model_output: AgentOutput) -> str:
|
||||
|
||||
|
||||
async def _handle_new_step(
|
||||
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
||||
webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int
|
||||
):
|
||||
"""Callback for each step taken by the agent, including screenshot display."""
|
||||
|
||||
@@ -157,12 +156,12 @@ async def _handle_new_step(
|
||||
try:
|
||||
# Basic validation: check if it looks like base64
|
||||
if (
|
||||
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
||||
isinstance(screenshot_data, str) and len(screenshot_data) > 100
|
||||
): # Arbitrary length check
|
||||
# *** UPDATED STYLE: Removed centering, adjusted width ***
|
||||
img_tag = f'<img src="data:image/jpeg;base64,{screenshot_data}" alt="Step {step_num} Screenshot" style="max-width: 800px; max-height: 600px; object-fit:contain;" />'
|
||||
screenshot_html = (
|
||||
img_tag + "<br/>"
|
||||
img_tag + "<br/>"
|
||||
) # Use <br/> for line break after inline-block image
|
||||
else:
|
||||
logger.warning(
|
||||
@@ -223,7 +222,7 @@ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList):
|
||||
|
||||
|
||||
async def _ask_assistant_callback(
|
||||
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
||||
webui_manager: WebuiManager, query: str, browser_context: BrowserContext
|
||||
) -> Dict[str, Any]:
|
||||
"""Callback triggered by the agent's ask_for_assistant action."""
|
||||
logger.info("Agent requires assistance. Waiting for user input.")
|
||||
@@ -274,7 +273,7 @@ async def _ask_assistant_callback(
|
||||
|
||||
|
||||
async def run_agent_task(
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
) -> AsyncGenerator[Dict[gr.components.Component, Any], None]:
|
||||
"""Handles the entire lifecycle of initializing and running the agent."""
|
||||
|
||||
@@ -358,6 +357,7 @@ async def run_agent_task(
|
||||
# Planner LLM Settings (Optional)
|
||||
planner_llm_provider_name = get_setting("planner_llm_provider") or None
|
||||
planner_llm = None
|
||||
planner_use_vision = False
|
||||
if planner_llm_provider_name:
|
||||
planner_llm_model_name = get_setting("planner_llm_model_name")
|
||||
planner_llm_temperature = get_setting("planner_llm_temperature", 0.6)
|
||||
@@ -387,7 +387,7 @@ async def run_agent_task(
|
||||
) # Logic handled by CDP/WSS presence
|
||||
keep_browser_open = get_browser_setting("keep_browser_open", False)
|
||||
headless = get_browser_setting("headless", False)
|
||||
disable_security = get_browser_setting("disable_security", True)
|
||||
disable_security = get_browser_setting("disable_security", False)
|
||||
window_w = int(get_browser_setting("window_w", 1280))
|
||||
window_h = int(get_browser_setting("window_h", 1100))
|
||||
cdp_url = get_browser_setting("cdp_url") or None
|
||||
@@ -422,7 +422,7 @@ async def run_agent_task(
|
||||
|
||||
# Pass the webui_manager instance to the callback when wrapping it
|
||||
async def ask_callback_wrapper(
|
||||
query: str, browser_context: BrowserContext
|
||||
query: str, browser_context: BrowserContext
|
||||
) -> Dict[str, Any]:
|
||||
return await _ask_assistant_callback(webui_manager, query, browser_context)
|
||||
|
||||
@@ -451,20 +451,16 @@ async def run_agent_task(
|
||||
if not webui_manager.bu_browser:
|
||||
logger.info("Launching new browser instance.")
|
||||
extra_args = [f"--window-size={window_w},{window_h}"]
|
||||
if browser_user_data_dir:
|
||||
extra_args.append(f"--user-data-dir={browser_user_data_dir}")
|
||||
|
||||
if use_own_browser:
|
||||
browser_binary_path = (
|
||||
os.getenv("CHROME_PATH", None) or browser_binary_path
|
||||
)
|
||||
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
||||
if browser_binary_path == "":
|
||||
browser_binary_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
||||
if browser_user_data:
|
||||
extra_args += [f"--user-data-dir={browser_user_data}"]
|
||||
else:
|
||||
browser_binary_path = None
|
||||
|
||||
webui_manager.bu_browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
@@ -479,14 +475,14 @@ async def run_agent_task(
|
||||
# Create Context if needed
|
||||
if not webui_manager.bu_browser_context:
|
||||
logger.info("Creating new browser context.")
|
||||
context_config = CustomBrowserContextConfig(
|
||||
context_config = BrowserContextConfig(
|
||||
trace_path=save_trace_path if save_trace_path else None,
|
||||
save_recording_path=save_recording_path
|
||||
if save_recording_path
|
||||
else None,
|
||||
save_downloads_path=save_download_path if save_download_path else None,
|
||||
window_width=window_w,
|
||||
window_height=window_h,
|
||||
window_height=window_h,
|
||||
window_width=window_w,
|
||||
)
|
||||
if not webui_manager.bu_browser:
|
||||
raise ValueError("Browser not initialized, cannot create context.")
|
||||
@@ -513,7 +509,7 @@ async def run_agent_task(
|
||||
|
||||
# Pass the webui_manager to callbacks when wrapping them
|
||||
async def step_callback_wrapper(
|
||||
state: BrowserState, output: AgentOutput, step_num: int
|
||||
state: BrowserState, output: AgentOutput, step_num: int
|
||||
):
|
||||
await _handle_new_step(webui_manager, state, output, step_num)
|
||||
|
||||
@@ -582,7 +578,7 @@ async def run_agent_task(
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
if (
|
||||
agent_task.done() or is_stopped
|
||||
agent_task.done() or is_stopped
|
||||
): # If stopped or task finished while paused
|
||||
break
|
||||
|
||||
@@ -633,8 +629,8 @@ async def run_agent_task(
|
||||
yield update_dict
|
||||
# Wait until response is submitted or task finishes
|
||||
while (
|
||||
webui_manager.bu_response_event is not None
|
||||
and not agent_task.done()
|
||||
webui_manager.bu_response_event is not None
|
||||
and not agent_task.done()
|
||||
):
|
||||
await asyncio.sleep(0.2)
|
||||
# Restore UI after response submitted or if task ended unexpectedly
|
||||
@@ -716,9 +712,9 @@ async def run_agent_task(
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Agent task was cancelled.")
|
||||
if not any(
|
||||
"Cancelled" in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
"Cancelled" in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
):
|
||||
webui_manager.bu_chat_history.append(
|
||||
{"role": "assistant", "content": "**Task Cancelled**."}
|
||||
@@ -730,9 +726,9 @@ async def run_agent_task(
|
||||
f"**Agent Execution Error:**\n```\n{type(e).__name__}: {e}\n```"
|
||||
)
|
||||
if not any(
|
||||
error_message in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
error_message in msg.get("content", "")
|
||||
for msg in webui_manager.bu_chat_history
|
||||
if msg.get("role") == "assistant"
|
||||
):
|
||||
webui_manager.bu_chat_history.append(
|
||||
{"role": "assistant", "content": error_message}
|
||||
@@ -788,7 +784,7 @@ async def run_agent_task(
|
||||
clear_button_comp: gr.update(interactive=True),
|
||||
chatbot_comp: gr.update(
|
||||
value=webui_manager.bu_chat_history
|
||||
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
||||
+ [{"role": "assistant", "content": f"**Setup Error:** {e}"}]
|
||||
),
|
||||
}
|
||||
|
||||
@@ -797,7 +793,7 @@ async def run_agent_task(
|
||||
|
||||
|
||||
async def handle_submit(
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
webui_manager: WebuiManager, components: Dict[gr.components.Component, Any]
|
||||
):
|
||||
"""Handles clicks on the main 'Submit' button."""
|
||||
user_input_comp = webui_manager.get_component_by_id("browser_use_agent.user_input")
|
||||
@@ -1048,7 +1044,7 @@ def create_browser_use_agent_tab(webui_manager: WebuiManager):
|
||||
run_tab_outputs = list(tab_components.values())
|
||||
|
||||
async def submit_wrapper(
|
||||
components_dict: Dict[Component, Any],
|
||||
components_dict: Dict[Component, Any],
|
||||
) -> AsyncGenerator[Dict[Component, Any], None]:
|
||||
"""Wrapper for handle_submit that yields its results."""
|
||||
async for update in handle_submit(webui_manager, components_dict):
|
||||
|
||||
@@ -116,7 +116,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
|
||||
# LLM Config (from agent_settings tab)
|
||||
llm_provider_name = get_setting("agent_settings", "llm_provider")
|
||||
llm_model_name = get_setting("agent_settings", "llm_model_name")
|
||||
llm_temperature = get_setting("agent_settings", "llm_temperature", 0.5) # Default if not found
|
||||
llm_temperature = max(get_setting("agent_settings", "llm_temperature", 0.5), 0.5)
|
||||
llm_base_url = get_setting("agent_settings", "llm_base_url")
|
||||
llm_api_key = get_setting("agent_settings", "llm_api_key")
|
||||
ollama_num_ctx = get_setting("agent_settings", "ollama_num_ctx")
|
||||
@@ -132,7 +132,7 @@ async def run_deep_research(webui_manager: WebuiManager, components: Dict[Compon
|
||||
# Note: DeepResearchAgent constructor takes a dict, not full Browser/Context objects
|
||||
browser_config_dict = {
|
||||
"headless": get_setting("browser_settings", "headless", False),
|
||||
"disable_security": get_setting("browser_settings", "disable_security", True),
|
||||
"disable_security": get_setting("browser_settings", "disable_security", False),
|
||||
"browser_binary_path": get_setting("browser_settings", "browser_binary_path"),
|
||||
"user_data_dir": get_setting("browser_settings", "browser_user_data_dir"),
|
||||
"window_width": int(get_setting("browser_settings", "window_w", 1280)),
|
||||
|
||||
@@ -3,7 +3,7 @@ user=root
|
||||
nodaemon=true
|
||||
logfile=/dev/stdout
|
||||
logfile_maxbytes=0
|
||||
loglevel=debug
|
||||
loglevel=error
|
||||
|
||||
[program:xvfb]
|
||||
command=Xvfb :99 -screen 0 %(ENV_RESOLUTION)s -ac +extension GLX +render -noreset
|
||||
@@ -65,21 +65,6 @@ startretries=5
|
||||
startsecs=3
|
||||
depends_on=x11vnc
|
||||
|
||||
[program:persistent_browser]
|
||||
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
|
||||
command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
priority=350
|
||||
startretries=5
|
||||
startsecs=10
|
||||
stopsignal=TERM
|
||||
stopwaitsecs=15
|
||||
depends_on=novnc
|
||||
|
||||
[program:webui]
|
||||
command=python webui.py --ip 0.0.0.0 --port 7788
|
||||
directory=/app
|
||||
@@ -92,5 +77,4 @@ priority=400
|
||||
startretries=3
|
||||
startsecs=3
|
||||
stopsignal=TERM
|
||||
stopwaitsecs=10
|
||||
depends_on=persistent_browser
|
||||
stopwaitsecs=10
|
||||
@@ -20,15 +20,14 @@ from src.utils import utils
|
||||
async def test_browser_use_agent():
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
BrowserContextConfig
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -38,12 +37,12 @@ async def test_browser_use_agent():
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="google",
|
||||
# model_name="gemini-2.0-flash",
|
||||
# temperature=0.6,
|
||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
# )
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="google",
|
||||
model_name="gemini-2.0-flash",
|
||||
temperature=0.6,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
@@ -67,25 +66,25 @@ async def test_browser_use_agent():
|
||||
|
||||
window_w, window_h = 1280, 1100
|
||||
|
||||
llm = llm_provider.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.5,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
# llm = llm_provider.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.5,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
@@ -97,8 +96,7 @@ async def test_browser_use_agent():
|
||||
}
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_own_browser = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
@@ -106,37 +104,35 @@ async def test_browser_use_agent():
|
||||
browser_context = None
|
||||
|
||||
try:
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
extra_browser_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
||||
if browser_binary_path == "":
|
||||
browser_binary_path = None
|
||||
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
||||
if browser_user_data:
|
||||
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
browser_binary_path = None
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
browser_binary_path=browser_binary_path,
|
||||
extra_browser_args=extra_browser_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
config=BrowserContextConfig(
|
||||
trace_path=None,
|
||||
save_recording_path=None,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
force_new_context=True
|
||||
window_height=window_h,
|
||||
window_width=window_w,
|
||||
)
|
||||
)
|
||||
agent = Agent(
|
||||
task="download pdf from https://arxiv.org/abs/2504.10458 and rename this pdf to 'GUI-r1-test.pdf'",
|
||||
agent = BrowserUseAgent(
|
||||
# task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'",
|
||||
task="give me nvidia stock price",
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
@@ -153,7 +149,6 @@ async def test_browser_use_agent():
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
@@ -167,24 +162,16 @@ async def test_browser_use_agent():
|
||||
|
||||
|
||||
async def test_browser_use_parallel():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from patchright.async_api import async_playwright
|
||||
from browser_use.browser.browser import Browser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
from browser_use.agent.service import Agent
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import CustomBrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import llm_provider
|
||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="openai",
|
||||
@@ -233,15 +220,15 @@ async def test_browser_use_parallel():
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
@@ -261,8 +248,7 @@ async def test_browser_use_parallel():
|
||||
}
|
||||
controller = CustomController()
|
||||
await controller.setup_mcp_client(mcp_server_config)
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_own_browser = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
@@ -270,37 +256,35 @@ async def test_browser_use_parallel():
|
||||
browser_context = None
|
||||
|
||||
try:
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
extra_browser_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
||||
if browser_binary_path == "":
|
||||
browser_binary_path = None
|
||||
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
||||
if browser_user_data:
|
||||
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
browser_binary_path = None
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
browser_binary_path=chrome_path,
|
||||
extra_browser_args=extra_chromium_args,
|
||||
browser_binary_path=browser_binary_path,
|
||||
extra_browser_args=extra_browser_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
config=CustomBrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
config=BrowserContextConfig(
|
||||
trace_path=None,
|
||||
save_recording_path=None,
|
||||
save_downloads_path="./tmp/downloads",
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
window_height=window_h,
|
||||
window_width=window_w,
|
||||
force_new_context=True
|
||||
)
|
||||
)
|
||||
agents = [
|
||||
Agent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller)
|
||||
for task in [
|
||||
'Search Google for weather in Tokyo',
|
||||
# 'Check Reddit front page title',
|
||||
@@ -332,6 +316,8 @@ async def test_browser_use_parallel():
|
||||
await browser_context.close()
|
||||
if browser:
|
||||
await browser.close()
|
||||
if controller:
|
||||
await controller.close_mcp_client()
|
||||
|
||||
|
||||
async def test_deep_research_agent():
|
||||
@@ -362,8 +348,8 @@ async def test_deep_research_agent():
|
||||
|
||||
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
||||
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
||||
research_topic = "Impact of Microplastics on Marine Ecosystems"
|
||||
task_id_to_resume = "815460fb-337a-4850-8fa4-a5f2db301a89" # Set this to resume a previous task ID
|
||||
research_topic = "Give me investment advices of nvidia and tesla."
|
||||
task_id_to_resume = "" # Set this to resume a previous task ID
|
||||
|
||||
print(f"Starting research on: {research_topic}")
|
||||
|
||||
@@ -403,6 +389,6 @@ async def test_deep_research_agent():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# asyncio.run(test_browser_use_agent())
|
||||
asyncio.run(test_browser_use_agent())
|
||||
# asyncio.run(test_browser_use_parallel())
|
||||
asyncio.run(test_deep_research_agent())
|
||||
# asyncio.run(test_deep_research_agent())
|
||||
|
||||
@@ -14,20 +14,31 @@ async def test_mcp_client():
|
||||
from src.utils.mcp_client import setup_mcp_client_and_tools, create_tool_param_model
|
||||
|
||||
test_server_config = {
|
||||
"playwright": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@playwright/mcp@latest",
|
||||
],
|
||||
"transport": "stdio",
|
||||
},
|
||||
"filesystem": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@modelcontextprotocol/server-filesystem",
|
||||
"/Users/warmshao/ai_workspace",
|
||||
]
|
||||
"mcpServers": {
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"-y",
|
||||
"@wonderwhy-er/desktop-commander"
|
||||
]
|
||||
},
|
||||
# "filesystem": {
|
||||
# "command": "npx",
|
||||
# "args": [
|
||||
# "-y",
|
||||
# "@modelcontextprotocol/server-filesystem",
|
||||
# "/Users/xxx/ai_workspace",
|
||||
# ]
|
||||
# },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,15 +59,15 @@ async def test_controller_with_mcp():
|
||||
|
||||
mcp_server_config = {
|
||||
"mcpServers": {
|
||||
"markitdown": {
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"--rm",
|
||||
"-i",
|
||||
"markitdown-mcp:latest"
|
||||
]
|
||||
},
|
||||
# "markitdown": {
|
||||
# "command": "docker",
|
||||
# "args": [
|
||||
# "run",
|
||||
# "--rm",
|
||||
# "-i",
|
||||
# "markitdown-mcp:latest"
|
||||
# ]
|
||||
# },
|
||||
"desktop-commander": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
|
||||
@@ -142,17 +142,17 @@ def test_ibm_model():
|
||||
|
||||
|
||||
def test_qwen_model():
|
||||
config = LLMConfig(provider="alibaba", model_name="qwen3-30b-a3b")
|
||||
config = LLMConfig(provider="alibaba", model_name="qwen-vl-max")
|
||||
test_llm(config, "How many 'r's are in the word 'strawberry'?")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test_openai_model()
|
||||
# test_google_model()
|
||||
# test_azure_openai_model()
|
||||
test_azure_openai_model()
|
||||
# test_deepseek_model()
|
||||
# test_ollama_model()
|
||||
test_deepseek_r1_model()
|
||||
# test_deepseek_r1_model()
|
||||
# test_deepseek_r1_ollama_model()
|
||||
# test_mistral_model()
|
||||
# test_ibm_model()
|
||||
|
||||
Reference in New Issue
Block a user