mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
merge dockerfile
This commit is contained in:
14
.env.example
14
.env.example
@@ -40,14 +40,14 @@ ANONYMIZED_TELEMETRY=false
|
|||||||
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
|
||||||
BROWSER_USE_LOGGING_LEVEL=info
|
BROWSER_USE_LOGGING_LEVEL=info
|
||||||
|
|
||||||
# Chrome settings
|
# Browser settings
|
||||||
CHROME_PATH=
|
BROWSER_PATH=
|
||||||
CHROME_USER_DATA=
|
BROWSER_USER_DATA=
|
||||||
CHROME_DEBUGGING_PORT=9222
|
BROWSER_DEBUGGING_PORT=9222
|
||||||
CHROME_DEBUGGING_HOST=localhost
|
BROWSER_DEBUGGING_HOST=localhost
|
||||||
# Set to true to keep browser open between AI tasks
|
# Set to true to keep browser open between AI tasks
|
||||||
CHROME_PERSISTENT_SESSION=false
|
KEEP_BROWSER_OPEN=true
|
||||||
CHROME_CDP=
|
BROWSER_CDP=
|
||||||
# Display settings
|
# Display settings
|
||||||
# Format: WIDTHxHEIGHTxDEPTH
|
# Format: WIDTHxHEIGHTxDEPTH
|
||||||
RESOLUTION=1920x1080x24
|
RESOLUTION=1920x1080x24
|
||||||
|
|||||||
44
Dockerfile
44
Dockerfile
@@ -1,5 +1,8 @@
|
|||||||
FROM python:3.11-slim
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set platform for multi-arch builds (Docker Buildx will set this)
|
||||||
|
ARG TARGETPLATFORM
|
||||||
|
|
||||||
# Install system dependencies
|
# Install system dependencies
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
wget \
|
wget \
|
||||||
@@ -28,7 +31,6 @@ RUN apt-get update && apt-get install -y \
|
|||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
dbus \
|
dbus \
|
||||||
xauth \
|
xauth \
|
||||||
xvfb \
|
|
||||||
x11vnc \
|
x11vnc \
|
||||||
tigervnc-tools \
|
tigervnc-tools \
|
||||||
supervisor \
|
supervisor \
|
||||||
@@ -47,33 +49,45 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
|||||||
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
||||||
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
||||||
|
|
||||||
# Set platform for ARM64 compatibility
|
|
||||||
ARG TARGETPLATFORM=linux/amd64
|
|
||||||
|
|
||||||
# Set up working directory
|
# Set up working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy requirements and install Python dependencies
|
# Copy requirements and install Python dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
# Ensure 'patchright' is in your requirements.txt or install it directly
|
||||||
|
# RUN pip install --no-cache-dir -r requirements.txt patchright # If not in requirements
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt # Assuming patchright is in requirements.txt
|
||||||
|
RUN pip install --no-cache-dir patchright # Or install it explicitly
|
||||||
|
|
||||||
# Install Playwright and browsers with system dependencies
|
# Install Patchright browsers and dependencies
|
||||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
# Patchright documentation suggests PLAYWRIGHT_BROWSERS_PATH is still relevant
|
||||||
RUN playwright install --with-deps chromium
|
# or that Patchright installs to a similar default location that Playwright would.
|
||||||
RUN playwright install-deps
|
# Let's assume Patchright respects PLAYWRIGHT_BROWSERS_PATH or its default install location is findable.
|
||||||
|
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-browsers
|
||||||
|
RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
|
||||||
|
|
||||||
|
# Install recommended: Google Chrome (instead of just Chromium for better undetectability)
|
||||||
|
# The 'patchright install chrome' command might download and place it.
|
||||||
|
# The '--with-deps' equivalent for patchright install is to run 'patchright install-deps chrome' after.
|
||||||
|
RUN patchright install chrome
|
||||||
|
RUN patchright install-deps chrome
|
||||||
|
|
||||||
|
# Alternative: Install Chromium if Google Chrome is problematic in certain environments
|
||||||
|
RUN patchright install chromium
|
||||||
|
RUN patchright install-deps chromium
|
||||||
|
|
||||||
# Copy the application code
|
# Copy the application code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables (Updated Names)
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
ENV BROWSER_USE_LOGGING_LEVEL=info
|
ENV BROWSER_USE_LOGGING_LEVEL=info
|
||||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
# BROWSER_PATH will be determined by Patchright installation, supervisord will find it.
|
||||||
ENV ANONYMIZED_TELEMETRY=false
|
ENV ANONYMIZED_TELEMETRY=false
|
||||||
ENV DISPLAY=:99
|
ENV DISPLAY=:99
|
||||||
ENV RESOLUTION=1920x1080x24
|
ENV RESOLUTION=1920x1080x24
|
||||||
ENV VNC_PASSWORD=vncpassword
|
ENV VNC_PASSWORD=youvncpassword
|
||||||
ENV CHROME_PERSISTENT_SESSION=true
|
ENV KEEP_BROWSER_OPEN=true
|
||||||
ENV RESOLUTION_WIDTH=1920
|
ENV RESOLUTION_WIDTH=1920
|
||||||
ENV RESOLUTION_HEIGHT=1080
|
ENV RESOLUTION_HEIGHT=1080
|
||||||
|
|
||||||
@@ -81,6 +95,6 @@ ENV RESOLUTION_HEIGHT=1080
|
|||||||
RUN mkdir -p /var/log/supervisor
|
RUN mkdir -p /var/log/supervisor
|
||||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
EXPOSE 7788 6080 5901
|
EXPOSE 7788 6080 5901 9222
|
||||||
|
|
||||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
FROM python:3.11-slim
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
wget \
|
|
||||||
gnupg \
|
|
||||||
curl \
|
|
||||||
unzip \
|
|
||||||
xvfb \
|
|
||||||
libgconf-2-4 \
|
|
||||||
libxss1 \
|
|
||||||
libnss3 \
|
|
||||||
libnspr4 \
|
|
||||||
libasound2 \
|
|
||||||
libatk1.0-0 \
|
|
||||||
libatk-bridge2.0-0 \
|
|
||||||
libcups2 \
|
|
||||||
libdbus-1-3 \
|
|
||||||
libdrm2 \
|
|
||||||
libgbm1 \
|
|
||||||
libgtk-3-0 \
|
|
||||||
libxcomposite1 \
|
|
||||||
libxdamage1 \
|
|
||||||
libxfixes3 \
|
|
||||||
libxrandr2 \
|
|
||||||
xdg-utils \
|
|
||||||
fonts-liberation \
|
|
||||||
dbus \
|
|
||||||
xauth \
|
|
||||||
xvfb \
|
|
||||||
x11vnc \
|
|
||||||
tigervnc-tools \
|
|
||||||
supervisor \
|
|
||||||
net-tools \
|
|
||||||
procps \
|
|
||||||
git \
|
|
||||||
python3-numpy \
|
|
||||||
fontconfig \
|
|
||||||
fonts-dejavu \
|
|
||||||
fonts-dejavu-core \
|
|
||||||
fonts-dejavu-extra \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install noVNC
|
|
||||||
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
|
|
||||||
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
|
|
||||||
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
|
|
||||||
|
|
||||||
# Set platform explicitly for ARM64
|
|
||||||
ARG TARGETPLATFORM=linux/arm64
|
|
||||||
|
|
||||||
# Set up working directory
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Copy requirements and install Python dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
# Install Playwright and browsers with system dependencies optimized for ARM64
|
|
||||||
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
|
||||||
RUN PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 pip install playwright && \
|
|
||||||
playwright install --with-deps chromium
|
|
||||||
|
|
||||||
# Copy the application code
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Set environment variables
|
|
||||||
ENV PYTHONUNBUFFERED=1
|
|
||||||
ENV BROWSER_USE_LOGGING_LEVEL=info
|
|
||||||
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
|
|
||||||
ENV ANONYMIZED_TELEMETRY=false
|
|
||||||
ENV DISPLAY=:99
|
|
||||||
ENV RESOLUTION=1920x1080x24
|
|
||||||
ENV VNC_PASSWORD=vncpassword
|
|
||||||
ENV CHROME_PERSISTENT_SESSION=true
|
|
||||||
ENV RESOLUTION_WIDTH=1920
|
|
||||||
ENV RESOLUTION_HEIGHT=1080
|
|
||||||
|
|
||||||
# Set up supervisor configuration
|
|
||||||
RUN mkdir -p /var/log/supervisor
|
|
||||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
|
||||||
|
|
||||||
EXPOSE 7788 6080 5901
|
|
||||||
|
|
||||||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
|
||||||
132
README.md
132
README.md
@@ -23,10 +23,6 @@ We would like to officially thank [WarmShao](https://github.com/warmshao) for hi
|
|||||||
|
|
||||||
## Installation Guide
|
## Installation Guide
|
||||||
|
|
||||||
### Prerequisites
|
|
||||||
- Python 3.11 or higher
|
|
||||||
- Git (for cloning the repository)
|
|
||||||
|
|
||||||
### Option 1: Local Installation
|
### Option 1: Local Installation
|
||||||
|
|
||||||
Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
|
Read the [quickstart guide](https://docs.browser-use.com/quickstart#prepare-the-environment) or follow the steps below to get started.
|
||||||
@@ -65,10 +61,13 @@ Install Python packages:
|
|||||||
uv pip install -r requirements.txt
|
uv pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
Install Browsers in Playwright:
|
Install Browsers in Patchright.
|
||||||
You can install specific browsers by running:
|
|
||||||
```bash
|
```bash
|
||||||
patchright install chromium
|
patchright install
|
||||||
|
```
|
||||||
|
Or you can install specific browsers by running:
|
||||||
|
```bash
|
||||||
|
patchright install chromium --with-deps --no-shell
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 4: Configure Environment
|
#### Step 4: Configure Environment
|
||||||
@@ -83,6 +82,42 @@ cp .env.example .env
|
|||||||
```
|
```
|
||||||
2. Open `.env` in your preferred text editor and add your API keys and other settings
|
2. Open `.env` in your preferred text editor and add your API keys and other settings
|
||||||
|
|
||||||
|
#### Local Setup
|
||||||
|
1. **Run the WebUI:**
|
||||||
|
After completing the installation steps above, start the application:
|
||||||
|
```bash
|
||||||
|
python webui.py --ip 127.0.0.1 --port 7788
|
||||||
|
```
|
||||||
|
2. WebUI options:
|
||||||
|
- `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
|
||||||
|
- `--port`: The port to bind the WebUI to. Default is `7788`.
|
||||||
|
- `--theme`: The theme for the user interface. Default is `Ocean`.
|
||||||
|
- **Default**: The standard theme with a balanced design.
|
||||||
|
- **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
|
||||||
|
- **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
|
||||||
|
- **Glass**: A sleek, semi-transparent design for a modern appearance.
|
||||||
|
- **Origin**: A classic, retro-inspired theme for a nostalgic feel.
|
||||||
|
- **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
|
||||||
|
- **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
|
||||||
|
- `--dark-mode`: Enables dark mode for the user interface.
|
||||||
|
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||||
|
4. **Using Your Own Browser(Optional):**
|
||||||
|
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data.
|
||||||
|
- Windows
|
||||||
|
```env
|
||||||
|
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||||
|
CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
||||||
|
```
|
||||||
|
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
||||||
|
- Mac
|
||||||
|
```env
|
||||||
|
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||||
|
CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
|
||||||
|
```
|
||||||
|
- Close all Chrome windows
|
||||||
|
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||||
|
- Check the "Use Own Browser" option within the Browser Settings.
|
||||||
|
|
||||||
### Option 2: Docker Installation
|
### Option 2: Docker Installation
|
||||||
|
|
||||||
#### Prerequisites
|
#### Prerequisites
|
||||||
@@ -118,95 +153,12 @@ docker compose up --build
|
|||||||
CHROME_PERSISTENT_SESSION=true docker compose up --build
|
CHROME_PERSISTENT_SESSION=true docker compose up --build
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
4. Access the Application:
|
4. Access the Application:
|
||||||
- Web Interface: Open `http://localhost:7788` in your browser
|
- Web Interface: Open `http://localhost:7788` in your browser
|
||||||
- VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
|
- VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
|
||||||
- Default VNC password: "youvncpassword"
|
- Default VNC password: "youvncpassword"
|
||||||
- Can be changed by setting `VNC_PASSWORD` in your `.env` file
|
- Can be changed by setting `VNC_PASSWORD` in your `.env` file
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Local Setup
|
|
||||||
1. **Run the WebUI:**
|
|
||||||
After completing the installation steps above, start the application:
|
|
||||||
```bash
|
|
||||||
python webui.py --ip 127.0.0.1 --port 7788
|
|
||||||
```
|
|
||||||
2. WebUI options:
|
|
||||||
- `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
|
|
||||||
- `--port`: The port to bind the WebUI to. Default is `7788`.
|
|
||||||
- `--theme`: The theme for the user interface. Default is `Ocean`.
|
|
||||||
- **Default**: The standard theme with a balanced design.
|
|
||||||
- **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
|
|
||||||
- **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
|
|
||||||
- **Glass**: A sleek, semi-transparent design for a modern appearance.
|
|
||||||
- **Origin**: A classic, retro-inspired theme for a nostalgic feel.
|
|
||||||
- **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
|
|
||||||
- **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
|
|
||||||
- `--dark-mode`: Enables dark mode for the user interface.
|
|
||||||
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
|
||||||
4. **Using Your Own Browser(Optional):**
|
|
||||||
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data.
|
|
||||||
- Windows
|
|
||||||
```env
|
|
||||||
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
|
||||||
CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
|
|
||||||
```
|
|
||||||
> Note: Replace `YourUsername` with your actual Windows username for Windows systems.
|
|
||||||
- Mac
|
|
||||||
```env
|
|
||||||
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
|
||||||
CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
|
|
||||||
```
|
|
||||||
- Close all Chrome windows
|
|
||||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
|
||||||
- Check the "Use Own Browser" option within the Browser Settings.
|
|
||||||
5. **Keep Browser Open(Optional):**
|
|
||||||
- Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
|
|
||||||
|
|
||||||
### Docker Setup
|
|
||||||
1. **Environment Variables:**
|
|
||||||
- All configuration is done through the `.env` file
|
|
||||||
- Available environment variables:
|
|
||||||
```
|
|
||||||
# LLM API Keys
|
|
||||||
OPENAI_API_KEY=your_key_here
|
|
||||||
ANTHROPIC_API_KEY=your_key_here
|
|
||||||
GOOGLE_API_KEY=your_key_here
|
|
||||||
|
|
||||||
# Browser Settings
|
|
||||||
CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks
|
|
||||||
RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH
|
|
||||||
RESOLUTION_WIDTH=1920 # Custom width in pixels
|
|
||||||
RESOLUTION_HEIGHT=1080 # Custom height in pixels
|
|
||||||
|
|
||||||
# VNC Settings
|
|
||||||
VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Platform Support:**
|
|
||||||
- Supports both AMD64 and ARM64 architectures
|
|
||||||
- For ARM64 systems (e.g., Apple Silicon Macs), the container will automatically use the appropriate image
|
|
||||||
|
|
||||||
3. **Browser Persistence Modes:**
|
|
||||||
- **Default Mode (CHROME_PERSISTENT_SESSION=false):**
|
|
||||||
- Browser opens and closes with each AI task
|
|
||||||
- Clean state for each interaction
|
|
||||||
- Lower resource usage
|
|
||||||
|
|
||||||
- **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
|
|
||||||
- Browser stays open between AI tasks
|
|
||||||
- Maintains history and state
|
|
||||||
- Allows viewing previous AI interactions
|
|
||||||
- Set in `.env` file or via environment variable when starting container
|
|
||||||
|
|
||||||
4. **Viewing Browser Interactions:**
|
|
||||||
- Access the noVNC viewer at `http://localhost:6080/vnc.html`
|
|
||||||
- Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
|
|
||||||
- Direct VNC access available on port 5900 (mapped to container port 5901)
|
|
||||||
- You can now see all browser interactions in real-time
|
|
||||||
|
|
||||||
5. **Container Management:**
|
5. **Container Management:**
|
||||||
```bash
|
```bash
|
||||||
# Start with persistent browser
|
# Start with persistent browser
|
||||||
|
|||||||
@@ -1,62 +1,76 @@
|
|||||||
services:
|
services:
|
||||||
browser-use-webui:
|
browser-use-webui:
|
||||||
platform: linux/amd64
|
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: ${DOCKERFILE:-Dockerfile}
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
|
||||||
ports:
|
ports:
|
||||||
- "7788:7788" # Gradio default port
|
- "7788:7788"
|
||||||
- "6080:6080" # noVNC web interface
|
- "6080:6080"
|
||||||
- "5901:5901" # VNC port
|
- "5901:5901"
|
||||||
- "9222:9222" # Chrome remote debugging port
|
- "9222:9222"
|
||||||
environment:
|
environment:
|
||||||
|
# LLM API Keys & Endpoints (Your existing list)
|
||||||
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
|
||||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
|
||||||
- ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
|
- ANTHROPIC_ENDPOINT=${ANTHROPIC_ENDPOINT:-https://api.anthropic.com}
|
||||||
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
|
||||||
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
- AZURE_OPENAI_ENDPOINT=${AZURE_OPENAI_ENDPOINT:-}
|
||||||
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY:-}
|
||||||
|
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION:-2025-01-01-preview}
|
||||||
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
- DEEPSEEK_ENDPOINT=${DEEPSEEK_ENDPOINT:-https://api.deepseek.com}
|
||||||
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
- DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY:-}
|
||||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
|
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://localhost:11434}
|
||||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
|
||||||
- MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
|
- MISTRAL_ENDPOINT=${MISTRAL_ENDPOINT:-https://api.mistral.ai/v1}
|
||||||
|
- MISTRAL_API_KEY=${MISTRAL_API_KEY:-}
|
||||||
- ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
|
- ALIBABA_ENDPOINT=${ALIBABA_ENDPOINT:-https://dashscope.aliyuncs.com/compatible-mode/v1}
|
||||||
- ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
|
- ALIBABA_API_KEY=${ALIBABA_API_KEY:-}
|
||||||
- MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
|
- MOONSHOT_ENDPOINT=${MOONSHOT_ENDPOINT:-https://api.moonshot.cn/v1}
|
||||||
- MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
|
- MOONSHOT_API_KEY=${MOONSHOT_API_KEY:-}
|
||||||
- IBM_API_KEY=${IBM_API_KEY:-}
|
- UNBOUND_ENDPOINT=${UNBOUND_ENDPOINT:-https://api.getunbound.ai}
|
||||||
|
- UNBOUND_API_KEY=${UNBOUND_API_KEY:-}
|
||||||
|
- SiliconFLOW_ENDPOINT=${SiliconFLOW_ENDPOINT:-https://api.siliconflow.cn/v1/}
|
||||||
|
- SiliconFLOW_API_KEY=${SiliconFLOW_API_KEY:-}
|
||||||
- IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
|
- IBM_ENDPOINT=${IBM_ENDPOINT:-https://us-south.ml.cloud.ibm.com}
|
||||||
|
- IBM_API_KEY=${IBM_API_KEY:-}
|
||||||
- IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
|
- IBM_PROJECT_ID=${IBM_PROJECT_ID:-}
|
||||||
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
|
||||||
|
# Application Settings
|
||||||
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
- ANONYMIZED_TELEMETRY=${ANONYMIZED_TELEMETRY:-false}
|
||||||
- CHROME_PATH=/usr/bin/google-chrome
|
- BROWSER_USE_LOGGING_LEVEL=${BROWSER_USE_LOGGING_LEVEL:-info}
|
||||||
- CHROME_USER_DATA=/app/data/chrome_data
|
|
||||||
- CHROME_PERSISTENT_SESSION=${CHROME_PERSISTENT_SESSION:-false}
|
# Browser Settings
|
||||||
- CHROME_CDP=${CHROME_CDP:-http://localhost:9222}
|
- BROWSER_USER_DATA=${BROWSER_USER_DATA:-/app/data/chrome_data}
|
||||||
|
- BROWSER_DEBUGGING_PORT=${BROWSER_DEBUGGING_PORT:-9222}
|
||||||
|
- BROWSER_DEBUGGING_HOST=${BROWSER_DEBUGGING_HOST:-0.0.0.0}
|
||||||
|
- KEEP_BROWSER_OPEN=${KEEP_BROWSER_OPEN:-true}
|
||||||
|
- BROWSER_CDP=${BROWSER_CDP:-} # e.g., http://localhost:9222
|
||||||
|
|
||||||
|
# Display Settings
|
||||||
- DISPLAY=:99
|
- DISPLAY=:99
|
||||||
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
# This ENV is used by the Dockerfile during build time if Patchright respects it.
|
||||||
|
# It's not strictly needed at runtime by docker-compose unless your app or scripts also read it.
|
||||||
|
- PLAYWRIGHT_BROWSERS_PATH=/ms-browsers # Matches Dockerfile ENV
|
||||||
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
- RESOLUTION=${RESOLUTION:-1920x1080x24}
|
||||||
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
- RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
|
||||||
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
- RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
|
||||||
- VNC_PASSWORD=${VNC_PASSWORD:-vncpassword}
|
|
||||||
- CHROME_DEBUGGING_PORT=9222
|
# VNC Settings
|
||||||
- CHROME_DEBUGGING_HOST=localhost
|
- VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
- /tmp/.X11-unix:/tmp/.X11-unix
|
- /tmp/.X11-unix:/tmp/.X11-unix
|
||||||
|
# - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
shm_size: '2gb'
|
shm_size: '2gb'
|
||||||
cap_add:
|
cap_add:
|
||||||
- SYS_ADMIN
|
- SYS_ADMIN
|
||||||
security_opt:
|
|
||||||
- seccomp=unconfined
|
|
||||||
tmpfs:
|
tmpfs:
|
||||||
- /tmp
|
- /tmp
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "nc", "-z", "localhost", "5901"]
|
test: ["CMD", "nc", "-z", "localhost", "5901"] # VNC port
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Start supervisord in the foreground to properly manage child processes
|
|
||||||
exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
browser-use==0.1.43
|
browser-use==0.1.45
|
||||||
pyperclip==1.9.0
|
pyperclip==1.9.0
|
||||||
gradio==5.27.0
|
gradio==5.27.0
|
||||||
json-repair
|
json-repair
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from browser_use.telemetry.views import (
|
|||||||
)
|
)
|
||||||
from browser_use.utils import time_execution_async
|
from browser_use.utils import time_execution_async
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from browser_use.agent.message_manager.utils import is_model_without_tool_support
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -30,6 +31,22 @@ SKIP_LLM_API_KEY_VERIFICATION = (
|
|||||||
|
|
||||||
|
|
||||||
class BrowserUseAgent(Agent):
|
class BrowserUseAgent(Agent):
|
||||||
|
def _set_tool_calling_method(self) -> ToolCallingMethod | None:
|
||||||
|
tool_calling_method = self.settings.tool_calling_method
|
||||||
|
if tool_calling_method == 'auto':
|
||||||
|
if is_model_without_tool_support(self.model_name):
|
||||||
|
return 'raw'
|
||||||
|
elif self.chat_model_library == 'ChatGoogleGenerativeAI':
|
||||||
|
return None
|
||||||
|
elif self.chat_model_library == 'ChatOpenAI':
|
||||||
|
return 'function_calling'
|
||||||
|
elif self.chat_model_library == 'AzureChatOpenAI':
|
||||||
|
return 'function_calling'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return tool_calling_method
|
||||||
|
|
||||||
@time_execution_async("--run (agent)")
|
@time_execution_async("--run (agent)")
|
||||||
async def run(
|
async def run(
|
||||||
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ from langchain_core.tools import StructuredTool, Tool
|
|||||||
from langgraph.graph import StateGraph
|
from langgraph.graph import StateGraph
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from browser_use.browser.context import BrowserContextWindowSize, BrowserContextConfig
|
from browser_use.browser.context import BrowserContextConfig
|
||||||
|
|
||||||
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
from src.agent.browser_use.browser_use_agent import BrowserUseAgent
|
||||||
from src.browser.custom_browser import CustomBrowser
|
from src.browser.custom_browser import CustomBrowser
|
||||||
@@ -82,22 +82,19 @@ async def run_single_browser_task(
|
|||||||
try:
|
try:
|
||||||
logger.info(f"Starting browser task for query: {task_query}")
|
logger.info(f"Starting browser task for query: {task_query}")
|
||||||
extra_args = [f"--window-size={window_w},{window_h}"]
|
extra_args = [f"--window-size={window_w},{window_h}"]
|
||||||
if browser_user_data_dir:
|
|
||||||
extra_args.append(f"--user-data-dir={browser_user_data_dir}")
|
|
||||||
if use_own_browser:
|
if use_own_browser:
|
||||||
browser_binary_path = os.getenv("CHROME_PATH", None) or browser_binary_path
|
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
||||||
if browser_binary_path == "":
|
if browser_binary_path == "":
|
||||||
browser_binary_path = None
|
browser_binary_path = None
|
||||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
||||||
if chrome_user_data:
|
if browser_user_data:
|
||||||
extra_args += [f"--user-data-dir={chrome_user_data}"]
|
extra_args += [f"--user-data-dir={browser_user_data}"]
|
||||||
else:
|
else:
|
||||||
browser_binary_path = None
|
browser_binary_path = None
|
||||||
|
|
||||||
bu_browser = CustomBrowser(
|
bu_browser = CustomBrowser(
|
||||||
config=BrowserConfig(
|
config=BrowserConfig(
|
||||||
headless=headless,
|
headless=headless,
|
||||||
disable_security=False,
|
|
||||||
browser_binary_path=browser_binary_path,
|
browser_binary_path=browser_binary_path,
|
||||||
extra_browser_args=extra_args,
|
extra_browser_args=extra_args,
|
||||||
wss_url=wss_url,
|
wss_url=wss_url,
|
||||||
@@ -107,7 +104,8 @@ async def run_single_browser_task(
|
|||||||
|
|
||||||
context_config = BrowserContextConfig(
|
context_config = BrowserContextConfig(
|
||||||
save_downloads_path="./tmp/downloads",
|
save_downloads_path="./tmp/downloads",
|
||||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
window_height=window_h,
|
||||||
|
window_width=window_w,
|
||||||
force_new_context=True,
|
force_new_context=True,
|
||||||
)
|
)
|
||||||
bu_browser_context = await bu_browser.new_context(config=context_config)
|
bu_browser_context = await bu_browser.new_context(config=context_config)
|
||||||
@@ -299,30 +297,34 @@ Provide a list of distinct search queries(up to {max_parallel_browsers}) that ar
|
|||||||
# --- Langgraph State Definition ---
|
# --- Langgraph State Definition ---
|
||||||
|
|
||||||
|
|
||||||
class ResearchPlanItem(TypedDict):
|
class ResearchTaskItem(TypedDict):
|
||||||
step: int
|
# step: int # Maybe step within category, or just implicit by order
|
||||||
task: str
|
task_description: str
|
||||||
status: str # "pending", "completed", "failed"
|
status: str # "pending", "completed", "failed"
|
||||||
queries: Optional[List[str]] # Queries generated for this task
|
queries: Optional[List[str]]
|
||||||
result_summary: Optional[str] # Optional brief summary after execution
|
result_summary: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
class ResearchCategoryItem(TypedDict):
|
||||||
|
category_name: str
|
||||||
|
tasks: List[ResearchTaskItem]
|
||||||
|
# Optional: category_status: str # Could be "pending", "in_progress", "completed"
|
||||||
|
|
||||||
|
|
||||||
class DeepResearchState(TypedDict):
|
class DeepResearchState(TypedDict):
|
||||||
task_id: str
|
task_id: str
|
||||||
topic: str
|
topic: str
|
||||||
research_plan: List[ResearchPlanItem]
|
research_plan: List[ResearchCategoryItem] # CHANGED
|
||||||
search_results: List[Dict[str, Any]] # Stores results from browser_search_tool_func
|
search_results: List[Dict[str, Any]]
|
||||||
# messages: Sequence[BaseMessage] # History for ReAct-like steps within nodes
|
llm: Any
|
||||||
llm: Any # The LLM instance
|
|
||||||
tools: List[Tool]
|
tools: List[Tool]
|
||||||
output_dir: Path
|
output_dir: Path
|
||||||
browser_config: Dict[str, Any]
|
browser_config: Dict[str, Any]
|
||||||
final_report: Optional[str]
|
final_report: Optional[str]
|
||||||
current_step_index: int # To track progress through the plan
|
current_category_index: int
|
||||||
stop_requested: bool # Flag to signal termination
|
current_task_index_in_category: int
|
||||||
# Add other state variables as needed
|
stop_requested: bool
|
||||||
error_message: Optional[str] # To store errors
|
error_message: Optional[str]
|
||||||
|
|
||||||
messages: List[BaseMessage]
|
messages: List[BaseMessage]
|
||||||
|
|
||||||
|
|
||||||
@@ -330,44 +332,75 @@ class DeepResearchState(TypedDict):
|
|||||||
|
|
||||||
|
|
||||||
def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
|
def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
|
||||||
"""Loads state from files if they exist."""
|
|
||||||
state_updates = {}
|
state_updates = {}
|
||||||
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
||||||
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
search_file = os.path.join(output_dir, SEARCH_INFO_FILENAME)
|
||||||
|
|
||||||
|
loaded_plan: List[ResearchCategoryItem] = []
|
||||||
|
next_cat_idx, next_task_idx = 0, 0
|
||||||
|
found_pending = False
|
||||||
|
|
||||||
if os.path.exists(plan_file):
|
if os.path.exists(plan_file):
|
||||||
try:
|
try:
|
||||||
with open(plan_file, "r", encoding="utf-8") as f:
|
with open(plan_file, "r", encoding="utf-8") as f:
|
||||||
# Basic parsing, assumes markdown checklist format
|
current_category: Optional[ResearchCategoryItem] = None
|
||||||
plan = []
|
lines = f.readlines()
|
||||||
step = 1
|
cat_counter = 0
|
||||||
for line in f:
|
task_counter_in_cat = 0
|
||||||
line = line.strip()
|
|
||||||
if line.startswith(("- [x]", "- [ ]")):
|
for line_num, line_content in enumerate(lines):
|
||||||
status = "completed" if line.startswith("- [x]") else "pending"
|
line = line_content.strip()
|
||||||
task = line[5:].strip()
|
if line.startswith("## "): # Category
|
||||||
plan.append(
|
if current_category: # Save previous category
|
||||||
ResearchPlanItem(
|
loaded_plan.append(current_category)
|
||||||
step=step,
|
if not found_pending: # If previous category was all done, advance cat counter
|
||||||
task=task,
|
cat_counter += 1
|
||||||
status=status,
|
task_counter_in_cat = 0
|
||||||
queries=None,
|
category_name = line[line.find(" "):].strip() # Get text after "## X. "
|
||||||
result_summary=None,
|
current_category = ResearchCategoryItem(category_name=category_name, tasks=[])
|
||||||
)
|
elif (line.startswith("- [ ]") or line.startswith("- [x]") or line.startswith(
|
||||||
|
"- [-]")) and current_category: # Task
|
||||||
|
status = "pending"
|
||||||
|
if line.startswith("- [x]"):
|
||||||
|
status = "completed"
|
||||||
|
elif line.startswith("- [-]"):
|
||||||
|
status = "failed"
|
||||||
|
|
||||||
|
task_desc = line[5:].strip()
|
||||||
|
current_category["tasks"].append(
|
||||||
|
ResearchTaskItem(task_description=task_desc, status=status, queries=None,
|
||||||
|
result_summary=None)
|
||||||
)
|
)
|
||||||
step += 1
|
if status == "pending" and not found_pending:
|
||||||
state_updates["research_plan"] = plan
|
next_cat_idx = cat_counter
|
||||||
# Determine next step index based on loaded plan
|
next_task_idx = task_counter_in_cat
|
||||||
next_step = next(
|
found_pending = True
|
||||||
(i for i, item in enumerate(plan) if item["status"] == "pending"),
|
if not found_pending: # only increment if previous tasks were completed/failed
|
||||||
len(plan),
|
task_counter_in_cat += 1
|
||||||
)
|
|
||||||
state_updates["current_step_index"] = next_step
|
if current_category: # Append last category
|
||||||
|
loaded_plan.append(current_category)
|
||||||
|
|
||||||
|
if loaded_plan:
|
||||||
|
state_updates["research_plan"] = loaded_plan
|
||||||
|
if not found_pending and loaded_plan: # All tasks were completed or failed
|
||||||
|
next_cat_idx = len(loaded_plan) # Points beyond the last category
|
||||||
|
next_task_idx = 0
|
||||||
|
state_updates["current_category_index"] = next_cat_idx
|
||||||
|
state_updates["current_task_index_in_category"] = next_task_idx
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Loaded research plan from {plan_file}, next step index: {next_step}"
|
f"Loaded hierarchical research plan from {plan_file}. "
|
||||||
|
f"Next task: Category {next_cat_idx}, Task {next_task_idx} in category."
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Plan file {plan_file} was empty or malformed.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load or parse research plan {plan_file}: {e}")
|
logger.error(f"Failed to load or parse research plan {plan_file}: {e}", exc_info=True)
|
||||||
state_updates["error_message"] = f"Failed to load research plan: {e}"
|
state_updates["error_message"] = f"Failed to load research plan: {e}"
|
||||||
|
else:
|
||||||
|
logger.info(f"Plan file {plan_file} not found. Will start fresh.")
|
||||||
|
|
||||||
if os.path.exists(search_file):
|
if os.path.exists(search_file):
|
||||||
try:
|
try:
|
||||||
with open(search_file, "r", encoding="utf-8") as f:
|
with open(search_file, "r", encoding="utf-8") as f:
|
||||||
@@ -375,22 +408,25 @@ def _load_previous_state(task_id: str, output_dir: str) -> Dict[str, Any]:
|
|||||||
logger.info(f"Loaded search results from {search_file}")
|
logger.info(f"Loaded search results from {search_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load search results {search_file}: {e}")
|
logger.error(f"Failed to load search results {search_file}: {e}")
|
||||||
state_updates["error_message"] = f"Failed to load search results: {e}"
|
state_updates["error_message"] = (
|
||||||
# Decide if this is fatal or if we can continue without old results
|
state_updates.get("error_message", "") + f" Failed to load search results: {e}").strip()
|
||||||
|
|
||||||
return state_updates
|
return state_updates
|
||||||
|
|
||||||
|
|
||||||
def _save_plan_to_md(plan: List[ResearchPlanItem], output_dir: str):
|
def _save_plan_to_md(plan: List[ResearchCategoryItem], output_dir: str):
|
||||||
"""Saves the research plan to a markdown checklist file."""
|
|
||||||
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
plan_file = os.path.join(output_dir, PLAN_FILENAME)
|
||||||
try:
|
try:
|
||||||
with open(plan_file, "w", encoding="utf-8") as f:
|
with open(plan_file, "w", encoding="utf-8") as f:
|
||||||
f.write("# Research Plan\n\n")
|
f.write(f"# Research Plan\n\n")
|
||||||
for item in plan:
|
for cat_idx, category in enumerate(plan):
|
||||||
marker = "- [x]" if item["status"] == "completed" else "- [ ]"
|
f.write(f"## {cat_idx + 1}. {category['category_name']}\n\n")
|
||||||
f.write(f"{marker} {item['task']}\n")
|
for task_idx, task in enumerate(category['tasks']):
|
||||||
logger.info(f"Research plan saved to {plan_file}")
|
marker = "- [x]" if task["status"] == "completed" else "- [ ]" if task[
|
||||||
|
"status"] == "pending" else "- [-]" # [-] for failed
|
||||||
|
f.write(f" {marker} {task['task_description']}\n")
|
||||||
|
f.write("\n")
|
||||||
|
logger.info(f"Hierarchical research plan saved to {plan_file}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to save research plan to {plan_file}: {e}")
|
logger.error(f"Failed to save research plan to {plan_file}: {e}")
|
||||||
|
|
||||||
@@ -419,7 +455,6 @@ def _save_report_to_md(report: str, output_dir: Path):
|
|||||||
|
|
||||||
|
|
||||||
async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
|
async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||||
"""Generates the initial research plan or refines it if resuming."""
|
|
||||||
logger.info("--- Entering Planning Node ---")
|
logger.info("--- Entering Planning Node ---")
|
||||||
if state.get("stop_requested"):
|
if state.get("stop_requested"):
|
||||||
logger.info("Stop requested, skipping planning.")
|
logger.info("Stop requested, skipping planning.")
|
||||||
@@ -428,293 +463,344 @@ async def planning_node(state: DeepResearchState) -> Dict[str, Any]:
|
|||||||
llm = state["llm"]
|
llm = state["llm"]
|
||||||
topic = state["topic"]
|
topic = state["topic"]
|
||||||
existing_plan = state.get("research_plan")
|
existing_plan = state.get("research_plan")
|
||||||
existing_results = state.get("search_results")
|
|
||||||
output_dir = state["output_dir"]
|
output_dir = state["output_dir"]
|
||||||
|
|
||||||
if existing_plan and state.get("current_step_index", 0) > 0:
|
if existing_plan and (
|
||||||
|
state.get("current_category_index", 0) > 0 or state.get("current_task_index_in_category", 0) > 0):
|
||||||
logger.info("Resuming with existing plan.")
|
logger.info("Resuming with existing plan.")
|
||||||
# Maybe add logic here to let LLM review and potentially adjust the plan
|
|
||||||
# based on existing_results, but for now, we just use the loaded plan.
|
|
||||||
_save_plan_to_md(existing_plan, output_dir) # Ensure it's saved initially
|
_save_plan_to_md(existing_plan, output_dir) # Ensure it's saved initially
|
||||||
return {"research_plan": existing_plan} # Return the loaded plan
|
# current_category_index and current_task_index_in_category should be set by _load_previous_state
|
||||||
|
return {"research_plan": existing_plan}
|
||||||
|
|
||||||
logger.info(f"Generating new research plan for topic: {topic}")
|
logger.info(f"Generating new research plan for topic: {topic}")
|
||||||
|
|
||||||
prompt = ChatPromptTemplate.from_messages(
|
prompt_text = f"""You are a meticulous research assistant. Your goal is to create a hierarchical research plan to thoroughly investigate the topic: "{topic}".
|
||||||
[
|
The plan should be structured into several main research categories. Each category should contain a list of specific, actionable research tasks or questions.
|
||||||
(
|
Format the output as a JSON list of objects. Each object represents a research category and should have:
|
||||||
"system",
|
1. "category_name": A string for the name of the research category.
|
||||||
"""You are a meticulous research assistant. Your goal is to create a step-by-step research plan to thoroughly investigate a given topic.
|
2. "tasks": A list of strings, where each string is a specific research task for that category.
|
||||||
The plan should consist of clear, actionable research tasks or questions. Each step should logically build towards a comprehensive understanding.
|
|
||||||
Format the output as a numbered list. Each item should represent a distinct research step or question.
|
|
||||||
Example:
|
|
||||||
1. Define the core concepts and terminology related to [Topic].
|
|
||||||
2. Identify the key historical developments of [Topic].
|
|
||||||
3. Analyze the current state-of-the-art and recent advancements in [Topic].
|
|
||||||
4. Investigate the major challenges and limitations associated with [Topic].
|
|
||||||
5. Explore the future trends and potential applications of [Topic].
|
|
||||||
6. Summarize the findings and draw conclusions.
|
|
||||||
|
|
||||||
Keep the plan focused and manageable. Aim for 5-10 detailed steps.
|
Example JSON Output:
|
||||||
""",
|
[
|
||||||
),
|
{{
|
||||||
("human", f"Generate a research plan for the topic: {topic}"),
|
"category_name": "Understanding Core Concepts and Definitions",
|
||||||
]
|
"tasks": [
|
||||||
)
|
"Define the primary terminology associated with '{topic}'.",
|
||||||
|
"Identify the fundamental principles and theories underpinning '{topic}'."
|
||||||
|
]
|
||||||
|
}},
|
||||||
|
{{
|
||||||
|
"category_name": "Historical Development and Key Milestones",
|
||||||
|
"tasks": [
|
||||||
|
"Trace the historical evolution of '{topic}'.",
|
||||||
|
"Identify key figures, events, or breakthroughs in the development of '{topic}'."
|
||||||
|
]
|
||||||
|
}},
|
||||||
|
{{
|
||||||
|
"category_name": "Current State-of-the-Art and Applications",
|
||||||
|
"tasks": [
|
||||||
|
"Analyze the current advancements and prominent applications of '{topic}'.",
|
||||||
|
"Investigate ongoing research and active areas of development related to '{topic}'."
|
||||||
|
]
|
||||||
|
}},
|
||||||
|
{{
|
||||||
|
"category_name": "Challenges, Limitations, and Future Outlook",
|
||||||
|
"tasks": [
|
||||||
|
"Identify the major challenges and limitations currently facing '{topic}'.",
|
||||||
|
"Explore potential future trends, ethical considerations, and societal impacts of '{topic}'."
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
|
||||||
|
Generate a plan with 3-10 categories, and 2-6 tasks per category for the topic: "{topic}" according to the complexity of the topic.
|
||||||
|
Ensure the output is a valid JSON array.
|
||||||
|
"""
|
||||||
|
messages = [
|
||||||
|
SystemMessage(content="You are a research planning assistant outputting JSON."),
|
||||||
|
HumanMessage(content=prompt_text)
|
||||||
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await llm.ainvoke(prompt.format_prompt(topic=topic).to_messages())
|
response = await llm.ainvoke(messages)
|
||||||
plan_text = response.content
|
raw_content = response.content
|
||||||
|
# The LLM might wrap the JSON in backticks
|
||||||
|
if raw_content.strip().startswith("```json"):
|
||||||
|
raw_content = raw_content.strip()[7:-3].strip()
|
||||||
|
elif raw_content.strip().startswith("```"):
|
||||||
|
raw_content = raw_content.strip()[3:-3].strip()
|
||||||
|
|
||||||
# Parse the numbered list into the plan structure
|
logger.debug(f"LLM response for plan: {raw_content}")
|
||||||
new_plan: List[ResearchPlanItem] = []
|
parsed_plan_from_llm = json.loads(raw_content)
|
||||||
for i, line in enumerate(plan_text.strip().split("\n")):
|
|
||||||
line = line.strip()
|
new_plan: List[ResearchCategoryItem] = []
|
||||||
if line and (line[0].isdigit() or line.startswith(("*", "-"))):
|
for cat_idx, category_data in enumerate(parsed_plan_from_llm):
|
||||||
# Simple parsing: remove number/bullet and space
|
if not isinstance(category_data,
|
||||||
task_text = (
|
dict) or "category_name" not in category_data or "tasks" not in category_data:
|
||||||
line.split(".", 1)[-1].strip()
|
logger.warning(f"Skipping invalid category data: {category_data}")
|
||||||
if line[0].isdigit()
|
continue
|
||||||
else line[1:].strip()
|
|
||||||
)
|
tasks: List[ResearchTaskItem] = []
|
||||||
if task_text:
|
for task_idx, task_desc in enumerate(category_data["tasks"]):
|
||||||
new_plan.append(
|
if isinstance(task_desc, str):
|
||||||
ResearchPlanItem(
|
tasks.append(
|
||||||
step=i + 1,
|
ResearchTaskItem(
|
||||||
task=task_text,
|
task_description=task_desc,
|
||||||
status="pending",
|
status="pending",
|
||||||
queries=None,
|
queries=None,
|
||||||
result_summary=None,
|
result_summary=None,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
else: # Sometimes LLM puts tasks as {"task": "description"}
|
||||||
|
if isinstance(task_desc, dict) and "task_description" in task_desc:
|
||||||
|
tasks.append(
|
||||||
|
ResearchTaskItem(
|
||||||
|
task_description=task_desc["task_description"],
|
||||||
|
status="pending",
|
||||||
|
queries=None,
|
||||||
|
result_summary=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif isinstance(task_desc, dict) and "task" in task_desc: # common LLM mistake
|
||||||
|
tasks.append(
|
||||||
|
ResearchTaskItem(
|
||||||
|
task_description=task_desc["task"],
|
||||||
|
status="pending",
|
||||||
|
queries=None,
|
||||||
|
result_summary=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Skipping invalid task data: {task_desc} in category {category_data['category_name']}")
|
||||||
|
|
||||||
|
new_plan.append(
|
||||||
|
ResearchCategoryItem(
|
||||||
|
category_name=category_data["category_name"],
|
||||||
|
tasks=tasks,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if not new_plan:
|
if not new_plan:
|
||||||
logger.error("LLM failed to generate a valid plan structure.")
|
logger.error("LLM failed to generate a valid plan structure from JSON.")
|
||||||
return {"error_message": "Failed to generate research plan structure."}
|
return {"error_message": "Failed to generate research plan structure."}
|
||||||
|
|
||||||
logger.info(f"Generated research plan with {len(new_plan)} steps.")
|
logger.info(f"Generated research plan with {len(new_plan)} categories.")
|
||||||
_save_plan_to_md(new_plan, output_dir)
|
_save_plan_to_md(new_plan, output_dir) # Save the hierarchical plan
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"research_plan": new_plan,
|
"research_plan": new_plan,
|
||||||
"current_step_index": 0, # Start from the beginning
|
"current_category_index": 0,
|
||||||
"search_results": [], # Initialize search results
|
"current_task_index_in_category": 0,
|
||||||
|
"search_results": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse JSON from LLM for plan: {e}. Response was: {raw_content}", exc_info=True)
|
||||||
|
return {"error_message": f"LLM generated invalid JSON for research plan: {e}"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during planning: {e}", exc_info=True)
|
logger.error(f"Error during planning: {e}", exc_info=True)
|
||||||
return {"error_message": f"LLM Error during planning: {e}"}
|
return {"error_message": f"LLM Error during planning: {e}"}
|
||||||
|
|
||||||
|
|
||||||
async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
async def research_execution_node(state: DeepResearchState) -> Dict[str, Any]:
|
||||||
"""
|
|
||||||
Executes the next step in the research plan by invoking the LLM with tools.
|
|
||||||
The LLM decides which tool (e.g., browser search) to use and provides arguments.
|
|
||||||
"""
|
|
||||||
logger.info("--- Entering Research Execution Node ---")
|
logger.info("--- Entering Research Execution Node ---")
|
||||||
if state.get("stop_requested"):
|
if state.get("stop_requested"):
|
||||||
logger.info("Stop requested, skipping research execution.")
|
logger.info("Stop requested, skipping research execution.")
|
||||||
return {
|
return {
|
||||||
"stop_requested": True,
|
"stop_requested": True,
|
||||||
"current_step_index": state["current_step_index"],
|
"current_category_index": state["current_category_index"],
|
||||||
} # Keep index same
|
"current_task_index_in_category": state["current_task_index_in_category"],
|
||||||
|
}
|
||||||
|
|
||||||
plan = state["research_plan"]
|
plan = state["research_plan"]
|
||||||
current_index = state["current_step_index"]
|
cat_idx = state["current_category_index"]
|
||||||
|
task_idx = state["current_task_index_in_category"]
|
||||||
llm = state["llm"]
|
llm = state["llm"]
|
||||||
tools = state["tools"] # Tools are now passed in state
|
tools = state["tools"]
|
||||||
output_dir = str(state["output_dir"])
|
output_dir = str(state["output_dir"])
|
||||||
task_id = state["task_id"]
|
task_id = state["task_id"] # For _AGENT_STOP_FLAGS
|
||||||
# Stop event is bound inside the tool function, no need to pass directly here
|
|
||||||
|
|
||||||
if not plan or current_index >= len(plan):
|
# This check should ideally be handled by `should_continue`
|
||||||
logger.info("Research plan complete or empty.")
|
if not plan or cat_idx >= len(plan):
|
||||||
# This condition should ideally be caught by `should_continue` before reaching here
|
logger.info("Research plan complete or categories exhausted.")
|
||||||
return {}
|
return {} # should route to synthesis
|
||||||
|
|
||||||
current_step = plan[current_index]
|
current_category = plan[cat_idx]
|
||||||
if current_step["status"] == "completed":
|
if task_idx >= len(current_category["tasks"]):
|
||||||
logger.info(f"Step {current_step['step']} already completed, skipping.")
|
logger.info(f"All tasks in category '{current_category['category_name']}' completed. Moving to next category.")
|
||||||
return {"current_step_index": current_index + 1} # Move to next step
|
# This logic is now effectively handled by should_continue and the index updates below
|
||||||
|
# The next iteration will be caught by should_continue or this node with updated indices
|
||||||
|
return {
|
||||||
|
"current_category_index": cat_idx + 1,
|
||||||
|
"current_task_index_in_category": 0,
|
||||||
|
"messages": state["messages"] # Pass messages along
|
||||||
|
}
|
||||||
|
|
||||||
|
current_task = current_category["tasks"][task_idx]
|
||||||
|
|
||||||
|
if current_task["status"] == "completed":
|
||||||
|
logger.info(
|
||||||
|
f"Task '{current_task['task_description']}' in category '{current_category['category_name']}' already completed. Skipping.")
|
||||||
|
# Logic to find next task
|
||||||
|
next_task_idx = task_idx + 1
|
||||||
|
next_cat_idx = cat_idx
|
||||||
|
if next_task_idx >= len(current_category["tasks"]):
|
||||||
|
next_cat_idx += 1
|
||||||
|
next_task_idx = 0
|
||||||
|
return {
|
||||||
|
"current_category_index": next_cat_idx,
|
||||||
|
"current_task_index_in_category": next_task_idx,
|
||||||
|
"messages": state["messages"] # Pass messages along
|
||||||
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Executing research step {current_step['step']}: {current_step['task']}"
|
f"Executing research task: '{current_task['task_description']}' (Category: '{current_category['category_name']}')"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bind tools to the LLM for this call
|
|
||||||
llm_with_tools = llm.bind_tools(tools)
|
llm_with_tools = llm.bind_tools(tools)
|
||||||
if state["messages"]:
|
|
||||||
current_task_message = [
|
# Construct messages for LLM invocation
|
||||||
HumanMessage(
|
task_prompt_content = (
|
||||||
content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
|
f"Current Research Category: {current_category['category_name']}\n"
|
||||||
)
|
f"Specific Task: {current_task['task_description']}\n\n"
|
||||||
]
|
"Please use the available tools, especially 'parallel_browser_search', to gather information for this specific task. "
|
||||||
invocation_messages = state["messages"] + current_task_message
|
"Provide focused search queries relevant ONLY to this task. "
|
||||||
|
"If you believe you have sufficient information from previous steps for this specific task, you can indicate that you are ready to summarize or that no further search is needed."
|
||||||
|
)
|
||||||
|
current_task_message_history = [
|
||||||
|
HumanMessage(content=task_prompt_content)
|
||||||
|
]
|
||||||
|
if not state["messages"]: # First actual execution message
|
||||||
|
invocation_messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a research assistant executing one task of a research plan. Focus on the current task only."),
|
||||||
|
] + current_task_message_history
|
||||||
else:
|
else:
|
||||||
current_task_message = [
|
invocation_messages = state["messages"] + current_task_message_history
|
||||||
SystemMessage(
|
|
||||||
content="You are a research assistant executing one step of a research plan. Use the available tools, especially the 'parallel_browser_search' tool, to gather information needed for the current task. Be precise with your search queries if using the browser tool."
|
|
||||||
),
|
|
||||||
HumanMessage(
|
|
||||||
content=f"Research Task (Step {current_step['step']}): {current_step['task']}"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
invocation_messages = current_task_message
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Invoke the LLM, expecting it to make a tool call
|
logger.info(f"Invoking LLM with tools for task: {current_task['task_description']}")
|
||||||
logger.info(f"Invoking LLM with tools for task: {current_step['task']}")
|
|
||||||
ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
|
ai_response: BaseMessage = await llm_with_tools.ainvoke(invocation_messages)
|
||||||
logger.info("LLM invocation complete.")
|
logger.info("LLM invocation complete.")
|
||||||
|
|
||||||
tool_results = []
|
tool_results = []
|
||||||
executed_tool_names = []
|
executed_tool_names = []
|
||||||
|
current_search_results = state.get("search_results", []) # Get existing search results
|
||||||
|
|
||||||
if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
|
if not isinstance(ai_response, AIMessage) or not ai_response.tool_calls:
|
||||||
# LLM didn't call a tool. Maybe it answered directly? Or failed?
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"LLM did not call any tool for step {current_step['step']}. Response: {ai_response.content[:100]}..."
|
f"LLM did not call any tool for task '{current_task['task_description']}'. Response: {ai_response.content[:100]}..."
|
||||||
)
|
|
||||||
# How to handle this? Mark step as failed? Or store the content?
|
|
||||||
# Let's mark as failed for now, assuming a tool was expected.
|
|
||||||
current_step["status"] = "failed"
|
|
||||||
current_step["result_summary"] = "LLM did not use a tool as expected."
|
|
||||||
_save_plan_to_md(plan, output_dir)
|
|
||||||
return {
|
|
||||||
"research_plan": plan,
|
|
||||||
"status": "pending",
|
|
||||||
"current_step_index": current_index,
|
|
||||||
"messages": [
|
|
||||||
f"LLM failed to call a tool for step {current_step['step']}. Response: {ai_response.content}"
|
|
||||||
f". Please use tool to do research unless you are thinking or summary"],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process tool calls
|
|
||||||
for tool_call in ai_response.tool_calls:
|
|
||||||
tool_name = tool_call.get("name")
|
|
||||||
tool_args = tool_call.get("args", {})
|
|
||||||
tool_call_id = tool_call.get("id") # Important for ToolMessage
|
|
||||||
|
|
||||||
logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
|
|
||||||
executed_tool_names.append(tool_name)
|
|
||||||
|
|
||||||
# Find the corresponding tool instance
|
|
||||||
selected_tool = next((t for t in tools if t.name == tool_name), None)
|
|
||||||
|
|
||||||
if not selected_tool:
|
|
||||||
logger.error(f"LLM called tool '{tool_name}' which is not available.")
|
|
||||||
# Create a ToolMessage indicating the error
|
|
||||||
tool_results.append(
|
|
||||||
ToolMessage(
|
|
||||||
content=f"Error: Tool '{tool_name}' not found.",
|
|
||||||
tool_call_id=tool_call_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
continue # Skip to next tool call if any
|
|
||||||
|
|
||||||
# Execute the tool
|
|
||||||
try:
|
|
||||||
# Stop check before executing the tool (tool itself also checks)
|
|
||||||
stop_event = _AGENT_STOP_FLAGS.get(task_id)
|
|
||||||
if stop_event and stop_event.is_set():
|
|
||||||
logger.info(f"Stop requested before executing tool: {tool_name}")
|
|
||||||
current_step["status"] = "pending" # Not completed due to stop
|
|
||||||
_save_plan_to_md(plan, output_dir)
|
|
||||||
return {"stop_requested": True, "research_plan": plan}
|
|
||||||
|
|
||||||
logger.info(f"Executing tool: {tool_name}")
|
|
||||||
# Assuming tool functions handle async correctly
|
|
||||||
tool_output = await selected_tool.ainvoke(tool_args)
|
|
||||||
logger.info(f"Tool '{tool_name}' executed successfully.")
|
|
||||||
browser_tool_called = "parallel_browser_search" in executed_tool_names
|
|
||||||
# Append result to overall search results
|
|
||||||
current_search_results = state.get("search_results", [])
|
|
||||||
if browser_tool_called: # Specific handling for browser tool output
|
|
||||||
current_search_results.extend(tool_output)
|
|
||||||
else: # Handle other tool outputs (e.g., file tools return strings)
|
|
||||||
# Store it associated with the step? Or a generic log?
|
|
||||||
# Let's just log it for now. Need better handling for diverse tool outputs.
|
|
||||||
logger.info(
|
|
||||||
f"Result from tool '{tool_name}': {str(tool_output)[:200]}..."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Store result for potential next LLM call (if we were doing multi-turn)
|
|
||||||
tool_results.append(
|
|
||||||
ToolMessage(
|
|
||||||
content=json.dumps(tool_output), tool_call_id=tool_call_id
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
|
|
||||||
tool_results.append(
|
|
||||||
ToolMessage(
|
|
||||||
content=f"Error executing tool {tool_name}: {e}",
|
|
||||||
tool_call_id=tool_call_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
# Also update overall state search_results with error?
|
|
||||||
current_search_results = state.get("search_results", [])
|
|
||||||
current_search_results.append(
|
|
||||||
{
|
|
||||||
"tool_name": tool_name,
|
|
||||||
"args": tool_args,
|
|
||||||
"status": "failed",
|
|
||||||
"error": str(e),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Basic check: Did the browser tool run at all? (More specific checks needed)
|
|
||||||
browser_tool_called = "parallel_browser_search" in executed_tool_names
|
|
||||||
# We might need a more nuanced status based on the *content* of tool_results
|
|
||||||
step_failed = (
|
|
||||||
any("Error:" in str(tr.content) for tr in tool_results)
|
|
||||||
or not browser_tool_called
|
|
||||||
)
|
|
||||||
|
|
||||||
if step_failed:
|
|
||||||
logger.warning(
|
|
||||||
f"Step {current_step['step']} failed or did not yield results via browser search."
|
|
||||||
)
|
|
||||||
current_step["status"] = "failed"
|
|
||||||
current_step["result_summary"] = (
|
|
||||||
f"Tool execution failed or browser tool not used. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
|
|
||||||
)
|
)
|
||||||
|
current_task["status"] = "pending" # Or "completed_no_tool" if LLM explains it's done
|
||||||
|
current_task["result_summary"] = f"LLM did not use a tool. Response: {ai_response.content}"
|
||||||
|
current_task["current_category_index"] = cat_idx
|
||||||
|
current_task["current_task_index_in_category"] = task_idx
|
||||||
|
return current_task
|
||||||
|
# We still save the plan and advance.
|
||||||
else:
|
else:
|
||||||
logger.info(
|
# Process tool calls
|
||||||
f"Step {current_step['step']} completed using tool(s): {executed_tool_names}."
|
for tool_call in ai_response.tool_calls:
|
||||||
)
|
tool_name = tool_call.get("name")
|
||||||
current_step["status"] = "completed"
|
tool_args = tool_call.get("args", {})
|
||||||
|
tool_call_id = tool_call.get("id")
|
||||||
|
|
||||||
current_step["result_summary"] = (
|
logger.info(f"LLM requested tool call: {tool_name} with args: {tool_args}")
|
||||||
f"Executed tool(s): {', '.join(executed_tool_names)}."
|
executed_tool_names.append(tool_name)
|
||||||
)
|
selected_tool = next((t for t in tools if t.name == tool_name), None)
|
||||||
|
|
||||||
|
if not selected_tool:
|
||||||
|
logger.error(f"LLM called tool '{tool_name}' which is not available.")
|
||||||
|
tool_results.append(
|
||||||
|
ToolMessage(content=f"Error: Tool '{tool_name}' not found.", tool_call_id=tool_call_id))
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
stop_event = _AGENT_STOP_FLAGS.get(task_id)
|
||||||
|
if stop_event and stop_event.is_set():
|
||||||
|
logger.info(f"Stop requested before executing tool: {tool_name}")
|
||||||
|
current_task["status"] = "pending" # Or a new "stopped" status
|
||||||
|
_save_plan_to_md(plan, output_dir)
|
||||||
|
return {"stop_requested": True, "research_plan": plan, "current_category_index": cat_idx,
|
||||||
|
"current_task_index_in_category": task_idx}
|
||||||
|
|
||||||
|
logger.info(f"Executing tool: {tool_name}")
|
||||||
|
tool_output = await selected_tool.ainvoke(tool_args)
|
||||||
|
logger.info(f"Tool '{tool_name}' executed successfully.")
|
||||||
|
|
||||||
|
if tool_name == "parallel_browser_search":
|
||||||
|
current_search_results.extend(tool_output) # tool_output is List[Dict]
|
||||||
|
else: # For other tools, we might need specific handling or just log
|
||||||
|
logger.info(f"Result from tool '{tool_name}': {str(tool_output)[:200]}...")
|
||||||
|
# Storing non-browser results might need a different structure or key in search_results
|
||||||
|
current_search_results.append(
|
||||||
|
{"tool_name": tool_name, "args": tool_args, "output": str(tool_output),
|
||||||
|
"status": "completed"})
|
||||||
|
|
||||||
|
tool_results.append(ToolMessage(content=json.dumps(tool_output), tool_call_id=tool_call_id))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing tool '{tool_name}': {e}", exc_info=True)
|
||||||
|
tool_results.append(
|
||||||
|
ToolMessage(content=f"Error executing tool {tool_name}: {e}", tool_call_id=tool_call_id))
|
||||||
|
current_search_results.append(
|
||||||
|
{"tool_name": tool_name, "args": tool_args, "status": "failed", "error": str(e)})
|
||||||
|
|
||||||
|
# After processing all tool calls for this task
|
||||||
|
step_failed_tool_execution = any("Error:" in str(tr.content) for tr in tool_results)
|
||||||
|
# Consider a task successful if a browser search was attempted and didn't immediately error out during call
|
||||||
|
# The browser search itself returns status for each query.
|
||||||
|
browser_tool_attempted_successfully = "parallel_browser_search" in executed_tool_names and not step_failed_tool_execution
|
||||||
|
|
||||||
|
if step_failed_tool_execution:
|
||||||
|
current_task["status"] = "failed"
|
||||||
|
current_task[
|
||||||
|
"result_summary"] = f"Tool execution failed. Errors: {[tr.content for tr in tool_results if 'Error' in str(tr.content)]}"
|
||||||
|
elif executed_tool_names: # If any tool was called
|
||||||
|
current_task["status"] = "completed"
|
||||||
|
current_task["result_summary"] = f"Executed tool(s): {', '.join(executed_tool_names)}."
|
||||||
|
# TODO: Could ask LLM to summarize the tool_results for this task if needed, rather than just listing tools.
|
||||||
|
else: # No tool calls but AI response had .tool_calls structure (empty)
|
||||||
|
current_task["status"] = "failed" # Or a more specific status
|
||||||
|
current_task["result_summary"] = "LLM prepared for tool call but provided no tools."
|
||||||
|
|
||||||
|
# Save progress
|
||||||
_save_plan_to_md(plan, output_dir)
|
_save_plan_to_md(plan, output_dir)
|
||||||
_save_search_results_to_json(current_search_results, output_dir)
|
_save_search_results_to_json(current_search_results, output_dir)
|
||||||
|
|
||||||
|
# Determine next indices
|
||||||
|
next_task_idx = task_idx + 1
|
||||||
|
next_cat_idx = cat_idx
|
||||||
|
if next_task_idx >= len(current_category["tasks"]):
|
||||||
|
next_cat_idx += 1
|
||||||
|
next_task_idx = 0
|
||||||
|
|
||||||
|
updated_messages = state["messages"] + current_task_message_history + [ai_response] + tool_results
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"research_plan": plan,
|
"research_plan": plan,
|
||||||
"search_results": current_search_results, # Update with new results
|
"search_results": current_search_results,
|
||||||
"current_step_index": current_index + 1,
|
"current_category_index": next_cat_idx,
|
||||||
"messages": state["messages"]
|
"current_task_index_in_category": next_task_idx,
|
||||||
+ current_task_message
|
"messages": updated_messages,
|
||||||
+ [ai_response]
|
|
||||||
+ tool_results,
|
|
||||||
# Optionally return the tool_results messages if needed by downstream nodes
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(f"Unhandled error during research execution for task '{current_task['task_description']}': {e}",
|
||||||
f"Unhandled error during research execution node for step {current_step['step']}: {e}",
|
exc_info=True)
|
||||||
exc_info=True,
|
current_task["status"] = "failed"
|
||||||
)
|
|
||||||
current_step["status"] = "failed"
|
|
||||||
_save_plan_to_md(plan, output_dir)
|
_save_plan_to_md(plan, output_dir)
|
||||||
|
# Determine next indices even on error to attempt to move on
|
||||||
|
next_task_idx = task_idx + 1
|
||||||
|
next_cat_idx = cat_idx
|
||||||
|
if next_task_idx >= len(current_category["tasks"]):
|
||||||
|
next_cat_idx += 1
|
||||||
|
next_task_idx = 0
|
||||||
return {
|
return {
|
||||||
"research_plan": plan,
|
"research_plan": plan,
|
||||||
"current_step_index": current_index + 1, # Move on even if error?
|
"current_category_index": next_cat_idx,
|
||||||
"error_message": f"Core Execution Error on step {current_step['step']}: {e}",
|
"current_task_index_in_category": next_task_idx,
|
||||||
|
"error_message": f"Core Execution Error on task '{current_task['task_description']}': {e}",
|
||||||
|
"messages": state["messages"] + current_task_message_history # Preserve messages up to error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -747,36 +833,37 @@ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
|||||||
references = {}
|
references = {}
|
||||||
ref_count = 1
|
ref_count = 1
|
||||||
for i, result_entry in enumerate(search_results):
|
for i, result_entry in enumerate(search_results):
|
||||||
query = result_entry.get("query", "Unknown Query")
|
query = result_entry.get("query", "Unknown Query") # From parallel_browser_search
|
||||||
|
tool_name = result_entry.get("tool_name") # From other tools
|
||||||
status = result_entry.get("status", "unknown")
|
status = result_entry.get("status", "unknown")
|
||||||
result_data = result_entry.get(
|
result_data = result_entry.get("result") # From BrowserUseAgent's final_result
|
||||||
"result"
|
tool_output_str = result_entry.get("output") # From other tools
|
||||||
) # This should be the dict with summary, title, url
|
|
||||||
error = result_entry.get("error")
|
|
||||||
|
|
||||||
if status == "completed" and result_data:
|
if tool_name == "parallel_browser_search" and status == "completed" and result_data:
|
||||||
summary = result_data
|
# result_data is the summary from BrowserUseAgent
|
||||||
formatted_results += f'### Finding from Query: "{query}"\n'
|
formatted_results += f'### Finding from Web Search Query: "{query}"\n'
|
||||||
formatted_results += f"- **Summary:**\n{summary}\n"
|
formatted_results += f"- **Summary:**\n{result_data}\n" # result_data is already a summary string here
|
||||||
|
# If result_data contained title/URL, you'd format them here.
|
||||||
|
# The current BrowserUseAgent returns a string summary directly as 'final_data' in run_single_browser_task
|
||||||
|
formatted_results += "---\n"
|
||||||
|
elif tool_name != "parallel_browser_search" and status == "completed" and tool_output_str:
|
||||||
|
formatted_results += f'### Finding from Tool: "{tool_name}" (Args: {result_entry.get("args")})\n'
|
||||||
|
formatted_results += f"- **Output:**\n{tool_output_str}\n"
|
||||||
formatted_results += "---\n"
|
formatted_results += "---\n"
|
||||||
|
|
||||||
elif status == "failed":
|
elif status == "failed":
|
||||||
formatted_results += f'### Failed Query: "{query}"\n'
|
error = result_entry.get("error")
|
||||||
|
q_or_t = f"Query: \"{query}\"" if query != "Unknown Query" else f"Tool: \"{tool_name}\""
|
||||||
|
formatted_results += f'### Failed {q_or_t}\n'
|
||||||
formatted_results += f"- **Error:** {error}\n"
|
formatted_results += f"- **Error:** {error}\n"
|
||||||
formatted_results += "---\n"
|
formatted_results += "---\n"
|
||||||
# Ignore cancelled/other statuses for the report content
|
|
||||||
|
|
||||||
# Prepare the research plan context
|
# Prepare the research plan context
|
||||||
plan_summary = "\nResearch Plan Followed:\n"
|
plan_summary = "\nResearch Plan Followed:\n"
|
||||||
for item in plan:
|
for cat_idx, category in enumerate(plan):
|
||||||
marker = (
|
plan_summary += f"\n#### Category {cat_idx + 1}: {category['category_name']}\n"
|
||||||
"- [x]"
|
for task_idx, task in enumerate(category['tasks']):
|
||||||
if item["status"] == "completed"
|
marker = "[x]" if task["status"] == "completed" else "[ ]" if task["status"] == "pending" else "[-]"
|
||||||
else "- [ ] (Failed)"
|
plan_summary += f" - {marker} {task['task_description']}\n"
|
||||||
if item["status"] == "failed"
|
|
||||||
else "- [ ]"
|
|
||||||
)
|
|
||||||
plan_summary += f"{marker} {item['task']}\n"
|
|
||||||
|
|
||||||
synthesis_prompt = ChatPromptTemplate.from_messages(
|
synthesis_prompt = ChatPromptTemplate.from_messages(
|
||||||
[
|
[
|
||||||
@@ -785,29 +872,28 @@ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
|||||||
"""You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
|
"""You are a professional researcher tasked with writing a comprehensive and well-structured report based on collected findings.
|
||||||
The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
|
The report should address the research topic thoroughly, synthesizing the information gathered from various sources.
|
||||||
Structure the report logically:
|
Structure the report logically:
|
||||||
1. **Introduction:** Briefly introduce the topic and the report's scope (mentioning the research plan followed is good).
|
1. Briefly introduce the topic and the report's scope (mentioning the research plan followed, including categories and tasks, is good).
|
||||||
2. **Main Body:** Discuss the key findings, organizing them thematically or according to the research plan steps. Analyze, compare, and contrast information from different sources where applicable. **Crucially, cite your sources using bracketed numbers [X] corresponding to the reference list.**
|
2. Discuss the key findings, organizing them thematically, possibly aligning with the research categories. Analyze, compare, and contrast information.
|
||||||
3. **Conclusion:** Summarize the main points and offer concluding thoughts or potential areas for further research.
|
3. Summarize the main points and offer concluding thoughts.
|
||||||
|
|
||||||
Ensure the tone is objective, professional, and analytical. Base the report **strictly** on the provided findings. Do not add external knowledge. If findings are contradictory or incomplete, acknowledge this.
|
Ensure the tone is objective and professional.
|
||||||
""",
|
If findings are contradictory or incomplete, acknowledge this.
|
||||||
|
""", # Removed citation part for simplicity for now, as browser agent returns summaries.
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"human",
|
"human",
|
||||||
f"""
|
f"""
|
||||||
**Research Topic:** {topic}
|
**Research Topic:** {topic}
|
||||||
|
|
||||||
{plan_summary}
|
{plan_summary}
|
||||||
|
|
||||||
**Collected Findings:**
|
**Collected Findings:**
|
||||||
```
|
```
|
||||||
{formatted_results}
|
{formatted_results}
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
Please generate the final research report in Markdown format based **only** on the information above.
|
||||||
|
""",
|
||||||
Please generate the final research report in Markdown format based **only** on the information above. Ensure all claims derived from the findings are properly cited using the format [Reference_ID].
|
|
||||||
""",
|
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@@ -818,7 +904,6 @@ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
|||||||
topic=topic,
|
topic=topic,
|
||||||
plan_summary=plan_summary,
|
plan_summary=plan_summary,
|
||||||
formatted_results=formatted_results,
|
formatted_results=formatted_results,
|
||||||
references=references,
|
|
||||||
).to_messages()
|
).to_messages()
|
||||||
)
|
)
|
||||||
final_report_md = response.content
|
final_report_md = response.content
|
||||||
@@ -847,34 +932,44 @@ async def synthesis_node(state: DeepResearchState) -> Dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def should_continue(state: DeepResearchState) -> str:
|
def should_continue(state: DeepResearchState) -> str:
|
||||||
"""Determines the next step based on the current state."""
|
|
||||||
logger.info("--- Evaluating Condition: Should Continue? ---")
|
logger.info("--- Evaluating Condition: Should Continue? ---")
|
||||||
if state.get("stop_requested"):
|
if state.get("stop_requested"):
|
||||||
logger.info("Stop requested, routing to END.")
|
logger.info("Stop requested, routing to END.")
|
||||||
return "end_run" # Go to a dedicated end node for cleanup if needed
|
return "end_run"
|
||||||
if state.get("error_message"):
|
if state.get("error_message") and "Core Execution Error" in state["error_message"]: # Critical error in node
|
||||||
logger.warning(f"Error detected: {state['error_message']}. Routing to END.")
|
logger.warning(f"Critical error detected: {state['error_message']}. Routing to END.")
|
||||||
# Decide if errors should halt execution or if it should try to synthesize anyway
|
return "end_run"
|
||||||
return "end_run" # Stop on error for now
|
|
||||||
|
|
||||||
plan = state.get("research_plan")
|
plan = state.get("research_plan")
|
||||||
current_index = state.get("current_step_index", 0)
|
cat_idx = state.get("current_category_index", 0)
|
||||||
|
task_idx = state.get("current_task_index_in_category", 0) # This is the *next* task to check
|
||||||
|
|
||||||
if not plan:
|
if not plan:
|
||||||
logger.warning(
|
logger.warning("No research plan found. Routing to END.")
|
||||||
"No research plan found, cannot continue execution. Routing to END."
|
return "end_run"
|
||||||
)
|
|
||||||
return "end_run" # Should not happen if planning node ran correctly
|
|
||||||
|
|
||||||
# Check if there are pending steps in the plan
|
# Check if the current indices point to a valid pending task
|
||||||
if current_index < len(plan):
|
if cat_idx < len(plan):
|
||||||
logger.info(
|
current_category = plan[cat_idx]
|
||||||
f"Plan has pending steps (current index {current_index}/{len(plan)}). Routing to Research Execution."
|
if task_idx < len(current_category["tasks"]):
|
||||||
)
|
# We are trying to execute the task at plan[cat_idx]["tasks"][task_idx]
|
||||||
return "execute_research"
|
# The research_execution_node will handle if it's already completed.
|
||||||
else:
|
logger.info(
|
||||||
logger.info("All plan steps processed. Routing to Synthesis.")
|
f"Plan has potential pending tasks (next up: Category {cat_idx}, Task {task_idx}). Routing to Research Execution."
|
||||||
return "synthesize_report"
|
)
|
||||||
|
return "execute_research"
|
||||||
|
else: # task_idx is out of bounds for current category, means we need to check next category
|
||||||
|
if cat_idx + 1 < len(plan): # If there is a next category
|
||||||
|
logger.info(
|
||||||
|
f"Finished tasks in category {cat_idx}. Moving to category {cat_idx + 1}. Routing to Research Execution."
|
||||||
|
)
|
||||||
|
# research_execution_node will update state to {current_category_index: cat_idx + 1, current_task_index_in_category: 0}
|
||||||
|
# Or rather, the previous execution node already set these indices to the start of the next category.
|
||||||
|
return "execute_research"
|
||||||
|
|
||||||
|
# If we've gone through all categories and tasks (cat_idx >= len(plan))
|
||||||
|
logger.info("All plan categories and tasks processed or current indices are out of bounds. Routing to Synthesis.")
|
||||||
|
return "synthesize_report"
|
||||||
|
|
||||||
|
|
||||||
# --- DeepSearchAgent Class ---
|
# --- DeepSearchAgent Class ---
|
||||||
@@ -1033,22 +1128,24 @@ class DeepResearchAgent:
|
|||||||
"messages": [],
|
"messages": [],
|
||||||
"llm": self.llm,
|
"llm": self.llm,
|
||||||
"tools": agent_tools,
|
"tools": agent_tools,
|
||||||
"output_dir": output_dir,
|
"output_dir": Path(output_dir),
|
||||||
"browser_config": self.browser_config,
|
"browser_config": self.browser_config,
|
||||||
"final_report": None,
|
"final_report": None,
|
||||||
"current_step_index": 0,
|
"current_category_index": 0,
|
||||||
|
"current_task_index_in_category": 0,
|
||||||
"stop_requested": False,
|
"stop_requested": False,
|
||||||
"error_message": None,
|
"error_message": None,
|
||||||
}
|
}
|
||||||
|
|
||||||
loaded_state = {}
|
|
||||||
if task_id:
|
if task_id:
|
||||||
logger.info(f"Attempting to resume task {task_id}...")
|
logger.info(f"Attempting to resume task {task_id}...")
|
||||||
loaded_state = _load_previous_state(task_id, output_dir)
|
loaded_state = _load_previous_state(task_id, output_dir)
|
||||||
initial_state.update(loaded_state)
|
initial_state.update(loaded_state)
|
||||||
if loaded_state.get("research_plan"):
|
if loaded_state.get("research_plan"):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Resuming with {len(loaded_state['research_plan'])} plan steps and {len(loaded_state.get('search_results', []))} existing results."
|
f"Resuming with {len(loaded_state['research_plan'])} plan categories "
|
||||||
|
f"and {len(loaded_state.get('search_results', []))} existing results. "
|
||||||
|
f"Next task: Cat {initial_state['current_category_index']}, Task {initial_state['current_task_index_in_category']}"
|
||||||
)
|
)
|
||||||
initial_state["topic"] = (
|
initial_state["topic"] = (
|
||||||
topic # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
|
topic # Allow overriding topic even when resuming? Or use stored topic? Let's use new one.
|
||||||
@@ -1057,7 +1154,6 @@ class DeepResearchAgent:
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
|
f"Resume requested for {task_id}, but no previous plan found. Starting fresh."
|
||||||
)
|
)
|
||||||
initial_state["current_step_index"] = 0
|
|
||||||
|
|
||||||
# --- Execute Graph using ainvoke ---
|
# --- Execute Graph using ainvoke ---
|
||||||
final_state = None
|
final_state = None
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import logging
|
import logging
|
||||||
from gradio.components import Component
|
from gradio.components import Component
|
||||||
@@ -56,7 +58,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
|
|||||||
)
|
)
|
||||||
keep_browser_open = gr.Checkbox(
|
keep_browser_open = gr.Checkbox(
|
||||||
label="Keep Browser Open",
|
label="Keep Browser Open",
|
||||||
value=True,
|
value=os.getenv("KEEP_BROWSER_OPEN", True),
|
||||||
info="Keep Browser Open between Tasks",
|
info="Keep Browser Open between Tasks",
|
||||||
interactive=True
|
interactive=True
|
||||||
)
|
)
|
||||||
@@ -91,6 +93,7 @@ def create_browser_settings_tab(webui_manager: WebuiManager):
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
cdp_url = gr.Textbox(
|
cdp_url = gr.Textbox(
|
||||||
label="CDP URL",
|
label="CDP URL",
|
||||||
|
value=os.getenv("BROWSER_CDP", None),
|
||||||
info="CDP URL for browser remote debugging",
|
info="CDP URL for browser remote debugging",
|
||||||
interactive=True,
|
interactive=True,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from browser_use.agent.views import (
|
|||||||
AgentOutput,
|
AgentOutput,
|
||||||
)
|
)
|
||||||
from browser_use.browser.browser import BrowserConfig
|
from browser_use.browser.browser import BrowserConfig
|
||||||
from browser_use.browser.context import BrowserContext, BrowserContextWindowSize, BrowserContextConfig
|
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||||
from browser_use.browser.views import BrowserState
|
from browser_use.browser.views import BrowserState
|
||||||
from gradio.components import Component
|
from gradio.components import Component
|
||||||
from langchain_core.language_models.chat_models import BaseChatModel
|
from langchain_core.language_models.chat_models import BaseChatModel
|
||||||
@@ -451,20 +451,16 @@ async def run_agent_task(
|
|||||||
if not webui_manager.bu_browser:
|
if not webui_manager.bu_browser:
|
||||||
logger.info("Launching new browser instance.")
|
logger.info("Launching new browser instance.")
|
||||||
extra_args = [f"--window-size={window_w},{window_h}"]
|
extra_args = [f"--window-size={window_w},{window_h}"]
|
||||||
if browser_user_data_dir:
|
|
||||||
extra_args.append(f"--user-data-dir={browser_user_data_dir}")
|
|
||||||
|
|
||||||
if use_own_browser:
|
if use_own_browser:
|
||||||
browser_binary_path = (
|
browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path
|
||||||
os.getenv("CHROME_PATH", None) or browser_binary_path
|
|
||||||
)
|
|
||||||
if browser_binary_path == "":
|
if browser_binary_path == "":
|
||||||
browser_binary_path = None
|
browser_binary_path = None
|
||||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None)
|
||||||
if chrome_user_data:
|
if browser_user_data:
|
||||||
extra_args += [f"--user-data-dir={chrome_user_data}"]
|
extra_args += [f"--user-data-dir={browser_user_data}"]
|
||||||
else:
|
else:
|
||||||
browser_binary_path = None
|
browser_binary_path = None
|
||||||
|
|
||||||
webui_manager.bu_browser = CustomBrowser(
|
webui_manager.bu_browser = CustomBrowser(
|
||||||
config=BrowserConfig(
|
config=BrowserConfig(
|
||||||
headless=headless,
|
headless=headless,
|
||||||
@@ -485,7 +481,8 @@ async def run_agent_task(
|
|||||||
if save_recording_path
|
if save_recording_path
|
||||||
else None,
|
else None,
|
||||||
save_downloads_path=save_download_path if save_download_path else None,
|
save_downloads_path=save_download_path if save_download_path else None,
|
||||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
window_height=window_h,
|
||||||
|
window_width=window_w,
|
||||||
)
|
)
|
||||||
if not webui_manager.bu_browser:
|
if not webui_manager.bu_browser:
|
||||||
raise ValueError("Browser not initialized, cannot create context.")
|
raise ValueError("Browser not initialized, cannot create context.")
|
||||||
|
|||||||
@@ -66,8 +66,8 @@ startsecs=3
|
|||||||
depends_on=x11vnc
|
depends_on=x11vnc
|
||||||
|
|
||||||
[program:persistent_browser]
|
[program:persistent_browser]
|
||||||
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
|
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>",BROWSER_USER_DATA="/app/data/chrome_data",BROWSER_DEBUGGING_PORT="%(ENV_BROWSER_DEBUGGING_PORT)s",BROWSER_DEBUGGING_HOST="%(ENV_BROWSER_DEBUGGING_HOST)s"
|
||||||
command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
|
command=bash -c "mkdir -p %(ENV_BROWSER_USER_DATA)s && sleep 8 && $(find $PLAYWRIGHT_BROWSERS_PATH/chrome-*/chrome-linux -name chrome || find /root/.cache/ms-playwright/chrome-*/chrome-linux -name chrome || find /opt/google/chrome -name chrome || echo \"/usr/bin/google-chrome-stable\") --user-data-dir=%(ENV_BROWSER_USER_DATA)s --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=%(ENV_BROWSER_DEBUGGING_PORT)s --remote-debugging-address=%(ENV_BROWSER_DEBUGGING_HOST)s --enable-features=NetworkService,NetworkServiceInProcess --disable-features=ImprovedCookieControls \"$START_URL\""
|
||||||
autorestart=true
|
autorestart=true
|
||||||
stdout_logfile=/dev/stdout
|
stdout_logfile=/dev/stdout
|
||||||
stdout_logfile_maxbytes=0
|
stdout_logfile_maxbytes=0
|
||||||
@@ -93,4 +93,4 @@ startretries=3
|
|||||||
startsecs=3
|
startsecs=3
|
||||||
stopsignal=TERM
|
stopsignal=TERM
|
||||||
stopwaitsecs=10
|
stopwaitsecs=10
|
||||||
depends_on=persistent_browser
|
depends_on=persistent_browser
|
||||||
@@ -20,8 +20,7 @@ from src.utils import utils
|
|||||||
async def test_browser_use_agent():
|
async def test_browser_use_agent():
|
||||||
from browser_use.browser.browser import Browser, BrowserConfig
|
from browser_use.browser.browser import Browser, BrowserConfig
|
||||||
from browser_use.browser.context import (
|
from browser_use.browser.context import (
|
||||||
BrowserContextConfig,
|
BrowserContextConfig
|
||||||
BrowserContextWindowSize,
|
|
||||||
)
|
)
|
||||||
from browser_use.agent.service import Agent
|
from browser_use.agent.service import Agent
|
||||||
|
|
||||||
@@ -38,12 +37,12 @@ async def test_browser_use_agent():
|
|||||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||||
# )
|
# )
|
||||||
|
|
||||||
# llm = utils.get_llm_model(
|
llm = llm_provider.get_llm_model(
|
||||||
# provider="google",
|
provider="google",
|
||||||
# model_name="gemini-2.0-flash",
|
model_name="gemini-2.0-flash",
|
||||||
# temperature=0.6,
|
temperature=0.6,
|
||||||
# api_key=os.getenv("GOOGLE_API_KEY", "")
|
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||||
# )
|
)
|
||||||
|
|
||||||
# llm = utils.get_llm_model(
|
# llm = utils.get_llm_model(
|
||||||
# provider="deepseek",
|
# provider="deepseek",
|
||||||
@@ -67,13 +66,13 @@ async def test_browser_use_agent():
|
|||||||
|
|
||||||
window_w, window_h = 1280, 1100
|
window_w, window_h = 1280, 1100
|
||||||
|
|
||||||
llm = llm_provider.get_llm_model(
|
# llm = llm_provider.get_llm_model(
|
||||||
provider="azure_openai",
|
# provider="azure_openai",
|
||||||
model_name="gpt-4o",
|
# model_name="gpt-4o",
|
||||||
temperature=0.5,
|
# temperature=0.5,
|
||||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||||
)
|
# )
|
||||||
|
|
||||||
mcp_server_config = {
|
mcp_server_config = {
|
||||||
"mcpServers": {
|
"mcpServers": {
|
||||||
@@ -98,7 +97,6 @@ async def test_browser_use_agent():
|
|||||||
controller = CustomController()
|
controller = CustomController()
|
||||||
await controller.setup_mcp_client(mcp_server_config)
|
await controller.setup_mcp_client(mcp_server_config)
|
||||||
use_own_browser = True
|
use_own_browser = True
|
||||||
disable_security = False
|
|
||||||
use_vision = True # Set to False when using DeepSeek
|
use_vision = True # Set to False when using DeepSeek
|
||||||
|
|
||||||
max_actions_per_step = 10
|
max_actions_per_step = 10
|
||||||
@@ -106,33 +104,30 @@ async def test_browser_use_agent():
|
|||||||
browser_context = None
|
browser_context = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
extra_browser_args = [f"--window-size={window_w},{window_h}"]
|
||||||
if use_own_browser:
|
if use_own_browser:
|
||||||
chrome_path = os.getenv("CHROME_PATH", None)
|
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
||||||
if chrome_path == "":
|
if browser_binary_path == "":
|
||||||
chrome_path = None
|
browser_binary_path = None
|
||||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
||||||
if chrome_user_data:
|
if browser_user_data:
|
||||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
||||||
else:
|
else:
|
||||||
chrome_path = None
|
browser_binary_path = None
|
||||||
browser = CustomBrowser(
|
browser = CustomBrowser(
|
||||||
config=BrowserConfig(
|
config=BrowserConfig(
|
||||||
headless=False,
|
headless=False,
|
||||||
disable_security=disable_security,
|
browser_binary_path=browser_binary_path,
|
||||||
browser_binary_path=chrome_path,
|
extra_browser_args=extra_browser_args,
|
||||||
extra_browser_args=extra_chromium_args,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
browser_context = await browser.new_context(
|
browser_context = await browser.new_context(
|
||||||
config=BrowserContextConfig(
|
config=BrowserContextConfig(
|
||||||
trace_path="./tmp/traces",
|
trace_path=None,
|
||||||
save_recording_path="./tmp/record_videos",
|
save_recording_path=None,
|
||||||
save_downloads_path="./tmp/downloads",
|
save_downloads_path="./tmp/downloads",
|
||||||
browser_window_size=BrowserContextWindowSize(
|
window_height=window_h,
|
||||||
width=window_w, height=window_h
|
window_width=window_w,
|
||||||
),
|
|
||||||
force_new_context=True
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
agent = BrowserUseAgent(
|
agent = BrowserUseAgent(
|
||||||
@@ -167,17 +162,9 @@ async def test_browser_use_agent():
|
|||||||
|
|
||||||
|
|
||||||
async def test_browser_use_parallel():
|
async def test_browser_use_parallel():
|
||||||
from browser_use.browser.context import BrowserContextWindowSize
|
|
||||||
from browser_use.browser.browser import BrowserConfig
|
|
||||||
from patchright.async_api import async_playwright
|
|
||||||
from browser_use.browser.browser import Browser
|
|
||||||
from src.browser.custom_context import BrowserContextConfig
|
|
||||||
from src.controller.custom_controller import CustomController
|
|
||||||
|
|
||||||
from browser_use.browser.browser import Browser, BrowserConfig
|
from browser_use.browser.browser import Browser, BrowserConfig
|
||||||
from browser_use.browser.context import (
|
from browser_use.browser.context import (
|
||||||
BrowserContextConfig,
|
BrowserContextConfig,
|
||||||
BrowserContextWindowSize,
|
|
||||||
)
|
)
|
||||||
from browser_use.agent.service import Agent
|
from browser_use.agent.service import Agent
|
||||||
|
|
||||||
@@ -261,8 +248,7 @@ async def test_browser_use_parallel():
|
|||||||
}
|
}
|
||||||
controller = CustomController()
|
controller = CustomController()
|
||||||
await controller.setup_mcp_client(mcp_server_config)
|
await controller.setup_mcp_client(mcp_server_config)
|
||||||
use_own_browser = False
|
use_own_browser = True
|
||||||
disable_security = False
|
|
||||||
use_vision = True # Set to False when using DeepSeek
|
use_vision = True # Set to False when using DeepSeek
|
||||||
|
|
||||||
max_actions_per_step = 10
|
max_actions_per_step = 10
|
||||||
@@ -270,32 +256,30 @@ async def test_browser_use_parallel():
|
|||||||
browser_context = None
|
browser_context = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
extra_browser_args = [f"--window-size={window_w},{window_h}"]
|
||||||
if use_own_browser:
|
if use_own_browser:
|
||||||
chrome_path = os.getenv("CHROME_PATH", None)
|
browser_binary_path = os.getenv("BROWSER_PATH", None)
|
||||||
if chrome_path == "":
|
if browser_binary_path == "":
|
||||||
chrome_path = None
|
browser_binary_path = None
|
||||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
browser_user_data = os.getenv("BROWSER_USER_DATA", None)
|
||||||
if chrome_user_data:
|
if browser_user_data:
|
||||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
extra_browser_args += [f"--user-data-dir={browser_user_data}"]
|
||||||
else:
|
else:
|
||||||
chrome_path = None
|
browser_binary_path = None
|
||||||
browser = CustomBrowser(
|
browser = CustomBrowser(
|
||||||
config=BrowserConfig(
|
config=BrowserConfig(
|
||||||
headless=False,
|
headless=False,
|
||||||
disable_security=disable_security,
|
browser_binary_path=browser_binary_path,
|
||||||
browser_binary_path=chrome_path,
|
extra_browser_args=extra_browser_args,
|
||||||
extra_browser_args=extra_chromium_args,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
browser_context = await browser.new_context(
|
browser_context = await browser.new_context(
|
||||||
config=BrowserContextConfig(
|
config=BrowserContextConfig(
|
||||||
trace_path="./tmp/traces",
|
trace_path=None,
|
||||||
save_recording_path="./tmp/record_videos",
|
save_recording_path=None,
|
||||||
save_downloads_path="./tmp/downloads",
|
save_downloads_path="./tmp/downloads",
|
||||||
browser_window_size=BrowserContextWindowSize(
|
window_height=window_h,
|
||||||
width=window_w, height=window_h
|
window_width=window_w,
|
||||||
),
|
|
||||||
force_new_context=True
|
force_new_context=True
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -364,7 +348,7 @@ async def test_deep_research_agent():
|
|||||||
|
|
||||||
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
browser_config = {"headless": False, "window_width": 1280, "window_height": 1100, "use_own_browser": False}
|
||||||
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
agent = DeepResearchAgent(llm=llm, browser_config=browser_config, mcp_server_config=mcp_server_config)
|
||||||
research_topic = "Give me a detailed travel plan to Switzerland from June 1st to 10th."
|
research_topic = "Give me investment advices of nvidia and tesla."
|
||||||
task_id_to_resume = "" # Set this to resume a previous task ID
|
task_id_to_resume = "" # Set this to resume a previous task ID
|
||||||
|
|
||||||
print(f"Starting research on: {research_topic}")
|
print(f"Starting research on: {research_topic}")
|
||||||
@@ -405,6 +389,6 @@ async def test_deep_research_agent():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# asyncio.run(test_browser_use_agent())
|
asyncio.run(test_browser_use_agent())
|
||||||
# asyncio.run(test_browser_use_parallel())
|
# asyncio.run(test_browser_use_parallel())
|
||||||
asyncio.run(test_deep_research_agent())
|
# asyncio.run(test_deep_research_agent())
|
||||||
|
|||||||
Reference in New Issue
Block a user