Refactor browser agent and update dependencies

- Updated import statements to use 'patchright' instead of 'playwright'.
- Cleaned up the BrowserUseAgent class for better readability.
- Modified README instructions for browser installation.
- Added new entries to .gitignore for PDF files and workflow.
This commit is contained in:
Magnus Müller
2025-05-02 13:21:47 +08:00
parent a1ec7ad012
commit 74bea17eb1
7 changed files with 45 additions and 78 deletions

2
.gitignore vendored
View File

@@ -188,3 +188,5 @@ data/
# For Config Files (Current Settings)
.config.pkl
*.pdf
workflow

View File

@@ -68,12 +68,7 @@ uv pip install -r requirements.txt
Install Browsers in Playwright:
You can install specific browsers by running:
```bash
playwright install --with-deps chromium
```
To install all browsers:
```bash
playwright install
patchright install chromium
```
#### Step 4: Configure Environment

View File

@@ -1,75 +1,37 @@
from __future__ import annotations
import asyncio
import gc
import inspect
import json
import logging
import os
import re
import time
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
from dotenv import load_dotenv
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
BaseMessage,
HumanMessage,
SystemMessage,
)
# from lmnr.sdk.decorators import observe
from pydantic import BaseModel, ValidationError
from browser_use.agent.gif import create_history_gif
from browser_use.agent.memory.service import Memory, MemorySettings
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
from browser_use.agent.service import Agent, AgentHookFunc
from browser_use.agent.views import (
REQUIRED_LLM_API_ENV_VARS,
ActionResult,
AgentError,
AgentHistory,
AgentHistoryList,
AgentOutput,
AgentSettings,
AgentState,
AgentStepInfo,
StepMetadata,
ToolCallingMethod,
)
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext
from browser_use.browser.views import BrowserState, BrowserStateHistory
from browser_use.controller.registry.views import ActionModel
from browser_use.controller.service import Controller
from browser_use.dom.history_tree_processor.service import (
DOMHistoryElement,
HistoryTreeProcessor,
)
from browser_use.exceptions import LLMException
from browser_use.telemetry.service import ProductTelemetry
from browser_use.telemetry.views import (
AgentEndTelemetryEvent,
AgentRunTelemetryEvent,
AgentStepTelemetryEvent,
)
from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
from browser_use.agent.service import Agent, AgentHookFunc
from browser_use.utils import time_execution_async
from dotenv import load_dotenv
load_dotenv()
logger = logging.getLogger(__name__)
SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1'
SKIP_LLM_API_KEY_VERIFICATION = (
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
)
class BrowserUseAgent(Agent):
@time_execution_async('--run (agent)')
@time_execution_async("--run (agent)")
async def run(
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
on_step_end: AgentHookFunc | None = None
self,
max_steps: int = 100,
on_step_start: AgentHookFunc | None = None,
on_step_end: AgentHookFunc | None = None,
) -> AgentHistoryList:
"""Execute the task with maximum number of steps"""
@@ -88,7 +50,7 @@ class BrowserUseAgent(Agent):
signal_handler.register()
# Wait for verification task to complete if it exists
if hasattr(self, '_verification_task') and not self._verification_task.done():
if hasattr(self, "_verification_task") and not self._verification_task.done():
try:
await self._verification_task
except Exception:
@@ -100,7 +62,9 @@ class BrowserUseAgent(Agent):
# Execute initial actions if provided
if self.initial_actions:
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
result = await self.multi_act(
self.initial_actions, check_for_new_elements=False
)
self.state.last_result = result
for step in range(max_steps):
@@ -112,12 +76,14 @@ class BrowserUseAgent(Agent):
# Check if we should stop due to too many failures
if self.state.consecutive_failures >= self.settings.max_failures:
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
logger.error(
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
)
break
# Check control flags before each step
if self.state.stopped:
logger.info('Agent stopped')
logger.info("Agent stopped")
break
while self.state.paused:
@@ -142,13 +108,15 @@ class BrowserUseAgent(Agent):
await self.log_completion()
break
else:
logger.info('❌ Failed to complete task in maximum steps')
logger.info("❌ Failed to complete task in maximum steps")
return self.state.history
except KeyboardInterrupt:
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
logger.info('Got KeyboardInterrupt during execution, returning current history')
logger.info(
"Got KeyboardInterrupt during execution, returning current history"
)
return self.state.history
finally:
@@ -171,8 +139,10 @@ class BrowserUseAgent(Agent):
await self.close()
if self.settings.generate_gif:
output_path: str = 'agent_history.gif'
output_path: str = "agent_history.gif"
if isinstance(self.settings.generate_gif, str):
output_path = self.settings.generate_gif
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
create_history_gif(
task=self.task, history=self.state.history, output_path=output_path
)

View File

@@ -1,17 +1,17 @@
import asyncio
import pdb
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
from patchright.async_api import Browser as PlaywrightBrowser
from patchright.async_api import (
BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
from patchright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.browser import Browser, IN_DOCKER
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
from patchright.async_api import BrowserContext as PlaywrightBrowserContext
import logging
from browser_use.browser.chrome import (

View File

@@ -4,8 +4,8 @@ import os
from browser_use.browser.browser import Browser, IN_DOCKER
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
from patchright.async_api import Browser as PlaywrightBrowser
from patchright.async_api import BrowserContext as PlaywrightBrowserContext
from typing import Optional
from browser_use.browser.context import BrowserContextState

View File

@@ -169,7 +169,7 @@ async def test_browser_use_agent():
async def test_browser_use_parallel():
from browser_use.browser.context import BrowserContextWindowSize
from browser_use.browser.browser import BrowserConfig
from playwright.async_api import async_playwright
from patchright.async_api import async_playwright
from browser_use.browser.browser import Browser
from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController

View File

@@ -6,7 +6,7 @@ load_dotenv()
def test_connect_browser():
import os
from playwright.sync_api import sync_playwright
from patchright.sync_api import sync_playwright
chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "")