Refactor browser agent and update dependencies

- Updated import statements to use 'patchright' instead of 'playwright'.
- Cleaned up the BrowserUseAgent class for better readability.
- Modified README instructions for browser installation.
- Added new entries to .gitignore for PDF files and workflow.
This commit is contained in:
Magnus Müller
2025-05-02 13:21:47 +08:00
parent a1ec7ad012
commit 74bea17eb1
7 changed files with 45 additions and 78 deletions

4
.gitignore vendored
View File

@@ -187,4 +187,6 @@ data/
# For Config Files (Current Settings) # For Config Files (Current Settings)
.config.pkl .config.pkl
*.pdf *.pdf
workflow

View File

@@ -68,12 +68,7 @@ uv pip install -r requirements.txt
Install Browsers in Playwright: Install Browsers in Playwright:
You can install specific browsers by running: You can install specific browsers by running:
```bash ```bash
playwright install --with-deps chromium patchright install chromium
```
To install all browsers:
```bash
playwright install
``` ```
#### Step 4: Configure Environment #### Step 4: Configure Environment

View File

@@ -1,75 +1,37 @@
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import gc
import inspect
import json
import logging import logging
import os import os
import re
import time
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
from dotenv import load_dotenv
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
BaseMessage,
HumanMessage,
SystemMessage,
)
# from lmnr.sdk.decorators import observe # from lmnr.sdk.decorators import observe
from pydantic import BaseModel, ValidationError
from browser_use.agent.gif import create_history_gif from browser_use.agent.gif import create_history_gif
from browser_use.agent.memory.service import Memory, MemorySettings
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
from browser_use.agent.views import (
REQUIRED_LLM_API_ENV_VARS,
ActionResult,
AgentError,
AgentHistory,
AgentHistoryList,
AgentOutput,
AgentSettings,
AgentState,
AgentStepInfo,
StepMetadata,
ToolCallingMethod,
)
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext
from browser_use.browser.views import BrowserState, BrowserStateHistory
from browser_use.controller.registry.views import ActionModel
from browser_use.controller.service import Controller
from browser_use.dom.history_tree_processor.service import (
DOMHistoryElement,
HistoryTreeProcessor,
)
from browser_use.exceptions import LLMException
from browser_use.telemetry.service import ProductTelemetry
from browser_use.telemetry.views import (
AgentEndTelemetryEvent,
AgentRunTelemetryEvent,
AgentStepTelemetryEvent,
)
from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
from browser_use.agent.service import Agent, AgentHookFunc from browser_use.agent.service import Agent, AgentHookFunc
from browser_use.agent.views import (
AgentHistoryList,
AgentStepInfo,
)
from browser_use.telemetry.views import (
AgentEndTelemetryEvent,
)
from browser_use.utils import time_execution_async
from dotenv import load_dotenv
load_dotenv() load_dotenv()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1' SKIP_LLM_API_KEY_VERIFICATION = (
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
)
class BrowserUseAgent(Agent): class BrowserUseAgent(Agent):
@time_execution_async('--run (agent)') @time_execution_async("--run (agent)")
async def run( async def run(
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None, self,
on_step_end: AgentHookFunc | None = None max_steps: int = 100,
on_step_start: AgentHookFunc | None = None,
on_step_end: AgentHookFunc | None = None,
) -> AgentHistoryList: ) -> AgentHistoryList:
"""Execute the task with maximum number of steps""" """Execute the task with maximum number of steps"""
@@ -88,7 +50,7 @@ class BrowserUseAgent(Agent):
signal_handler.register() signal_handler.register()
# Wait for verification task to complete if it exists # Wait for verification task to complete if it exists
if hasattr(self, '_verification_task') and not self._verification_task.done(): if hasattr(self, "_verification_task") and not self._verification_task.done():
try: try:
await self._verification_task await self._verification_task
except Exception: except Exception:
@@ -100,7 +62,9 @@ class BrowserUseAgent(Agent):
# Execute initial actions if provided # Execute initial actions if provided
if self.initial_actions: if self.initial_actions:
result = await self.multi_act(self.initial_actions, check_for_new_elements=False) result = await self.multi_act(
self.initial_actions, check_for_new_elements=False
)
self.state.last_result = result self.state.last_result = result
for step in range(max_steps): for step in range(max_steps):
@@ -112,12 +76,14 @@ class BrowserUseAgent(Agent):
# Check if we should stop due to too many failures # Check if we should stop due to too many failures
if self.state.consecutive_failures >= self.settings.max_failures: if self.state.consecutive_failures >= self.settings.max_failures:
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures') logger.error(
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
)
break break
# Check control flags before each step # Check control flags before each step
if self.state.stopped: if self.state.stopped:
logger.info('Agent stopped') logger.info("Agent stopped")
break break
while self.state.paused: while self.state.paused:
@@ -142,13 +108,15 @@ class BrowserUseAgent(Agent):
await self.log_completion() await self.log_completion()
break break
else: else:
logger.info('❌ Failed to complete task in maximum steps') logger.info("❌ Failed to complete task in maximum steps")
return self.state.history return self.state.history
except KeyboardInterrupt: except KeyboardInterrupt:
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
logger.info('Got KeyboardInterrupt during execution, returning current history') logger.info(
"Got KeyboardInterrupt during execution, returning current history"
)
return self.state.history return self.state.history
finally: finally:
@@ -171,8 +139,10 @@ class BrowserUseAgent(Agent):
await self.close() await self.close()
if self.settings.generate_gif: if self.settings.generate_gif:
output_path: str = 'agent_history.gif' output_path: str = "agent_history.gif"
if isinstance(self.settings.generate_gif, str): if isinstance(self.settings.generate_gif, str):
output_path = self.settings.generate_gif output_path = self.settings.generate_gif
create_history_gif(task=self.task, history=self.state.history, output_path=output_path) create_history_gif(
task=self.task, history=self.state.history, output_path=output_path
)

View File

@@ -1,17 +1,17 @@
import asyncio import asyncio
import pdb import pdb
from playwright.async_api import Browser as PlaywrightBrowser from patchright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import ( from patchright.async_api import (
BrowserContext as PlaywrightBrowserContext, BrowserContext as PlaywrightBrowserContext,
) )
from playwright.async_api import ( from patchright.async_api import (
Playwright, Playwright,
async_playwright, async_playwright,
) )
from browser_use.browser.browser import Browser, IN_DOCKER from browser_use.browser.browser import Browser, IN_DOCKER
from browser_use.browser.context import BrowserContext, BrowserContextConfig from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext from patchright.async_api import BrowserContext as PlaywrightBrowserContext
import logging import logging
from browser_use.browser.chrome import ( from browser_use.browser.chrome import (

View File

@@ -4,8 +4,8 @@ import os
from browser_use.browser.browser import Browser, IN_DOCKER from browser_use.browser.browser import Browser, IN_DOCKER
from browser_use.browser.context import BrowserContext, BrowserContextConfig from browser_use.browser.context import BrowserContext, BrowserContextConfig
from playwright.async_api import Browser as PlaywrightBrowser from patchright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import BrowserContext as PlaywrightBrowserContext from patchright.async_api import BrowserContext as PlaywrightBrowserContext
from typing import Optional from typing import Optional
from browser_use.browser.context import BrowserContextState from browser_use.browser.context import BrowserContextState

View File

@@ -169,7 +169,7 @@ async def test_browser_use_agent():
async def test_browser_use_parallel(): async def test_browser_use_parallel():
from browser_use.browser.context import BrowserContextWindowSize from browser_use.browser.context import BrowserContextWindowSize
from browser_use.browser.browser import BrowserConfig from browser_use.browser.browser import BrowserConfig
from playwright.async_api import async_playwright from patchright.async_api import async_playwright
from browser_use.browser.browser import Browser from browser_use.browser.browser import Browser
from src.browser.custom_context import BrowserContextConfig from src.browser.custom_context import BrowserContextConfig
from src.controller.custom_controller import CustomController from src.controller.custom_controller import CustomController

View File

@@ -6,7 +6,7 @@ load_dotenv()
def test_connect_browser(): def test_connect_browser():
import os import os
from playwright.sync_api import sync_playwright from patchright.sync_api import sync_playwright
chrome_exe = os.getenv("CHROME_PATH", "") chrome_exe = os.getenv("CHROME_PATH", "")
chrome_use_data = os.getenv("CHROME_USER_DATA", "") chrome_use_data = os.getenv("CHROME_USER_DATA", "")