mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 03:07:54 +08:00
Refactor browser agent and update dependencies
- Updated import statements to use 'patchright' instead of 'playwright'. - Cleaned up the BrowserUseAgent class for better readability. - Modified README instructions for browser installation. - Added new entries to .gitignore for PDF files and workflow.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -188,3 +188,5 @@ data/
|
||||
# For Config Files (Current Settings)
|
||||
.config.pkl
|
||||
*.pdf
|
||||
|
||||
workflow
|
||||
@@ -68,12 +68,7 @@ uv pip install -r requirements.txt
|
||||
Install Browsers in Playwright:
|
||||
You can install specific browsers by running:
|
||||
```bash
|
||||
playwright install --with-deps chromium
|
||||
```
|
||||
|
||||
To install all browsers:
|
||||
```bash
|
||||
playwright install
|
||||
patchright install chromium
|
||||
```
|
||||
|
||||
#### Step 4: Configure Environment
|
||||
|
||||
@@ -1,75 +1,37 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import gc
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
SystemMessage,
|
||||
)
|
||||
|
||||
# from lmnr.sdk.decorators import observe
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from browser_use.agent.gif import create_history_gif
|
||||
from browser_use.agent.memory.service import Memory, MemorySettings
|
||||
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
|
||||
from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
|
||||
from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
|
||||
from browser_use.agent.views import (
|
||||
REQUIRED_LLM_API_ENV_VARS,
|
||||
ActionResult,
|
||||
AgentError,
|
||||
AgentHistory,
|
||||
AgentHistoryList,
|
||||
AgentOutput,
|
||||
AgentSettings,
|
||||
AgentState,
|
||||
AgentStepInfo,
|
||||
StepMetadata,
|
||||
ToolCallingMethod,
|
||||
)
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.views import BrowserState, BrowserStateHistory
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.dom.history_tree_processor.service import (
|
||||
DOMHistoryElement,
|
||||
HistoryTreeProcessor,
|
||||
)
|
||||
from browser_use.exceptions import LLMException
|
||||
from browser_use.telemetry.service import ProductTelemetry
|
||||
from browser_use.telemetry.views import (
|
||||
AgentEndTelemetryEvent,
|
||||
AgentRunTelemetryEvent,
|
||||
AgentStepTelemetryEvent,
|
||||
)
|
||||
from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
|
||||
from browser_use.agent.service import Agent, AgentHookFunc
|
||||
from browser_use.agent.views import (
|
||||
AgentHistoryList,
|
||||
AgentStepInfo,
|
||||
)
|
||||
from browser_use.telemetry.views import (
|
||||
AgentEndTelemetryEvent,
|
||||
)
|
||||
from browser_use.utils import time_execution_async
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1'
|
||||
SKIP_LLM_API_KEY_VERIFICATION = (
|
||||
os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
|
||||
)
|
||||
|
||||
|
||||
class BrowserUseAgent(Agent):
|
||||
@time_execution_async('--run (agent)')
|
||||
@time_execution_async("--run (agent)")
|
||||
async def run(
|
||||
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None
|
||||
self,
|
||||
max_steps: int = 100,
|
||||
on_step_start: AgentHookFunc | None = None,
|
||||
on_step_end: AgentHookFunc | None = None,
|
||||
) -> AgentHistoryList:
|
||||
"""Execute the task with maximum number of steps"""
|
||||
|
||||
@@ -88,7 +50,7 @@ class BrowserUseAgent(Agent):
|
||||
signal_handler.register()
|
||||
|
||||
# Wait for verification task to complete if it exists
|
||||
if hasattr(self, '_verification_task') and not self._verification_task.done():
|
||||
if hasattr(self, "_verification_task") and not self._verification_task.done():
|
||||
try:
|
||||
await self._verification_task
|
||||
except Exception:
|
||||
@@ -100,7 +62,9 @@ class BrowserUseAgent(Agent):
|
||||
|
||||
# Execute initial actions if provided
|
||||
if self.initial_actions:
|
||||
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
|
||||
result = await self.multi_act(
|
||||
self.initial_actions, check_for_new_elements=False
|
||||
)
|
||||
self.state.last_result = result
|
||||
|
||||
for step in range(max_steps):
|
||||
@@ -112,12 +76,14 @@ class BrowserUseAgent(Agent):
|
||||
|
||||
# Check if we should stop due to too many failures
|
||||
if self.state.consecutive_failures >= self.settings.max_failures:
|
||||
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
|
||||
logger.error(
|
||||
f"❌ Stopping due to {self.settings.max_failures} consecutive failures"
|
||||
)
|
||||
break
|
||||
|
||||
# Check control flags before each step
|
||||
if self.state.stopped:
|
||||
logger.info('Agent stopped')
|
||||
logger.info("Agent stopped")
|
||||
break
|
||||
|
||||
while self.state.paused:
|
||||
@@ -142,13 +108,15 @@ class BrowserUseAgent(Agent):
|
||||
await self.log_completion()
|
||||
break
|
||||
else:
|
||||
logger.info('❌ Failed to complete task in maximum steps')
|
||||
logger.info("❌ Failed to complete task in maximum steps")
|
||||
|
||||
return self.state.history
|
||||
|
||||
except KeyboardInterrupt:
|
||||
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
|
||||
logger.info('Got KeyboardInterrupt during execution, returning current history')
|
||||
logger.info(
|
||||
"Got KeyboardInterrupt during execution, returning current history"
|
||||
)
|
||||
return self.state.history
|
||||
|
||||
finally:
|
||||
@@ -171,8 +139,10 @@ class BrowserUseAgent(Agent):
|
||||
await self.close()
|
||||
|
||||
if self.settings.generate_gif:
|
||||
output_path: str = 'agent_history.gif'
|
||||
output_path: str = "agent_history.gif"
|
||||
if isinstance(self.settings.generate_gif, str):
|
||||
output_path = self.settings.generate_gif
|
||||
|
||||
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
|
||||
create_history_gif(
|
||||
task=self.task, history=self.state.history, output_path=output_path
|
||||
)
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
import asyncio
|
||||
import pdb
|
||||
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import (
|
||||
from patchright.async_api import Browser as PlaywrightBrowser
|
||||
from patchright.async_api import (
|
||||
BrowserContext as PlaywrightBrowserContext,
|
||||
)
|
||||
from playwright.async_api import (
|
||||
from patchright.async_api import (
|
||||
Playwright,
|
||||
async_playwright,
|
||||
)
|
||||
from browser_use.browser.browser import Browser, IN_DOCKER
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
from patchright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
import logging
|
||||
|
||||
from browser_use.browser.chrome import (
|
||||
|
||||
@@ -4,8 +4,8 @@ import os
|
||||
|
||||
from browser_use.browser.browser import Browser, IN_DOCKER
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
from patchright.async_api import Browser as PlaywrightBrowser
|
||||
from patchright.async_api import BrowserContext as PlaywrightBrowserContext
|
||||
from typing import Optional
|
||||
from browser_use.browser.context import BrowserContextState
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ async def test_browser_use_agent():
|
||||
async def test_browser_use_parallel():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
from patchright.async_api import async_playwright
|
||||
from browser_use.browser.browser import Browser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
@@ -6,7 +6,7 @@ load_dotenv()
|
||||
|
||||
def test_connect_browser():
|
||||
import os
|
||||
from playwright.sync_api import sync_playwright
|
||||
from patchright.sync_api import sync_playwright
|
||||
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
|
||||
Reference in New Issue
Block a user