diff --git a/.gitignore b/.gitignore index 548d48d..a7a55cd 100644 --- a/.gitignore +++ b/.gitignore @@ -187,4 +187,6 @@ data/ # For Config Files (Current Settings) .config.pkl -*.pdf \ No newline at end of file +*.pdf + +workflow \ No newline at end of file diff --git a/README.md b/README.md index 355ff76..91fb7fa 100644 --- a/README.md +++ b/README.md @@ -68,12 +68,7 @@ uv pip install -r requirements.txt Install Browsers in Playwright: You can install specific browsers by running: ```bash -playwright install --with-deps chromium -``` - -To install all browsers: -```bash -playwright install +patchright install chromium ``` #### Step 4: Configure Environment diff --git a/src/agent/browser_use/browser_use_agent.py b/src/agent/browser_use/browser_use_agent.py index a38211e..9234bca 100644 --- a/src/agent/browser_use/browser_use_agent.py +++ b/src/agent/browser_use/browser_use_agent.py @@ -1,75 +1,37 @@ from __future__ import annotations import asyncio -import gc -import inspect -import json import logging import os -import re -import time -from pathlib import Path -from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union - -from dotenv import load_dotenv -from langchain_core.language_models.chat_models import BaseChatModel -from langchain_core.messages import ( - BaseMessage, - HumanMessage, - SystemMessage, -) # from lmnr.sdk.decorators import observe -from pydantic import BaseModel, ValidationError - from browser_use.agent.gif import create_history_gif -from browser_use.agent.memory.service import Memory, MemorySettings -from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings -from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation -from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt -from browser_use.agent.views import ( - REQUIRED_LLM_API_ENV_VARS, - ActionResult, - AgentError, - AgentHistory, - AgentHistoryList, - AgentOutput, - AgentSettings, - AgentState, - AgentStepInfo, - StepMetadata, - ToolCallingMethod, -) -from browser_use.browser.browser import Browser -from browser_use.browser.context import BrowserContext -from browser_use.browser.views import BrowserState, BrowserStateHistory -from browser_use.controller.registry.views import ActionModel -from browser_use.controller.service import Controller -from browser_use.dom.history_tree_processor.service import ( - DOMHistoryElement, - HistoryTreeProcessor, -) -from browser_use.exceptions import LLMException -from browser_use.telemetry.service import ProductTelemetry -from browser_use.telemetry.views import ( - AgentEndTelemetryEvent, - AgentRunTelemetryEvent, - AgentStepTelemetryEvent, -) -from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync from browser_use.agent.service import Agent, AgentHookFunc +from browser_use.agent.views import ( + AgentHistoryList, + AgentStepInfo, +) +from browser_use.telemetry.views import ( + AgentEndTelemetryEvent, +) +from browser_use.utils import time_execution_async +from dotenv import load_dotenv load_dotenv() logger = logging.getLogger(__name__) -SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1' +SKIP_LLM_API_KEY_VERIFICATION = ( + os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1" +) class BrowserUseAgent(Agent): - @time_execution_async('--run (agent)') + @time_execution_async("--run (agent)") async def run( - self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None, - on_step_end: AgentHookFunc | None = None + self, + max_steps: int = 100, + on_step_start: AgentHookFunc | None = None, + on_step_end: AgentHookFunc | None = None, ) -> AgentHistoryList: """Execute the task with maximum number of steps""" @@ -88,7 +50,7 @@ class BrowserUseAgent(Agent): signal_handler.register() # Wait for verification task to complete if it exists - if hasattr(self, '_verification_task') and not self._verification_task.done(): + if hasattr(self, "_verification_task") and not self._verification_task.done(): try: await self._verification_task except Exception: @@ -100,7 +62,9 @@ class BrowserUseAgent(Agent): # Execute initial actions if provided if self.initial_actions: - result = await self.multi_act(self.initial_actions, check_for_new_elements=False) + result = await self.multi_act( + self.initial_actions, check_for_new_elements=False + ) self.state.last_result = result for step in range(max_steps): @@ -112,12 +76,14 @@ class BrowserUseAgent(Agent): # Check if we should stop due to too many failures if self.state.consecutive_failures >= self.settings.max_failures: - logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures') + logger.error( + f"❌ Stopping due to {self.settings.max_failures} consecutive failures" + ) break # Check control flags before each step if self.state.stopped: - logger.info('Agent stopped') + logger.info("Agent stopped") break while self.state.paused: @@ -142,13 +108,15 @@ class BrowserUseAgent(Agent): await self.log_completion() break else: - logger.info('❌ Failed to complete task in maximum steps') + logger.info("❌ Failed to complete task in maximum steps") return self.state.history except KeyboardInterrupt: # Already handled by our signal handler, but catch any direct KeyboardInterrupt as well - logger.info('Got KeyboardInterrupt during execution, returning current history') + logger.info( + "Got KeyboardInterrupt during execution, returning current history" + ) return self.state.history finally: @@ -171,8 +139,10 @@ class BrowserUseAgent(Agent): await self.close() if self.settings.generate_gif: - output_path: str = 'agent_history.gif' + output_path: str = "agent_history.gif" if isinstance(self.settings.generate_gif, str): output_path = self.settings.generate_gif - create_history_gif(task=self.task, history=self.state.history, output_path=output_path) \ No newline at end of file + create_history_gif( + task=self.task, history=self.state.history, output_path=output_path + ) diff --git a/src/browser/custom_browser.py b/src/browser/custom_browser.py index 6db980f..02875e3 100644 --- a/src/browser/custom_browser.py +++ b/src/browser/custom_browser.py @@ -1,17 +1,17 @@ import asyncio import pdb -from playwright.async_api import Browser as PlaywrightBrowser -from playwright.async_api import ( +from patchright.async_api import Browser as PlaywrightBrowser +from patchright.async_api import ( BrowserContext as PlaywrightBrowserContext, ) -from playwright.async_api import ( +from patchright.async_api import ( Playwright, async_playwright, ) from browser_use.browser.browser import Browser, IN_DOCKER from browser_use.browser.context import BrowserContext, BrowserContextConfig -from playwright.async_api import BrowserContext as PlaywrightBrowserContext +from patchright.async_api import BrowserContext as PlaywrightBrowserContext import logging from browser_use.browser.chrome import ( diff --git a/src/browser/custom_context.py b/src/browser/custom_context.py index 43a67a8..753b4c5 100644 --- a/src/browser/custom_context.py +++ b/src/browser/custom_context.py @@ -4,8 +4,8 @@ import os from browser_use.browser.browser import Browser, IN_DOCKER from browser_use.browser.context import BrowserContext, BrowserContextConfig -from playwright.async_api import Browser as PlaywrightBrowser -from playwright.async_api import BrowserContext as PlaywrightBrowserContext +from patchright.async_api import Browser as PlaywrightBrowser +from patchright.async_api import BrowserContext as PlaywrightBrowserContext from typing import Optional from browser_use.browser.context import BrowserContextState diff --git a/tests/test_agents.py b/tests/test_agents.py index 23a6fb0..ffa743f 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -169,7 +169,7 @@ async def test_browser_use_agent(): async def test_browser_use_parallel(): from browser_use.browser.context import BrowserContextWindowSize from browser_use.browser.browser import BrowserConfig - from playwright.async_api import async_playwright + from patchright.async_api import async_playwright from browser_use.browser.browser import Browser from src.browser.custom_context import BrowserContextConfig from src.controller.custom_controller import CustomController diff --git a/tests/test_playwright.py b/tests/test_playwright.py index 6704a02..5a522fd 100644 --- a/tests/test_playwright.py +++ b/tests/test_playwright.py @@ -6,7 +6,7 @@ load_dotenv() def test_connect_browser(): import os - from playwright.sync_api import sync_playwright + from patchright.sync_api import sync_playwright chrome_exe = os.getenv("CHROME_PATH", "") chrome_use_data = os.getenv("CHROME_USER_DATA", "")