mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
fixed file formatting
This commit is contained in:
@@ -4,99 +4,85 @@
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: custom_agent.py
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pdb
|
||||
import textwrap
|
||||
import time
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Type, TypeVar
|
||||
from typing import Optional, Type
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
SystemMessage,
|
||||
)
|
||||
from openai import RateLimitError
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
from browser_use.agent.message_manager.service import MessageManager
|
||||
from browser_use.agent.prompts import AgentMessagePrompt, SystemPrompt
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import (
|
||||
ActionResult,
|
||||
AgentError,
|
||||
AgentHistory,
|
||||
AgentHistoryList,
|
||||
AgentOutput,
|
||||
AgentStepInfo,
|
||||
)
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.views import BrowserState, BrowserStateHistory
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.dom.history_tree_processor.service import (
|
||||
DOMHistoryElement,
|
||||
HistoryTreeProcessor,
|
||||
)
|
||||
from browser_use.telemetry.service import ProductTelemetry
|
||||
from browser_use.telemetry.views import (
|
||||
AgentEndTelemetryEvent,
|
||||
AgentRunTelemetryEvent,
|
||||
AgentStepErrorTelemetryEvent,
|
||||
)
|
||||
from browser_use.utils import time_execution_async
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
)
|
||||
|
||||
from .custom_views import CustomAgentOutput, CustomAgentStepInfo
|
||||
from .custom_massage_manager import CustomMassageManager
|
||||
from .custom_views import CustomAgentOutput, CustomAgentStepInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomAgent(Agent):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task: str,
|
||||
llm: BaseChatModel,
|
||||
add_infos: str = '',
|
||||
browser: Browser | None = None,
|
||||
browser_context: BrowserContext | None = None,
|
||||
controller: Controller = Controller(),
|
||||
use_vision: bool = True,
|
||||
save_conversation_path: Optional[str] = None,
|
||||
max_failures: int = 5,
|
||||
retry_delay: int = 10,
|
||||
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
|
||||
max_input_tokens: int = 128000,
|
||||
validate_output: bool = False,
|
||||
include_attributes: list[str] = [
|
||||
'title',
|
||||
'type',
|
||||
'name',
|
||||
'role',
|
||||
'tabindex',
|
||||
'aria-label',
|
||||
'placeholder',
|
||||
'value',
|
||||
'alt',
|
||||
'aria-expanded',
|
||||
],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
self,
|
||||
task: str,
|
||||
llm: BaseChatModel,
|
||||
add_infos: str = "",
|
||||
browser: Browser | None = None,
|
||||
browser_context: BrowserContext | None = None,
|
||||
controller: Controller = Controller(),
|
||||
use_vision: bool = True,
|
||||
save_conversation_path: Optional[str] = None,
|
||||
max_failures: int = 5,
|
||||
retry_delay: int = 10,
|
||||
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
|
||||
max_input_tokens: int = 128000,
|
||||
validate_output: bool = False,
|
||||
include_attributes: list[str] = [
|
||||
"title",
|
||||
"type",
|
||||
"name",
|
||||
"role",
|
||||
"tabindex",
|
||||
"aria-label",
|
||||
"placeholder",
|
||||
"value",
|
||||
"alt",
|
||||
"aria-expanded",
|
||||
],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
):
|
||||
super().__init__(task, llm, browser, browser_context, controller, use_vision, save_conversation_path,
|
||||
max_failures, retry_delay, system_prompt_class, max_input_tokens, validate_output,
|
||||
include_attributes, max_error_length, max_actions_per_step)
|
||||
super().__init__(
|
||||
task,
|
||||
llm,
|
||||
browser,
|
||||
browser_context,
|
||||
controller,
|
||||
use_vision,
|
||||
save_conversation_path,
|
||||
max_failures,
|
||||
retry_delay,
|
||||
system_prompt_class,
|
||||
max_input_tokens,
|
||||
validate_output,
|
||||
include_attributes,
|
||||
max_error_length,
|
||||
max_actions_per_step,
|
||||
)
|
||||
self.add_infos = add_infos
|
||||
self.message_manager = CustomMassageManager(
|
||||
llm=self.llm,
|
||||
@@ -118,24 +104,26 @@ class CustomAgent(Agent):
|
||||
|
||||
def _log_response(self, response: CustomAgentOutput) -> None:
|
||||
"""Log the model's response"""
|
||||
if 'Success' in response.current_state.prev_action_evaluation:
|
||||
emoji = '✅'
|
||||
elif 'Failed' in response.current_state.prev_action_evaluation:
|
||||
emoji = '❌'
|
||||
if "Success" in response.current_state.prev_action_evaluation:
|
||||
emoji = "✅"
|
||||
elif "Failed" in response.current_state.prev_action_evaluation:
|
||||
emoji = "❌"
|
||||
else:
|
||||
emoji = '🤷'
|
||||
emoji = "🤷"
|
||||
|
||||
logger.info(f'{emoji} Eval: {response.current_state.prev_action_evaluation}')
|
||||
logger.info(f'🧠 New Memory: {response.current_state.important_contents}')
|
||||
logger.info(f'⏳ Task Progress: {response.current_state.completed_contents}')
|
||||
logger.info(f'🤔 Thought: {response.current_state.thought}')
|
||||
logger.info(f'🎯 Summary: {response.current_state.summary}')
|
||||
logger.info(f"{emoji} Eval: {response.current_state.prev_action_evaluation}")
|
||||
logger.info(f"🧠 New Memory: {response.current_state.important_contents}")
|
||||
logger.info(f"⏳ Task Progress: {response.current_state.completed_contents}")
|
||||
logger.info(f"🤔 Thought: {response.current_state.thought}")
|
||||
logger.info(f"🎯 Summary: {response.current_state.summary}")
|
||||
for i, action in enumerate(response.action):
|
||||
logger.info(
|
||||
f'🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}'
|
||||
f"🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}"
|
||||
)
|
||||
|
||||
def update_step_info(self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None):
|
||||
def update_step_info(
|
||||
self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None
|
||||
):
|
||||
"""
|
||||
update step info
|
||||
"""
|
||||
@@ -144,19 +132,23 @@ class CustomAgent(Agent):
|
||||
|
||||
step_info.step_number += 1
|
||||
important_contents = model_output.current_state.important_contents
|
||||
if important_contents and 'None' not in important_contents and important_contents not in step_info.memory:
|
||||
step_info.memory += important_contents + '\n'
|
||||
if (
|
||||
important_contents
|
||||
and "None" not in important_contents
|
||||
and important_contents not in step_info.memory
|
||||
):
|
||||
step_info.memory += important_contents + "\n"
|
||||
|
||||
completed_contents = model_output.current_state.completed_contents
|
||||
if completed_contents and 'None' not in completed_contents:
|
||||
if completed_contents and "None" not in completed_contents:
|
||||
step_info.task_progress = completed_contents
|
||||
|
||||
@time_execution_async('--get_next_action')
|
||||
@time_execution_async("--get_next_action")
|
||||
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
|
||||
"""Get next action from LLM based on current state"""
|
||||
|
||||
ret = self.llm.invoke(input_messages)
|
||||
parsed_json = json.loads(ret.content.replace('```json', '').replace("```", ""))
|
||||
parsed_json = json.loads(ret.content.replace("```json", "").replace("```", ""))
|
||||
parsed: AgentOutput = self.AgentOutput(**parsed_json)
|
||||
# cut the number of actions to max_actions_per_step
|
||||
parsed.action = parsed.action[: self.max_actions_per_step]
|
||||
@@ -165,10 +157,10 @@ class CustomAgent(Agent):
|
||||
|
||||
return parsed
|
||||
|
||||
@time_execution_async('--step')
|
||||
@time_execution_async("--step")
|
||||
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
|
||||
"""Execute one step of the task"""
|
||||
logger.info(f'\n📍 Step {self.n_steps}')
|
||||
logger.info(f"\n📍 Step {self.n_steps}")
|
||||
state = None
|
||||
model_output = None
|
||||
result: list[ActionResult] = []
|
||||
@@ -179,7 +171,7 @@ class CustomAgent(Agent):
|
||||
input_messages = self.message_manager.get_messages()
|
||||
model_output = await self.get_next_action(input_messages)
|
||||
self.update_step_info(model_output, step_info)
|
||||
logger.info(f'🧠 All Memory: {step_info.memory}')
|
||||
logger.info(f"🧠 All Memory: {step_info.memory}")
|
||||
self._save_conversation(input_messages, model_output)
|
||||
self.message_manager._remove_last_state_message() # we dont want the whole state in the chat history
|
||||
self.message_manager.add_model_output(model_output)
|
||||
@@ -190,7 +182,7 @@ class CustomAgent(Agent):
|
||||
self._last_result = result
|
||||
|
||||
if len(result) > 0 and result[-1].is_done:
|
||||
logger.info(f'📄 Result: {result[-1].extracted_content}')
|
||||
logger.info(f"📄 Result: {result[-1].extracted_content}")
|
||||
|
||||
self.consecutive_failures = 0
|
||||
|
||||
@@ -215,7 +207,7 @@ class CustomAgent(Agent):
|
||||
async def run(self, max_steps: int = 100) -> AgentHistoryList:
|
||||
"""Execute the task with maximum number of steps"""
|
||||
try:
|
||||
logger.info(f'🚀 Starting task: {self.task}')
|
||||
logger.info(f"🚀 Starting task: {self.task}")
|
||||
|
||||
self.telemetry.capture(
|
||||
AgentRunTelemetryEvent(
|
||||
@@ -224,13 +216,14 @@ class CustomAgent(Agent):
|
||||
)
|
||||
)
|
||||
|
||||
step_info = CustomAgentStepInfo(task=self.task,
|
||||
add_infos=self.add_infos,
|
||||
step_number=1,
|
||||
max_steps=max_steps,
|
||||
memory='',
|
||||
task_progress=''
|
||||
)
|
||||
step_info = CustomAgentStepInfo(
|
||||
task=self.task,
|
||||
add_infos=self.add_infos,
|
||||
step_number=1,
|
||||
max_steps=max_steps,
|
||||
memory="",
|
||||
task_progress="",
|
||||
)
|
||||
|
||||
for step in range(max_steps):
|
||||
if self._too_many_failures():
|
||||
@@ -240,15 +233,15 @@ class CustomAgent(Agent):
|
||||
|
||||
if self.history.is_done():
|
||||
if (
|
||||
self.validate_output and step < max_steps - 1
|
||||
self.validate_output and step < max_steps - 1
|
||||
): # if last step, we dont need to validate
|
||||
if not await self._validate_output():
|
||||
continue
|
||||
|
||||
logger.info('✅ Task completed successfully')
|
||||
logger.info("✅ Task completed successfully")
|
||||
break
|
||||
else:
|
||||
logger.info('❌ Failed to complete task in maximum steps')
|
||||
logger.info("❌ Failed to complete task in maximum steps")
|
||||
|
||||
return self.history
|
||||
|
||||
|
||||
@@ -7,23 +7,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from browser_use.agent.message_manager.service import MessageManager
|
||||
from browser_use.agent.message_manager.views import MessageHistory
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.views import ActionResult, AgentStepInfo
|
||||
from browser_use.browser.views import BrowserState
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
)
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use.agent.message_manager.views import MessageHistory, MessageMetadata
|
||||
from browser_use.agent.prompts import AgentMessagePrompt, SystemPrompt
|
||||
from browser_use.agent.views import ActionResult, AgentOutput, AgentStepInfo
|
||||
from browser_use.browser.views import BrowserState
|
||||
from browser_use.agent.message_manager.service import MessageManager
|
||||
|
||||
from .custom_prompts import CustomAgentMessagePrompt
|
||||
|
||||
@@ -32,31 +26,40 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CustomMassageManager(MessageManager):
|
||||
def __init__(
|
||||
self,
|
||||
llm: BaseChatModel,
|
||||
task: str,
|
||||
action_descriptions: str,
|
||||
system_prompt_class: Type[SystemPrompt],
|
||||
max_input_tokens: int = 128000,
|
||||
estimated_tokens_per_character: int = 3,
|
||||
image_tokens: int = 800,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
self,
|
||||
llm: BaseChatModel,
|
||||
task: str,
|
||||
action_descriptions: str,
|
||||
system_prompt_class: Type[SystemPrompt],
|
||||
max_input_tokens: int = 128000,
|
||||
estimated_tokens_per_character: int = 3,
|
||||
image_tokens: int = 800,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
):
|
||||
super().__init__(llm, task, action_descriptions, system_prompt_class, max_input_tokens,
|
||||
estimated_tokens_per_character, image_tokens, include_attributes, max_error_length,
|
||||
max_actions_per_step)
|
||||
super().__init__(
|
||||
llm,
|
||||
task,
|
||||
action_descriptions,
|
||||
system_prompt_class,
|
||||
max_input_tokens,
|
||||
estimated_tokens_per_character,
|
||||
image_tokens,
|
||||
include_attributes,
|
||||
max_error_length,
|
||||
max_actions_per_step,
|
||||
)
|
||||
|
||||
# Move Task info to state_message
|
||||
self.history = MessageHistory()
|
||||
self._add_message_with_tokens(self.system_prompt)
|
||||
|
||||
def add_state_message(
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
step_info: Optional[AgentStepInfo] = None,
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
step_info: Optional[AgentStepInfo] = None,
|
||||
) -> None:
|
||||
"""Add browser state as human message"""
|
||||
|
||||
@@ -68,7 +71,9 @@ class CustomMassageManager(MessageManager):
|
||||
msg = HumanMessage(content=str(r.extracted_content))
|
||||
self._add_message_with_tokens(msg)
|
||||
if r.error:
|
||||
msg = HumanMessage(content=str(r.error)[-self.max_error_length:])
|
||||
msg = HumanMessage(
|
||||
content=str(r.error)[-self.max_error_length :]
|
||||
)
|
||||
self._add_message_with_tokens(msg)
|
||||
result = None # if result in history, we dont want to add it again
|
||||
|
||||
|
||||
@@ -4,14 +4,12 @@
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: custom_prompts.py
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
|
||||
from browser_use.agent.views import ActionResult, AgentStepInfo
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.views import BrowserState
|
||||
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
|
||||
from .custom_views import CustomAgentStepInfo
|
||||
|
||||
@@ -93,7 +91,7 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
|
||||
- only use multiple actions if it makes sense.
|
||||
"""
|
||||
text += f' - use maximum {self.max_actions_per_step} actions per sequence'
|
||||
text += f" - use maximum {self.max_actions_per_step} actions per sequence"
|
||||
return text
|
||||
|
||||
def input_format(self) -> str:
|
||||
@@ -128,7 +126,7 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
Returns:
|
||||
str: Formatted system prompt
|
||||
"""
|
||||
time_str = self.current_date.strftime('%Y-%m-%d %H:%M')
|
||||
time_str = self.current_date.strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
AGENT_PROMPT = f"""You are a precise browser automation agent that interacts with websites through structured commands. Your role is to:
|
||||
1. Analyze the provided webpage elements and structure
|
||||
@@ -150,12 +148,12 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
|
||||
class CustomAgentMessagePrompt:
|
||||
def __init__(
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
step_info: Optional[CustomAgentStepInfo] = None,
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
step_info: Optional[CustomAgentStepInfo] = None,
|
||||
):
|
||||
self.state = state
|
||||
self.result = result
|
||||
@@ -182,22 +180,24 @@ class CustomAgentMessagePrompt:
|
||||
if self.result:
|
||||
for i, result in enumerate(self.result):
|
||||
if result.extracted_content:
|
||||
state_description += (
|
||||
f'\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}'
|
||||
)
|
||||
state_description += f"\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}"
|
||||
if result.error:
|
||||
# only use last 300 characters of error
|
||||
error = result.error[-self.max_error_length:]
|
||||
state_description += f'\nError of action {i + 1}/{len(self.result)}: ...{error}'
|
||||
error = result.error[-self.max_error_length :]
|
||||
state_description += (
|
||||
f"\nError of action {i + 1}/{len(self.result)}: ...{error}"
|
||||
)
|
||||
|
||||
if self.state.screenshot:
|
||||
# Format message for vision model
|
||||
return HumanMessage(
|
||||
content=[
|
||||
{'type': 'text', 'text': state_description},
|
||||
{"type": "text", "text": state_description},
|
||||
{
|
||||
'type': 'image_url',
|
||||
'image_url': {'url': f'data:image/png;base64,{self.state.screenshot}'},
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{self.state.screenshot}"
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -6,9 +6,10 @@
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Type
|
||||
from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
|
||||
from browser_use.agent.views import AgentOutput
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
from pydantic import BaseModel, ConfigDict, Field, create_model
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -43,11 +44,16 @@ class CustomAgentOutput(AgentOutput):
|
||||
action: list[ActionModel]
|
||||
|
||||
@staticmethod
|
||||
def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['CustomAgentOutput']:
|
||||
def type_with_custom_actions(
|
||||
custom_actions: Type[ActionModel],
|
||||
) -> Type["CustomAgentOutput"]:
|
||||
"""Extend actions with custom actions"""
|
||||
return create_model(
|
||||
'AgentOutput',
|
||||
"AgentOutput",
|
||||
__base__=CustomAgentOutput,
|
||||
action=(list[custom_actions], Field(...)), # Properly annotated field with no default
|
||||
action=(
|
||||
list[custom_actions],
|
||||
Field(...),
|
||||
), # Properly annotated field with no default
|
||||
__module__=CustomAgentOutput.__module__,
|
||||
)
|
||||
|
||||
@@ -4,16 +4,17 @@
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: browser.py
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig, BrowserContext
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
|
||||
from .custom_context import CustomBrowserContext
|
||||
|
||||
|
||||
class CustomBrowser(Browser):
|
||||
|
||||
async def new_context(
|
||||
self, config: BrowserContextConfig = BrowserContextConfig(), context: CustomBrowserContext = None
|
||||
self,
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: CustomBrowserContext = None,
|
||||
) -> BrowserContext:
|
||||
"""Create a browser context"""
|
||||
return CustomBrowserContext(config=config, browser=self, context=context)
|
||||
|
||||
@@ -5,26 +5,23 @@
|
||||
# @Project : browser-use-webui
|
||||
# @FileName: context.py
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomBrowserContext(BrowserContext):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
browser: 'Browser',
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: BrowserContext = None
|
||||
self,
|
||||
browser: "Browser",
|
||||
config: BrowserContextConfig = BrowserContextConfig(),
|
||||
context: BrowserContext = None,
|
||||
):
|
||||
super(CustomBrowserContext, self).__init__(browser, config)
|
||||
self.context = context
|
||||
@@ -42,14 +39,14 @@ class CustomBrowserContext(BrowserContext):
|
||||
viewport=self.config.browser_window_size,
|
||||
no_viewport=False,
|
||||
user_agent=(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=self.config.disable_security,
|
||||
ignore_https_errors=self.config.disable_security,
|
||||
record_video_dir=self.config.save_recording_path,
|
||||
record_video_size=self.config.browser_window_size # set record video size
|
||||
record_video_size=self.config.browser_window_size, # set record video size
|
||||
)
|
||||
|
||||
if self.config.trace_path:
|
||||
@@ -57,9 +54,11 @@ class CustomBrowserContext(BrowserContext):
|
||||
|
||||
# Load cookies if they exist
|
||||
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
|
||||
with open(self.config.cookies_file, 'r') as f:
|
||||
with open(self.config.cookies_file, "r") as f:
|
||||
cookies = json.load(f)
|
||||
logger.info(f'Loaded {len(cookies)} cookies from {self.config.cookies_file}')
|
||||
logger.info(
|
||||
f"Loaded {len(cookies)} cookies from {self.config.cookies_file}"
|
||||
)
|
||||
await context.add_cookies(cookies)
|
||||
|
||||
# Expose anti-detection scripts
|
||||
|
||||
@@ -5,10 +5,9 @@
|
||||
# @FileName: custom_action.py
|
||||
|
||||
import pyperclip
|
||||
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
|
||||
class CustomController(Controller):
|
||||
@@ -19,12 +18,12 @@ class CustomController(Controller):
|
||||
def _register_custom_actions(self):
|
||||
"""Register all custom browser actions"""
|
||||
|
||||
@self.registry.action('Copy text to clipboard')
|
||||
@self.registry.action("Copy text to clipboard")
|
||||
def copy_to_clipboard(text: str):
|
||||
pyperclip.copy(text)
|
||||
return ActionResult(extracted_content=text)
|
||||
|
||||
@self.registry.action('Paste text from clipboard', requires_browser=True)
|
||||
@self.registry.action("Paste text from clipboard", requires_browser=True)
|
||||
async def paste_from_clipboard(browser: BrowserContext):
|
||||
text = pyperclip.paste()
|
||||
# send text to browser
|
||||
|
||||
@@ -8,10 +8,10 @@
|
||||
import base64
|
||||
import os
|
||||
|
||||
from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from langchain_ollama import ChatOllama
|
||||
from langchain_openai import AzureChatOpenAI, ChatOpenAI
|
||||
|
||||
|
||||
def get_llm_model(provider: str, **kwargs):
|
||||
@@ -21,7 +21,7 @@ def get_llm_model(provider: str, **kwargs):
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
if provider == 'anthropic':
|
||||
if provider == "anthropic":
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = "https://api.anthropic.com"
|
||||
else:
|
||||
@@ -33,12 +33,12 @@ def get_llm_model(provider: str, **kwargs):
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatAnthropic(
|
||||
model_name=kwargs.get("model_name", 'claude-3-5-sonnet-20240620'),
|
||||
model_name=kwargs.get("model_name", "claude-3-5-sonnet-20240620"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
api_key=api_key,
|
||||
)
|
||||
elif provider == 'openai':
|
||||
elif provider == "openai":
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
|
||||
else:
|
||||
@@ -50,12 +50,12 @@ def get_llm_model(provider: str, **kwargs):
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatOpenAI(
|
||||
model=kwargs.get("model_name", 'gpt-4o'),
|
||||
model=kwargs.get("model_name", "gpt-4o"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
api_key=api_key,
|
||||
)
|
||||
elif provider == 'deepseek':
|
||||
elif provider == "deepseek":
|
||||
if not kwargs.get("base_url", ""):
|
||||
base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
|
||||
else:
|
||||
@@ -67,24 +67,24 @@ def get_llm_model(provider: str, **kwargs):
|
||||
api_key = kwargs.get("api_key")
|
||||
|
||||
return ChatOpenAI(
|
||||
model=kwargs.get("model_name", 'deepseek-chat'),
|
||||
model=kwargs.get("model_name", "deepseek-chat"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
base_url=base_url,
|
||||
api_key=api_key
|
||||
api_key=api_key,
|
||||
)
|
||||
elif provider == 'gemini':
|
||||
elif provider == "gemini":
|
||||
if not kwargs.get("api_key", ""):
|
||||
api_key = os.getenv("GOOGLE_API_KEY", "")
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
return ChatGoogleGenerativeAI(
|
||||
model=kwargs.get("model_name", 'gemini-2.0-flash-exp'),
|
||||
model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
google_api_key=api_key,
|
||||
)
|
||||
elif provider == 'ollama':
|
||||
elif provider == "ollama":
|
||||
return ChatOllama(
|
||||
model=kwargs.get("model_name", 'qwen2.5:7b'),
|
||||
model=kwargs.get("model_name", "qwen2.5:7b"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
)
|
||||
elif provider == "azure_openai":
|
||||
@@ -97,14 +97,14 @@ def get_llm_model(provider: str, **kwargs):
|
||||
else:
|
||||
api_key = kwargs.get("api_key")
|
||||
return AzureChatOpenAI(
|
||||
model=kwargs.get("model_name", 'gpt-4o'),
|
||||
model=kwargs.get("model_name", "gpt-4o"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
api_version="2024-05-01-preview",
|
||||
azure_endpoint=base_url,
|
||||
api_key=api_key
|
||||
api_key=api_key,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Unsupported provider: {provider}')
|
||||
raise ValueError(f"Unsupported provider: {provider}")
|
||||
|
||||
|
||||
def encode_image(img_path):
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
# @Author : wenshao
|
||||
# @ProjectName: browser-use-webui
|
||||
# @FileName: test_browser_use.py
|
||||
import pdb
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -11,11 +10,11 @@ load_dotenv()
|
||||
import sys
|
||||
|
||||
sys.path.append(".")
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
import asyncio
|
||||
from browser_use import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
|
||||
@@ -25,16 +24,16 @@ from src.utils import utils
|
||||
async def test_browser_use_org():
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import (
|
||||
BrowserContext,
|
||||
BrowserContextConfig,
|
||||
BrowserContextWindowSize,
|
||||
)
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.8,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", "")
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
@@ -43,16 +42,18 @@ async def test_browser_use_org():
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path='./tmp/traces',
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
)
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/traces",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
@@ -61,32 +62,31 @@ async def test_browser_use_org():
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print('Final Result:')
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print('\nErrors:')
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print('\nModel Outputs:')
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print('\nThoughts:')
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def test_browser_use_custom():
|
||||
from playwright.async_api import async_playwright
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.browser.custom_browser import CustomBrowser, BrowserConfig
|
||||
from src.browser.custom_context import BrowserContext, BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.browser.custom_context import CustomBrowserContext
|
||||
from src.browser.custom_browser import BrowserConfig, CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
|
||||
@@ -112,9 +112,7 @@ async def test_browser_use_custom():
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="ollama",
|
||||
model_name="qwen2.5:7b",
|
||||
temperature=0.8
|
||||
provider="ollama", model_name="qwen2.5:7b", temperature=0.8
|
||||
)
|
||||
|
||||
controller = CustomController()
|
||||
@@ -134,14 +132,14 @@ async def test_browser_use_custom():
|
||||
no_viewport=False,
|
||||
headless=False, # 保持浏览器窗口可见
|
||||
user_agent=(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
||||
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir="./tmp/record_videos",
|
||||
record_video_size={'width': window_w, 'height': window_h}
|
||||
record_video_size={"width": window_w, "height": window_h},
|
||||
)
|
||||
else:
|
||||
browser_context_ = None
|
||||
@@ -150,18 +148,20 @@ async def test_browser_use_custom():
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path='./tmp/result_processing',
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/result_processing",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
context=browser_context_
|
||||
),
|
||||
context=browser_context_,
|
||||
) as browser_context:
|
||||
agent = CustomAgent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
@@ -170,25 +170,26 @@ async def test_browser_use_custom():
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
use_vision=use_vision
|
||||
use_vision=use_vision,
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print('Final Result:')
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print('\nErrors:')
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print('\nModel Outputs:')
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print('\nThoughts:')
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
@@ -202,6 +203,6 @@ async def test_browser_use_custom():
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
# asyncio.run(test_browser_use_org())
|
||||
asyncio.run(test_browser_use_custom())
|
||||
|
||||
2
webui.py
2
webui.py
@@ -22,7 +22,7 @@ from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.browser.custom_browser import BrowserConfig, CustomBrowser
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
from src.utils import utils
|
||||
|
||||
Reference in New Issue
Block a user