mirror of
https://github.com/browser-use/web-ui.git
synced 2026-03-22 11:17:17 +08:00
Merge pull request #184 from vvincent1234/fix/adapt_latest_browser-use
Fix/adapt latest browser use
This commit is contained in:
@@ -108,7 +108,7 @@ playwright install
|
||||
- `--dark-mode`: Enables dark mode for the user interface.
|
||||
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
|
||||
4. **Using Your Own Browser(Optional):**
|
||||
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
|
||||
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data.
|
||||
- Windows
|
||||
```env
|
||||
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
@@ -118,7 +118,7 @@ playwright install
|
||||
- Mac
|
||||
```env
|
||||
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
|
||||
CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
|
||||
```
|
||||
- Close all Chrome windows
|
||||
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
browser-use==0.1.29
|
||||
pyperclip==1.9.0
|
||||
gradio==5.10.0
|
||||
json-repair
|
||||
|
||||
@@ -8,10 +8,11 @@ import os
|
||||
import base64
|
||||
import io
|
||||
import platform
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import (
|
||||
ActionResult,
|
||||
ActionModel,
|
||||
AgentHistoryList,
|
||||
AgentOutput,
|
||||
AgentHistory,
|
||||
@@ -30,6 +31,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
)
|
||||
from json_repair import repair_json
|
||||
from src.utils.agent_state import AgentState
|
||||
|
||||
from .custom_massage_manager import CustomMassageManager
|
||||
@@ -52,6 +54,7 @@ class CustomAgent(Agent):
|
||||
max_failures: int = 5,
|
||||
retry_delay: int = 10,
|
||||
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
|
||||
agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
|
||||
max_input_tokens: int = 128000,
|
||||
validate_output: bool = False,
|
||||
include_attributes: list[str] = [
|
||||
@@ -98,7 +101,7 @@ class CustomAgent(Agent):
|
||||
register_done_callback=register_done_callback,
|
||||
tool_calling_method=tool_calling_method
|
||||
)
|
||||
if self.model_name in ["deepseek-reasoner"] or self.model_name.startswith("deepseek-r1"):
|
||||
if self.model_name in ["deepseek-reasoner"] or "deepseek-r1" in self.model_name:
|
||||
# deepseek-reasoner does not support function calling
|
||||
self.use_deepseek_r1 = True
|
||||
# deepseek-reasoner only support 64000 context
|
||||
@@ -106,20 +109,23 @@ class CustomAgent(Agent):
|
||||
else:
|
||||
self.use_deepseek_r1 = False
|
||||
|
||||
# record last actions
|
||||
self._last_actions = None
|
||||
# custom new info
|
||||
self.add_infos = add_infos
|
||||
# agent_state for Stop
|
||||
self.agent_state = agent_state
|
||||
self.agent_prompt_class = agent_prompt_class
|
||||
self.message_manager = CustomMassageManager(
|
||||
llm=self.llm,
|
||||
task=self.task,
|
||||
action_descriptions=self.controller.registry.get_prompt_description(),
|
||||
system_prompt_class=self.system_prompt_class,
|
||||
agent_prompt_class=agent_prompt_class,
|
||||
max_input_tokens=self.max_input_tokens,
|
||||
include_attributes=self.include_attributes,
|
||||
max_error_length=self.max_error_length,
|
||||
max_actions_per_step=self.max_actions_per_step,
|
||||
use_deepseek_r1=self.use_deepseek_r1
|
||||
max_actions_per_step=self.max_actions_per_step
|
||||
)
|
||||
|
||||
def _setup_action_models(self) -> None:
|
||||
@@ -186,9 +192,11 @@ class CustomAgent(Agent):
|
||||
logger.info(ai_message.reasoning_content)
|
||||
logger.info(f"🤯 End Deep Thinking")
|
||||
if isinstance(ai_message.content, list):
|
||||
parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", ""))
|
||||
ai_content = ai_message.content[0].replace("```json", "").replace("```", "")
|
||||
else:
|
||||
parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", ""))
|
||||
ai_content = ai_message.content.replace("```json", "").replace("```", "")
|
||||
ai_content = repair_json(ai_content)
|
||||
parsed_json = json.loads(ai_content)
|
||||
parsed: AgentOutput = self.AgentOutput(**parsed_json)
|
||||
if parsed is None:
|
||||
logger.debug(ai_message.content)
|
||||
@@ -197,9 +205,11 @@ class CustomAgent(Agent):
|
||||
ai_message = self.llm.invoke(input_messages)
|
||||
self.message_manager._add_message_with_tokens(ai_message)
|
||||
if isinstance(ai_message.content, list):
|
||||
parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", ""))
|
||||
ai_content = ai_message.content[0].replace("```json", "").replace("```", "")
|
||||
else:
|
||||
parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", ""))
|
||||
ai_content = ai_message.content.replace("```json", "").replace("```", "")
|
||||
ai_content = repair_json(ai_content)
|
||||
parsed_json = json.loads(ai_content)
|
||||
parsed: AgentOutput = self.AgentOutput(**parsed_json)
|
||||
if parsed is None:
|
||||
logger.debug(ai_message.content)
|
||||
@@ -222,7 +232,7 @@ class CustomAgent(Agent):
|
||||
|
||||
try:
|
||||
state = await self.browser_context.get_state(use_vision=self.use_vision)
|
||||
self.message_manager.add_state_message(state, self._last_result, step_info)
|
||||
self.message_manager.add_state_message(state, self._last_actions, self._last_result, step_info)
|
||||
input_messages = self.message_manager.get_messages()
|
||||
try:
|
||||
model_output = await self.get_next_action(input_messages)
|
||||
@@ -231,27 +241,31 @@ class CustomAgent(Agent):
|
||||
self.update_step_info(model_output, step_info)
|
||||
logger.info(f"🧠 All Memory: \n{step_info.memory}")
|
||||
self._save_conversation(input_messages, model_output)
|
||||
# should we remove last state message? at least, deepseek-reasoner cannot remove
|
||||
if self.model_name != "deepseek-reasoner":
|
||||
self.message_manager._remove_last_state_message()
|
||||
# remove prev message
|
||||
self.message_manager._remove_state_message_by_index(-1)
|
||||
except Exception as e:
|
||||
# model call failed, remove last state message from history
|
||||
self.message_manager._remove_last_state_message()
|
||||
self.message_manager._remove_state_message_by_index(-1)
|
||||
raise e
|
||||
|
||||
actions: list[ActionModel] = model_output.action
|
||||
result: list[ActionResult] = await self.controller.multi_act(
|
||||
model_output.action, self.browser_context
|
||||
actions, self.browser_context
|
||||
)
|
||||
if len(result) != len(model_output.action):
|
||||
if len(result) != len(actions):
|
||||
# I think something changes, such information should let LLM know
|
||||
for ri in range(len(result), len(model_output.action)):
|
||||
for ri in range(len(result), len(actions)):
|
||||
result.append(ActionResult(extracted_content=None,
|
||||
include_in_memory=True,
|
||||
error=f"{model_output.action[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \
|
||||
Something new appeared after action {model_output.action[len(result) - 1].model_dump_json(exclude_unset=True)}",
|
||||
error=f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \
|
||||
Something new appeared after action {actions[len(result) - 1].model_dump_json(exclude_unset=True)}",
|
||||
is_done=False))
|
||||
if len(actions) == 0:
|
||||
# TODO: fix no action case
|
||||
result = [ActionResult(is_done=True, extracted_content=step_info.memory, include_in_memory=True)]
|
||||
self._last_result = result
|
||||
|
||||
self._last_actions = actions
|
||||
if len(result) > 0 and result[-1].is_done:
|
||||
logger.info(f"📄 Result: {result[-1].extracted_content}")
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@ from typing import List, Optional, Type
|
||||
|
||||
from browser_use.agent.message_manager.service import MessageManager
|
||||
from browser_use.agent.message_manager.views import MessageHistory
|
||||
from browser_use.agent.prompts import SystemPrompt
|
||||
from browser_use.agent.views import ActionResult, AgentStepInfo
|
||||
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
|
||||
from browser_use.agent.views import ActionResult, AgentStepInfo, ActionModel
|
||||
from browser_use.browser.views import BrowserState
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
@@ -31,14 +31,14 @@ class CustomMassageManager(MessageManager):
|
||||
task: str,
|
||||
action_descriptions: str,
|
||||
system_prompt_class: Type[SystemPrompt],
|
||||
agent_prompt_class: Type[AgentMessagePrompt],
|
||||
max_input_tokens: int = 128000,
|
||||
estimated_characters_per_token: int = 3,
|
||||
image_tokens: int = 800,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
max_actions_per_step: int = 10,
|
||||
message_context: Optional[str] = None,
|
||||
use_deepseek_r1: bool = False
|
||||
message_context: Optional[str] = None
|
||||
):
|
||||
super().__init__(
|
||||
llm=llm,
|
||||
@@ -53,8 +53,7 @@ class CustomMassageManager(MessageManager):
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
message_context=message_context
|
||||
)
|
||||
self.tool_id = 1
|
||||
self.use_deepseek_r1 = use_deepseek_r1
|
||||
self.agent_prompt_class = agent_prompt_class
|
||||
# Custom: Move Task info to state_message
|
||||
self.history = MessageHistory()
|
||||
self._add_message_with_tokens(self.system_prompt)
|
||||
@@ -75,13 +74,15 @@ class CustomMassageManager(MessageManager):
|
||||
def add_state_message(
|
||||
self,
|
||||
state: BrowserState,
|
||||
actions: Optional[List[ActionModel]] = None,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
step_info: Optional[AgentStepInfo] = None,
|
||||
) -> None:
|
||||
"""Add browser state as human message"""
|
||||
# otherwise add state message and result to next message (which will not stay in memory)
|
||||
state_message = CustomAgentMessagePrompt(
|
||||
state_message = self.agent_prompt_class(
|
||||
state,
|
||||
actions,
|
||||
result,
|
||||
include_attributes=self.include_attributes,
|
||||
max_error_length=self.max_error_length,
|
||||
@@ -102,3 +103,15 @@ class CustomMassageManager(MessageManager):
|
||||
len(text) // self.estimated_characters_per_token
|
||||
) # Rough estimate if no tokenizer available
|
||||
return tokens
|
||||
|
||||
def _remove_state_message_by_index(self, remove_ind=-1) -> None:
|
||||
"""Remove last state message from history"""
|
||||
i = len(self.history.messages) - 1
|
||||
remove_cnt = 0
|
||||
while i >= 0:
|
||||
if isinstance(self.history.messages[i].message, HumanMessage):
|
||||
remove_cnt += 1
|
||||
if remove_cnt == abs(remove_ind):
|
||||
self.history.remove_message(i)
|
||||
break
|
||||
i -= 1
|
||||
@@ -2,7 +2,7 @@ import pdb
|
||||
from typing import List, Optional
|
||||
|
||||
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.agent.views import ActionResult, ActionModel
|
||||
from browser_use.browser.views import BrowserState
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
|
||||
@@ -56,7 +56,7 @@ class CustomSystemPrompt(SystemPrompt):
|
||||
- Use scroll to find elements you are looking for
|
||||
|
||||
5. TASK COMPLETION:
|
||||
- If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the done action to terminate the operation process.
|
||||
- If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the **Done** action to terminate the operation process.
|
||||
- Don't hallucinate actions.
|
||||
- If the task requires specific information - make sure to include everything in the done function. This is what the user will see.
|
||||
- If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action.
|
||||
@@ -140,6 +140,7 @@ class CustomAgentMessagePrompt(AgentMessagePrompt):
|
||||
def __init__(
|
||||
self,
|
||||
state: BrowserState,
|
||||
actions: Optional[List[ActionModel]] = None,
|
||||
result: Optional[List[ActionResult]] = None,
|
||||
include_attributes: list[str] = [],
|
||||
max_error_length: int = 400,
|
||||
@@ -151,10 +152,11 @@ class CustomAgentMessagePrompt(AgentMessagePrompt):
|
||||
max_error_length=max_error_length,
|
||||
step_info=step_info
|
||||
)
|
||||
self.actions = actions
|
||||
|
||||
def get_user_message(self) -> HumanMessage:
|
||||
if self.step_info:
|
||||
step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}'
|
||||
step_info_description = f'Current step: {self.step_info.step_number}/{self.step_info.max_steps}\n'
|
||||
else:
|
||||
step_info_description = ''
|
||||
|
||||
@@ -181,7 +183,7 @@ class CustomAgentMessagePrompt(AgentMessagePrompt):
|
||||
|
||||
state_description = f"""
|
||||
{step_info_description}
|
||||
1. Task: {self.step_info.task}
|
||||
1. Task: {self.step_info.task}.
|
||||
2. Hints(Optional):
|
||||
{self.step_info.add_infos}
|
||||
3. Memory:
|
||||
@@ -193,17 +195,20 @@ class CustomAgentMessagePrompt(AgentMessagePrompt):
|
||||
{elements_text}
|
||||
"""
|
||||
|
||||
if self.result:
|
||||
|
||||
if self.actions and self.result:
|
||||
state_description += "\n **Previous Actions** \n"
|
||||
state_description += f'Previous step: {self.step_info.step_number-1}/{self.step_info.max_steps} \n'
|
||||
for i, result in enumerate(self.result):
|
||||
action = self.actions[i]
|
||||
state_description += f"Previous action {i + 1}/{len(self.result)}: {action.model_dump_json(exclude_unset=True)}\n"
|
||||
if result.include_in_memory:
|
||||
if result.extracted_content:
|
||||
state_description += f"\nResult of previous action {i + 1}/{len(self.result)}: {result.extracted_content}"
|
||||
state_description += f"Result of previous action {i + 1}/{len(self.result)}: {result.extracted_content}\n"
|
||||
if result.error:
|
||||
# only use last 300 characters of error
|
||||
error = result.error[-self.max_error_length:]
|
||||
state_description += (
|
||||
f"\nError of previous action {i + 1}/{len(self.result)}: ...{error}"
|
||||
f"Error of previous action {i + 1}/{len(self.result)}: ...{error}\n"
|
||||
)
|
||||
|
||||
if self.state.screenshot:
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Optional, Type
|
||||
from pydantic import BaseModel
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.controller.service import Controller, DoneAction
|
||||
|
||||
|
||||
class CustomController(Controller):
|
||||
|
||||
@@ -94,9 +94,9 @@ def get_llm_model(provider: str, **kwargs):
|
||||
else:
|
||||
base_url = kwargs.get("base_url")
|
||||
|
||||
if kwargs.get("model_name", "qwen2.5:7b").startswith("deepseek-r1"):
|
||||
if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
|
||||
return DeepSeekR1ChatOllama(
|
||||
model=kwargs.get("model_name", "deepseek-r1:7b"),
|
||||
model=kwargs.get("model_name", "deepseek-r1:14b"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
num_ctx=kwargs.get("num_ctx", 32000),
|
||||
base_url=kwargs.get("base_url", base_url),
|
||||
@@ -106,6 +106,7 @@ def get_llm_model(provider: str, **kwargs):
|
||||
model=kwargs.get("model_name", "qwen2.5:7b"),
|
||||
temperature=kwargs.get("temperature", 0.0),
|
||||
num_ctx=kwargs.get("num_ctx", 32000),
|
||||
num_predict=kwargs.get("num_predict", 1024),
|
||||
base_url=kwargs.get("base_url", base_url),
|
||||
)
|
||||
elif provider == "azure_openai":
|
||||
|
||||
@@ -32,10 +32,14 @@ async def test_browser_use_org():
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-chat",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="deepseek",
|
||||
model_name="deepseek-chat",
|
||||
temperature=0.8
|
||||
provider="ollama", model_name="deepseek-r1:14b", temperature=0.5
|
||||
)
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
@@ -99,151 +103,29 @@ async def test_browser_use_custom():
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# provider="openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# base_url=os.getenv("OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
llm = utils.get_llm_model(
|
||||
provider="gemini",
|
||||
model_name="gemini-2.0-flash-exp",
|
||||
temperature=1.0,
|
||||
api_key=os.getenv("GOOGLE_API_KEY", "")
|
||||
provider="azure_openai",
|
||||
model_name="gpt-4o",
|
||||
temperature=0.8,
|
||||
base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
)
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="deepseek",
|
||||
# model_name="deepseek-chat",
|
||||
# temperature=0.8
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="ollama", model_name="qwen2.5:7b", temperature=0.8
|
||||
# )
|
||||
|
||||
controller = CustomController()
|
||||
use_own_browser = False
|
||||
disable_security = True
|
||||
use_vision = True # Set to False when using DeepSeek
|
||||
tool_call_in_content = True # Set to True when using Ollama
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
browser_context_ = None
|
||||
try:
|
||||
if use_own_browser:
|
||||
playwright = await async_playwright().start()
|
||||
chrome_exe = os.getenv("CHROME_PATH", "")
|
||||
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
|
||||
browser_context_ = await playwright.chromium.launch_persistent_context(
|
||||
user_data_dir=chrome_use_data,
|
||||
executable_path=chrome_exe,
|
||||
no_viewport=False,
|
||||
headless=False, # 保持浏览器窗口可见
|
||||
user_agent=(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
|
||||
),
|
||||
java_script_enabled=True,
|
||||
bypass_csp=disable_security,
|
||||
ignore_https_errors=disable_security,
|
||||
record_video_dir="./tmp/record_videos",
|
||||
record_video_size={"width": window_w, "height": window_h},
|
||||
)
|
||||
else:
|
||||
browser_context_ = None
|
||||
|
||||
browser = CustomBrowser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
)
|
||||
)
|
||||
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path="./tmp/result_processing",
|
||||
save_recording_path="./tmp/record_videos",
|
||||
no_viewport=False,
|
||||
browser_window_size=BrowserContextWindowSize(
|
||||
width=window_w, height=window_h
|
||||
),
|
||||
),
|
||||
context=browser_context_,
|
||||
) as browser_context:
|
||||
agent = CustomAgent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
use_vision=use_vision,
|
||||
tool_call_in_content=tool_call_in_content,
|
||||
max_actions_per_step=max_actions_per_step
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print("\nErrors:")
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print("\nModel Outputs:")
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print("\nThoughts:")
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
# 显式关闭持久化上下文
|
||||
if browser_context_:
|
||||
await browser_context_.close()
|
||||
|
||||
# 关闭 Playwright 对象
|
||||
if playwright:
|
||||
await playwright.stop()
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def test_browser_use_custom_v2():
|
||||
from browser_use.browser.context import BrowserContextWindowSize
|
||||
from browser_use.browser.browser import BrowserConfig
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.browser.custom_context import BrowserContextConfig
|
||||
from src.controller.custom_controller import CustomController
|
||||
|
||||
window_w, window_h = 1920, 1080
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="azure_openai",
|
||||
# model_name="gpt-4o",
|
||||
# temperature=0.8,
|
||||
# base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
|
||||
# api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
|
||||
# )
|
||||
|
||||
# llm = utils.get_llm_model(
|
||||
# provider="gemini",
|
||||
# model_name="gemini-2.0-flash-exp",
|
||||
@@ -272,20 +154,24 @@ async def test_browser_use_custom_v2():
|
||||
# )
|
||||
|
||||
controller = CustomController()
|
||||
use_own_browser = False
|
||||
use_own_browser = True
|
||||
disable_security = True
|
||||
use_vision = False # Set to False when using DeepSeek
|
||||
|
||||
max_actions_per_step = 10
|
||||
max_actions_per_step = 1
|
||||
playwright = None
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
try:
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
browser = CustomBrowser(
|
||||
@@ -293,7 +179,7 @@ async def test_browser_use_custom_v2():
|
||||
headless=False,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
extra_chromium_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
browser_context = await browser.new_context(
|
||||
@@ -307,17 +193,18 @@ async def test_browser_use_custom_v2():
|
||||
)
|
||||
)
|
||||
agent = CustomAgent(
|
||||
task="go to google.com and type 'Nvidia' click search and give me the first url",
|
||||
task="Search 'Nvidia' and give me the first url",
|
||||
add_infos="", # some hints for llm to complete the task
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
agent_prompt_class=CustomAgentMessagePrompt,
|
||||
use_vision=use_vision,
|
||||
max_actions_per_step=max_actions_per_step
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
history: AgentHistoryList = await agent.run(max_steps=100)
|
||||
|
||||
print("Final Result:")
|
||||
pprint(history.final_result(), indent=4)
|
||||
@@ -349,5 +236,4 @@ async def test_browser_use_custom_v2():
|
||||
|
||||
if __name__ == "__main__":
|
||||
# asyncio.run(test_browser_use_org())
|
||||
# asyncio.run(test_browser_use_custom())
|
||||
asyncio.run(test_browser_use_custom_v2())
|
||||
asyncio.run(test_browser_use_custom())
|
||||
|
||||
@@ -156,7 +156,7 @@ if __name__ == '__main__':
|
||||
# test_openai_model()
|
||||
# test_gemini_model()
|
||||
# test_azure_openai_model()
|
||||
# test_deepseek_model()
|
||||
test_deepseek_model()
|
||||
# test_ollama_model()
|
||||
test_deepseek_r1_model()
|
||||
# test_deepseek_r1_model()
|
||||
# test_deepseek_r1_ollama_model()
|
||||
17
webui.py
17
webui.py
@@ -28,7 +28,7 @@ from src.utils.agent_state import AgentState
|
||||
from src.utils import utils
|
||||
from src.agent.custom_agent import CustomAgent
|
||||
from src.browser.custom_browser import CustomBrowser
|
||||
from src.agent.custom_prompts import CustomSystemPrompt
|
||||
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
|
||||
from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext
|
||||
from src.controller.custom_controller import CustomController
|
||||
from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base
|
||||
@@ -224,20 +224,24 @@ async def run_org_agent(
|
||||
# Clear any previous stop request
|
||||
_global_agent_state.clear_stop()
|
||||
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
|
||||
if _global_browser is None:
|
||||
_global_browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
extra_chromium_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -315,10 +319,14 @@ async def run_custom_agent(
|
||||
# Clear any previous stop request
|
||||
_global_agent_state.clear_stop()
|
||||
|
||||
extra_chromium_args = [f"--window-size={window_w},{window_h}"]
|
||||
if use_own_browser:
|
||||
chrome_path = os.getenv("CHROME_PATH", None)
|
||||
if chrome_path == "":
|
||||
chrome_path = None
|
||||
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
|
||||
if chrome_user_data:
|
||||
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
|
||||
else:
|
||||
chrome_path = None
|
||||
|
||||
@@ -331,7 +339,7 @@ async def run_custom_agent(
|
||||
headless=headless,
|
||||
disable_security=disable_security,
|
||||
chrome_instance_path=chrome_path,
|
||||
extra_chromium_args=[f"--window-size={window_w},{window_h}"],
|
||||
extra_chromium_args=extra_chromium_args,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -357,6 +365,7 @@ async def run_custom_agent(
|
||||
browser_context=_global_browser_context,
|
||||
controller=controller,
|
||||
system_prompt_class=CustomSystemPrompt,
|
||||
agent_prompt_class=CustomAgentMessagePrompt,
|
||||
max_actions_per_step=max_actions_per_step,
|
||||
agent_state=_global_agent_state,
|
||||
tool_calling_method=tool_calling_method
|
||||
|
||||
Reference in New Issue
Block a user