commit 08fd44b6efb7fb6b4b2a1b39d498c12135c94b4c Author: warmshao Date: Thu Jan 2 09:30:46 2025 +0800 add gemini supported diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..46b6e5b --- /dev/null +++ b/.env.example @@ -0,0 +1,11 @@ +OPENAI_API_KEY= +ANTHROPIC_API_KEY= +GOOGLE_API_KEY= +AZURE_OPENAI_ENDPOINT= +AZURE_OPENAI_KEY= + +# Set to false to disable anonymized telemetry +ANONYMIZED_TELEMETRY=true + +# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info +BROWSER_USE_LOGGING_LEVEL=info \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c45cf01 --- /dev/null +++ b/.gitignore @@ -0,0 +1,179 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +test_env/ + + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ +temp +tmp + + +.DS_Store + +private_example.py +private_example + +browser_cookies.json +cookies.json +AgentHistory.json +cv_04_24.pdf +AgentHistoryList.json +*.gif \ No newline at end of file diff --git a/assets/examples/test.png b/assets/examples/test.png new file mode 100644 index 0000000..4e3ae5e Binary files /dev/null and b/assets/examples/test.png differ diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..93fbe7f --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: __init__.py.py diff --git a/src/agent/__init__.py b/src/agent/__init__.py new file mode 100644 index 0000000..93fbe7f --- /dev/null +++ b/src/agent/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: __init__.py.py diff --git a/src/browser/__init__.py b/src/browser/__init__.py new file mode 100644 index 0000000..93fbe7f --- /dev/null +++ b/src/browser/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: __init__.py.py diff --git a/src/browser/context.py b/src/browser/context.py new file mode 100644 index 0000000..bbc459a --- /dev/null +++ b/src/browser/context.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: context.py + +import asyncio +import base64 +import json +import logging +import os + +from playwright.async_api import Browser as PlaywrightBrowser +from browser_use.browser.context import BrowserContext, BrowserContextConfig +from browser_use.browser.browser import Browser + +logger = logging.getLogger(__name__) + + +class CustomBrowserContext(BrowserContext): + """ + 定制BrowserContext + """ + + def __init__(self, + browser: 'Browser', + config: BrowserContextConfig = BrowserContextConfig(), + ): + super(CustomBrowserContext, self).__init__(browser, config) + + async def _create_context(self, browser: PlaywrightBrowser): + """Creates a new browser context with anti-detection measures and loads cookies if available.""" + if self.browser.config.chrome_instance_path and len(browser.contexts) > 0: + # Connect to existing Chrome instance instead of creating new one + context = browser.contexts[0] + else: + # Original code for creating new context + context = await browser.new_context( + viewport=self.config.browser_window_size, + no_viewport=False, + user_agent=( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36' + ), + java_script_enabled=True, + bypass_csp=self.config.disable_security, + ignore_https_errors=self.config.disable_security, + record_video_dir=self.config.save_recording_path, + record_video_size=self.config.browser_window_size # set record video size + ) + + if self.config.trace_path: + await context.tracing.start(screenshots=True, snapshots=True, sources=True) + + # Load cookies if they exist + if self.config.cookies_file and os.path.exists(self.config.cookies_file): + with open(self.config.cookies_file, 'r') as f: + cookies = json.load(f) + logger.info(f'Loaded {len(cookies)} cookies from {self.config.cookies_file}') + await context.add_cookies(cookies) + + # Expose anti-detection scripts + await context.add_init_script( + """ + // Webdriver property + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined + }); + + // Languages + Object.defineProperty(navigator, 'languages', { + get: () => ['en-US', 'en'] + }); + + // Plugins + Object.defineProperty(navigator, 'plugins', { + get: () => [1, 2, 3, 4, 5] + }); + + // Chrome runtime + window.chrome = { runtime: {} }; + + // Permissions + const originalQuery = window.navigator.permissions.query; + window.navigator.permissions.query = (parameters) => ( + parameters.name === 'notifications' ? + Promise.resolve({ state: Notification.permission }) : + originalQuery(parameters) + ); + """ + ) + + return context diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..93fbe7f --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: __init__.py.py diff --git a/src/utils/utils.py b/src/utils/utils.py new file mode 100644 index 0000000..cda3b1f --- /dev/null +++ b/src/utils/utils.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: utils.py + +import base64 +from langchain_openai import ChatOpenAI, AzureChatOpenAI +from langchain_anthropic import ChatAnthropic +from langchain_google_genai import ChatGoogleGenerativeAI + + +def get_llm_model(provider: str, **kwargs): + """ + 获取LLM 模型 + :param provider: 模型类型 + :param kwargs: + :return: + """ + if provider == 'claude': + return ChatAnthropic( + model_name=kwargs.get("model_name", 'claude-3-5-sonnet-20240620'), + temperature=kwargs.get("temperature", 0.0), + base_url=kwargs.get("base_url", "https://api.anthropic.com"), + api_key=kwargs.get("api_key", None) + ) + elif provider == 'openai': + return ChatOpenAI( + model=kwargs.get("model_name", 'gpt-4o'), + temperature=kwargs.get("temperature", 0.0), + base_url=kwargs.get("base_url", "https://api.openai.com/v1/"), + api_key=kwargs.get("api_key", None) + ) + elif provider == 'gemini': + return ChatGoogleGenerativeAI( + model=kwargs.get("model_name", 'gemini-2.0-flash-exp'), + temperature=kwargs.get("temperature", 0.0), + google_api_key=kwargs.get("api_key", None), + ) + elif provider == "azure_openai": + return AzureChatOpenAI( + model=kwargs.get("model_name", 'gpt-4o'), + temperature=kwargs.get("temperature", 0.0), + api_version="2024-05-01-preview", + azure_endpoint=kwargs.get("base_url", ""), + api_key=kwargs.get("api_key", None) + ) + else: + raise ValueError(f'Unsupported provider: {provider}') + + +def encode_image(img_path): + if not img_path: + return None + with open(img_path, "rb") as fin: + image_data = base64.b64encode(fin.read()).decode("utf-8") + return image_data diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py new file mode 100644 index 0000000..dab930c --- /dev/null +++ b/tests/test_llm_api.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: test_llm_api.py +import os +import pdb + +from dotenv import load_dotenv + +load_dotenv() + +import sys + +sys.path.append(".") + + +def test_gemini_model(): + # you need to enable your api key first: https://ai.google.dev/palm_docs/oauth_quickstart + from langchain_core.messages import HumanMessage + from src.utils import utils + + llm = utils.get_llm_model( + provider="gemini", + model_name="gemini-2.0-flash-exp", + temperature=0.8, + api_key=os.getenv("GOOGLE_API_KEY", "") + ) + + image_path = "assets/examples/test.png" + image_data = utils.encode_image(image_path) + message = HumanMessage( + content=[ + {"type": "text", "text": "describe this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + ) + ai_msg = llm.invoke([message]) + print(ai_msg.content) + + +def test_azure_openai_model(): + from langchain_core.messages import HumanMessage + from src.utils import utils + + llm = utils.get_llm_model( + provider="azure_openai", + model_name="gpt-4o", + temperature=0.8, + base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), + api_key=os.getenv("AZURE_OPENAI_KEY", "") + ) + image_path = "assets/examples/test.png" + image_data = utils.encode_image(image_path) + message = HumanMessage( + content=[ + {"type": "text", "text": "describe this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + ) + ai_msg = llm.invoke([message]) + print(ai_msg.content) + + +if __name__ == '__main__': + test_gemini_model() + # test_azure_openai_model() diff --git a/webui.py b/webui.py new file mode 100644 index 0000000..85c8660 --- /dev/null +++ b/webui.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2025/1/1 +# @Author : wenshao +# @Email : wenshaoguo1026@gmail.com +# @Project : browser-use-webui +# @FileName: webui.py