mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Switch dependency to browsergym-core (#5242)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
4b633782e5
commit
cd22817004
4
.github/workflows/ghcr-build.yml
vendored
4
.github/workflows/ghcr-build.yml
vendored
@ -291,7 +291,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=false \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
@ -368,7 +368,7 @@ jobs:
|
||||
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
RUN_AS_OPENHANDS=true \
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
|
||||
265
poetry.lock
generated
265
poetry.lock
generated
@ -601,43 +601,6 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >
|
||||
[package.extras]
|
||||
crt = ["awscrt (==0.22.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "browsergym"
|
||||
version = "0.10.2"
|
||||
description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
|
||||
optional = false
|
||||
python-versions = ">3.7"
|
||||
files = [
|
||||
{file = "browsergym-0.10.2-py3-none-any.whl", hash = "sha256:9581d1d1f1fcd1cf35266cf30c881d60c147a0d374b3491eeaebb07d9690f868"},
|
||||
{file = "browsergym-0.10.2.tar.gz", hash = "sha256:3cdd7520cca857421aa7ec0a965968df4bcef721299a424397f86d7cad078ab0"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
browsergym-assistantbench = "0.10.2"
|
||||
browsergym-core = "0.10.2"
|
||||
browsergym-experiments = "0.10.2"
|
||||
browsergym-miniwob = "0.10.2"
|
||||
browsergym-visualwebarena = "0.10.2"
|
||||
browsergym-webarena = "0.10.2"
|
||||
browsergym-workarena = ">=0.4.1"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-assistantbench"
|
||||
version = "0.10.2"
|
||||
description = "AssistantBench benchmark for BrowserGym"
|
||||
optional = false
|
||||
python-versions = ">3.7"
|
||||
files = [
|
||||
{file = "browsergym_assistantbench-0.10.2-py3-none-any.whl", hash = "sha256:af0d3a3e23686066b070feca38f8740262bed6d65ccf9098f393334a005987c0"},
|
||||
{file = "browsergym_assistantbench-0.10.2.tar.gz", hash = "sha256:de18eb7c010403d5d467b927b4713b56f6e97a59493bee4c42599d4d7cb54dce"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
browsergym-core = "0.10.2"
|
||||
datasets = "*"
|
||||
numpy = "*"
|
||||
scipy = "*"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-core"
|
||||
version = "0.10.2"
|
||||
@ -658,22 +621,6 @@ pillow = ">=10.1"
|
||||
playwright = ">=1.39,<2.0"
|
||||
pyparsing = ">=3"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-experiments"
|
||||
version = "0.10.2"
|
||||
description = "Experimentation tools for BrowserGym"
|
||||
optional = false
|
||||
python-versions = ">3.7"
|
||||
files = [
|
||||
{file = "browsergym_experiments-0.10.2-py3-none-any.whl", hash = "sha256:60a626b3159ef63b5ff72a6c8156c8f3cf82a9278dfc5a9d3ece39c2b1913595"},
|
||||
{file = "browsergym_experiments-0.10.2.tar.gz", hash = "sha256:b49bc27f315ad12014ff21580c7c7aca6489ca4106e7ab46502f716674efa236"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
browsergym-core = "0.10.2"
|
||||
dataclasses-json = "*"
|
||||
tiktoken = ">=0.4"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-miniwob"
|
||||
version = "0.10.2"
|
||||
@ -688,22 +635,6 @@ files = [
|
||||
[package.dependencies]
|
||||
browsergym-core = "0.10.2"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-visualwebarena"
|
||||
version = "0.10.2"
|
||||
description = "VisualWebArena benchmark for BrowserGym"
|
||||
optional = false
|
||||
python-versions = ">3.7"
|
||||
files = [
|
||||
{file = "browsergym_visualwebarena-0.10.2-py3-none-any.whl", hash = "sha256:87c913ccd4d12a79c625b5c4d9ead7e0bc50b298d19e413204bb586a67736d83"},
|
||||
{file = "browsergym_visualwebarena-0.10.2.tar.gz", hash = "sha256:5f84a4f33a21106c9b650cecb0362b78af2546d9927255828c273fe800d776a1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
browsergym-core = "0.10.2"
|
||||
libvisualwebarena = "0.0.14"
|
||||
requests = "*"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-webarena"
|
||||
version = "0.10.2"
|
||||
@ -719,26 +650,6 @@ files = [
|
||||
browsergym-core = "0.10.2"
|
||||
libwebarena = "0.0.3"
|
||||
|
||||
[[package]]
|
||||
name = "browsergym-workarena"
|
||||
version = "0.4.1"
|
||||
description = "WorkArena benchmark for BrowserGym"
|
||||
optional = false
|
||||
python-versions = ">3.7"
|
||||
files = [
|
||||
{file = "browsergym_workarena-0.4.1-py3-none-any.whl", hash = "sha256:b8f04b2e3801fd32962b7d99f0685c507b258841e2b4bfdb46d041091d2f1b89"},
|
||||
{file = "browsergym_workarena-0.4.1.tar.gz", hash = "sha256:ba2958d804b80836c7f81360d66b99c6c655c5070eddc5fae9c1c88306a23403"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
browsergym-core = ">=0.2"
|
||||
english-words = ">=2.0.1"
|
||||
faker = ">=24.8.0"
|
||||
numpy = ">=1.14"
|
||||
requests = ">=2.31"
|
||||
tenacity = ">=8.2.3"
|
||||
tqdm = ">=4.66.2"
|
||||
|
||||
[[package]]
|
||||
name = "build"
|
||||
version = "1.2.2.post1"
|
||||
@ -1659,16 +1570,6 @@ typing-extensions = ">=4.8.0"
|
||||
urllib3 = ">=1.25.3"
|
||||
websockets = ">=11.0.3"
|
||||
|
||||
[[package]]
|
||||
name = "english-words"
|
||||
version = "2.0.1"
|
||||
description = "Generate sets of english words by combining different word lists"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "english-words-2.0.1.tar.gz", hash = "sha256:a4105c57493bb757a3d8973fcf8e1dc05e7ca09c836dff467c3fb445f84bc43d"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "evaluate"
|
||||
version = "0.4.3"
|
||||
@ -1732,21 +1633,6 @@ files = [
|
||||
[package.extras]
|
||||
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
|
||||
|
||||
[[package]]
|
||||
name = "faker"
|
||||
version = "33.0.0"
|
||||
description = "Faker is a Python package that generates fake data for you."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "Faker-33.0.0-py3-none-any.whl", hash = "sha256:68e5580cb6b4226710886e595eabc13127149d6e71e9d1db65506a7fbe2c7fce"},
|
||||
{file = "faker-33.0.0.tar.gz", hash = "sha256:9b01019c1ddaf2253ca2308c0472116e993f4ad8fc9905f82fa965e0c6f932e9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
python-dateutil = ">=2.4"
|
||||
typing-extensions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "farama-notifications"
|
||||
version = "0.0.4"
|
||||
@ -3099,39 +2985,6 @@ files = [
|
||||
[package.extras]
|
||||
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "imageio"
|
||||
version = "2.36.0"
|
||||
description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "imageio-2.36.0-py3-none-any.whl", hash = "sha256:471f1eda55618ee44a3c9960911c35e647d9284c68f077e868df633398f137f0"},
|
||||
{file = "imageio-2.36.0.tar.gz", hash = "sha256:1c8f294db862c256e9562354d65aa54725b8dafed7f10f02bb3ec20ec1678850"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = "*"
|
||||
pillow = ">=8.3.2"
|
||||
|
||||
[package.extras]
|
||||
all-plugins = ["astropy", "av", "imageio-ffmpeg", "numpy (>2)", "pillow-heif", "psutil", "rawpy", "tifffile"]
|
||||
all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"]
|
||||
build = ["wheel"]
|
||||
dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"]
|
||||
docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"]
|
||||
ffmpeg = ["imageio-ffmpeg", "psutil"]
|
||||
fits = ["astropy"]
|
||||
full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpy (>2)", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "rawpy", "sphinx (<6)", "tifffile", "wheel"]
|
||||
gdal = ["gdal"]
|
||||
itk = ["itk"]
|
||||
linting = ["black", "flake8"]
|
||||
pillow-heif = ["pillow-heif"]
|
||||
pyav = ["av"]
|
||||
rawpy = ["numpy (>2)", "rawpy"]
|
||||
test = ["fsspec[github]", "pytest", "pytest-cov"]
|
||||
tifffile = ["tifffile"]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "7.1.0"
|
||||
@ -3885,52 +3738,6 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0"
|
||||
[package.extras]
|
||||
adal = ["adal (>=1.0.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "lazy-loader"
|
||||
version = "0.4"
|
||||
description = "Makes it easy to load subpackages and functions on demand."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
|
||||
{file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
packaging = "*"
|
||||
|
||||
[package.extras]
|
||||
dev = ["changelist (==0.5)"]
|
||||
lint = ["pre-commit (==3.7.0)"]
|
||||
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "libvisualwebarena"
|
||||
version = "0.0.14"
|
||||
description = "This is an unofficial, use-at-your-own risks port of the visualwebarena benchmark, for use as a standalone library package."
|
||||
optional = false
|
||||
python-versions = "<4,>=3.7"
|
||||
files = [
|
||||
{file = "libvisualwebarena-0.0.14-py3-none-any.whl", hash = "sha256:636b06ca1d52f1a363503b5b563492e83f2482efaf85bb26b69744565a499f0f"},
|
||||
{file = "libvisualwebarena-0.0.14.tar.gz", hash = "sha256:7e660179f60f1df8d884204f2b742a2117e7fe050823d839ca5744ea1c0709a7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiolimiter = "*"
|
||||
beartype = "0.12.0"
|
||||
evaluate = "*"
|
||||
flask = "*"
|
||||
gymnasium = "*"
|
||||
nltk = "*"
|
||||
openai = ">=1"
|
||||
Pillow = "*"
|
||||
playwright = ">=1.32,<1.40"
|
||||
scikit-image = ">=0.16"
|
||||
text-generation = "*"
|
||||
tiktoken = "*"
|
||||
transformers = "*"
|
||||
types-tqdm = "*"
|
||||
|
||||
[[package]]
|
||||
name = "libwebarena"
|
||||
version = "0.0.3"
|
||||
@ -8114,54 +7921,6 @@ files = [
|
||||
attrs = ">=18.0.0"
|
||||
pathspec = ">=0.10.1"
|
||||
|
||||
[[package]]
|
||||
name = "scikit-image"
|
||||
version = "0.24.0"
|
||||
description = "Image processing in Python"
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
files = [
|
||||
{file = "scikit_image-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb3bc0264b6ab30b43c4179ee6156bc18b4861e78bb329dd8d16537b7bbf827a"},
|
||||
{file = "scikit_image-0.24.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9c7a52e20cdd760738da38564ba1fed7942b623c0317489af1a598a8dedf088b"},
|
||||
{file = "scikit_image-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93f46e6ce42e5409f4d09ce1b0c7f80dd7e4373bcec635b6348b63e3c886eac8"},
|
||||
{file = "scikit_image-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39ee0af13435c57351a3397eb379e72164ff85161923eec0c38849fecf1b4764"},
|
||||
{file = "scikit_image-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:7ac7913b028b8aa780ffae85922894a69e33d1c0bf270ea1774f382fe8bf95e7"},
|
||||
{file = "scikit_image-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:272909e02a59cea3ed4aa03739bb88df2625daa809f633f40b5053cf09241831"},
|
||||
{file = "scikit_image-0.24.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:190ebde80b4470fe8838764b9b15f232a964f1a20391663e31008d76f0c696f7"},
|
||||
{file = "scikit_image-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c98cc695005faf2b79904e4663796c977af22586ddf1b12d6af2fa22842dc2"},
|
||||
{file = "scikit_image-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa27b3a0dbad807b966b8db2d78da734cb812ca4787f7fbb143764800ce2fa9c"},
|
||||
{file = "scikit_image-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:dacf591ac0c272a111181afad4b788a27fe70d213cfddd631d151cbc34f8ca2c"},
|
||||
{file = "scikit_image-0.24.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6fccceb54c9574590abcddc8caf6cefa57c13b5b8b4260ab3ff88ad8f3c252b3"},
|
||||
{file = "scikit_image-0.24.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ccc01e4760d655aab7601c1ba7aa4ddd8b46f494ac46ec9c268df6f33ccddf4c"},
|
||||
{file = "scikit_image-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18836a18d3a7b6aca5376a2d805f0045826bc6c9fc85331659c33b4813e0b563"},
|
||||
{file = "scikit_image-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8579bda9c3f78cb3b3ed8b9425213c53a25fa7e994b7ac01f2440b395babf660"},
|
||||
{file = "scikit_image-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:82ab903afa60b2da1da2e6f0c8c65e7c8868c60a869464c41971da929b3e82bc"},
|
||||
{file = "scikit_image-0.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef04360eda372ee5cd60aebe9be91258639c86ae2ea24093fb9182118008d009"},
|
||||
{file = "scikit_image-0.24.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e9aadb442360a7e76f0c5c9d105f79a83d6df0e01e431bd1d5757e2c5871a1f3"},
|
||||
{file = "scikit_image-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e37de6f4c1abcf794e13c258dc9b7d385d5be868441de11c180363824192ff7"},
|
||||
{file = "scikit_image-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4688c18bd7ec33c08d7bf0fd19549be246d90d5f2c1d795a89986629af0a1e83"},
|
||||
{file = "scikit_image-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:56dab751d20b25d5d3985e95c9b4e975f55573554bd76b0aedf5875217c93e69"},
|
||||
{file = "scikit_image-0.24.0.tar.gz", hash = "sha256:5d16efe95da8edbeb363e0c4157b99becbd650a60b77f6e3af5768b66cf007ab"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
imageio = ">=2.33"
|
||||
lazy-loader = ">=0.4"
|
||||
networkx = ">=2.8"
|
||||
numpy = ">=1.23"
|
||||
packaging = ">=21"
|
||||
pillow = ">=9.1"
|
||||
scipy = ">=1.9"
|
||||
tifffile = ">=2022.8.12"
|
||||
|
||||
[package.extras]
|
||||
build = ["Cython (>=3.0.4)", "build", "meson-python (>=0.15)", "ninja", "numpy (>=2.0.0rc1)", "packaging (>=21)", "pythran", "setuptools (>=67)", "spin (==0.8)", "wheel"]
|
||||
data = ["pooch (>=1.6.0)"]
|
||||
developer = ["ipython", "pre-commit", "tomli"]
|
||||
docs = ["PyWavelets (>=1.1.1)", "dask[array] (>=2022.9.2)", "ipykernel", "ipywidgets", "kaleido", "matplotlib (>=3.6)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=1.5)", "plotly (>=5.10)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.15.2)", "pytest-doctestplus", "pytest-runner", "scikit-learn (>=1.1)", "seaborn (>=0.11)", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-gallery (>=0.14)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
|
||||
optional = ["PyWavelets (>=1.1.1)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=0.2.1)", "dask[array] (>=2021.1.0)", "matplotlib (>=3.6)", "pooch (>=1.6.0)", "pyamg", "scikit-learn (>=1.1)"]
|
||||
test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
|
||||
|
||||
[[package]]
|
||||
name = "scikit-learn"
|
||||
version = "1.5.2"
|
||||
@ -8843,28 +8602,6 @@ files = [
|
||||
{file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tifffile"
|
||||
version = "2024.9.20"
|
||||
description = "Read and write TIFF files"
|
||||
optional = false
|
||||
python-versions = ">=3.10"
|
||||
files = [
|
||||
{file = "tifffile-2024.9.20-py3-none-any.whl", hash = "sha256:c54dc85bc1065d972cb8a6ffb3181389d597876aa80177933459733e4ed243dd"},
|
||||
{file = "tifffile-2024.9.20.tar.gz", hash = "sha256:3fbf3be2f995a7051a8ae05a4be70c96fc0789f22ed6f1c4104c973cf68a640b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = "*"
|
||||
|
||||
[package.extras]
|
||||
all = ["defusedxml", "fsspec", "imagecodecs (>=2023.8.12)", "lxml", "matplotlib", "zarr"]
|
||||
codecs = ["imagecodecs (>=2023.8.12)"]
|
||||
plot = ["matplotlib"]
|
||||
test = ["cmapfile", "czifile", "dask", "defusedxml", "fsspec", "imagecodecs", "lfdfiles", "lxml", "ndtiff", "oiffile", "psdtags", "pytest", "roifile", "xarray", "zarr"]
|
||||
xml = ["defusedxml", "lxml"]
|
||||
zarr = ["fsspec", "zarr"]
|
||||
|
||||
[[package]]
|
||||
name = "tiktoken"
|
||||
version = "0.8.0"
|
||||
@ -10350,4 +10087,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "ff370b7b5077720b73fe3b90cc1b7fb9c7a262bfbd35885bb717369061e8a466"
|
||||
content-hash = "56a80082afb76e518239060855598921d94a0373123b2d9222cf8c7b6238b7ad"
|
||||
|
||||
@ -28,7 +28,7 @@ uvicorn = "*"
|
||||
types-toml = "*"
|
||||
numpy = "*"
|
||||
json-repair = "*"
|
||||
browsergym = "0.10.2" # integrate browsergym as the browsing interface
|
||||
browsergym-core = "0.10.2" # integrate browsergym-core as the browsing interface
|
||||
html2text = "*"
|
||||
e2b = ">=0.17.1,<1.1.0"
|
||||
pexpect = "*"
|
||||
@ -63,6 +63,7 @@ opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
|
||||
modal = "^0.66.26"
|
||||
runloop-api-client = "0.10.0"
|
||||
pygithub = "^2.5.0"
|
||||
joblib = "*"
|
||||
openhands-aci = "^0.1.1"
|
||||
python-socketio = "^5.11.4"
|
||||
redis = "^5.2.0"
|
||||
@ -142,6 +143,8 @@ gdown = "*"
|
||||
matplotlib = "*"
|
||||
seaborn = "*"
|
||||
tabulate = "*"
|
||||
browsergym-webarena = "0.10.2"
|
||||
browsergym-miniwob = "0.10.2"
|
||||
|
||||
[tool.poetry-dynamic-versioning]
|
||||
enable = true
|
||||
|
||||
73
tests/runtime/test_browsergym_envs.py
Normal file
73
tests/runtime/test_browsergym_envs.py
Normal file
@ -0,0 +1,73 @@
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action.browse import BrowseInteractiveAction
|
||||
from openhands.events.observation.browse import BrowserOutputObservation
|
||||
from tests.runtime.conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
|
||||
def has_miniwob():
|
||||
try:
|
||||
import importlib.util
|
||||
|
||||
# try to find this browser environment, if it was installed
|
||||
spec = importlib.util.find_spec('browsergym.miniwob')
|
||||
if spec is None:
|
||||
return False
|
||||
|
||||
# try to import this environment
|
||||
importlib.util.module_from_spec(spec)
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not has_miniwob(),
|
||||
reason='Requires browsergym-miniwob package to be installed',
|
||||
)
|
||||
def test_browsergym_eval_env(runtime_cls, temp_dir):
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls=runtime_cls,
|
||||
run_as_openhands=False, # need root permission to access file
|
||||
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
|
||||
browsergym_eval_env='browsergym/miniwob.choose-list',
|
||||
force_rebuild_runtime=True,
|
||||
)
|
||||
from openhands.runtime.browser.browser_env import (
|
||||
BROWSER_EVAL_GET_GOAL_ACTION,
|
||||
BROWSER_EVAL_GET_REWARDS_ACTION,
|
||||
)
|
||||
|
||||
# Test browse
|
||||
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, BrowserOutputObservation)
|
||||
assert not obs.error
|
||||
assert 'Select' in obs.content
|
||||
assert 'from the list and click Submit' in obs.content
|
||||
|
||||
# Make sure the browser can produce observation in eval env
|
||||
action = BrowseInteractiveAction(browser_actions='noop()')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert (
|
||||
obs.url.strip()
|
||||
== 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
|
||||
)
|
||||
|
||||
# Make sure the rewards are working
|
||||
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert json.loads(obs.content) == [0.0]
|
||||
|
||||
_close_test_runtime(runtime)
|
||||
@ -1,12 +1,9 @@
|
||||
"""Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||
|
||||
import json
|
||||
|
||||
from conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import (
|
||||
BrowseInteractiveAction,
|
||||
BrowseURLAction,
|
||||
CmdRunAction,
|
||||
)
|
||||
@ -16,7 +13,8 @@ from openhands.events.observation import (
|
||||
)
|
||||
|
||||
# ============================================================================================================================
|
||||
# Browsing tests
|
||||
# Browsing tests, without evaluation (poetry install --without evaluation)
|
||||
# For eval environments, tests need to run with poetry install
|
||||
# ============================================================================================================================
|
||||
|
||||
PY3_FOR_TESTING = '/openhands/micromamba/bin/micromamba run -n openhands python3'
|
||||
@ -66,48 +64,3 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
|
||||
assert obs.exit_code == 0
|
||||
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
|
||||
def test_browsergym_eval_env(runtime_cls, temp_dir):
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls=runtime_cls,
|
||||
run_as_openhands=False, # need root permission to access file
|
||||
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
|
||||
browsergym_eval_env='browsergym/miniwob.choose-list',
|
||||
force_rebuild_runtime=True,
|
||||
)
|
||||
from openhands.runtime.browser.browser_env import (
|
||||
BROWSER_EVAL_GET_GOAL_ACTION,
|
||||
BROWSER_EVAL_GET_REWARDS_ACTION,
|
||||
)
|
||||
|
||||
# Test browse
|
||||
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
assert isinstance(obs, BrowserOutputObservation)
|
||||
assert not obs.error
|
||||
assert 'Select' in obs.content
|
||||
assert 'from the list and click Submit' in obs.content
|
||||
|
||||
# Make sure the browser can produce observation in eva[l
|
||||
action = BrowseInteractiveAction(browser_actions='noop()')
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert (
|
||||
obs.url.strip()
|
||||
== 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
|
||||
)
|
||||
|
||||
# Make sure the rewards are working
|
||||
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert json.loads(obs.content) == [0.0]
|
||||
|
||||
_close_test_runtime(runtime)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user