Switch dependency to browsergym-core (#5242)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst 2024-12-02 14:52:02 +01:00 committed by GitHub
parent 4b633782e5
commit cd22817004
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 82 additions and 316 deletions

View File

@ -291,7 +291,7 @@ jobs:
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=false \
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
@ -368,7 +368,7 @@ jobs:
SANDBOX_RUNTIME_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=true \
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime
poetry run pytest -n 3 -raRs --reruns 2 --reruns-delay 5 --cov=openhands --cov-report=xml -s ./tests/runtime --ignore=tests/runtime/test_browsergym_envs.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:

265
poetry.lock generated
View File

@ -601,43 +601,6 @@ urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >
[package.extras]
crt = ["awscrt (==0.22.0)"]
[[package]]
name = "browsergym"
version = "0.10.2"
description = "BrowserGym: a gym environment for web task automation in the Chromium browser"
optional = false
python-versions = ">3.7"
files = [
{file = "browsergym-0.10.2-py3-none-any.whl", hash = "sha256:9581d1d1f1fcd1cf35266cf30c881d60c147a0d374b3491eeaebb07d9690f868"},
{file = "browsergym-0.10.2.tar.gz", hash = "sha256:3cdd7520cca857421aa7ec0a965968df4bcef721299a424397f86d7cad078ab0"},
]
[package.dependencies]
browsergym-assistantbench = "0.10.2"
browsergym-core = "0.10.2"
browsergym-experiments = "0.10.2"
browsergym-miniwob = "0.10.2"
browsergym-visualwebarena = "0.10.2"
browsergym-webarena = "0.10.2"
browsergym-workarena = ">=0.4.1"
[[package]]
name = "browsergym-assistantbench"
version = "0.10.2"
description = "AssistantBench benchmark for BrowserGym"
optional = false
python-versions = ">3.7"
files = [
{file = "browsergym_assistantbench-0.10.2-py3-none-any.whl", hash = "sha256:af0d3a3e23686066b070feca38f8740262bed6d65ccf9098f393334a005987c0"},
{file = "browsergym_assistantbench-0.10.2.tar.gz", hash = "sha256:de18eb7c010403d5d467b927b4713b56f6e97a59493bee4c42599d4d7cb54dce"},
]
[package.dependencies]
browsergym-core = "0.10.2"
datasets = "*"
numpy = "*"
scipy = "*"
[[package]]
name = "browsergym-core"
version = "0.10.2"
@ -658,22 +621,6 @@ pillow = ">=10.1"
playwright = ">=1.39,<2.0"
pyparsing = ">=3"
[[package]]
name = "browsergym-experiments"
version = "0.10.2"
description = "Experimentation tools for BrowserGym"
optional = false
python-versions = ">3.7"
files = [
{file = "browsergym_experiments-0.10.2-py3-none-any.whl", hash = "sha256:60a626b3159ef63b5ff72a6c8156c8f3cf82a9278dfc5a9d3ece39c2b1913595"},
{file = "browsergym_experiments-0.10.2.tar.gz", hash = "sha256:b49bc27f315ad12014ff21580c7c7aca6489ca4106e7ab46502f716674efa236"},
]
[package.dependencies]
browsergym-core = "0.10.2"
dataclasses-json = "*"
tiktoken = ">=0.4"
[[package]]
name = "browsergym-miniwob"
version = "0.10.2"
@ -688,22 +635,6 @@ files = [
[package.dependencies]
browsergym-core = "0.10.2"
[[package]]
name = "browsergym-visualwebarena"
version = "0.10.2"
description = "VisualWebArena benchmark for BrowserGym"
optional = false
python-versions = ">3.7"
files = [
{file = "browsergym_visualwebarena-0.10.2-py3-none-any.whl", hash = "sha256:87c913ccd4d12a79c625b5c4d9ead7e0bc50b298d19e413204bb586a67736d83"},
{file = "browsergym_visualwebarena-0.10.2.tar.gz", hash = "sha256:5f84a4f33a21106c9b650cecb0362b78af2546d9927255828c273fe800d776a1"},
]
[package.dependencies]
browsergym-core = "0.10.2"
libvisualwebarena = "0.0.14"
requests = "*"
[[package]]
name = "browsergym-webarena"
version = "0.10.2"
@ -719,26 +650,6 @@ files = [
browsergym-core = "0.10.2"
libwebarena = "0.0.3"
[[package]]
name = "browsergym-workarena"
version = "0.4.1"
description = "WorkArena benchmark for BrowserGym"
optional = false
python-versions = ">3.7"
files = [
{file = "browsergym_workarena-0.4.1-py3-none-any.whl", hash = "sha256:b8f04b2e3801fd32962b7d99f0685c507b258841e2b4bfdb46d041091d2f1b89"},
{file = "browsergym_workarena-0.4.1.tar.gz", hash = "sha256:ba2958d804b80836c7f81360d66b99c6c655c5070eddc5fae9c1c88306a23403"},
]
[package.dependencies]
browsergym-core = ">=0.2"
english-words = ">=2.0.1"
faker = ">=24.8.0"
numpy = ">=1.14"
requests = ">=2.31"
tenacity = ">=8.2.3"
tqdm = ">=4.66.2"
[[package]]
name = "build"
version = "1.2.2.post1"
@ -1659,16 +1570,6 @@ typing-extensions = ">=4.8.0"
urllib3 = ">=1.25.3"
websockets = ">=11.0.3"
[[package]]
name = "english-words"
version = "2.0.1"
description = "Generate sets of english words by combining different word lists"
optional = false
python-versions = "*"
files = [
{file = "english-words-2.0.1.tar.gz", hash = "sha256:a4105c57493bb757a3d8973fcf8e1dc05e7ca09c836dff467c3fb445f84bc43d"},
]
[[package]]
name = "evaluate"
version = "0.4.3"
@ -1732,21 +1633,6 @@ files = [
[package.extras]
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
[[package]]
name = "faker"
version = "33.0.0"
description = "Faker is a Python package that generates fake data for you."
optional = false
python-versions = ">=3.8"
files = [
{file = "Faker-33.0.0-py3-none-any.whl", hash = "sha256:68e5580cb6b4226710886e595eabc13127149d6e71e9d1db65506a7fbe2c7fce"},
{file = "faker-33.0.0.tar.gz", hash = "sha256:9b01019c1ddaf2253ca2308c0472116e993f4ad8fc9905f82fa965e0c6f932e9"},
]
[package.dependencies]
python-dateutil = ">=2.4"
typing-extensions = "*"
[[package]]
name = "farama-notifications"
version = "0.0.4"
@ -3099,39 +2985,6 @@ files = [
[package.extras]
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
[[package]]
name = "imageio"
version = "2.36.0"
description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
optional = false
python-versions = ">=3.9"
files = [
{file = "imageio-2.36.0-py3-none-any.whl", hash = "sha256:471f1eda55618ee44a3c9960911c35e647d9284c68f077e868df633398f137f0"},
{file = "imageio-2.36.0.tar.gz", hash = "sha256:1c8f294db862c256e9562354d65aa54725b8dafed7f10f02bb3ec20ec1678850"},
]
[package.dependencies]
numpy = "*"
pillow = ">=8.3.2"
[package.extras]
all-plugins = ["astropy", "av", "imageio-ffmpeg", "numpy (>2)", "pillow-heif", "psutil", "rawpy", "tifffile"]
all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"]
build = ["wheel"]
dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"]
docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"]
ffmpeg = ["imageio-ffmpeg", "psutil"]
fits = ["astropy"]
full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpy (>2)", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "rawpy", "sphinx (<6)", "tifffile", "wheel"]
gdal = ["gdal"]
itk = ["itk"]
linting = ["black", "flake8"]
pillow-heif = ["pillow-heif"]
pyav = ["av"]
rawpy = ["numpy (>2)", "rawpy"]
test = ["fsspec[github]", "pytest", "pytest-cov"]
tifffile = ["tifffile"]
[[package]]
name = "importlib-metadata"
version = "7.1.0"
@ -3885,52 +3738,6 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0"
[package.extras]
adal = ["adal (>=1.0.2)"]
[[package]]
name = "lazy-loader"
version = "0.4"
description = "Makes it easy to load subpackages and functions on demand."
optional = false
python-versions = ">=3.7"
files = [
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
{file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"},
]
[package.dependencies]
packaging = "*"
[package.extras]
dev = ["changelist (==0.5)"]
lint = ["pre-commit (==3.7.0)"]
test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"]
[[package]]
name = "libvisualwebarena"
version = "0.0.14"
description = "This is an unofficial, use-at-your-own risks port of the visualwebarena benchmark, for use as a standalone library package."
optional = false
python-versions = "<4,>=3.7"
files = [
{file = "libvisualwebarena-0.0.14-py3-none-any.whl", hash = "sha256:636b06ca1d52f1a363503b5b563492e83f2482efaf85bb26b69744565a499f0f"},
{file = "libvisualwebarena-0.0.14.tar.gz", hash = "sha256:7e660179f60f1df8d884204f2b742a2117e7fe050823d839ca5744ea1c0709a7"},
]
[package.dependencies]
aiolimiter = "*"
beartype = "0.12.0"
evaluate = "*"
flask = "*"
gymnasium = "*"
nltk = "*"
openai = ">=1"
Pillow = "*"
playwright = ">=1.32,<1.40"
scikit-image = ">=0.16"
text-generation = "*"
tiktoken = "*"
transformers = "*"
types-tqdm = "*"
[[package]]
name = "libwebarena"
version = "0.0.3"
@ -8114,54 +7921,6 @@ files = [
attrs = ">=18.0.0"
pathspec = ">=0.10.1"
[[package]]
name = "scikit-image"
version = "0.24.0"
description = "Image processing in Python"
optional = false
python-versions = ">=3.9"
files = [
{file = "scikit_image-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb3bc0264b6ab30b43c4179ee6156bc18b4861e78bb329dd8d16537b7bbf827a"},
{file = "scikit_image-0.24.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9c7a52e20cdd760738da38564ba1fed7942b623c0317489af1a598a8dedf088b"},
{file = "scikit_image-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93f46e6ce42e5409f4d09ce1b0c7f80dd7e4373bcec635b6348b63e3c886eac8"},
{file = "scikit_image-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39ee0af13435c57351a3397eb379e72164ff85161923eec0c38849fecf1b4764"},
{file = "scikit_image-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:7ac7913b028b8aa780ffae85922894a69e33d1c0bf270ea1774f382fe8bf95e7"},
{file = "scikit_image-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:272909e02a59cea3ed4aa03739bb88df2625daa809f633f40b5053cf09241831"},
{file = "scikit_image-0.24.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:190ebde80b4470fe8838764b9b15f232a964f1a20391663e31008d76f0c696f7"},
{file = "scikit_image-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c98cc695005faf2b79904e4663796c977af22586ddf1b12d6af2fa22842dc2"},
{file = "scikit_image-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa27b3a0dbad807b966b8db2d78da734cb812ca4787f7fbb143764800ce2fa9c"},
{file = "scikit_image-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:dacf591ac0c272a111181afad4b788a27fe70d213cfddd631d151cbc34f8ca2c"},
{file = "scikit_image-0.24.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6fccceb54c9574590abcddc8caf6cefa57c13b5b8b4260ab3ff88ad8f3c252b3"},
{file = "scikit_image-0.24.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ccc01e4760d655aab7601c1ba7aa4ddd8b46f494ac46ec9c268df6f33ccddf4c"},
{file = "scikit_image-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18836a18d3a7b6aca5376a2d805f0045826bc6c9fc85331659c33b4813e0b563"},
{file = "scikit_image-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8579bda9c3f78cb3b3ed8b9425213c53a25fa7e994b7ac01f2440b395babf660"},
{file = "scikit_image-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:82ab903afa60b2da1da2e6f0c8c65e7c8868c60a869464c41971da929b3e82bc"},
{file = "scikit_image-0.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef04360eda372ee5cd60aebe9be91258639c86ae2ea24093fb9182118008d009"},
{file = "scikit_image-0.24.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e9aadb442360a7e76f0c5c9d105f79a83d6df0e01e431bd1d5757e2c5871a1f3"},
{file = "scikit_image-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e37de6f4c1abcf794e13c258dc9b7d385d5be868441de11c180363824192ff7"},
{file = "scikit_image-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4688c18bd7ec33c08d7bf0fd19549be246d90d5f2c1d795a89986629af0a1e83"},
{file = "scikit_image-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:56dab751d20b25d5d3985e95c9b4e975f55573554bd76b0aedf5875217c93e69"},
{file = "scikit_image-0.24.0.tar.gz", hash = "sha256:5d16efe95da8edbeb363e0c4157b99becbd650a60b77f6e3af5768b66cf007ab"},
]
[package.dependencies]
imageio = ">=2.33"
lazy-loader = ">=0.4"
networkx = ">=2.8"
numpy = ">=1.23"
packaging = ">=21"
pillow = ">=9.1"
scipy = ">=1.9"
tifffile = ">=2022.8.12"
[package.extras]
build = ["Cython (>=3.0.4)", "build", "meson-python (>=0.15)", "ninja", "numpy (>=2.0.0rc1)", "packaging (>=21)", "pythran", "setuptools (>=67)", "spin (==0.8)", "wheel"]
data = ["pooch (>=1.6.0)"]
developer = ["ipython", "pre-commit", "tomli"]
docs = ["PyWavelets (>=1.1.1)", "dask[array] (>=2022.9.2)", "ipykernel", "ipywidgets", "kaleido", "matplotlib (>=3.6)", "myst-parser", "numpydoc (>=1.7)", "pandas (>=1.5)", "plotly (>=5.10)", "pooch (>=1.6)", "pydata-sphinx-theme (>=0.15.2)", "pytest-doctestplus", "pytest-runner", "scikit-learn (>=1.1)", "seaborn (>=0.11)", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-gallery (>=0.14)", "sphinx_design (>=0.5)", "tifffile (>=2022.8.12)"]
optional = ["PyWavelets (>=1.1.1)", "SimpleITK", "astropy (>=5.0)", "cloudpickle (>=0.2.1)", "dask[array] (>=2021.1.0)", "matplotlib (>=3.6)", "pooch (>=1.6.0)", "pyamg", "scikit-learn (>=1.1)"]
test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest-cov (>=2.11.0)", "pytest-doctestplus", "pytest-faulthandler", "pytest-localserver"]
[[package]]
name = "scikit-learn"
version = "1.5.2"
@ -8843,28 +8602,6 @@ files = [
{file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
]
[[package]]
name = "tifffile"
version = "2024.9.20"
description = "Read and write TIFF files"
optional = false
python-versions = ">=3.10"
files = [
{file = "tifffile-2024.9.20-py3-none-any.whl", hash = "sha256:c54dc85bc1065d972cb8a6ffb3181389d597876aa80177933459733e4ed243dd"},
{file = "tifffile-2024.9.20.tar.gz", hash = "sha256:3fbf3be2f995a7051a8ae05a4be70c96fc0789f22ed6f1c4104c973cf68a640b"},
]
[package.dependencies]
numpy = "*"
[package.extras]
all = ["defusedxml", "fsspec", "imagecodecs (>=2023.8.12)", "lxml", "matplotlib", "zarr"]
codecs = ["imagecodecs (>=2023.8.12)"]
plot = ["matplotlib"]
test = ["cmapfile", "czifile", "dask", "defusedxml", "fsspec", "imagecodecs", "lfdfiles", "lxml", "ndtiff", "oiffile", "psdtags", "pytest", "roifile", "xarray", "zarr"]
xml = ["defusedxml", "lxml"]
zarr = ["fsspec", "zarr"]
[[package]]
name = "tiktoken"
version = "0.8.0"
@ -10350,4 +10087,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "ff370b7b5077720b73fe3b90cc1b7fb9c7a262bfbd35885bb717369061e8a466"
content-hash = "56a80082afb76e518239060855598921d94a0373123b2d9222cf8c7b6238b7ad"

View File

@ -28,7 +28,7 @@ uvicorn = "*"
types-toml = "*"
numpy = "*"
json-repair = "*"
browsergym = "0.10.2" # integrate browsergym as the browsing interface
browsergym-core = "0.10.2" # integrate browsergym-core as the browsing interface
html2text = "*"
e2b = ">=0.17.1,<1.1.0"
pexpect = "*"
@ -63,6 +63,7 @@ opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
modal = "^0.66.26"
runloop-api-client = "0.10.0"
pygithub = "^2.5.0"
joblib = "*"
openhands-aci = "^0.1.1"
python-socketio = "^5.11.4"
redis = "^5.2.0"
@ -142,6 +143,8 @@ gdown = "*"
matplotlib = "*"
seaborn = "*"
tabulate = "*"
browsergym-webarena = "0.10.2"
browsergym-miniwob = "0.10.2"
[tool.poetry-dynamic-versioning]
enable = true

View File

@ -0,0 +1,73 @@
import json
import pytest
from openhands.core.logger import openhands_logger as logger
from openhands.events.action.browse import BrowseInteractiveAction
from openhands.events.observation.browse import BrowserOutputObservation
from tests.runtime.conftest import _close_test_runtime, _load_runtime
def has_miniwob():
try:
import importlib.util
# try to find this browser environment, if it was installed
spec = importlib.util.find_spec('browsergym.miniwob')
if spec is None:
return False
# try to import this environment
importlib.util.module_from_spec(spec)
return True
except ImportError:
return False
@pytest.mark.skipif(
not has_miniwob(),
reason='Requires browsergym-miniwob package to be installed',
)
def test_browsergym_eval_env(runtime_cls, temp_dir):
runtime = _load_runtime(
temp_dir,
runtime_cls=runtime_cls,
run_as_openhands=False, # need root permission to access file
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
browsergym_eval_env='browsergym/miniwob.choose-list',
force_rebuild_runtime=True,
)
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
# Test browse
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, BrowserOutputObservation)
assert not obs.error
assert 'Select' in obs.content
assert 'from the list and click Submit' in obs.content
# Make sure the browser can produce observation in eval env
action = BrowseInteractiveAction(browser_actions='noop()')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert (
obs.url.strip()
== 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
)
# Make sure the rewards are working
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert json.loads(obs.content) == [0.0]
_close_test_runtime(runtime)

View File

@ -1,12 +1,9 @@
"""Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
import json
from conftest import _close_test_runtime, _load_runtime
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
BrowseInteractiveAction,
BrowseURLAction,
CmdRunAction,
)
@ -16,7 +13,8 @@ from openhands.events.observation import (
)
# ============================================================================================================================
# Browsing tests
# Browsing tests, without evaluation (poetry install --without evaluation)
# For eval environments, tests need to run with poetry install
# ============================================================================================================================
PY3_FOR_TESTING = '/openhands/micromamba/bin/micromamba run -n openhands python3'
@ -66,48 +64,3 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
assert obs.exit_code == 0
_close_test_runtime(runtime)
def test_browsergym_eval_env(runtime_cls, temp_dir):
runtime = _load_runtime(
temp_dir,
runtime_cls=runtime_cls,
run_as_openhands=False, # need root permission to access file
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
browsergym_eval_env='browsergym/miniwob.choose-list',
force_rebuild_runtime=True,
)
from openhands.runtime.browser.browser_env import (
BROWSER_EVAL_GET_GOAL_ACTION,
BROWSER_EVAL_GET_REWARDS_ACTION,
)
# Test browse
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, BrowserOutputObservation)
assert not obs.error
assert 'Select' in obs.content
assert 'from the list and click Submit' in obs.content
# Make sure the browser can produce observation in eva[l
action = BrowseInteractiveAction(browser_actions='noop()')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert (
obs.url.strip()
== 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
)
# Make sure the rewards are working
action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert json.loads(obs.content) == [0.0]
_close_test_runtime(runtime)