From 19ca52f95412d70988010d4a4e39b8681c6857eb Mon Sep 17 00:00:00 2001 From: Boxuan Li Date: Mon, 21 Jul 2025 08:34:11 -0700 Subject: [PATCH] Skip browser dependency build in Dockerfile when browser is disabled (#9815) --- .../runtime/impl/docker/docker_runtime.py | 1 + .../runtime/impl/remote/remote_runtime.py | 1 + openhands/runtime/utils/runtime_build.py | 30 ++++++++++++++--- .../utils/runtime_templates/Dockerfile.j2 | 2 ++ tests/runtime/conftest.py | 2 +- tests/runtime/test_aci_edit.py | 4 ++- tests/runtime/test_browsergym_envs.py | 1 + tests/runtime/test_browsing.py | 32 ++++++++++++++----- tests/runtime/test_mcp_action.py | 8 ++++- tests/unit/test_runtime_build.py | 31 ++++++++++++++++-- .../runtime/impl/modal/modal_runtime.py | 3 +- 11 files changed, 97 insertions(+), 18 deletions(-) diff --git a/openhands/runtime/impl/docker/docker_runtime.py b/openhands/runtime/impl/docker/docker_runtime.py index 1785576dca..03dfb7b949 100644 --- a/openhands/runtime/impl/docker/docker_runtime.py +++ b/openhands/runtime/impl/docker/docker_runtime.py @@ -210,6 +210,7 @@ class DockerRuntime(ActionExecutionClient): extra_deps=self.config.sandbox.runtime_extra_deps, force_rebuild=self.config.sandbox.force_rebuild_runtime, extra_build_args=self.config.sandbox.runtime_extra_build_args, + enable_browser=self.config.enable_browser, ) @staticmethod diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 146670e9ad..14fc54ff29 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -250,6 +250,7 @@ class RemoteRuntime(ActionExecutionClient): platform=self.config.sandbox.platform, extra_deps=self.config.sandbox.runtime_extra_deps, force_rebuild=self.config.sandbox.force_rebuild_runtime, + enable_browser=self.config.enable_browser, ) response = self._send_runtime_api_request( diff --git a/openhands/runtime/utils/runtime_build.py b/openhands/runtime/utils/runtime_build.py index ff85c7d681..65702d15df 100644 --- a/openhands/runtime/utils/runtime_build.py +++ b/openhands/runtime/utils/runtime_build.py @@ -32,6 +32,7 @@ def _generate_dockerfile( base_image: str, build_from: BuildFromImageType = BuildFromImageType.SCRATCH, extra_deps: str | None = None, + enable_browser: bool = True, ) -> str: """Generate the Dockerfile content for the runtime image based on the base image. @@ -39,6 +40,7 @@ def _generate_dockerfile( - base_image (str): The base image provided for the runtime image - build_from (BuildFromImageType): The build method for the runtime image. - extra_deps (str): + - enable_browser (bool): Whether to enable browser support (install Playwright) Returns: - str: The resulting Dockerfile content @@ -55,6 +57,7 @@ def _generate_dockerfile( build_from_scratch=build_from == BuildFromImageType.SCRATCH, build_from_versioned=build_from == BuildFromImageType.VERSIONED, extra_deps=extra_deps if extra_deps is not None else '', + enable_browser=enable_browser, ) return dockerfile_content @@ -111,6 +114,7 @@ def build_runtime_image( dry_run: bool = False, force_rebuild: bool = False, extra_build_args: list[str] | None = None, + enable_browser: bool = True, ) -> str: """Prepares the final docker build folder. @@ -125,6 +129,7 @@ def build_runtime_image( - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image - force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image - extra_build_args (List[str]): Additional build arguments to pass to the builder + - enable_browser (bool): Whether to enable browser support (install Playwright) Returns: - str: :. Where MD5 hash is the hash of the docker build folder @@ -142,6 +147,7 @@ def build_runtime_image( force_rebuild=force_rebuild, platform=platform, extra_build_args=extra_build_args, + enable_browser=enable_browser, ) return result @@ -154,6 +160,7 @@ def build_runtime_image( force_rebuild=force_rebuild, platform=platform, extra_build_args=extra_build_args, + enable_browser=enable_browser, ) return result @@ -167,9 +174,10 @@ def build_runtime_image_in_folder( force_rebuild: bool, platform: str | None = None, extra_build_args: list[str] | None = None, + enable_browser: bool = True, ) -> str: runtime_image_repo, _ = get_runtime_image_repo_and_tag(base_image) - lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image)}' + lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image, enable_browser)}' versioned_tag = ( # truncate the base image to 96 characters to fit in the tag max length (128 characters) f'oh_v{oh_version}_{get_tag_for_versioned_image(base_image)}' @@ -188,6 +196,7 @@ def build_runtime_image_in_folder( base_image, build_from=BuildFromImageType.SCRATCH, extra_deps=extra_deps, + enable_browser=enable_browser, ) if not dry_run: _build_sandbox_image( @@ -226,7 +235,7 @@ def build_runtime_image_in_folder( else: logger.debug(f'Build [{hash_image_name}] from scratch') - prep_build_folder(build_folder, base_image, build_from, extra_deps) + prep_build_folder(build_folder, base_image, build_from, extra_deps, enable_browser) if not dry_run: _build_sandbox_image( build_folder, @@ -251,6 +260,7 @@ def prep_build_folder( base_image: str, build_from: BuildFromImageType, extra_deps: str | None, + enable_browser: bool = True, ) -> None: # Copy the source code to directory. It will end up in build_folder/code # If package is not found, build from source code @@ -282,6 +292,7 @@ def prep_build_folder( base_image, build_from=build_from, extra_deps=extra_deps, + enable_browser=enable_browser, ) dockerfile_path = Path(build_folder, 'Dockerfile') with open(str(dockerfile_path), 'w') as f: @@ -301,10 +312,13 @@ def truncate_hash(hash: str) -> str: return ''.join(result) -def get_hash_for_lock_files(base_image: str) -> str: +def get_hash_for_lock_files(base_image: str, enable_browser: bool = True) -> str: openhands_source_dir = Path(openhands.__file__).parent md5 = hashlib.md5() md5.update(base_image.encode()) + # Only include enable_browser in hash when it's False for backward compatibility + if not enable_browser: + md5.update(str(enable_browser).encode()) for file in ['pyproject.toml', 'poetry.lock']: src = Path(openhands_source_dir, file) if not src.exists(): @@ -378,6 +392,10 @@ if __name__ == '__main__': parser.add_argument('--build_folder', type=str, default=None) parser.add_argument('--force_rebuild', action='store_true', default=False) parser.add_argument('--platform', type=str, default=None) + parser.add_argument('--enable_browser', action='store_true', default=True) + parser.add_argument( + '--no_enable_browser', dest='enable_browser', action='store_false' + ) args = parser.parse_args() if args.build_folder is not None: @@ -409,6 +427,7 @@ if __name__ == '__main__': dry_run=True, force_rebuild=args.force_rebuild, platform=args.platform, + enable_browser=args.enable_browser, ) _runtime_image_repo, runtime_image_source_tag = ( @@ -444,6 +463,9 @@ if __name__ == '__main__': logger.debug('Building image in a temporary folder') docker_builder = DockerRuntimeBuilder(docker.from_env()) image_name = build_runtime_image( - args.base_image, docker_builder, platform=args.platform + args.base_image, + docker_builder, + platform=args.platform, + enable_browser=args.enable_browser, ) logger.debug(f'\nBuilt image: {image_name}\n') diff --git a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 index 8b86016516..14c1cb37e7 100644 --- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2 +++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2 @@ -127,7 +127,9 @@ RUN \ /openhands/micromamba/bin/micromamba run -n openhands poetry install --only main,runtime --no-interaction --no-root && \ # Update and install additional tools # (There used to be an "apt-get update" here, hopefully we can skip it.) + {% if enable_browser %} /openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \ + {% endif %} # Set environment variables /openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \ # Set permissions diff --git a/tests/runtime/conftest.py b/tests/runtime/conftest.py index 05f9a5417f..3e1aa0632a 100644 --- a/tests/runtime/conftest.py +++ b/tests/runtime/conftest.py @@ -212,7 +212,7 @@ def _load_runtime( runtime_startup_env_vars: dict[str, str] | None = None, docker_runtime_kwargs: dict[str, str] | None = None, override_mcp_config: MCPConfig | None = None, - enable_browser: bool = True, + enable_browser: bool = False, ) -> tuple[Runtime, OpenHandsConfig]: sid = 'rt_' + str(random.randint(100000, 999999)) diff --git a/tests/runtime/test_aci_edit.py b/tests/runtime/test_aci_edit.py index 68d2f8c0c5..401fc98f23 100644 --- a/tests/runtime/test_aci_edit.py +++ b/tests/runtime/test_aci_edit.py @@ -38,7 +38,9 @@ def test_view_file(temp_dir, runtime_cls, run_as_openhands): def test_view_directory(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create test file test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt') diff --git a/tests/runtime/test_browsergym_envs.py b/tests/runtime/test_browsergym_envs.py index ad31d84647..028693102d 100644 --- a/tests/runtime/test_browsergym_envs.py +++ b/tests/runtime/test_browsergym_envs.py @@ -36,6 +36,7 @@ def test_browsergym_eval_env(runtime_cls, temp_dir): base_container_image='xingyaoww/od-eval-miniwob:v1.0', browsergym_eval_env='browsergym/miniwob.choose-list', force_rebuild_runtime=True, + enable_browser=True, ) from openhands.runtime.browser.browser_env import ( BROWSER_EVAL_GET_GOAL_ACTION, diff --git a/tests/runtime/test_browsing.py b/tests/runtime/test_browsing.py index e5566958ed..d800c7af04 100644 --- a/tests/runtime/test_browsing.py +++ b/tests/runtime/test_browsing.py @@ -144,7 +144,9 @@ def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands): def test_simple_browse(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) # Test browse action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &') @@ -189,7 +191,9 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands): def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands): """Test browser navigation actions: goto, go_back, go_forward, noop.""" - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create test HTML pages page1_content = """ @@ -322,7 +326,9 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands): def test_browser_form_interactions(temp_dir, runtime_cls, run_as_openhands): """Test browser form interaction actions: fill, click, select_option, clear.""" - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create a test form page form_content = """ @@ -536,7 +542,9 @@ fill("{textarea_bid}", "This is a test message") def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands): """Test browser interactive actions: scroll, hover, fill, press, focus.""" - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create a test page with scrollable content scroll_content = """ @@ -742,7 +750,9 @@ scroll(0, 400) def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands): """Test browser file upload action.""" - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create a test file to upload test_file_content = 'This is a test file for upload testing.' @@ -897,7 +907,9 @@ def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands): def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create a PDF file using reportlab in the host environment from reportlab.lib.pagesizes import letter @@ -969,7 +981,9 @@ def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands): def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands): - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Create a PNG file using PIL in the host environment from PIL import Image, ImageDraw @@ -1037,7 +1051,9 @@ def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands): def test_download_file(temp_dir, runtime_cls, run_as_openhands): """Test downloading a file using the browser.""" - runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + runtime, config = _load_runtime( + temp_dir, runtime_cls, run_as_openhands, enable_browser=True + ) try: # Minimal PDF content for testing pdf_content = b"""%PDF-1.4 diff --git a/tests/runtime/test_mcp_action.py b/tests/runtime/test_mcp_action.py index 983f2a22eb..e3d29fa18a 100644 --- a/tests/runtime/test_mcp_action.py +++ b/tests/runtime/test_mcp_action.py @@ -128,7 +128,11 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands): ) override_mcp_config = MCPConfig(stdio_servers=[mcp_stdio_server_config]) runtime, config = _load_runtime( - temp_dir, runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config + temp_dir, + runtime_cls, + run_as_openhands, + override_mcp_config=override_mcp_config, + enable_browser=True, ) # Test browser server @@ -220,6 +224,7 @@ async def test_both_stdio_and_sse_mcp( runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config, + enable_browser=True, ) # ======= Test SSE server ======= @@ -297,6 +302,7 @@ async def test_microagent_and_one_stdio_mcp_in_config( runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config, + enable_browser=True, ) # NOTE: this simulate the case where the microagent adds a new stdio server to the runtime diff --git a/tests/unit/test_runtime_build.py b/tests/unit/test_runtime_build.py index ae04f499e3..2ee8d17e52 100644 --- a/tests/unit/test_runtime_build.py +++ b/tests/unit/test_runtime_build.py @@ -101,7 +101,7 @@ def test_prep_build_folder(temp_dir): def test_get_hash_for_lock_files(): with patch('builtins.open', mock_open(read_data='mock-data'.encode())): - hash = get_hash_for_lock_files('some_base_image') + hash = get_hash_for_lock_files('some_base_image', enable_browser=True) # Since we mocked open to always return "mock_data", the hash is the result # of hashing the name of the base image followed by "mock-data" twice md5 = hashlib.md5() @@ -111,6 +111,31 @@ def test_get_hash_for_lock_files(): assert hash == truncate_hash(md5.hexdigest()) +def test_get_hash_for_lock_files_different_enable_browser(): + with patch('builtins.open', mock_open(read_data='mock-data'.encode())): + hash_true = get_hash_for_lock_files('some_base_image', enable_browser=True) + hash_false = get_hash_for_lock_files('some_base_image', enable_browser=False) + + # Hash with enable_browser=True should not include the enable_browser value + md5_true = hashlib.md5() + md5_true.update('some_base_image'.encode()) + for _ in range(2): + md5_true.update('mock-data'.encode()) + expected_hash_true = truncate_hash(md5_true.hexdigest()) + + # Hash with enable_browser=False should include the enable_browser value + md5_false = hashlib.md5() + md5_false.update('some_base_image'.encode()) + md5_false.update('False'.encode()) # enable_browser=False is included + for _ in range(2): + md5_false.update('mock-data'.encode()) + expected_hash_false = truncate_hash(md5_false.hexdigest()) + + assert hash_true == expected_hash_true + assert hash_false == expected_hash_false + assert hash_true != hash_false # They should be different + + def test_get_hash_for_source_files(): dirhash_mock = MagicMock() dirhash_mock.return_value = '1f69bd20d68d9e3874d5bf7f7459709b' @@ -247,7 +272,7 @@ def test_build_runtime_image_from_scratch(): == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag' ) mock_prep_build_folder.assert_called_once_with( - ANY, base_image, BuildFromImageType.SCRATCH, None + ANY, base_image, BuildFromImageType.SCRATCH, None, True ) @@ -342,6 +367,7 @@ def test_build_runtime_image_exact_hash_not_exist_and_lock_exist(): f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag', BuildFromImageType.LOCK, None, + True, ) @@ -401,6 +427,7 @@ def test_build_runtime_image_exact_hash_not_exist_and_lock_not_exist_and_version f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag', BuildFromImageType.VERSIONED, None, + True, ) diff --git a/third_party/runtime/impl/modal/modal_runtime.py b/third_party/runtime/impl/modal/modal_runtime.py index dcc4814aa2..aa1811654d 100644 --- a/third_party/runtime/impl/modal/modal_runtime.py +++ b/third_party/runtime/impl/modal/modal_runtime.py @@ -60,7 +60,7 @@ class ModalRuntime(ActionExecutionClient): # Read Modal API credentials from environment variables modal_token_id = os.getenv('MODAL_TOKEN_ID') modal_token_secret = os.getenv('MODAL_TOKEN_SECRET') - + if not modal_token_id: raise ValueError('MODAL_TOKEN_ID environment variable is required for Modal runtime') if not modal_token_secret: @@ -186,6 +186,7 @@ class ModalRuntime(ActionExecutionClient): base_image=base_container_image_id, build_from=BuildFromImageType.SCRATCH, extra_deps=runtime_extra_deps, + enable_browser=True, ) base_runtime_image = modal.Image.from_dockerfile(