Skip browser dependency build in Dockerfile when browser is disabled (#9815)

2025-12-26 05:48:36 +08:00 · 2025-07-21 08:34:11 -07:00 · 2025-07-21 08:34:11 -07:00 · 19ca52f954
commit 19ca52f954
parent df75116184
11 changed files with 97 additions and 18 deletions
--- a/openhands/runtime/impl/docker/docker_runtime.py
+++ b/openhands/runtime/impl/docker/docker_runtime.py
@ -210,6 +210,7 @@ class DockerRuntime(ActionExecutionClient):
                extra_deps=self.config.sandbox.runtime_extra_deps,
                force_rebuild=self.config.sandbox.force_rebuild_runtime,
                extra_build_args=self.config.sandbox.runtime_extra_build_args,
+                enable_browser=self.config.enable_browser,
            )

    @staticmethod
--- a/openhands/runtime/impl/remote/remote_runtime.py
+++ b/openhands/runtime/impl/remote/remote_runtime.py
@ -250,6 +250,7 @@ class RemoteRuntime(ActionExecutionClient):
            platform=self.config.sandbox.platform,
            extra_deps=self.config.sandbox.runtime_extra_deps,
            force_rebuild=self.config.sandbox.force_rebuild_runtime,
+            enable_browser=self.config.enable_browser,
        )

        response = self._send_runtime_api_request(
--- a/openhands/runtime/utils/runtime_build.py
+++ b/openhands/runtime/utils/runtime_build.py
@ -32,6 +32,7 @@ def _generate_dockerfile(
    base_image: str,
    build_from: BuildFromImageType = BuildFromImageType.SCRATCH,
    extra_deps: str | None = None,
+    enable_browser: bool = True,
 ) -> str:
    """Generate the Dockerfile content for the runtime image based on the base image.

@ -39,6 +40,7 @@ def _generate_dockerfile(
    - base_image (str): The base image provided for the runtime image
    - build_from (BuildFromImageType): The build method for the runtime image.
    - extra_deps (str):
+    - enable_browser (bool): Whether to enable browser support (install Playwright)

    Returns:
    - str: The resulting Dockerfile content
@ -55,6 +57,7 @@ def _generate_dockerfile(
        build_from_scratch=build_from == BuildFromImageType.SCRATCH,
        build_from_versioned=build_from == BuildFromImageType.VERSIONED,
        extra_deps=extra_deps if extra_deps is not None else '',
+        enable_browser=enable_browser,
    )
    return dockerfile_content

@ -111,6 +114,7 @@ def build_runtime_image(
    dry_run: bool = False,
    force_rebuild: bool = False,
    extra_build_args: list[str] | None = None,
+    enable_browser: bool = True,
 ) -> str:
    """Prepares the final docker build folder.

@ -125,6 +129,7 @@ def build_runtime_image(
    - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image
    - force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image
    - extra_build_args (List[str]): Additional build arguments to pass to the builder
+    - enable_browser (bool): Whether to enable browser support (install Playwright)

    Returns:
    - str: <image_repo>:<MD5 hash>. Where MD5 hash is the hash of the docker build folder
@ -142,6 +147,7 @@ def build_runtime_image(
                force_rebuild=force_rebuild,
                platform=platform,
                extra_build_args=extra_build_args,
+                enable_browser=enable_browser,
            )
            return result

@ -154,6 +160,7 @@ def build_runtime_image(
        force_rebuild=force_rebuild,
        platform=platform,
        extra_build_args=extra_build_args,
+        enable_browser=enable_browser,
    )
    return result

@ -167,9 +174,10 @@ def build_runtime_image_in_folder(
    force_rebuild: bool,
    platform: str | None = None,
    extra_build_args: list[str] | None = None,
+    enable_browser: bool = True,
 ) -> str:
    runtime_image_repo, _ = get_runtime_image_repo_and_tag(base_image)
-    lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image)}'
+    lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image, enable_browser)}'
    versioned_tag = (
        # truncate the base image to 96 characters to fit in the tag max length (128 characters)
        f'oh_v{oh_version}_{get_tag_for_versioned_image(base_image)}'
@ -188,6 +196,7 @@ def build_runtime_image_in_folder(
            base_image,
            build_from=BuildFromImageType.SCRATCH,
            extra_deps=extra_deps,
+            enable_browser=enable_browser,
        )
        if not dry_run:
            _build_sandbox_image(
@ -226,7 +235,7 @@ def build_runtime_image_in_folder(
    else:
        logger.debug(f'Build [{hash_image_name}] from scratch')

-    prep_build_folder(build_folder, base_image, build_from, extra_deps)
+    prep_build_folder(build_folder, base_image, build_from, extra_deps, enable_browser)
    if not dry_run:
        _build_sandbox_image(
            build_folder,
@ -251,6 +260,7 @@ def prep_build_folder(
    base_image: str,
    build_from: BuildFromImageType,
    extra_deps: str | None,
+    enable_browser: bool = True,
 ) -> None:
    # Copy the source code to directory. It will end up in build_folder/code
    # If package is not found, build from source code
@ -282,6 +292,7 @@ def prep_build_folder(
        base_image,
        build_from=build_from,
        extra_deps=extra_deps,
+        enable_browser=enable_browser,
    )
    dockerfile_path = Path(build_folder, 'Dockerfile')
    with open(str(dockerfile_path), 'w') as f:
@ -301,10 +312,13 @@ def truncate_hash(hash: str) -> str:
    return ''.join(result)


-def get_hash_for_lock_files(base_image: str) -> str:
+def get_hash_for_lock_files(base_image: str, enable_browser: bool = True) -> str:
    openhands_source_dir = Path(openhands.__file__).parent
    md5 = hashlib.md5()
    md5.update(base_image.encode())
+    # Only include enable_browser in hash when it's False for backward compatibility
+    if not enable_browser:
+        md5.update(str(enable_browser).encode())
    for file in ['pyproject.toml', 'poetry.lock']:
        src = Path(openhands_source_dir, file)
        if not src.exists():
@ -378,6 +392,10 @@ if __name__ == '__main__':
    parser.add_argument('--build_folder', type=str, default=None)
    parser.add_argument('--force_rebuild', action='store_true', default=False)
    parser.add_argument('--platform', type=str, default=None)
+    parser.add_argument('--enable_browser', action='store_true', default=True)
+    parser.add_argument(
+        '--no_enable_browser', dest='enable_browser', action='store_false'
+    )
    args = parser.parse_args()

    if args.build_folder is not None:
@ -409,6 +427,7 @@ if __name__ == '__main__':
                dry_run=True,
                force_rebuild=args.force_rebuild,
                platform=args.platform,
+                enable_browser=args.enable_browser,
            )

            _runtime_image_repo, runtime_image_source_tag = (
@ -444,6 +463,9 @@ if __name__ == '__main__':
        logger.debug('Building image in a temporary folder')
        docker_builder = DockerRuntimeBuilder(docker.from_env())
        image_name = build_runtime_image(
-            args.base_image, docker_builder, platform=args.platform
+            args.base_image,
+            docker_builder,
+            platform=args.platform,
+            enable_browser=args.enable_browser,
        )
        logger.debug(f'\nBuilt image: {image_name}\n')
--- a/openhands/runtime/utils/runtime_templates/Dockerfile.j2
+++ b/openhands/runtime/utils/runtime_templates/Dockerfile.j2
@ -127,7 +127,9 @@ RUN \
    /openhands/micromamba/bin/micromamba run -n openhands poetry install --only main,runtime --no-interaction --no-root && \
    # Update and install additional tools
    # (There used to be an "apt-get update" here, hopefully we can skip it.)
+    {% if enable_browser %}
    /openhands/micromamba/bin/micromamba run -n openhands poetry run playwright install --with-deps chromium && \
+    {% endif %}
    # Set environment variables
    /openhands/micromamba/bin/micromamba run -n openhands poetry run python -c "import sys; print('OH_INTERPRETER_PATH=' + sys.executable)" >> /etc/environment && \
    # Set permissions
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@ -212,7 +212,7 @@ def _load_runtime(
    runtime_startup_env_vars: dict[str, str] | None = None,
    docker_runtime_kwargs: dict[str, str] | None = None,
    override_mcp_config: MCPConfig | None = None,
-    enable_browser: bool = True,
+    enable_browser: bool = False,
 ) -> tuple[Runtime, OpenHandsConfig]:
    sid = 'rt_' + str(random.randint(100000, 999999))

--- a/tests/runtime/test_aci_edit.py
+++ b/tests/runtime/test_aci_edit.py
@ -38,7 +38,9 @@ def test_view_file(temp_dir, runtime_cls, run_as_openhands):


 def test_view_directory(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create test file
        test_file = os.path.join(config.workspace_mount_path_in_sandbox, 'test.txt')
--- a/tests/runtime/test_browsergym_envs.py
+++ b/tests/runtime/test_browsergym_envs.py
@ -36,6 +36,7 @@ def test_browsergym_eval_env(runtime_cls, temp_dir):
        base_container_image='xingyaoww/od-eval-miniwob:v1.0',
        browsergym_eval_env='browsergym/miniwob.choose-list',
        force_rebuild_runtime=True,
+        enable_browser=True,
    )
    from openhands.runtime.browser.browser_env import (
        BROWSER_EVAL_GET_GOAL_ACTION,
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@ -144,7 +144,9 @@ def test_browser_disabled(temp_dir, runtime_cls, run_as_openhands):


 def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )

    # Test browse
    action_cmd = CmdRunAction(command='python3 -m http.server 8000 > server.log 2>&1 &')
@ -189,7 +191,9 @@ def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):

 def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):
    """Test browser navigation actions: goto, go_back, go_forward, noop."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create test HTML pages
        page1_content = """
@ -322,7 +326,9 @@ def test_browser_navigation_actions(temp_dir, runtime_cls, run_as_openhands):

 def test_browser_form_interactions(temp_dir, runtime_cls, run_as_openhands):
    """Test browser form interaction actions: fill, click, select_option, clear."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create a test form page
        form_content = """
@ -536,7 +542,9 @@ fill("{textarea_bid}", "This is a test message")

 def test_browser_interactive_actions(temp_dir, runtime_cls, run_as_openhands):
    """Test browser interactive actions: scroll, hover, fill, press, focus."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create a test page with scrollable content
        scroll_content = """
@ -742,7 +750,9 @@ scroll(0, 400)

 def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):
    """Test browser file upload action."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create a test file to upload
        test_file_content = 'This is a test file for upload testing.'
@ -897,7 +907,9 @@ def test_browser_file_upload(temp_dir, runtime_cls, run_as_openhands):


 def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create a PDF file using reportlab in the host environment
        from reportlab.lib.pagesizes import letter
@ -969,7 +981,9 @@ def test_read_pdf_browse(temp_dir, runtime_cls, run_as_openhands):


 def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Create a PNG file using PIL in the host environment
        from PIL import Image, ImageDraw
@ -1037,7 +1051,9 @@ def test_read_png_browse(temp_dir, runtime_cls, run_as_openhands):

 def test_download_file(temp_dir, runtime_cls, run_as_openhands):
    """Test downloading a file using the browser."""
-    runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    runtime, config = _load_runtime(
+        temp_dir, runtime_cls, run_as_openhands, enable_browser=True
+    )
    try:
        # Minimal PDF content for testing
        pdf_content = b"""%PDF-1.4
--- a/tests/runtime/test_mcp_action.py
+++ b/tests/runtime/test_mcp_action.py
@ -128,7 +128,11 @@ async def test_fetch_mcp_via_stdio(temp_dir, runtime_cls, run_as_openhands):
    )
    override_mcp_config = MCPConfig(stdio_servers=[mcp_stdio_server_config])
    runtime, config = _load_runtime(
-        temp_dir, runtime_cls, run_as_openhands, override_mcp_config=override_mcp_config
+        temp_dir,
+        runtime_cls,
+        run_as_openhands,
+        override_mcp_config=override_mcp_config,
+        enable_browser=True,
    )

    # Test browser server
@ -220,6 +224,7 @@ async def test_both_stdio_and_sse_mcp(
            runtime_cls,
            run_as_openhands,
            override_mcp_config=override_mcp_config,
+            enable_browser=True,
        )

        # ======= Test SSE server =======
@ -297,6 +302,7 @@ async def test_microagent_and_one_stdio_mcp_in_config(
            runtime_cls,
            run_as_openhands,
            override_mcp_config=override_mcp_config,
+            enable_browser=True,
        )

        # NOTE: this simulate the case where the microagent adds a new stdio server to the runtime
--- a/tests/unit/test_runtime_build.py
+++ b/tests/unit/test_runtime_build.py
@ -101,7 +101,7 @@ def test_prep_build_folder(temp_dir):

 def test_get_hash_for_lock_files():
    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
-        hash = get_hash_for_lock_files('some_base_image')
+        hash = get_hash_for_lock_files('some_base_image', enable_browser=True)
        # Since we mocked open to always return "mock_data", the hash is the result
        # of hashing the name of the base image followed by "mock-data" twice
        md5 = hashlib.md5()
@ -111,6 +111,31 @@ def test_get_hash_for_lock_files():
        assert hash == truncate_hash(md5.hexdigest())


+def test_get_hash_for_lock_files_different_enable_browser():
+    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
+        hash_true = get_hash_for_lock_files('some_base_image', enable_browser=True)
+        hash_false = get_hash_for_lock_files('some_base_image', enable_browser=False)
+
+        # Hash with enable_browser=True should not include the enable_browser value
+        md5_true = hashlib.md5()
+        md5_true.update('some_base_image'.encode())
+        for _ in range(2):
+            md5_true.update('mock-data'.encode())
+        expected_hash_true = truncate_hash(md5_true.hexdigest())
+
+        # Hash with enable_browser=False should include the enable_browser value
+        md5_false = hashlib.md5()
+        md5_false.update('some_base_image'.encode())
+        md5_false.update('False'.encode())  # enable_browser=False is included
+        for _ in range(2):
+            md5_false.update('mock-data'.encode())
+        expected_hash_false = truncate_hash(md5_false.hexdigest())
+
+        assert hash_true == expected_hash_true
+        assert hash_false == expected_hash_false
+        assert hash_true != hash_false  # They should be different
+
+
 def test_get_hash_for_source_files():
    dirhash_mock = MagicMock()
    dirhash_mock.return_value = '1f69bd20d68d9e3874d5bf7f7459709b'
@ -247,7 +272,7 @@ def test_build_runtime_image_from_scratch():
            == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
        )
        mock_prep_build_folder.assert_called_once_with(
-            ANY, base_image, BuildFromImageType.SCRATCH, None
+            ANY, base_image, BuildFromImageType.SCRATCH, None, True
        )


@ -342,6 +367,7 @@ def test_build_runtime_image_exact_hash_not_exist_and_lock_exist():
            f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag',
            BuildFromImageType.LOCK,
            None,
+            True,
        )


@ -401,6 +427,7 @@ def test_build_runtime_image_exact_hash_not_exist_and_lock_not_exist_and_version
            f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag',
            BuildFromImageType.VERSIONED,
            None,
+            True,
        )


--- a/third_party/runtime/impl/modal/modal_runtime.py
+++ b/third_party/runtime/impl/modal/modal_runtime.py
@ -60,7 +60,7 @@ class ModalRuntime(ActionExecutionClient):
        # Read Modal API credentials from environment variables
        modal_token_id = os.getenv('MODAL_TOKEN_ID')
        modal_token_secret = os.getenv('MODAL_TOKEN_SECRET')
-        
+
        if not modal_token_id:
            raise ValueError('MODAL_TOKEN_ID environment variable is required for Modal runtime')
        if not modal_token_secret:
@ -186,6 +186,7 @@ class ModalRuntime(ActionExecutionClient):
                base_image=base_container_image_id,
                build_from=BuildFromImageType.SCRATCH,
                extra_deps=runtime_extra_deps,
+                enable_browser=True,
            )

            base_runtime_image = modal.Image.from_dockerfile(