Revert "tests: reorganize unit tests into subdirectories mirroring source modules" (#10437)

2026-03-22 13:47:19 +08:00 · 2025-08-17 02:33:17 +02:00
parent 95ef8965b7
commit 315d391414
124 changed files with 4 additions and 4 deletions
--- a/tests/unit/runtime/builder/test_runtime_build.py
+++ b/tests/unit/runtime/builder/test_runtime_build.py
@@ -1,676 +0,0 @@
-import hashlib
-import os
-import tempfile
-import uuid
-from importlib.metadata import version
-from pathlib import Path
-from unittest.mock import ANY, MagicMock, mock_open, patch
-
-import docker
-import pytest
-import toml
-from pytest import TempPathFactory
-
-import openhands
-from openhands import __version__ as oh_version
-from openhands.core.logger import openhands_logger as logger
-from openhands.runtime.builder.docker import DockerRuntimeBuilder
-from openhands.runtime.utils.runtime_build import (
-    BuildFromImageType,
-    _generate_dockerfile,
-    build_runtime_image,
-    get_hash_for_lock_files,
-    get_hash_for_source_files,
-    get_runtime_image_repo,
-    get_runtime_image_repo_and_tag,
-    prep_build_folder,
-    truncate_hash,
-)
-
-OH_VERSION = f'oh_v{oh_version}'
-DEFAULT_BASE_IMAGE = 'nikolaik/python-nodejs:python3.12-nodejs22'
-
-
-@pytest.fixture
-def temp_dir(tmp_path_factory: TempPathFactory) -> str:
-    return str(tmp_path_factory.mktemp('test_runtime_build'))
-
-
-@pytest.fixture
-def mock_docker_client():
-    mock_client = MagicMock(spec=docker.DockerClient)
-    mock_client.version.return_value = {
-        'Version': '20.10.0',
-        'Components': [{'Name': 'Engine', 'Version': '20.10.0'}],
-    }  # Ensure version is >= 18.09
-    return mock_client
-
-
-@pytest.fixture
-def docker_runtime_builder():
-    client = docker.from_env()
-    return DockerRuntimeBuilder(client)
-
-
-def _check_source_code_in_dir(temp_dir):
-    # assert there is a folder called 'code' in the temp_dir
-    code_dir = os.path.join(temp_dir, 'code')
-    assert os.path.exists(code_dir)
-    assert os.path.isdir(code_dir)
-
-    # check the source file is the same as the current code base
-    assert os.path.exists(os.path.join(code_dir, 'pyproject.toml'))
-
-    # The source code should only include the `openhands` folder,
-    # and pyproject.toml & poetry.lock that are needed to build the runtime image
-    assert set(os.listdir(code_dir)) == {
-        'openhands',
-        'pyproject.toml',
-        'poetry.lock',
-    }
-    assert os.path.exists(os.path.join(code_dir, 'openhands'))
-    assert os.path.isdir(os.path.join(code_dir, 'openhands'))
-
-    # make sure the version from the pyproject.toml is the same as the current version
-    with open(os.path.join(code_dir, 'pyproject.toml'), 'r') as f:
-        pyproject = toml.load(f)
-
-    _pyproject_version = pyproject['tool']['poetry']['version']
-    assert _pyproject_version == version('openhands-ai')
-
-
-def test_prep_build_folder(temp_dir):
-    shutil_mock = MagicMock()
-    with patch(f'{prep_build_folder.__module__}.shutil', shutil_mock):
-        prep_build_folder(
-            temp_dir,
-            base_image=DEFAULT_BASE_IMAGE,
-            build_from=BuildFromImageType.SCRATCH,
-            extra_deps=None,
-        )
-
-    # make sure that the code (openhands/) and microagents folder were copied
-    assert shutil_mock.copytree.call_count == 2
-    assert shutil_mock.copy2.call_count == 2
-
-    # Now check dockerfile is in the folder
-    dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
-    assert os.path.exists(dockerfile_path)
-    assert os.path.isfile(dockerfile_path)
-
-
-def test_get_hash_for_lock_files():
-    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
-        hash = get_hash_for_lock_files('some_base_image', enable_browser=True)
-        # Since we mocked open to always return "mock_data", the hash is the result
-        # of hashing the name of the base image followed by "mock-data" twice
-        md5 = hashlib.md5()
-        md5.update('some_base_image'.encode())
-        for _ in range(2):
-            md5.update('mock-data'.encode())
-        assert hash == truncate_hash(md5.hexdigest())
-
-
-def test_get_hash_for_lock_files_different_enable_browser():
-    with patch('builtins.open', mock_open(read_data='mock-data'.encode())):
-        hash_true = get_hash_for_lock_files('some_base_image', enable_browser=True)
-        hash_false = get_hash_for_lock_files('some_base_image', enable_browser=False)
-
-        # Hash with enable_browser=True should not include the enable_browser value
-        md5_true = hashlib.md5()
-        md5_true.update('some_base_image'.encode())
-        for _ in range(2):
-            md5_true.update('mock-data'.encode())
-        expected_hash_true = truncate_hash(md5_true.hexdigest())
-
-        # Hash with enable_browser=False should include the enable_browser value
-        md5_false = hashlib.md5()
-        md5_false.update('some_base_image'.encode())
-        md5_false.update('False'.encode())  # enable_browser=False is included
-        for _ in range(2):
-            md5_false.update('mock-data'.encode())
-        expected_hash_false = truncate_hash(md5_false.hexdigest())
-
-        assert hash_true == expected_hash_true
-        assert hash_false == expected_hash_false
-        assert hash_true != hash_false  # They should be different
-
-
-def test_get_hash_for_source_files():
-    dirhash_mock = MagicMock()
-    dirhash_mock.return_value = '1f69bd20d68d9e3874d5bf7f7459709b'
-    with patch(f'{get_hash_for_source_files.__module__}.dirhash', dirhash_mock):
-        result = get_hash_for_source_files()
-        assert result == truncate_hash(dirhash_mock.return_value)
-        dirhash_mock.assert_called_once_with(
-            Path(openhands.__file__).parent,
-            'md5',
-            ignore=[
-                '.*/',  # hidden directories
-                '__pycache__/',
-                '*.pyc',
-            ],
-        )
-
-
-def test_generate_dockerfile_build_from_scratch():
-    base_image = 'debian:11'
-    dockerfile_content = _generate_dockerfile(
-        base_image,
-        build_from=BuildFromImageType.SCRATCH,
-    )
-    assert base_image in dockerfile_content
-    assert 'apt-get update' in dockerfile_content
-    assert 'wget curl' in dockerfile_content
-    assert 'poetry' in dockerfile_content and '-c conda-forge' in dockerfile_content
-    assert 'python=3.12' in dockerfile_content
-
-    # Check the update command
-    assert 'COPY ./code/openhands /openhands/code/openhands' in dockerfile_content
-    assert (
-        '/openhands/micromamba/bin/micromamba run -n openhands poetry install'
-        in dockerfile_content
-    )
-
-
-def test_generate_dockerfile_build_from_lock():
-    base_image = 'debian:11'
-    dockerfile_content = _generate_dockerfile(
-        base_image,
-        build_from=BuildFromImageType.LOCK,
-    )
-
-    # These commands SHOULD NOT include in the dockerfile if build_from_scratch is False
-    assert 'wget curl sudo apt-utils git' not in dockerfile_content
-    assert '-c conda-forge' not in dockerfile_content
-    assert 'python=3.12' not in dockerfile_content
-    assert 'https://micro.mamba.pm/install.sh' not in dockerfile_content
-    assert 'poetry install' not in dockerfile_content
-
-    # These update commands SHOULD still in the dockerfile
-    assert 'COPY ./code/openhands /openhands/code/openhands' in dockerfile_content
-
-
-def test_generate_dockerfile_build_from_versioned():
-    base_image = 'debian:11'
-    dockerfile_content = _generate_dockerfile(
-        base_image,
-        build_from=BuildFromImageType.VERSIONED,
-    )
-
-    # these commands should not exist when build from versioned
-    assert 'wget curl sudo apt-utils git' not in dockerfile_content
-    assert '-c conda-forge' not in dockerfile_content
-    assert 'python=3.12' not in dockerfile_content
-    assert 'https://micro.mamba.pm/install.sh' not in dockerfile_content
-
-    # this SHOULD exist when build from versioned
-    assert 'poetry install' in dockerfile_content
-    assert 'COPY ./code/openhands /openhands/code/openhands' in dockerfile_content
-
-
-def test_get_runtime_image_repo_and_tag_eventstream():
-    base_image = 'debian:11'
-    img_repo, img_tag = get_runtime_image_repo_and_tag(base_image)
-    assert (
-        img_repo == f'{get_runtime_image_repo()}'
-        and img_tag == f'{OH_VERSION}_image_debian_tag_11'
-    )
-
-    img_repo, img_tag = get_runtime_image_repo_and_tag(DEFAULT_BASE_IMAGE)
-    assert (
-        img_repo == f'{get_runtime_image_repo()}'
-        and img_tag
-        == f'{OH_VERSION}_image_nikolaik_s_python-nodejs_tag_python3.12-nodejs22'
-    )
-
-    base_image = 'ubuntu'
-    img_repo, img_tag = get_runtime_image_repo_and_tag(base_image)
-    assert (
-        img_repo == f'{get_runtime_image_repo()}'
-        and img_tag == f'{OH_VERSION}_image_ubuntu_tag_latest'
-    )
-
-
-def test_build_runtime_image_from_scratch():
-    base_image = 'debian:11'
-    mock_lock_hash = MagicMock()
-    mock_lock_hash.return_value = 'mock-lock-tag'
-    mock_versioned_tag = MagicMock()
-    mock_versioned_tag.return_value = 'mock-versioned-tag'
-    mock_source_hash = MagicMock()
-    mock_source_hash.return_value = 'mock-source-tag'
-    mock_runtime_builder = MagicMock()
-    mock_runtime_builder.image_exists.return_value = False
-    mock_runtime_builder.build.return_value = (
-        f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-    )
-    mock_prep_build_folder = MagicMock()
-    mod = build_runtime_image.__module__
-    with (
-        patch(f'{mod}.get_hash_for_lock_files', mock_lock_hash),
-        patch(f'{mod}.get_hash_for_source_files', mock_source_hash),
-        patch(f'{mod}.get_tag_for_versioned_image', mock_versioned_tag),
-        patch(
-            f'{build_runtime_image.__module__}.prep_build_folder',
-            mock_prep_build_folder,
-        ),
-    ):
-        image_name = build_runtime_image(base_image, mock_runtime_builder)
-        mock_runtime_builder.build.assert_called_once_with(
-            path=ANY,
-            tags=[
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag',
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag',
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag',
-            ],
-            platform=None,
-            extra_build_args=None,
-        )
-        assert (
-            image_name
-            == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-        )
-        mock_prep_build_folder.assert_called_once_with(
-            ANY, base_image, BuildFromImageType.SCRATCH, None, True
-        )
-
-
-def test_build_runtime_image_exact_hash_exist():
-    base_image = 'debian:11'
-    mock_lock_hash = MagicMock()
-    mock_lock_hash.return_value = 'mock-lock-tag'
-    mock_source_hash = MagicMock()
-    mock_source_hash.return_value = 'mock-source-tag'
-    mock_versioned_tag = MagicMock()
-    mock_versioned_tag.return_value = 'mock-versioned-tag'
-    mock_runtime_builder = MagicMock()
-    mock_runtime_builder.image_exists.return_value = True
-    mock_runtime_builder.build.return_value = (
-        f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-    )
-    mock_prep_build_folder = MagicMock()
-    mod = build_runtime_image.__module__
-    with (
-        patch(f'{mod}.get_hash_for_lock_files', mock_lock_hash),
-        patch(f'{mod}.get_hash_for_source_files', mock_source_hash),
-        patch(f'{mod}.get_tag_for_versioned_image', mock_versioned_tag),
-        patch(
-            f'{build_runtime_image.__module__}.prep_build_folder',
-            mock_prep_build_folder,
-        ),
-    ):
-        image_name = build_runtime_image(base_image, mock_runtime_builder)
-        assert (
-            image_name
-            == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-        )
-        mock_runtime_builder.build.assert_not_called()
-        mock_prep_build_folder.assert_not_called()
-
-
-def test_build_runtime_image_exact_hash_not_exist_and_lock_exist():
-    base_image = 'debian:11'
-    mock_lock_hash = MagicMock()
-    mock_lock_hash.return_value = 'mock-lock-tag'
-    mock_source_hash = MagicMock()
-    mock_source_hash.return_value = 'mock-source-tag'
-    mock_versioned_tag = MagicMock()
-    mock_versioned_tag.return_value = 'mock-versioned-tag'
-    mock_runtime_builder = MagicMock()
-
-    def image_exists_side_effect(image_name, *args):
-        if 'mock-lock-tag_mock-source-tag' in image_name:
-            return False
-        elif 'mock-lock-tag' in image_name:
-            return True
-        elif 'mock-versioned-tag' in image_name:
-            # just to test we should never include versioned tag in a non-from-scratch build
-            # in real case it should be True when lock exists
-            return False
-        else:
-            raise ValueError(f'Unexpected image name: {image_name}')
-
-    mock_runtime_builder.image_exists.side_effect = image_exists_side_effect
-    mock_runtime_builder.build.return_value = (
-        f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-    )
-
-    mock_prep_build_folder = MagicMock()
-    mod = build_runtime_image.__module__
-    with (
-        patch(f'{mod}.get_hash_for_lock_files', mock_lock_hash),
-        patch(f'{mod}.get_hash_for_source_files', mock_source_hash),
-        patch(f'{mod}.get_tag_for_versioned_image', mock_versioned_tag),
-        patch(
-            f'{build_runtime_image.__module__}.prep_build_folder',
-            mock_prep_build_folder,
-        ),
-    ):
-        image_name = build_runtime_image(base_image, mock_runtime_builder)
-        assert (
-            image_name
-            == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-        )
-        mock_runtime_builder.build.assert_called_once_with(
-            path=ANY,
-            tags=[
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag',
-                # lock tag will NOT be included - since it already exists
-                # VERSION tag will NOT be included except from scratch
-            ],
-            platform=None,
-            extra_build_args=None,
-        )
-        mock_prep_build_folder.assert_called_once_with(
-            ANY,
-            f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag',
-            BuildFromImageType.LOCK,
-            None,
-            True,
-        )
-
-
-def test_build_runtime_image_exact_hash_not_exist_and_lock_not_exist_and_versioned_exist():
-    base_image = 'debian:11'
-    mock_lock_hash = MagicMock()
-    mock_lock_hash.return_value = 'mock-lock-tag'
-    mock_source_hash = MagicMock()
-    mock_source_hash.return_value = 'mock-source-tag'
-    mock_versioned_tag = MagicMock()
-    mock_versioned_tag.return_value = 'mock-versioned-tag'
-    mock_runtime_builder = MagicMock()
-
-    def image_exists_side_effect(image_name, *args):
-        if 'mock-lock-tag_mock-source-tag' in image_name:
-            return False
-        elif 'mock-lock-tag' in image_name:
-            return False
-        elif 'mock-versioned-tag' in image_name:
-            return True
-        else:
-            raise ValueError(f'Unexpected image name: {image_name}')
-
-    mock_runtime_builder.image_exists.side_effect = image_exists_side_effect
-    mock_runtime_builder.build.return_value = (
-        f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-    )
-
-    mock_prep_build_folder = MagicMock()
-    mod = build_runtime_image.__module__
-    with (
-        patch(f'{mod}.get_hash_for_lock_files', mock_lock_hash),
-        patch(f'{mod}.get_hash_for_source_files', mock_source_hash),
-        patch(f'{mod}.get_tag_for_versioned_image', mock_versioned_tag),
-        patch(
-            f'{build_runtime_image.__module__}.prep_build_folder',
-            mock_prep_build_folder,
-        ),
-    ):
-        image_name = build_runtime_image(base_image, mock_runtime_builder)
-        assert (
-            image_name
-            == f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag'
-        )
-        mock_runtime_builder.build.assert_called_once_with(
-            path=ANY,
-            tags=[
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag_mock-source-tag',
-                f'{get_runtime_image_repo()}:{OH_VERSION}_mock-lock-tag',
-                # VERSION tag will NOT be included except from scratch
-            ],
-            platform=None,
-            extra_build_args=None,
-        )
-        mock_prep_build_folder.assert_called_once_with(
-            ANY,
-            f'{get_runtime_image_repo()}:{OH_VERSION}_mock-versioned-tag',
-            BuildFromImageType.VERSIONED,
-            None,
-            True,
-        )
-
-
-# ==============================
-# DockerRuntimeBuilder Tests
-# ==============================
-
-
-def test_output_build_progress(docker_runtime_builder):
-    layers = {}
-    docker_runtime_builder._output_build_progress(
-        {
-            'id': 'layer1',
-            'status': 'Downloading',
-            'progressDetail': {'current': 50, 'total': 100},
-        },
-        layers,
-        0,
-    )
-    assert layers['layer1']['status'] == 'Downloading'
-    assert layers['layer1']['progress'] == ''
-    assert layers['layer1']['last_logged'] == 50.0
-
-
-@pytest.fixture(scope='function')
-def live_docker_image():
-    client = docker.from_env()
-    unique_id = str(uuid.uuid4())[:8]  # Use first 8 characters of a UUID
-    unique_prefix = f'test_image_{unique_id}'
-
-    dockerfile_content = f"""
-    # syntax=docker/dockerfile:1.4
-    FROM {DEFAULT_BASE_IMAGE} AS base
-    RUN apt-get update && apt-get install -y wget curl sudo apt-utils
-
-    FROM base AS intermediate
-    RUN mkdir -p /openhands
-
-    FROM intermediate AS final
-    RUN echo "Hello, OpenHands!" > /openhands/hello.txt
-    """
-
-    with tempfile.TemporaryDirectory() as temp_dir:
-        dockerfile_path = os.path.join(temp_dir, 'Dockerfile')
-        with open(dockerfile_path, 'w') as f:
-            f.write(dockerfile_content)
-
-        try:
-            image, logs = client.images.build(
-                path=temp_dir,
-                tag=f'{unique_prefix}:final',
-                buildargs={'DOCKER_BUILDKIT': '1'},
-                labels={'test': 'true'},
-                rm=True,
-                forcerm=True,
-            )
-
-            # Tag intermediary stages
-            client.api.tag(image.id, unique_prefix, 'base')
-            client.api.tag(image.id, unique_prefix, 'intermediate')
-
-            all_tags = [
-                f'{unique_prefix}:final',
-                f'{unique_prefix}:base',
-                f'{unique_prefix}:intermediate',
-            ]
-
-            print(f'\nImage ID: {image.id}')
-            print(f'Image tags: {all_tags}\n')
-
-            yield image
-
-        finally:
-            # Clean up all tagged images
-            for tag in all_tags:
-                try:
-                    client.images.remove(tag, force=True)
-                    print(f'Removed image: {tag}')
-                except Exception as e:
-                    print(f'Error removing image {tag}: {str(e)}')
-
-
-def test_init(docker_runtime_builder):
-    assert isinstance(docker_runtime_builder.docker_client, docker.DockerClient)
-    assert docker_runtime_builder.rolling_logger.max_lines == 10
-    assert docker_runtime_builder.rolling_logger.log_lines == [''] * 10
-
-
-def test_build_image_from_scratch(docker_runtime_builder, tmp_path):
-    context_path = str(tmp_path)
-    tags = ['test_build:latest']
-
-    # Create a minimal Dockerfile in the context path
-    with open(os.path.join(context_path, 'Dockerfile'), 'w') as f:
-        f.write("""FROM php:latest
-CMD ["sh", "-c", "echo 'Hello, World!'"]
-""")
-    built_image_name = None
-    container = None
-    client = docker.from_env()
-    try:
-        built_image_name = docker_runtime_builder.build(
-            context_path,
-            tags,
-            use_local_cache=False,
-        )
-        assert built_image_name == f'{tags[0]}'
-
-        # Verify the image was created
-        image = client.images.get(tags[0])
-        assert image is not None
-
-    except docker.errors.ImageNotFound:
-        pytest.fail('test_build_image_from_scratch: test image not found!')
-    except Exception as e:
-        pytest.fail(f'test_build_image_from_scratch: Build failed with error: {str(e)}')
-
-    finally:
-        # Clean up the container
-        if container:
-            try:
-                container.remove(force=True)
-                logger.info(f'Removed test container: `{container.id}`')
-            except Exception as e:
-                logger.warning(
-                    f'Failed to remove test container `{container.id}`: {str(e)}'
-                )
-
-        # Clean up the image
-        if built_image_name:
-            try:
-                client.images.remove(built_image_name, force=True)
-                logger.info(f'Removed test image: `{built_image_name}`')
-            except Exception as e:
-                logger.warning(
-                    f'Failed to remove test image `{built_image_name}`: {str(e)}'
-                )
-        else:
-            logger.warning('No image was built, so no image cleanup was necessary.')
-
-
-def _format_size_to_gb(bytes_size):
-    """Convert bytes to gigabytes with two decimal places."""
-    return round(bytes_size / (1024**3), 2)
-
-
-def test_list_dangling_images():
-    client = docker.from_env()
-    dangling_images = client.images.list(filters={'dangling': True})
-    if dangling_images and len(dangling_images) > 0:
-        for image in dangling_images:
-            if 'Size' in image.attrs and isinstance(image.attrs['Size'], int):
-                size_gb = _format_size_to_gb(image.attrs['Size'])
-                logger.info(f'Dangling image: {image.tags}, Size: {size_gb} GB')
-            else:
-                logger.info(f'Dangling image: {image.tags}, Size: n/a')
-    else:
-        logger.info('No dangling images found')
-
-
-def test_build_image_from_repo(docker_runtime_builder, tmp_path):
-    context_path = str(tmp_path)
-    tags = ['alpine:latest']
-
-    # Create a minimal Dockerfile in the context path
-    with open(os.path.join(context_path, 'Dockerfile'), 'w') as f:
-        f.write(f"""FROM {DEFAULT_BASE_IMAGE}
-CMD ["sh", "-c", "echo 'Hello, World!'"]
-""")
-    built_image_name = None
-    container = None
-    client = docker.from_env()
-    try:
-        built_image_name = docker_runtime_builder.build(
-            context_path,
-            tags,
-            use_local_cache=False,
-        )
-        assert built_image_name == f'{tags[0]}'
-
-        image = client.images.get(tags[0])
-        assert image is not None
-
-    except docker.errors.ImageNotFound:
-        pytest.fail('test_build_image_from_repo: test image not found!')
-
-    finally:
-        # Clean up the container
-        if container:
-            try:
-                container.remove(force=True)
-                logger.info(f'Removed test container: `{container.id}`')
-            except Exception as e:
-                logger.warning(
-                    f'Failed to remove test container `{container.id}`: {str(e)}'
-                )
-
-        # Clean up the image
-        if built_image_name:
-            try:
-                client.images.remove(built_image_name, force=True)
-                logger.info(f'Removed test image: `{built_image_name}`')
-            except Exception as e:
-                logger.warning(
-                    f'Failed to remove test image `{built_image_name}`: {str(e)}'
-                )
-        else:
-            logger.warning('No image was built, so no image cleanup was necessary.')
-
-
-def test_image_exists_local(docker_runtime_builder):
-    mock_client = MagicMock()
-    mock_client.version.return_value = {
-        'Version': '20.10.0',
-        'Components': [{'Name': 'Engine', 'Version': '20.10.0'}],
-    }  # Ensure version is >= 18.09
-    builder = DockerRuntimeBuilder(mock_client)
-    image_name = 'existing-local:image'  # The mock pretends this exists by default
-    assert builder.image_exists(image_name)
-
-
-def test_image_exists_not_found():
-    mock_client = MagicMock()
-    mock_client.version.return_value = {
-        'Version': '20.10.0',
-        'Components': [{'Name': 'Engine', 'Version': '20.10.0'}],
-    }  # Ensure version is >= 18.09
-    mock_client.images.get.side_effect = docker.errors.ImageNotFound(
-        "He doesn't like you!"
-    )
-    mock_client.api.pull.side_effect = docker.errors.ImageNotFound(
-        "I don't like you either!"
-    )
-    builder = DockerRuntimeBuilder(mock_client)
-    assert not builder.image_exists('nonexistent:image')
-    mock_client.images.get.assert_called_once_with('nonexistent:image')
-    mock_client.api.pull.assert_called_once_with(
-        'nonexistent', tag='image', stream=True, decode=True
-    )
-
-
-def test_truncate_hash():
-    truncated = truncate_hash('b08f254d76b1c6a7ad924708c0032251')
-    assert truncated == 'pma2wc71uq3c9a85'
-    truncated = truncate_hash('102aecc0cea025253c0278f54ebef078')
-    assert truncated == '4titk6gquia3taj5'
--- a/tests/unit/runtime/impl/test_docker_runtime.py
+++ b/tests/unit/runtime/impl/test_docker_runtime.py
@@ -1,167 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from openhands.core.config import OpenHandsConfig
-from openhands.events import EventStream
-from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-
-
-@pytest.fixture
-def mock_docker_client():
-    with patch('docker.from_env') as mock_client:
-        container_mock = MagicMock()
-        container_mock.status = 'running'
-        container_mock.attrs = {
-            'Config': {
-                'Env': ['port=12345', 'VSCODE_PORT=54321'],
-                'ExposedPorts': {'12345/tcp': {}, '54321/tcp': {}},
-            }
-        }
-        mock_client.return_value.containers.get.return_value = container_mock
-        mock_client.return_value.containers.run.return_value = container_mock
-        # Mock version info for BuildKit check
-        mock_client.return_value.version.return_value = {
-            'Version': '20.10.0',
-            'Components': [{'Name': 'Engine', 'Version': '20.10.0'}],
-        }  # Ensure version is >= 18.09
-        yield mock_client.return_value
-
-
-@pytest.fixture
-def config():
-    config = OpenHandsConfig()
-    config.sandbox.keep_runtime_alive = False
-    return config
-
-
-@pytest.fixture
-def event_stream():
-    return MagicMock(spec=EventStream)
-
-
-@patch('openhands.runtime.impl.docker.docker_runtime.stop_all_containers')
-def test_container_stopped_when_keep_runtime_alive_false(
-    mock_stop_containers, mock_docker_client, config, event_stream
-):
-    # Arrange
-    runtime = DockerRuntime(config, event_stream, sid='test-sid')
-    runtime.container = mock_docker_client.containers.get.return_value
-
-    # Act
-    runtime.close()
-
-    # Assert
-    mock_stop_containers.assert_called_once_with('openhands-runtime-test-sid')
-
-
-@patch('openhands.runtime.impl.docker.docker_runtime.stop_all_containers')
-def test_container_not_stopped_when_keep_runtime_alive_true(
-    mock_stop_containers, mock_docker_client, config, event_stream
-):
-    # Arrange
-    config.sandbox.keep_runtime_alive = True
-    runtime = DockerRuntime(config, event_stream, sid='test-sid')
-    runtime.container = mock_docker_client.containers.get.return_value
-
-    # Act
-    runtime.close()
-
-    # Assert
-    mock_stop_containers.assert_not_called()
-
-
-def test_volumes_mode_extraction():
-    """Test that the mount mode is correctly extracted from sandbox.volumes."""
-    import os
-
-    from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-
-    # Create a DockerRuntime instance with a mock config
-    runtime = DockerRuntime.__new__(DockerRuntime)
-    runtime.config = MagicMock()
-    runtime.config.sandbox.volumes = '/host/path:/container/path:ro'
-    runtime.config.workspace_mount_path = '/host/path'
-    runtime.config.workspace_mount_path_in_sandbox = '/container/path'
-
-    # Call the actual method that processes volumes
-    volumes = runtime._process_volumes()
-
-    # Assert that the mode was correctly set to 'ro'
-    assert volumes[os.path.abspath('/host/path')]['mode'] == 'ro'
-
-
-# This test has been replaced by test_volumes_multiple_mounts
-
-
-def test_volumes_multiple_mounts():
-    """Test that multiple mounts in sandbox.volumes are correctly processed."""
-    import os
-
-    from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-
-    # Create a DockerRuntime instance with a mock config
-    runtime = DockerRuntime.__new__(DockerRuntime)
-    runtime.config = MagicMock()
-    runtime.config.runtime_mount = None
-    runtime.config.sandbox.volumes = (
-        '/host/path1:/container/path1,/host/path2:/container/path2:ro'
-    )
-    runtime.config.workspace_mount_path = '/host/path1'
-    runtime.config.workspace_mount_path_in_sandbox = '/container/path1'
-
-    # Call the actual method that processes volumes
-    volumes = runtime._process_volumes()
-
-    # Assert that both mounts were processed correctly
-    assert len(volumes) == 2
-    assert volumes[os.path.abspath('/host/path1')]['bind'] == '/container/path1'
-    assert volumes[os.path.abspath('/host/path1')]['mode'] == 'rw'  # Default mode
-    assert volumes[os.path.abspath('/host/path2')]['bind'] == '/container/path2'
-    assert volumes[os.path.abspath('/host/path2')]['mode'] == 'ro'  # Specified mode
-
-
-def test_multiple_volumes():
-    """Test that multiple volumes are correctly processed."""
-    import os
-
-    from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-
-    # Create a DockerRuntime instance with a mock config
-    runtime = DockerRuntime.__new__(DockerRuntime)
-    runtime.config = MagicMock()
-    runtime.config.sandbox.volumes = '/host/path1:/container/path1,/host/path2:/container/path2,/host/path3:/container/path3:ro'
-    runtime.config.workspace_mount_path = '/host/path1'
-    runtime.config.workspace_mount_path_in_sandbox = '/container/path1'
-
-    # Call the actual method that processes volumes
-    volumes = runtime._process_volumes()
-
-    # Assert that all mounts were processed correctly
-    assert len(volumes) == 3
-    assert volumes[os.path.abspath('/host/path1')]['bind'] == '/container/path1'
-    assert volumes[os.path.abspath('/host/path1')]['mode'] == 'rw'
-    assert volumes[os.path.abspath('/host/path2')]['bind'] == '/container/path2'
-    assert volumes[os.path.abspath('/host/path2')]['mode'] == 'rw'
-    assert volumes[os.path.abspath('/host/path3')]['bind'] == '/container/path3'
-    assert volumes[os.path.abspath('/host/path3')]['mode'] == 'ro'
-
-
-def test_volumes_default_mode():
-    """Test that the default mount mode (rw) is used when not specified in sandbox.volumes."""
-    import os
-
-    from openhands.runtime.impl.docker.docker_runtime import DockerRuntime
-
-    # Create a DockerRuntime instance with a mock config
-    runtime = DockerRuntime.__new__(DockerRuntime)
-    runtime.config = MagicMock()
-    runtime.config.sandbox.volumes = '/host/path:/container/path'
-    runtime.config.workspace_mount_path = '/host/path'
-    runtime.config.workspace_mount_path_in_sandbox = '/container/path'
-
-    # Call the actual method that processes volumes
-    volumes = runtime._process_volumes()
-
-    # Assert that the mode remains 'rw' (default)
-    assert volumes[os.path.abspath('/host/path')]['mode'] == 'rw'
--- a/tests/unit/runtime/impl/test_local_runtime.py
+++ b/tests/unit/runtime/impl/test_local_runtime.py
@@ -1,245 +0,0 @@
-"""Unit tests for LocalRuntime's URL-related methods."""
-
-import os
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from openhands.core.config import OpenHandsConfig
-from openhands.events import EventStream
-from openhands.runtime.impl.local.local_runtime import LocalRuntime
-
-
-@pytest.fixture
-def config():
-    """Create a mock OpenHandsConfig for testing."""
-    config = OpenHandsConfig()
-    config.sandbox.local_runtime_url = 'http://localhost'
-    config.workspace_mount_path_in_sandbox = '/workspace'
-    return config
-
-
-@pytest.fixture
-def event_stream():
-    """Create a mock EventStream for testing."""
-    return MagicMock(spec=EventStream)
-
-
-@pytest.fixture
-def local_runtime(config, event_stream):
-    """Create a LocalRuntime instance for testing."""
-    # Use __new__ to avoid calling __init__ which would start the server
-    runtime = LocalRuntime.__new__(LocalRuntime)
-    runtime.config = config
-    runtime.event_stream = event_stream
-    runtime._vscode_port = 8080
-    runtime._app_ports = [12000, 12001]
-    runtime._runtime_initialized = True
-
-    # Add required attributes for testing
-    runtime._vscode_enabled = True
-    runtime._vscode_token = 'test-token'
-
-    # Mock the runtime_url property for testing
-    def mock_runtime_url(self):
-        return 'http://localhost'
-
-    # Create a property mock for runtime_url
-    type(runtime).runtime_url = property(mock_runtime_url)
-
-    return runtime
-
-
-class TestLocalRuntime:
-    """Tests for LocalRuntime's URL-related methods."""
-
-    def test_runtime_url_with_env_var(self):
-        """Test runtime_url when RUNTIME_URL environment variable is set."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        config.sandbox.local_runtime_url = 'http://localhost'
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        with patch.dict(os.environ, {'RUNTIME_URL': 'http://custom-url'}, clear=True):
-            # Call the actual runtime_url property
-            original_property = LocalRuntime.runtime_url
-            try:
-                assert original_property.__get__(runtime) == 'http://custom-url'
-            finally:
-                # Restore the original property
-                LocalRuntime.runtime_url = original_property
-
-    def test_runtime_url_with_pattern(self):
-        """Test runtime_url when RUNTIME_URL_PATTERN environment variable is set."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        config.sandbox.local_runtime_url = 'http://localhost'
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        env_vars = {
-            'RUNTIME_URL_PATTERN': 'http://runtime-{runtime_id}.example.com',
-            'HOSTNAME': 'runtime-abc123-xyz',
-        }
-        with patch.dict(os.environ, env_vars, clear=True):
-            # Call the actual runtime_url property
-            original_property = LocalRuntime.runtime_url
-            try:
-                assert (
-                    original_property.__get__(runtime)
-                    == 'http://runtime-abc123.example.com'
-                )
-            finally:
-                # Restore the original property
-                LocalRuntime.runtime_url = original_property
-
-    def test_runtime_url_fallback(self):
-        """Test runtime_url fallback to local_runtime_url."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        config.sandbox.local_runtime_url = 'http://localhost'
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        with patch.dict(os.environ, {}, clear=True):
-            # Call the actual runtime_url property
-            original_property = LocalRuntime.runtime_url
-            try:
-                assert original_property.__get__(runtime) == 'http://localhost'
-            finally:
-                # Restore the original property
-                LocalRuntime.runtime_url = original_property
-
-    def test_create_url_with_localhost(self):
-        """Test _create_url when runtime_url contains 'localhost'."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-        runtime._vscode_port = 8080
-
-        # Create a mock method for runtime_url that accepts self parameter
-        def mock_runtime_url(self):
-            return 'http://localhost'
-
-        # Temporarily replace the runtime_url property
-        original_property = LocalRuntime.runtime_url
-        try:
-            LocalRuntime.runtime_url = property(mock_runtime_url)
-            url = runtime._create_url('test-prefix', 9000)
-            assert url == 'http://localhost:8080'
-        finally:
-            # Restore the original property
-            LocalRuntime.runtime_url = original_property
-
-    def test_create_url_with_remote_url(self):
-        """Test _create_url when runtime_url is a remote URL."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        # Create a mock method for runtime_url that accepts self parameter
-        def mock_runtime_url(self):
-            return 'https://example.com'
-
-        # Temporarily replace the runtime_url property
-        original_property = LocalRuntime.runtime_url
-        try:
-            LocalRuntime.runtime_url = property(mock_runtime_url)
-            url = runtime._create_url('test-prefix', 9000)
-            assert url == 'https://test-prefix-example.com'
-        finally:
-            # Restore the original property
-            LocalRuntime.runtime_url = original_property
-
-    def test_vscode_url_with_token(self):
-        """Test vscode_url when token is available."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        config.workspace_mount_path_in_sandbox = '/workspace'
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        # Add required attributes
-        runtime._vscode_enabled = True
-        runtime._runtime_initialized = True
-        runtime._vscode_token = 'test-token'
-
-        # Create a direct implementation of the method to test
-        def mock_vscode_url(self):
-            # Simplified version of the actual method
-            token = 'test-token'  # Mocked token
-            if not token:
-                return None
-            vscode_url = 'https://vscode-example.com'  # Mocked URL
-            return f'{vscode_url}/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}'
-
-        # Temporarily replace the vscode_url method
-        original_method = LocalRuntime.vscode_url
-        try:
-            LocalRuntime.vscode_url = property(mock_vscode_url)
-            url = runtime.vscode_url
-            assert url == 'https://vscode-example.com/?tkn=test-token&folder=/workspace'
-        finally:
-            # Restore the original method
-            LocalRuntime.vscode_url = original_method
-
-    def test_vscode_url_without_token(self):
-        """Test vscode_url when token is not available."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-
-        # Create a direct implementation of the method to test
-        def mock_vscode_url(self):
-            # Simplified version that returns None (no token)
-            return None
-
-        # Temporarily replace the vscode_url method
-        original_method = LocalRuntime.vscode_url
-        try:
-            LocalRuntime.vscode_url = property(mock_vscode_url)
-            assert runtime.vscode_url is None
-        finally:
-            # Restore the original method
-            LocalRuntime.vscode_url = original_method
-
-    def test_web_hosts_with_multiple_ports(self):
-        """Test web_hosts with multiple app ports."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-        runtime._app_ports = [12000, 12001]
-
-        # Mock _create_url to return predictable values
-        def mock_create_url(prefix, port):
-            return f'https://{prefix}-example.com'
-
-        with patch.object(runtime, '_create_url', side_effect=mock_create_url):
-            # Call the web_hosts property
-            hosts = runtime.web_hosts
-
-            # Verify the result
-            assert len(hosts) == 2
-            assert 'https://work-1-example.com' in hosts
-            assert hosts['https://work-1-example.com'] == 12000
-            assert 'https://work-2-example.com' in hosts
-            assert hosts['https://work-2-example.com'] == 12001
-
-    def test_web_hosts_with_no_ports(self):
-        """Test web_hosts with no app ports."""
-        # Create a fresh instance for this test
-        config = OpenHandsConfig()
-        runtime = LocalRuntime.__new__(LocalRuntime)
-        runtime.config = config
-        runtime._app_ports = []
-
-        # Call the web_hosts property
-        hosts = runtime.web_hosts
-
-        # Verify the result is an empty dictionary
-        assert hosts == {}
--- a/tests/unit/runtime/plugins/test_agent_skill.py
+++ b/tests/unit/runtime/plugins/test_agent_skill.py
@@ -1,717 +0,0 @@
-import contextlib
-import io
-import sys
-
-import docx
-import pytest
-
-from openhands.runtime.plugins.agent_skills.file_ops.file_ops import (
-    WINDOW,
-    _print_window,
-    find_file,
-    goto_line,
-    open_file,
-    scroll_down,
-    scroll_up,
-    search_dir,
-    search_file,
-)
-from openhands.runtime.plugins.agent_skills.file_reader.file_readers import (
-    parse_docx,
-    parse_latex,
-    parse_pdf,
-    parse_pptx,
-)
-
-
-# CURRENT_FILE must be reset for each test
-@pytest.fixture(autouse=True)
-def reset_current_file():
-    from openhands.runtime.plugins.agent_skills import agentskills
-
-    agentskills.CURRENT_FILE = None
-
-
-def _numbered_test_lines(start, end) -> str:
-    return ('\n'.join(f'{i}|' for i in range(start, end + 1))) + '\n'
-
-
-def _generate_test_file_with_lines(temp_path, num_lines) -> str:
-    file_path = temp_path / 'test_file.py'
-    file_path.write_text('\n' * num_lines)
-    return file_path
-
-
-def _generate_ruby_test_file_with_lines(temp_path, num_lines) -> str:
-    file_path = temp_path / 'test_file.rb'
-    file_path.write_text('\n' * num_lines)
-    return file_path
-
-
-def _calculate_window_bounds(current_line, total_lines, window_size):
-    """Calculate the bounds of the window around the current line."""
-    half_window = window_size // 2
-    if current_line - half_window < 0:
-        start = 1
-        end = window_size
-    else:
-        start = current_line - half_window
-        end = current_line + half_window
-    return start, end
-
-
-def _capture_file_operation_error(operation, expected_error_msg):
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            operation()
-        result = buf.getvalue().strip()
-    assert result == expected_error_msg
-
-
-SEP = '-' * 49 + '\n'
-
-
-# =============================================================================
-
-
-def test_open_file_unexist_path():
-    _capture_file_operation_error(
-        lambda: open_file('/unexist/path/a.txt'),
-        'ERROR: File /unexist/path/a.txt not found.',
-    )
-
-
-def test_open_file(tmp_path):
-    assert tmp_path is not None
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-    expected = (
-        f'[File: {temp_file_path} (5 lines total)]\n'
-        '(this is the beginning of the file)\n'
-        '1|Line 1\n'
-        '2|Line 2\n'
-        '3|Line 3\n'
-        '4|Line 4\n'
-        '5|Line 5\n'
-        '(this is the end of the file)\n'
-    )
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_open_file_with_indentation(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\n    Line 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-    expected = (
-        f'[File: {temp_file_path} (5 lines total)]\n'
-        '(this is the beginning of the file)\n'
-        '1|Line 1\n'
-        '2|    Line 2\n'
-        '3|Line 3\n'
-        '4|Line 4\n'
-        '5|Line 5\n'
-        '(this is the end of the file)\n'
-    )
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_open_file_long(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    content = '\n'.join([f'Line {i}' for i in range(1, 1001)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path), 1, 50)
-        result = buf.getvalue()
-    assert result is not None
-    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
-    expected += '(this is the beginning of the file)\n'
-    for i in range(1, 51):
-        expected += f'{i}|Line {i}\n'
-    expected += '(950 more lines below)\n'
-    expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_open_file_long_with_lineno(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    content = '\n'.join([f'Line {i}' for i in range(1, 1001)])
-    temp_file_path.write_text(content)
-
-    cur_line = 100
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path), cur_line)
-        result = buf.getvalue()
-    assert result is not None
-    expected = f'[File: {temp_file_path} (1000 lines total)]\n'
-    # since 100 is < WINDOW and 100 - WINDOW//2 < 0, so it should show all lines from 1 to WINDOW
-
-    start, end = _calculate_window_bounds(cur_line, 1000, WINDOW)
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == 1000:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({1000 - end} more lines below)\n'
-        expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_goto_line(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    total_lines = 1000
-    content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    expected += '(this is the beginning of the file)\n'
-    for i in range(1, WINDOW + 1):
-        expected += f'{i}|Line {i}\n'
-    expected += f'({total_lines - WINDOW} more lines below)\n'
-    expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-    assert result.split('\n') == expected.split('\n')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            goto_line(500)
-        result = buf.getvalue()
-    assert result is not None
-
-    cur_line = 500
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    start, end = _calculate_window_bounds(cur_line, total_lines, WINDOW)
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == total_lines:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({total_lines - end} more lines below)\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_goto_line_negative(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    content = '\n'.join([f'Line {i}' for i in range(1, 5)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-
-    _capture_file_operation_error(
-        lambda: goto_line(-1), 'ERROR: Line number must be between 1 and 4.'
-    )
-
-
-def test_goto_line_out_of_bound(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    content = '\n'.join([f'Line {i}' for i in range(1, 10)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-
-    _capture_file_operation_error(
-        lambda: goto_line(100), 'ERROR: Line number must be between 1 and 9.'
-    )
-
-
-def test_scroll_down(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    total_lines = 1000
-    content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    start, end = _calculate_window_bounds(1, total_lines, WINDOW)
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == total_lines:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({total_lines - end} more lines below)\n'
-        expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-    assert result.split('\n') == expected.split('\n')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            scroll_down()
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    start = WINDOW + 1
-    end = 2 * WINDOW + 1
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == total_lines:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({total_lines - end} more lines below)\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_scroll_up(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    total_lines = 1000
-    content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
-    temp_file_path.write_text(content)
-
-    cur_line = 300
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path), cur_line)
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    start, end = _calculate_window_bounds(cur_line, total_lines, WINDOW)
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == total_lines:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({total_lines - end} more lines below)\n'
-        expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-    assert result.split('\n') == expected.split('\n')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            scroll_up()
-        result = buf.getvalue()
-    assert result is not None
-
-    cur_line = cur_line - WINDOW
-
-    expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
-    start = cur_line
-    end = cur_line + WINDOW
-
-    if start == 1:
-        expected += '(this is the beginning of the file)\n'
-    else:
-        expected += f'({start - 1} more lines above)\n'
-    for i in range(start, end + 1):
-        expected += f'{i}|Line {i}\n'
-    if end == total_lines:
-        expected += '(this is the end of the file)\n'
-    else:
-        expected += f'({total_lines - end} more lines below)\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_scroll_down_edge(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    content = '\n'.join([f'Line {i}' for i in range(1, 10)])
-    temp_file_path.write_text(content)
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            open_file(str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[File: {temp_file_path} (9 lines total)]\n'
-    expected += '(this is the beginning of the file)\n'
-    for i in range(1, 10):
-        expected += f'{i}|Line {i}\n'
-    expected += '(this is the end of the file)\n'
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            scroll_down()
-        result = buf.getvalue()
-    assert result is not None
-
-    # expected should be unchanged
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_print_window_internal(tmp_path):
-    test_file_path = tmp_path / 'a.txt'
-    test_file_path.write_text('')
-    open_file(str(test_file_path))
-    with open(test_file_path, 'w') as file:
-        for i in range(1, 101):
-            file.write(f'Line `{i}`\n')
-
-    # Define the parameters for the test
-    current_line = 50
-    window = 2
-
-    # Test _print_window especially with backticks
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            _print_window(str(test_file_path), current_line, window, return_str=False)
-        result = buf.getvalue()
-        expected = (
-            '(48 more lines above)\n'
-            '49|Line `49`\n'
-            '50|Line `50`\n'
-            '51|Line `51`\n'
-            '(49 more lines below)\n'
-        )
-        assert result == expected
-
-
-def test_open_file_large_line_number(tmp_path):
-    test_file_path = tmp_path / 'a.txt'
-    test_file_path.write_text('')
-    open_file(str(test_file_path))
-    with open(test_file_path, 'w') as file:
-        for i in range(1, 1000):
-            file.write(f'Line `{i}`\n')
-
-    # Define the parameters for the test
-    current_line = 800
-    window = 100
-
-    # Test _print_window especially with backticks
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            # _print_window(str(test_file_path), current_line, window, return_str=False)
-            open_file(str(test_file_path), current_line, window)
-        result = buf.getvalue()
-        expected = f'[File: {test_file_path} (999 lines total)]\n'
-        expected += '(749 more lines above)\n'
-        for i in range(750, 850 + 1):
-            expected += f'{i}|Line `{i}`\n'
-        expected += '(149 more lines below)\n'
-        expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
-        assert result == expected
-
-
-def test_search_dir(tmp_path):
-    # create files with the search term "bingo"
-    for i in range(1, 101):
-        temp_file_path = tmp_path / f'a{i}.txt'
-        with open(temp_file_path, 'w') as file:
-            file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
-            if i == 50:
-                file.write('bingo')
-
-    # test
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_dir('bingo', str(tmp_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = (
-        f'[Found 1 matches for "bingo" in {tmp_path}]\n'
-        f'{tmp_path}/a50.txt (Line 6): bingo\n'
-        f'[End of matches for "bingo" in {tmp_path}]\n'
-    )
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_dir_not_exist_term(tmp_path):
-    # create files with the search term "bingo"
-    for i in range(1, 101):
-        temp_file_path = tmp_path / f'a{i}.txt'
-        with open(temp_file_path, 'w') as file:
-            file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
-
-    # test
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_dir('non-exist', str(tmp_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'No matches found for "non-exist" in {tmp_path}\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_dir_too_much_match(tmp_path):
-    # create files with the search term "Line 5"
-    for i in range(1, 1000):
-        temp_file_path = tmp_path / f'a{i}.txt'
-        with open(temp_file_path, 'w') as file:
-            file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_dir('Line 5', str(tmp_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'More than 999 files matched for "Line 5" in {tmp_path}. Please narrow your search.\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_dir_cwd(tmp_path, monkeypatch):
-    # Using pytest's monkeypatch to change directory without affecting other tests
-    monkeypatch.chdir(tmp_path)
-    # create files with the search term "bingo"
-    for i in range(1, 101):
-        temp_file_path = tmp_path / f'a{i}.txt'
-        with open(temp_file_path, 'w') as file:
-            file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
-            if i == 50:
-                file.write('bingo')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_dir('bingo')
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = (
-        '[Found 1 matches for "bingo" in ./]\n'
-        './a50.txt (Line 6): bingo\n'
-        '[End of matches for "bingo" in ./]\n'
-    )
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_file(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_file('Line 5', str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-    expected = f'[Found 1 matches for "Line 5" in {temp_file_path}]\n'
-    expected += 'Line 5: Line 5\n'
-    expected += f'[End of matches for "Line 5" in {temp_file_path}]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_file_not_exist_term(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            search_file('Line 6', str(temp_file_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[No matches found for "Line 6" in {temp_file_path}]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_search_file_not_exist_file():
-    _capture_file_operation_error(
-        lambda: search_file('Line 6', '/unexist/path/a.txt'),
-        'ERROR: File /unexist/path/a.txt not found.',
-    )
-
-
-def test_find_file(tmp_path):
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            find_file('a.txt', str(tmp_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[Found 1 matches for "a.txt" in {tmp_path}]\n'
-    expected += f'{tmp_path}/a.txt\n'
-    expected += f'[End of matches for "a.txt" in {tmp_path}]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_find_file_cwd(tmp_path, monkeypatch):
-    monkeypatch.chdir(tmp_path)
-    temp_file_path = tmp_path / 'a.txt'
-    temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
-
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            find_file('a.txt')
-        result = buf.getvalue()
-    assert result is not None
-
-
-def test_find_file_not_exist_file():
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            find_file('nonexist.txt')
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = '[No matches found for "nonexist.txt" in ./]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_find_file_not_exist_file_specific_path(tmp_path):
-    with io.StringIO() as buf:
-        with contextlib.redirect_stdout(buf):
-            find_file('nonexist.txt', str(tmp_path))
-        result = buf.getvalue()
-    assert result is not None
-
-    expected = f'[No matches found for "nonexist.txt" in {tmp_path}]\n'
-    assert result.split('\n') == expected.split('\n')
-
-
-def test_parse_docx(tmp_path):
-    # Create a DOCX file with some content
-    test_docx_path = tmp_path / 'test.docx'
-    doc = docx.Document()
-    doc.add_paragraph('Hello, this is a test document.')
-    doc.add_paragraph('This is the second paragraph.')
-    doc.save(str(test_docx_path))
-
-    old_stdout = sys.stdout
-    sys.stdout = io.StringIO()
-
-    # Call the parse_docx function
-    parse_docx(str(test_docx_path))
-
-    # Capture the output
-    output = sys.stdout.getvalue()
-    sys.stdout = old_stdout
-
-    # Check if the output is correct
-    expected_output = (
-        f'[Reading DOCX file from {test_docx_path}]\n'
-        '@@ Page 1 @@\nHello, this is a test document.\n\n'
-        '@@ Page 2 @@\nThis is the second paragraph.\n\n\n'
-    )
-    assert output == expected_output, f'Expected output does not match. Got: {output}'
-
-
-def test_parse_latex(tmp_path):
-    # Create a LaTeX file with some content
-    test_latex_path = tmp_path / 'test.tex'
-    with open(test_latex_path, 'w') as f:
-        f.write(r"""
-        \documentclass{article}
-        \begin{document}
-        Hello, this is a test LaTeX document.
-        \end{document}
-        """)
-
-    old_stdout = sys.stdout
-    sys.stdout = io.StringIO()
-
-    # Call the parse_latex function
-    parse_latex(str(test_latex_path))
-
-    # Capture the output
-    output = sys.stdout.getvalue()
-    sys.stdout = old_stdout
-
-    # Check if the output is correct
-    expected_output = (
-        f'[Reading LaTex file from {test_latex_path}]\n'
-        'Hello, this is a test LaTeX document.\n'
-    )
-    assert output == expected_output, f'Expected output does not match. Got: {output}'
-
-
-def test_parse_pdf(tmp_path):
-    # Create a PDF file with some content
-    test_pdf_path = tmp_path / 'test.pdf'
-    from reportlab.lib.pagesizes import letter
-    from reportlab.pdfgen import canvas
-
-    c = canvas.Canvas(str(test_pdf_path), pagesize=letter)
-    c.drawString(100, 750, 'Hello, this is a test PDF document.')
-    c.save()
-
-    old_stdout = sys.stdout
-    sys.stdout = io.StringIO()
-
-    # Call the parse_pdf function
-    parse_pdf(str(test_pdf_path))
-
-    # Capture the output
-    output = sys.stdout.getvalue()
-    sys.stdout = old_stdout
-
-    # Check if the output is correct
-    expected_output = (
-        f'[Reading PDF file from {test_pdf_path}]\n'
-        '@@ Page 1 @@\n'
-        'Hello, this is a test PDF document.\n'
-    )
-    assert output == expected_output, f'Expected output does not match. Got: {output}'
-
-
-def test_parse_pptx(tmp_path):
-    test_pptx_path = tmp_path / 'test.pptx'
-    from pptx import Presentation
-
-    pres = Presentation()
-
-    slide1 = pres.slides.add_slide(pres.slide_layouts[0])
-    title1 = slide1.shapes.title
-    title1.text = 'Hello, this is the first test PPTX slide.'
-
-    slide2 = pres.slides.add_slide(pres.slide_layouts[0])
-    title2 = slide2.shapes.title
-    title2.text = 'Hello, this is the second test PPTX slide.'
-
-    pres.save(str(test_pptx_path))
-
-    old_stdout = sys.stdout
-    sys.stdout = io.StringIO()
-
-    parse_pptx(str(test_pptx_path))
-
-    output = sys.stdout.getvalue()
-    sys.stdout = old_stdout
-
-    expected_output = (
-        f'[Reading PowerPoint file from {test_pptx_path}]\n'
-        '@@ Slide 1 @@\n'
-        'Hello, this is the first test PPTX slide.\n\n'
-        '@@ Slide 2 @@\n'
-        'Hello, this is the second test PPTX slide.\n\n'
-    )
-    assert output == expected_output, f'Expected output does not match. Got: {output}'
--- a/tests/unit/runtime/test_git_hooks.py
+++ b/tests/unit/runtime/test_git_hooks.py
@@ -1,158 +0,0 @@
-from unittest.mock import MagicMock, call
-
-import pytest
-
-from openhands.events.action import CmdRunAction, FileReadAction
-from openhands.events.observation import (
-    CmdOutputObservation,
-    ErrorObservation,
-    FileReadObservation,
-)
-from openhands.runtime.base import Runtime
-
-
-class TestGitHooks:
-    @pytest.fixture
-    def mock_runtime(self):
-        # Create a mock runtime
-        mock_runtime = MagicMock(spec=Runtime)
-        mock_runtime.status_callback = None
-
-        # Set up read to return different values based on the path
-        def mock_read(action):
-            if action.path == '.openhands/pre-commit.sh':
-                return FileReadObservation(
-                    content="#!/bin/bash\necho 'Test pre-commit hook'\nexit 0",
-                    path='.openhands/pre-commit.sh',
-                )
-            elif action.path == '.git/hooks/pre-commit':
-                # Simulate no existing pre-commit hook
-                return ErrorObservation(content='File not found')
-            return ErrorObservation(content='Unexpected path')
-
-        mock_runtime.read.side_effect = mock_read
-
-        mock_runtime.run_action.return_value = CmdOutputObservation(
-            content='', exit_code=0, command='test command'
-        )
-        mock_runtime.write.return_value = None
-        return mock_runtime
-
-    def test_maybe_setup_git_hooks_success(self, mock_runtime):
-        # Test successful setup of git hooks
-        Runtime.maybe_setup_git_hooks(mock_runtime)
-
-        # Verify that the runtime tried to read the pre-commit script
-        assert mock_runtime.read.call_args_list[0] == call(
-            FileReadAction(path='.openhands/pre-commit.sh')
-        )
-
-        # Verify that the runtime created the git hooks directory
-        # We can't directly compare the CmdRunAction objects, so we check if run_action was called
-        assert mock_runtime.run_action.called
-
-        # Verify that the runtime made the pre-commit script executable
-        # We can't directly compare the CmdRunAction objects, so we check if run_action was called
-        assert mock_runtime.run_action.called
-
-        # Verify that the runtime wrote the pre-commit hook
-        assert mock_runtime.write.called
-
-        # Verify that the runtime made the pre-commit hook executable
-        # We can't directly compare the CmdRunAction objects, so we check if run_action was called
-        assert mock_runtime.run_action.call_count >= 3
-
-        # Verify that the runtime logged success
-        mock_runtime.log.assert_called_with(
-            'info', 'Git pre-commit hook installed successfully'
-        )
-
-    def test_maybe_setup_git_hooks_no_script(self, mock_runtime):
-        # Test when pre-commit script doesn't exist
-        mock_runtime.read.side_effect = lambda action: ErrorObservation(
-            content='File not found'
-        )
-
-        Runtime.maybe_setup_git_hooks(mock_runtime)
-
-        # Verify that the runtime tried to read the pre-commit script
-        mock_runtime.read.assert_called_with(
-            FileReadAction(path='.openhands/pre-commit.sh')
-        )
-
-        # Verify that no other actions were taken
-        mock_runtime.run_action.assert_not_called()
-        mock_runtime.write.assert_not_called()
-
-    def test_maybe_setup_git_hooks_mkdir_failure(self, mock_runtime):
-        # Test failure to create git hooks directory
-        def mock_run_action(action):
-            if (
-                isinstance(action, CmdRunAction)
-                and action.command == 'mkdir -p .git/hooks'
-            ):
-                return CmdOutputObservation(
-                    content='Permission denied',
-                    exit_code=1,
-                    command='mkdir -p .git/hooks',
-                )
-            return CmdOutputObservation(content='', exit_code=0, command=action.command)
-
-        mock_runtime.run_action.side_effect = mock_run_action
-
-        Runtime.maybe_setup_git_hooks(mock_runtime)
-
-        # Verify that the runtime tried to create the git hooks directory
-        assert mock_runtime.run_action.called
-
-        # Verify that the runtime logged an error
-        mock_runtime.log.assert_called_with(
-            'error', 'Failed to create git hooks directory: Permission denied'
-        )
-
-        # Verify that no other actions were taken
-        mock_runtime.write.assert_not_called()
-
-    def test_maybe_setup_git_hooks_with_existing_hook(self, mock_runtime):
-        # Test when there's an existing pre-commit hook
-        def mock_read(action):
-            if action.path == '.openhands/pre-commit.sh':
-                return FileReadObservation(
-                    content="#!/bin/bash\necho 'Test pre-commit hook'\nexit 0",
-                    path='.openhands/pre-commit.sh',
-                )
-            elif action.path == '.git/hooks/pre-commit':
-                # Simulate existing pre-commit hook
-                return FileReadObservation(
-                    content="#!/bin/bash\necho 'Existing hook'\nexit 0",
-                    path='.git/hooks/pre-commit',
-                )
-            return ErrorObservation(content='Unexpected path')
-
-        mock_runtime.read.side_effect = mock_read
-
-        Runtime.maybe_setup_git_hooks(mock_runtime)
-
-        # Verify that the runtime tried to read both scripts
-        assert len(mock_runtime.read.call_args_list) >= 2
-
-        # Verify that the runtime preserved the existing hook
-        assert mock_runtime.log.call_args_list[0] == call(
-            'info', 'Preserving existing pre-commit hook'
-        )
-
-        # Verify that the runtime moved the existing hook
-        move_calls = [
-            call
-            for call in mock_runtime.run_action.call_args_list
-            if isinstance(call[0][0], CmdRunAction) and 'mv' in call[0][0].command
-        ]
-        assert len(move_calls) > 0
-
-        # Verify that the runtime wrote the new pre-commit hook
-        assert mock_runtime.write.called
-
-        # Verify that the runtime logged success
-        assert mock_runtime.log.call_args_list[-1] == call(
-            'info', 'Git pre-commit hook installed successfully'
-        )
--- a/tests/unit/runtime/test_runtime_git_tokens.py
+++ b/tests/unit/runtime/test_runtime_git_tokens.py
@@ -1,434 +0,0 @@
-from types import MappingProxyType
-from unittest.mock import MagicMock, patch
-
-import pytest
-from pydantic import SecretStr
-
-from openhands.core.config import OpenHandsConfig
-from openhands.core.config.mcp_config import MCPConfig, MCPStdioServerConfig
-from openhands.events.action import Action
-from openhands.events.action.commands import CmdRunAction
-from openhands.events.observation import NullObservation, Observation
-from openhands.events.stream import EventStream
-from openhands.integrations.provider import ProviderHandler, ProviderToken, ProviderType
-from openhands.integrations.service_types import AuthenticationError, Repository
-from openhands.runtime.base import Runtime
-from openhands.storage import get_file_store
-
-
-class TestRuntime(Runtime):
-    """A concrete implementation of Runtime for testing"""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.run_action_calls = []
-        self._execute_shell_fn_git_handler = MagicMock(
-            return_value=MagicMock(exit_code=0, stdout='', stderr='')
-        )
-
-    async def connect(self):
-        pass
-
-    def close(self):
-        pass
-
-    def browse(self, action):
-        return NullObservation(content='')
-
-    def browse_interactive(self, action):
-        return NullObservation(content='')
-
-    def run(self, action):
-        return NullObservation(content='')
-
-    def run_ipython(self, action):
-        return NullObservation(content='')
-
-    def read(self, action):
-        return NullObservation(content='')
-
-    def write(self, action):
-        return NullObservation(content='')
-
-    def copy_from(self, path):
-        return ''
-
-    def copy_to(self, path, content):
-        pass
-
-    def list_files(self, path):
-        return []
-
-    def run_action(self, action: Action) -> Observation:
-        self.run_action_calls.append(action)
-        return NullObservation(content='')
-
-    def call_tool_mcp(self, action):
-        return NullObservation(content='')
-
-    def edit(self, action):
-        return NullObservation(content='')
-
-    def get_mcp_config(
-        self, extra_stdio_servers: list[MCPStdioServerConfig] | None = None
-    ):
-        return MCPConfig()
-
-
-@pytest.fixture
-def temp_dir(tmp_path_factory: pytest.TempPathFactory) -> str:
-    return str(tmp_path_factory.mktemp('test_event_stream'))
-
-
-@pytest.fixture
-def runtime(temp_dir):
-    """Fixture for runtime testing"""
-    config = OpenHandsConfig()
-    git_provider_tokens = MappingProxyType(
-        {ProviderType.GITHUB: ProviderToken(token=SecretStr('test_token'))}
-    )
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config,
-        event_stream=event_stream,
-        sid='test',
-        user_id='test_user',
-        git_provider_tokens=git_provider_tokens,
-    )
-    return runtime
-
-
-def mock_repo_and_patch(monkeypatch, provider=ProviderType.GITHUB, is_public=True):
-    repo = Repository(
-        id='123', full_name='owner/repo', git_provider=provider, is_public=is_public
-    )
-
-    async def mock_verify_repo_provider(*_args, **_kwargs):
-        return repo
-
-    monkeypatch.setattr(
-        ProviderHandler, 'verify_repo_provider', mock_verify_repo_provider
-    )
-    return repo
-
-
-@pytest.mark.asyncio
-async def test_export_latest_git_provider_tokens_no_user_id(temp_dir):
-    """Test that no token export happens when user_id is not set"""
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(config=config, event_stream=event_stream, sid='test')
-
-    # Create a command that would normally trigger token export
-    cmd = CmdRunAction(command='echo $GITHUB_TOKEN')
-
-    # This should not raise any errors and should return None
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Verify no secrets were set
-    assert not event_stream.secrets
-
-
-@pytest.mark.asyncio
-async def test_export_latest_git_provider_tokens_no_token_ref(temp_dir):
-    """Test that no token export happens when command doesn't reference tokens"""
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id='test_user'
-    )
-
-    # Create a command that doesn't reference any tokens
-    cmd = CmdRunAction(command='echo "hello"')
-
-    # This should not raise any errors and should return None
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Verify no secrets were set
-    assert not event_stream.secrets
-
-
-@pytest.mark.asyncio
-async def test_export_latest_git_provider_tokens_success(runtime):
-    """Test successful token export when command references tokens"""
-    # Create a command that references the GitHub token
-    cmd = CmdRunAction(command='echo $GITHUB_TOKEN')
-
-    # Export the tokens
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Verify that the token was exported to the event stream
-    assert runtime.event_stream.secrets == {'github_token': 'test_token'}
-
-
-@pytest.mark.asyncio
-async def test_export_latest_git_provider_tokens_multiple_refs(temp_dir):
-    """Test token export with multiple token references"""
-    config = OpenHandsConfig()
-    # Initialize with both GitHub and GitLab tokens
-    git_provider_tokens = MappingProxyType(
-        {
-            ProviderType.GITHUB: ProviderToken(token=SecretStr('github_token')),
-            ProviderType.GITLAB: ProviderToken(token=SecretStr('gitlab_token')),
-        }
-    )
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config,
-        event_stream=event_stream,
-        sid='test',
-        user_id='test_user',
-        git_provider_tokens=git_provider_tokens,
-    )
-
-    # Create a command that references multiple tokens
-    cmd = CmdRunAction(command='echo $GITHUB_TOKEN && echo $GITLAB_TOKEN')
-
-    # Export the tokens
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Verify that both tokens were exported
-    assert event_stream.secrets == {
-        'github_token': 'github_token',
-        'gitlab_token': 'gitlab_token',
-    }
-
-
-@pytest.mark.asyncio
-async def test_export_latest_git_provider_tokens_token_update(runtime):
-    """Test that token updates are handled correctly"""
-    # First export with initial token
-    cmd = CmdRunAction(command='echo $GITHUB_TOKEN')
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Update the token
-    new_token = 'new_test_token'
-    runtime.provider_handler._provider_tokens = MappingProxyType(
-        {ProviderType.GITHUB: ProviderToken(token=SecretStr(new_token))}
-    )
-
-    # Export again with updated token
-    await runtime._export_latest_git_provider_tokens(cmd)
-
-    # Verify that the new token was exported
-    assert runtime.event_stream.secrets == {'github_token': new_token}
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_no_repo_init_git_in_empty_workspace(temp_dir):
-    """Test that git init is run when no repository is selected and init_git_in_empty_workspace"""
-    config = OpenHandsConfig()
-    config.init_git_in_empty_workspace = True
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id=None
-    )
-
-    # Call the function with no repository
-    result = await runtime.clone_or_init_repo(None, None, None)
-
-    # Verify that git init was called
-    assert len(runtime.run_action_calls) == 1
-    assert isinstance(runtime.run_action_calls[0], CmdRunAction)
-    assert (
-        runtime.run_action_calls[0].command
-        == f'git init && git config --global --add safe.directory {runtime.workspace_root}'
-    )
-    assert result == ''
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_no_repo_no_user_id_with_workspace_base(temp_dir):
-    """Test that git init is not run when no repository is selected, no user_id, but workspace_base is set"""
-    config = OpenHandsConfig()
-    config.workspace_base = '/some/path'  # Set workspace_base
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id=None
-    )
-
-    # Call the function with no repository
-    result = await runtime.clone_or_init_repo(None, None, None)
-
-    # Verify that git init was not called
-    assert len(runtime.run_action_calls) == 0
-    assert result == ''
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_auth_error(temp_dir):
-    """Test that RuntimeError is raised when authentication fails"""
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id='test_user'
-    )
-
-    # Mock the verify_repo_provider method to raise AuthenticationError
-    with patch.object(
-        ProviderHandler,
-        'verify_repo_provider',
-        side_effect=AuthenticationError('Auth failed'),
-    ):
-        # Call the function with a repository
-        with pytest.raises(Exception) as excinfo:
-            await runtime.clone_or_init_repo(None, 'owner/repo', None)
-
-        # Verify the error message
-        assert 'Git provider authentication issue when getting remote URL' in str(
-            excinfo.value
-        )
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_github_with_token(temp_dir, monkeypatch):
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-
-    github_token = 'github_test_token'
-    git_provider_tokens = MappingProxyType(
-        {ProviderType.GITHUB: ProviderToken(token=SecretStr(github_token))}
-    )
-
-    runtime = TestRuntime(
-        config=config,
-        event_stream=event_stream,
-        sid='test',
-        user_id='test_user',
-        git_provider_tokens=git_provider_tokens,
-    )
-
-    mock_repo_and_patch(monkeypatch, provider=ProviderType.GITHUB)
-
-    result = await runtime.clone_or_init_repo(git_provider_tokens, 'owner/repo', None)
-
-    # Verify that git clone and checkout were called as separate commands
-    assert len(runtime.run_action_calls) == 2
-    assert isinstance(runtime.run_action_calls[0], CmdRunAction)
-    assert isinstance(runtime.run_action_calls[1], CmdRunAction)
-
-    # Check that the first command is the git clone with the correct URL format with token
-    clone_cmd = runtime.run_action_calls[0].command
-    assert (
-        f'git clone https://{github_token}@github.com/owner/repo.git repo' in clone_cmd
-    )
-
-    # Check that the second command is the checkout
-    checkout_cmd = runtime.run_action_calls[1].command
-    assert 'cd repo' in checkout_cmd
-    assert 'git checkout -b openhands-workspace-' in checkout_cmd
-
-    assert result == 'repo'
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_github_no_token(temp_dir, monkeypatch):
-    """Test cloning a GitHub repository without a token"""
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id='test_user'
-    )
-
-    mock_repo_and_patch(monkeypatch, provider=ProviderType.GITHUB)
-    result = await runtime.clone_or_init_repo(None, 'owner/repo', None)
-
-    # Verify that git clone and checkout were called as separate commands
-    assert len(runtime.run_action_calls) == 2
-    assert isinstance(runtime.run_action_calls[0], CmdRunAction)
-    assert isinstance(runtime.run_action_calls[1], CmdRunAction)
-
-    # Check that the first command is the git clone with the correct URL format without token
-    clone_cmd = runtime.run_action_calls[0].command
-    assert 'git clone https://github.com/owner/repo.git repo' in clone_cmd
-
-    # Check that the second command is the checkout
-    checkout_cmd = runtime.run_action_calls[1].command
-    assert 'cd repo' in checkout_cmd
-    assert 'git checkout -b openhands-workspace-' in checkout_cmd
-
-    assert result == 'repo'
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_gitlab_with_token(temp_dir, monkeypatch):
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-
-    gitlab_token = 'gitlab_test_token'
-    git_provider_tokens = MappingProxyType(
-        {ProviderType.GITLAB: ProviderToken(token=SecretStr(gitlab_token))}
-    )
-
-    runtime = TestRuntime(
-        config=config,
-        event_stream=event_stream,
-        sid='test',
-        user_id='test_user',
-        git_provider_tokens=git_provider_tokens,
-    )
-
-    mock_repo_and_patch(monkeypatch, provider=ProviderType.GITLAB)
-
-    result = await runtime.clone_or_init_repo(git_provider_tokens, 'owner/repo', None)
-
-    # Verify that git clone and checkout were called as separate commands
-    assert len(runtime.run_action_calls) == 2
-    assert isinstance(runtime.run_action_calls[0], CmdRunAction)
-    assert isinstance(runtime.run_action_calls[1], CmdRunAction)
-
-    # Check that the first command is the git clone with the correct URL format with token
-    clone_cmd = runtime.run_action_calls[0].command
-    assert (
-        f'git clone https://oauth2:{gitlab_token}@gitlab.com/owner/repo.git repo'
-        in clone_cmd
-    )
-
-    # Check that the second command is the checkout
-    checkout_cmd = runtime.run_action_calls[1].command
-    assert 'cd repo' in checkout_cmd
-    assert 'git checkout -b openhands-workspace-' in checkout_cmd
-
-    assert result == 'repo'
-
-
-@pytest.mark.asyncio
-async def test_clone_or_init_repo_with_branch(temp_dir, monkeypatch):
-    """Test cloning a repository with a specified branch"""
-    config = OpenHandsConfig()
-    file_store = get_file_store('local', temp_dir)
-    event_stream = EventStream('abc', file_store)
-
-    runtime = TestRuntime(
-        config=config, event_stream=event_stream, sid='test', user_id='test_user'
-    )
-
-    mock_repo_and_patch(monkeypatch, provider=ProviderType.GITHUB)
-    result = await runtime.clone_or_init_repo(None, 'owner/repo', 'feature-branch')
-
-    # Verify that git clone and checkout were called as separate commands
-    assert len(runtime.run_action_calls) == 2
-    assert isinstance(runtime.run_action_calls[0], CmdRunAction)
-    assert isinstance(runtime.run_action_calls[1], CmdRunAction)
-
-    # Check that the first command is the git clone
-    clone_cmd = runtime.run_action_calls[0].command
-
-    # Check that the second command contains the correct branch checkout
-    checkout_cmd = runtime.run_action_calls[1].command
-    assert 'git clone https://github.com/owner/repo.git repo' in clone_cmd
-    assert 'cd repo' in checkout_cmd
-    assert 'git checkout feature-branch' in checkout_cmd
-    assert 'git checkout -b' not in checkout_cmd  # Should not create a new branch
-    assert result == 'repo'
--- a/tests/unit/runtime/test_runtime_gitlab_microagents.py
+++ b/tests/unit/runtime/test_runtime_gitlab_microagents.py
@@ -1,324 +0,0 @@
-"""Tests for GitLab alternative directory support for microagents."""
-
-import tempfile
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from openhands.core.config import OpenHandsConfig, SandboxConfig
-from openhands.events import EventStream
-from openhands.integrations.service_types import ProviderType, Repository
-from openhands.microagent.microagent import (
-    RepoMicroagent,
-)
-from openhands.runtime.base import Runtime
-
-
-class MockRuntime(Runtime):
-    """Mock runtime for testing."""
-
-    def __init__(self, workspace_root: Path):
-        # Create a minimal config for testing
-        config = OpenHandsConfig()
-        config.workspace_mount_path_in_sandbox = str(workspace_root)
-        config.sandbox = SandboxConfig()
-
-        # Create a mock event stream
-        event_stream = MagicMock(spec=EventStream)
-
-        # Initialize the parent class properly
-        super().__init__(
-            config=config, event_stream=event_stream, sid='test', git_provider_tokens={}
-        )
-
-        self._workspace_root = workspace_root
-        self._logs = []
-
-    @property
-    def workspace_root(self) -> Path:
-        """Return the workspace root path."""
-        return self._workspace_root
-
-    def log(self, level: str, message: str):
-        """Mock log method."""
-        self._logs.append((level, message))
-
-    def run_action(self, action):
-        """Mock run_action method."""
-        # For testing, we'll simulate successful cloning
-        from openhands.events.observation import CmdOutputObservation
-
-        return CmdOutputObservation(content='', exit_code=0)
-
-    def read(self, action):
-        """Mock read method."""
-        from openhands.events.observation import ErrorObservation
-
-        return ErrorObservation('File not found')
-
-    def _load_microagents_from_directory(self, directory: Path, source: str):
-        """Mock microagent loading."""
-        if not directory.exists():
-            return []
-
-        # Create mock microagents based on directory structure
-        microagents = []
-        for md_file in directory.rglob('*.md'):
-            if md_file.name == 'README.md':
-                continue
-
-            # Create a simple mock microagent
-            from openhands.microagent.types import MicroagentMetadata, MicroagentType
-
-            agent = RepoMicroagent(
-                name=f'mock_{md_file.stem}',
-                content=f'Mock content from {md_file}',
-                metadata=MicroagentMetadata(name=f'mock_{md_file.stem}'),
-                source=str(md_file),
-                type=MicroagentType.REPO_KNOWLEDGE,
-            )
-            microagents.append(agent)
-
-        return microagents
-
-    # Implement abstract methods with minimal functionality
-    def connect(self):
-        pass
-
-    def run(self, action):
-        from openhands.events.observation import CmdOutputObservation
-
-        return CmdOutputObservation(content='', exit_code=0)
-
-    def run_ipython(self, action):
-        from openhands.events.observation import IPythonRunCellObservation
-
-        return IPythonRunCellObservation(content='', code='')
-
-    def edit(self, action):
-        from openhands.events.observation import FileEditObservation
-
-        return FileEditObservation(content='', path='')
-
-    def browse(self, action):
-        from openhands.events.observation import BrowserObservation
-
-        return BrowserObservation(content='', url='', screenshot='')
-
-    def browse_interactive(self, action):
-        from openhands.events.observation import BrowserObservation
-
-        return BrowserObservation(content='', url='', screenshot='')
-
-    def write(self, action):
-        from openhands.events.observation import FileWriteObservation
-
-        return FileWriteObservation(content='', path='')
-
-    def copy_to(self, host_src, sandbox_dest, recursive=False):
-        pass
-
-    def copy_from(self, sandbox_src, host_dest, recursive=False):
-        pass
-
-    def list_files(self, path=None):
-        return []
-
-    def get_mcp_config(self, extra_stdio_servers=None):
-        from openhands.core.config.mcp_config import MCPConfig
-
-        return MCPConfig()
-
-    def call_tool_mcp(self, action):
-        from openhands.events.observation import MCPObservation
-
-        return MCPObservation(content='', tool='', result='')
-
-
-def create_test_microagents(base_dir: Path, config_dir_name: str = '.openhands'):
-    """Create test microagent files in the specified directory."""
-    microagents_dir = base_dir / config_dir_name / 'microagents'
-    microagents_dir.mkdir(parents=True, exist_ok=True)
-
-    # Create a test microagent
-    test_agent = """---
-name: test_agent
-type: repo
-version: 1.0.0
-agent: CodeActAgent
---
-
-# Test Agent
-
-This is a test microagent.
-"""
-    (microagents_dir / 'test.md').write_text(test_agent)
-    return microagents_dir
-
-
-@pytest.fixture
-def temp_workspace():
-    """Create a temporary workspace directory."""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        yield Path(temp_dir)
-
-
-def test_is_gitlab_repository_github(temp_workspace):
-    """Test that GitHub repositories are correctly identified as non-GitLab."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Mock the provider handler to return GitHub
-    mock_repo = Repository(
-        id='123',
-        full_name='owner/repo',
-        git_provider=ProviderType.GITHUB,
-        is_public=True,
-    )
-
-    with patch('openhands.runtime.base.ProviderHandler') as mock_handler_class:
-        mock_handler = MagicMock()
-        mock_handler_class.return_value = mock_handler
-
-        with patch('openhands.runtime.base.call_async_from_sync') as mock_async:
-            mock_async.return_value = mock_repo
-
-            result = runtime._is_gitlab_repository('github.com/owner/repo')
-            assert result is False
-
-
-def test_is_gitlab_repository_gitlab(temp_workspace):
-    """Test that GitLab repositories are correctly identified."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Mock the provider handler to return GitLab
-    mock_repo = Repository(
-        id='456',
-        full_name='owner/repo',
-        git_provider=ProviderType.GITLAB,
-        is_public=True,
-    )
-
-    with patch('openhands.runtime.base.ProviderHandler') as mock_handler_class:
-        mock_handler = MagicMock()
-        mock_handler_class.return_value = mock_handler
-
-        with patch('openhands.runtime.base.call_async_from_sync') as mock_async:
-            mock_async.return_value = mock_repo
-
-            result = runtime._is_gitlab_repository('gitlab.com/owner/repo')
-            assert result is True
-
-
-def test_is_gitlab_repository_exception(temp_workspace):
-    """Test that exceptions in provider detection return False."""
-    runtime = MockRuntime(temp_workspace)
-
-    with patch('openhands.runtime.base.ProviderHandler') as mock_handler_class:
-        mock_handler_class.side_effect = Exception('Provider error')
-
-        result = runtime._is_gitlab_repository('unknown.com/owner/repo')
-        assert result is False
-
-
-def test_get_microagents_from_org_or_user_github(temp_workspace):
-    """Test that GitHub repositories only try .openhands directory."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Mock the provider detection to return GitHub
-    with patch.object(runtime, '_is_gitlab_repository', return_value=False):
-        # Mock the _get_authenticated_git_url to simulate failure (no org repo)
-        with patch('openhands.runtime.base.call_async_from_sync') as mock_async:
-            mock_async.side_effect = Exception('Repository not found')
-
-            result = runtime.get_microagents_from_org_or_user('github.com/owner/repo')
-
-            # Should only try .openhands, not openhands-config
-            assert len(result) == 0
-            # Check that only one attempt was made (for .openhands)
-            assert mock_async.call_count == 1
-
-
-def test_get_microagents_from_org_or_user_gitlab_success_with_config(temp_workspace):
-    """Test that GitLab repositories use openhands-config and succeed."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Create a mock org directory with microagents
-    org_dir = temp_workspace / 'org_openhands_owner'
-    create_test_microagents(org_dir, '.')  # Create microagents directly in org_dir
-
-    # Mock the provider detection to return GitLab
-    with patch.object(runtime, '_is_gitlab_repository', return_value=True):
-        # Mock successful cloning for openhands-config
-        with patch('openhands.runtime.base.call_async_from_sync') as mock_async:
-            mock_async.return_value = 'https://gitlab.com/owner/openhands-config.git'
-
-            result = runtime.get_microagents_from_org_or_user('gitlab.com/owner/repo')
-
-            # Should succeed with openhands-config
-            assert len(result) >= 0  # May be empty if no microagents found
-            # Should only try once for openhands-config
-            assert mock_async.call_count == 1
-
-
-def test_get_microagents_from_org_or_user_gitlab_failure(temp_workspace):
-    """Test that GitLab repositories handle failure gracefully when openhands-config doesn't exist."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Mock the provider detection to return GitLab
-    with patch.object(runtime, '_is_gitlab_repository', return_value=True):
-        # Mock the _get_authenticated_git_url to fail for openhands-config
-        with patch('openhands.runtime.base.call_async_from_sync') as mock_async:
-            mock_async.side_effect = Exception('openhands-config not found')
-
-            result = runtime.get_microagents_from_org_or_user('gitlab.com/owner/repo')
-
-            # Should return empty list when repository doesn't exist
-            assert len(result) == 0
-            # Should only try once for openhands-config
-            assert mock_async.call_count == 1
-
-
-def test_get_microagents_from_selected_repo_gitlab_uses_openhands(temp_workspace):
-    """Test that GitLab repositories use .openhands directory for repository-specific microagents."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Create a repository directory structure
-    repo_dir = temp_workspace / 'repo'
-    repo_dir.mkdir()
-
-    # Create microagents in .openhands directory
-    create_test_microagents(repo_dir, '.openhands')
-
-    # Mock the provider detection to return GitLab
-    with patch.object(runtime, '_is_gitlab_repository', return_value=True):
-        # Mock org-level microagents (empty)
-        with patch.object(runtime, 'get_microagents_from_org_or_user', return_value=[]):
-            result = runtime.get_microagents_from_selected_repo('gitlab.com/owner/repo')
-
-            # Should find microagents from .openhands directory
-            # The exact assertion depends on the mock implementation
-            # At minimum, it should not raise an exception
-            assert isinstance(result, list)
-
-
-def test_get_microagents_from_selected_repo_github_only_openhands(temp_workspace):
-    """Test that GitHub repositories only check .openhands directory."""
-    runtime = MockRuntime(temp_workspace)
-
-    # Create a repository directory structure
-    repo_dir = temp_workspace / 'repo'
-    repo_dir.mkdir()
-
-    # Create microagents in both directories
-    create_test_microagents(repo_dir, 'openhands-config')
-    create_test_microagents(repo_dir, '.openhands')
-
-    # Mock the provider detection to return GitHub
-    with patch.object(runtime, '_is_gitlab_repository', return_value=False):
-        # Mock org-level microagents (empty)
-        with patch.object(runtime, 'get_microagents_from_org_or_user', return_value=[]):
-            result = runtime.get_microagents_from_selected_repo('github.com/owner/repo')
-
-            # Should only check .openhands directory, not openhands-config
-            assert isinstance(result, list)
--- a/tests/unit/runtime/test_runtime_import_robustness.py
+++ b/tests/unit/runtime/test_runtime_import_robustness.py
@@ -1,153 +0,0 @@
-"""Test that the runtime import system is robust against broken third-party dependencies.
-
-This test specifically addresses the issue where broken third-party runtime dependencies
-(like runloop-api-client with incompatible httpx_aiohttp versions) would break the entire
-OpenHands CLI and system.
-"""
-
-import logging
-import sys
-
-import pytest
-
-
-def test_cli_import_with_broken_third_party_runtime():
-    """Test that CLI can be imported even with broken third-party runtime dependencies."""
-    # Clear any cached modules to ensure fresh import
-    modules_to_clear = [
-        k for k in sys.modules.keys() if 'openhands' in k or 'third_party' in k
-    ]
-    for module in modules_to_clear:
-        del sys.modules[module]
-
-    # This should not raise an exception even if third-party runtimes have broken dependencies
-    try:
-        import openhands.cli.main  # noqa: F401
-
-        assert True
-    except Exception as e:
-        pytest.fail(f'CLI import failed: {e}')
-
-
-def test_runtime_import_robustness():
-    """Test that runtime import system is robust against broken dependencies."""
-    # Clear any cached runtime modules
-    modules_to_clear = [k for k in sys.modules.keys() if 'openhands.runtime' in k]
-    for module in modules_to_clear:
-        del sys.modules[module]
-
-    # Import the runtime module - should succeed even with broken third-party runtimes
-    try:
-        import openhands.runtime  # noqa: F401
-
-        assert True
-    except Exception as e:
-        pytest.fail(f'Runtime import failed: {e}')
-
-
-def test_get_runtime_cls_works():
-    """Test that get_runtime_cls works even when third-party runtimes are broken."""
-    # Import the runtime module
-    import openhands.runtime
-
-    # Test that we can still get core runtime classes
-    docker_runtime = openhands.runtime.get_runtime_cls('docker')
-    assert docker_runtime is not None
-
-    local_runtime = openhands.runtime.get_runtime_cls('local')
-    assert local_runtime is not None
-
-    # Test that requesting a non-existent runtime raises appropriate error
-    with pytest.raises(ValueError, match='Runtime nonexistent not supported'):
-        openhands.runtime.get_runtime_cls('nonexistent')
-
-
-def test_runtime_exception_handling():
-    """Test that the runtime discovery code properly handles exceptions."""
-    # This test verifies that the fix in openhands/runtime/__init__.py
-    # properly catches all exceptions (not just ImportError) during
-    # third-party runtime discovery
-
-    import openhands.runtime
-
-    # The fact that we can import this module successfully means
-    # the exception handling is working correctly, even if there
-    # are broken third-party runtime dependencies
-    assert hasattr(openhands.runtime, 'get_runtime_cls')
-    assert hasattr(openhands.runtime, '_THIRD_PARTY_RUNTIME_CLASSES')
-
-
-def test_runtime_import_exception_handling_behavior():
-    """Test that runtime import handles ImportError silently but logs other exceptions."""
-    # Test the exception handling logic by simulating the exact code from runtime init
-    from io import StringIO
-
-    from openhands.core.logger import openhands_logger as logger
-
-    # Create a string buffer to capture log output
-    log_capture = StringIO()
-    handler = logging.StreamHandler(log_capture)
-    handler.setLevel(logging.WARNING)
-
-    # Add our test handler to the OpenHands logger
-    logger.addHandler(handler)
-    original_level = logger.level
-    logger.setLevel(logging.WARNING)
-
-    try:
-        # Test 1: ImportError should be handled silently (no logging)
-        module_path = 'third_party.runtime.impl.missing.missing_runtime'
-        try:
-            raise ImportError("No module named 'missing_library'")
-        except ImportError:
-            # This is the exact code from runtime init: just pass, no logging
-            pass
-
-        # Test 2: Other exceptions should be logged
-        module_path = 'third_party.runtime.impl.runloop.runloop_runtime'
-        try:
-            raise AttributeError(
-                "module 'httpx_aiohttp' has no attribute 'HttpxAiohttpClient'"
-            )
-        except ImportError:
-            # ImportError means the library is not installed (expected for optional dependencies)
-            pass
-        except Exception as e:
-            # Other exceptions mean the library is present but broken, which should be logged
-            # This is the exact code from runtime init
-            logger.warning(f'Failed to import third-party runtime {module_path}: {e}')
-
-        # Check the captured log output
-        log_output = log_capture.getvalue()
-
-        # Should contain the AttributeError warning
-        assert 'Failed to import third-party runtime' in log_output
-        assert 'HttpxAiohttpClient' in log_output
-        # Should NOT contain the ImportError message
-        assert 'missing_library' not in log_output
-
-    finally:
-        logger.removeHandler(handler)
-        logger.setLevel(original_level)
-
-
-def test_import_error_handled_silently(caplog):
-    """Test that ImportError is handled silently (no logging) as it means library is not installed."""
-    # Simulate the exact code path for ImportError
-    logging.getLogger('openhands.runtime')
-
-    with caplog.at_level(logging.WARNING):
-        # Simulate ImportError handling - this should NOT log anything
-        try:
-            raise ImportError("No module named 'optional_runtime_library'")
-        except ImportError:
-            # This is the exact code from runtime init: just pass, no logging
-            pass
-
-    # Check that NO warning was logged for ImportError
-    warning_records = [
-        record for record in caplog.records if record.levelname == 'WARNING'
-    ]
-    assert len(warning_records) == 0, (
-        f'ImportError should not generate warnings, but got: {warning_records}'
-    )
--- a/tests/unit/runtime/test_runtime_reboot.py
+++ b/tests/unit/runtime/test_runtime_reboot.py
@@ -1,89 +0,0 @@
-from unittest.mock import MagicMock, Mock
-
-import httpx
-import pytest
-
-from openhands.core.exceptions import (
-    AgentRuntimeDisconnectedError,
-    AgentRuntimeTimeoutError,
-)
-from openhands.events.action import CmdRunAction
-from openhands.runtime.base import Runtime
-
-
-@pytest.fixture
-def mock_session():
-    return Mock()
-
-
-@pytest.fixture
-def runtime(mock_session):
-    runtime = Mock(spec=Runtime)
-    runtime.session = mock_session
-    runtime.send_action_for_execution = Mock()
-    return runtime
-
-
-def test_runtime_timeout_error(runtime, mock_session):
-    # Create a command action
-    action = CmdRunAction(command='test command')
-    action.set_hard_timeout(120)
-
-    # Mock the runtime to raise a timeout error
-    runtime.send_action_for_execution.side_effect = AgentRuntimeTimeoutError(
-        'Runtime failed to return execute_action before the requested timeout of 120s'
-    )
-
-    # Verify that the error message indicates a timeout
-    with pytest.raises(AgentRuntimeTimeoutError) as exc_info:
-        runtime.send_action_for_execution(action)
-
-    assert (
-        str(exc_info.value)
-        == 'Runtime failed to return execute_action before the requested timeout of 120s'
-    )
-
-
-@pytest.mark.parametrize(
-    'status_code,expected_message',
-    [
-        (404, 'Runtime is not responding. This may be temporary, please try again.'),
-        (
-            502,
-            'Runtime is temporarily unavailable. This may be due to a restart or network issue, please try again.',
-        ),
-    ],
-)
-def test_runtime_disconnected_error(
-    runtime, mock_session, status_code, expected_message
-):
-    # Mock the request to return the specified status code
-    mock_response = Mock()
-    mock_response.status_code = status_code
-    mock_response.raise_for_status = Mock(
-        side_effect=httpx.HTTPStatusError(
-            'mock_error', request=MagicMock(), response=mock_response
-        )
-    )
-    mock_response.json = Mock(
-        return_value={
-            'observation': 'run',
-            'content': 'test',
-            'extras': {'command_id': 'test_id', 'command': 'test command'},
-        }
-    )
-
-    # Mock the runtime to raise the error
-    runtime.send_action_for_execution.side_effect = AgentRuntimeDisconnectedError(
-        expected_message
-    )
-
-    # Create a command action
-    action = CmdRunAction(command='test command')
-    action.set_hard_timeout(120)
-
-    # Verify that the error message is correct
-    with pytest.raises(AgentRuntimeDisconnectedError) as exc_info:
-        runtime.send_action_for_execution(action)
-
-    assert str(exc_info.value) == expected_message
--- a/tests/unit/runtime/test_setup.py
+++ b/tests/unit/runtime/test_setup.py
@@ -1,73 +0,0 @@
-"""Unit tests for the setup script functionality."""
-
-from unittest.mock import MagicMock, patch
-
-from openhands.events.action import CmdRunAction, FileReadAction
-from openhands.events.event import EventSource
-from openhands.events.observation import ErrorObservation, FileReadObservation
-from openhands.runtime.base import Runtime
-
-
-def test_maybe_run_setup_script_executes_action():
-    """Test that maybe_run_setup_script executes the action after adding it to the event stream."""
-    # Create mock runtime
-    runtime = MagicMock(spec=Runtime)
-    runtime.read.return_value = FileReadObservation(
-        content="#!/bin/bash\necho 'test'", path='.openhands/setup.sh'
-    )
-
-    # Mock the event stream
-    runtime.event_stream = MagicMock()
-
-    # Add required attributes
-    runtime.status_callback = None
-
-    # Call the actual implementation
-    with patch.object(
-        Runtime, 'maybe_run_setup_script', Runtime.maybe_run_setup_script
-    ):
-        Runtime.maybe_run_setup_script(runtime)
-
-    # Verify that read was called with the correct action
-    runtime.read.assert_called_once_with(FileReadAction(path='.openhands/setup.sh'))
-
-    # Verify that add_event was called with the correct action and source
-    runtime.event_stream.add_event.assert_called_once()
-    args, kwargs = runtime.event_stream.add_event.call_args
-    action, source = args
-    assert isinstance(action, CmdRunAction)
-    assert source == EventSource.ENVIRONMENT
-
-    # Verify that run_action was called with the correct action
-    runtime.run_action.assert_called_once()
-    args, kwargs = runtime.run_action.call_args
-    action = args[0]
-    assert isinstance(action, CmdRunAction)
-    assert (
-        action.command == 'chmod +x .openhands/setup.sh && source .openhands/setup.sh'
-    )
-
-
-def test_maybe_run_setup_script_skips_when_file_not_found():
-    """Test that maybe_run_setup_script skips execution when the setup script is not found."""
-    # Create mock runtime
-    runtime = MagicMock(spec=Runtime)
-    runtime.read.return_value = ErrorObservation(content='File not found', error_id='')
-
-    # Mock the event stream
-    runtime.event_stream = MagicMock()
-
-    # Call the actual implementation
-    with patch.object(
-        Runtime, 'maybe_run_setup_script', Runtime.maybe_run_setup_script
-    ):
-        Runtime.maybe_run_setup_script(runtime)
-
-    # Verify that read was called with the correct action
-    runtime.read.assert_called_once_with(FileReadAction(path='.openhands/setup.sh'))
-
-    # Verify that add_event was not called
-    runtime.event_stream.add_event.assert_not_called()
-
-    # Verify that run_action was not called
-    runtime.run_action.assert_not_called()
--- a/tests/unit/runtime/utils/test_bash_parsing.py
+++ b/tests/unit/runtime/utils/test_bash_parsing.py
@@ -1,468 +0,0 @@
-import pytest
-
-from openhands.runtime.utils.bash import escape_bash_special_chars, split_bash_commands
-
-
-def test_split_commands_util():
-    cmds = [
-        'ls -l',
-        'echo -e "hello\nworld"',
-        """
-echo -e "hello it\\'s me"
-""".strip(),
-        """
-echo \\
-    -e 'hello' \\
-    -v
-""".strip(),
-        """
-echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
-""".strip(),
-        """
-echo -e 'hello
-world
-are
-you\\n
-there?'
-""".strip(),
-        """
-echo -e 'hello
-world "
-'
-""".strip(),
-        """
-kubectl apply -f - <<EOF
-apiVersion: v1
-kind: Pod
-metadata:
-  name: busybox-sleep
-spec:
-  containers:
-  - name: busybox
-    image: busybox:1.28
-    args:
-    - sleep
-    - "1000000"
-EOF
-""".strip(),
-        """
-mkdir -p _modules && \
-for month in {01..04}; do
-    for day in {01..05}; do
-        touch "_modules/2024-${month}-${day}-sample.md"
-    done
-done
-""".strip(),
-    ]
-    joined_cmds = '\n'.join(cmds)
-    split_cmds = split_bash_commands(joined_cmds)
-    for s in split_cmds:
-        print('\nCMD')
-        print(s)
-    for i in range(len(cmds)):
-        assert split_cmds[i].strip() == cmds[i].strip(), (
-            f'At index {i}: {split_cmds[i]} != {cmds[i]}.'
-        )
-
-
-@pytest.mark.parametrize(
-    'input_command, expected_output',
-    [
-        ('ls -l', ['ls -l']),
-        ("echo 'Hello, world!'", ["echo 'Hello, world!'"]),
-        ('cd /tmp && touch test.txt', ['cd /tmp && touch test.txt']),
-        ("echo -e 'line1\\nline2\\nline3'", ["echo -e 'line1\\nline2\\nline3'"]),
-        (
-            "grep 'pattern' file.txt | sort | uniq",
-            ["grep 'pattern' file.txt | sort | uniq"],
-        ),
-        ('for i in {1..5}; do echo $i; done', ['for i in {1..5}; do echo $i; done']),
-        (
-            "echo 'Single quotes don\\'t escape'",
-            ["echo 'Single quotes don\\'t escape'"],
-        ),
-        (
-            'echo "Double quotes \\"do\\" escape"',
-            ['echo "Double quotes \\"do\\" escape"'],
-        ),
-    ],
-)
-def test_single_commands(input_command, expected_output):
-    assert split_bash_commands(input_command) == expected_output
-
-
-def test_heredoc():
-    input_commands = """
-cat <<EOF
-multiline
-text
-EOF
-echo "Done"
-"""
-    expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
-    assert split_bash_commands(input_commands) == expected_output
-
-
-def test_backslash_continuation():
-    input_commands = """
-echo "This is a long \
-command that spans \
-multiple lines"
-echo "Next command"
-"""
-    expected_output = [
-        'echo "This is a long command that spans multiple lines"',
-        'echo "Next command"',
-    ]
-    assert split_bash_commands(input_commands) == expected_output
-
-
-def test_comments():
-    input_commands = """
-echo "Hello" # This is a comment
-# This is another comment
-ls -l
-"""
-    expected_output = [
-        'echo "Hello" # This is a comment\n# This is another comment',
-        'ls -l',
-    ]
-    assert split_bash_commands(input_commands) == expected_output
-
-
-def test_complex_quoting():
-    input_commands = """
-echo "This is a \\"quoted\\" string"
-echo 'This is a '\''single-quoted'\'' string'
-echo "Mixed 'quotes' in \\"double quotes\\""
-"""
-    expected_output = [
-        'echo "This is a \\"quoted\\" string"',
-        "echo 'This is a '''single-quoted''' string'",
-        'echo "Mixed \'quotes\' in \\"double quotes\\""',
-    ]
-    assert split_bash_commands(input_commands) == expected_output
-
-
-def test_invalid_syntax():
-    invalid_inputs = [
-        'echo "Unclosed quote',
-        "echo 'Unclosed quote",
-        'cat <<EOF\nUnclosed heredoc',
-    ]
-    for input_command in invalid_inputs:
-        # it will fall back to return the original input
-        assert split_bash_commands(input_command) == [input_command]
-
-
-def test_unclosed_backtick():
-    # This test reproduces issue #7391
-    # The issue occurs when parsing a command with an unclosed backtick
-    # which causes a TypeError: ParsingError.__init__() missing 2 required positional arguments: 's' and 'position'
-    command = 'echo `unclosed backtick'
-
-    # Should not raise TypeError
-    try:
-        result = split_bash_commands(command)
-        # If we get here, the error was handled properly
-        assert result == [command]
-    except TypeError as e:
-        # This is the error we're trying to fix
-        raise e
-
-    # Also test with the original command from the issue (with placeholder org/repo)
-    curl_command = 'curl -X POST "https://api.github.com/repos/example-org/example-repo/pulls" \\ -H "Authorization: Bearer $GITHUB_TOKEN" \\ -H "Accept: application/vnd.github.v3+json" \\ -d \'{ "title": "XXX", "head": "XXX", "base": "main", "draft": false }\' `echo unclosed'
-
-    try:
-        result = split_bash_commands(curl_command)
-        assert result == [curl_command]
-    except TypeError as e:
-        raise e
-
-
-def test_over_escaped_command():
-    # This test reproduces issue #8369 Example 1
-    # The issue occurs when parsing a command with over-escaped quotes
-    over_escaped_command = r'# 0. Setup directory\\nrm -rf /workspace/repro_sphinx_bug && mkdir -p /workspace/repro_sphinx_bug && cd /workspace/repro_sphinx_bug\\n\\n# 1. Run sphinx-quickstart\\nsphinx-quickstart --no-sep --project myproject --author me -v 0.1.0 --release 0.1.0 --language en . -q\\n\\n# 2. Create index.rst\\necho -e \'Welcome\\\\\\\\n=======\\\\\\\\n\\\\\\\\n.. toctree::\\\\n   :maxdepth: 2\\\\\\\\n\\\\\\\\n   mypackage_file\\\\\\\\n\' > index.rst'
-
-    # Should not raise any exception
-    try:
-        result = split_bash_commands(over_escaped_command)
-        # If parsing fails, it should return the original command
-        assert result == [over_escaped_command]
-    except Exception as e:
-        # This is the error we're trying to fix
-        pytest.fail(f'split_bash_commands raised {type(e).__name__} unexpectedly: {e}')
-
-
-@pytest.fixture
-def sample_commands():
-    return [
-        'ls -l',
-        'echo "Hello, world!"',
-        'cd /tmp && touch test.txt',
-        'echo -e "line1\\nline2\\nline3"',
-        'grep "pattern" file.txt | sort | uniq',
-        'for i in {1..5}; do echo $i; done',
-        'cat <<EOF\nmultiline\ntext\nEOF',
-        'echo "Escaped \\"quotes\\""',
-        "echo 'Single quotes don\\'t escape'",
-        'echo "Command with a trailing backslash \\\n  and continuation"',
-    ]
-
-
-def test_split_single_commands(sample_commands):
-    for cmd in sample_commands:
-        result = split_bash_commands(cmd)
-        assert len(result) == 1, f'Expected single command, got: {result}'
-
-
-def test_split_commands_with_heredoc():
-    input_commands = """
-cat <<EOF
-multiline
-text
-EOF
-echo "Done"
-"""
-    expected_output = ['cat <<EOF\nmultiline\ntext\nEOF', 'echo "Done"']
-    result = split_bash_commands(input_commands)
-    assert result == expected_output, f'Expected {expected_output}, got {result}'
-
-
-def test_split_commands_with_backslash_continuation():
-    input_commands = """
-echo "This is a long \
-command that spans \
-multiple lines"
-echo "Next command"
-"""
-    expected_output = [
-        'echo "This is a long command that spans multiple lines"',
-        'echo "Next command"',
-    ]
-    result = split_bash_commands(input_commands)
-    assert result == expected_output, f'Expected {expected_output}, got {result}'
-
-
-def test_split_commands_with_empty_lines():
-    input_commands = """
-ls -l
-
-echo "Hello"
-
-cd /tmp
-"""
-    expected_output = ['ls -l', 'echo "Hello"', 'cd /tmp']
-    result = split_bash_commands(input_commands)
-    assert result == expected_output, f'Expected {expected_output}, got {result}'
-
-
-def test_split_commands_with_comments():
-    input_commands = """
-echo "Hello" # This is a comment
-# This is another comment
-ls -l
-"""
-    expected_output = [
-        'echo "Hello" # This is a comment\n# This is another comment',
-        'ls -l',
-    ]
-    result = split_bash_commands(input_commands)
-    assert result == expected_output, f'Expected {expected_output}, got {result}'
-
-
-def test_split_commands_with_complex_quoting():
-    input_commands = """
-echo "This is a \\"quoted\\" string"
-echo "Mixed 'quotes' in \\"double quotes\\""
-"""
-    # echo 'This is a '\''single-quoted'\'' string'
-
-    expected_output = [
-        'echo "This is a \\"quoted\\" string"',
-        'echo "Mixed \'quotes\' in \\"double quotes\\""',
-    ]
-    # "echo 'This is a '\\''single-quoted'\\'' string'",
-    result = split_bash_commands(input_commands)
-    assert result == expected_output, f'Expected {expected_output}, got {result}'
-
-
-def test_split_commands_with_invalid_input():
-    invalid_inputs = [
-        'echo "Unclosed quote',
-        "echo 'Unclosed quote",
-        'cat <<EOF\nUnclosed heredoc',
-    ]
-    for input_command in invalid_inputs:
-        # it will fall back to return the original input
-        assert split_bash_commands(input_command) == [input_command]
-
-
-def test_escape_bash_special_chars():
-    test_cases = [
-        # Basic cases - use raw strings (r'') to avoid Python escape sequence warnings
-        ('echo test \\; ls', 'echo test \\\\; ls'),
-        ('grep pattern \\| sort', 'grep pattern \\\\| sort'),
-        ('cmd1 \\&\\& cmd2', 'cmd1 \\\\&\\\\& cmd2'),
-        ('cat file \\> output.txt', 'cat file \\\\> output.txt'),
-        ('cat \\< input.txt', 'cat \\\\< input.txt'),
-        # Quoted strings should remain unchanged
-        ('echo "test \\; unchanged"', 'echo "test \\; unchanged"'),
-        ("echo 'test \\| unchanged'", "echo 'test \\| unchanged'"),
-        # Mixed quoted and unquoted
-        (
-            'echo "quoted \\;" \\; "more" \\| grep',
-            'echo "quoted \\;" \\\\; "more" \\\\| grep',
-        ),
-        # Multiple escapes in sequence
-        ('cmd1 \\;\\|\\& cmd2', 'cmd1 \\\\;\\\\|\\\\& cmd2'),
-        # Commands with other backslashes
-        ('echo test\\ntest', 'echo test\\ntest'),
-        ('echo "test\\ntest"', 'echo "test\\ntest"'),
-        # Edge cases
-        ('', ''),  # Empty string
-        ('\\\\', '\\\\'),  # Double backslash
-        ('\\"', '\\"'),  # Escaped quote
-    ]
-
-    for input_cmd, expected in test_cases:
-        result = escape_bash_special_chars(input_cmd)
-        assert result == expected, (
-            f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
-        )
-
-
-def test_escape_bash_special_chars_with_invalid_syntax():
-    invalid_inputs = [
-        'echo "unclosed quote',
-        "echo 'unclosed quote",
-        'cat <<EOF\nunclosed heredoc',
-    ]
-    for input_cmd in invalid_inputs:
-        # Should return original input when parsing fails
-        result = escape_bash_special_chars(input_cmd)
-        assert result == input_cmd, f'Failed to handle invalid input: {input_cmd}'
-
-
-def test_escape_bash_special_chars_with_heredoc():
-    input_cmd = r"""cat <<EOF
-line1 \; not escaped
-line2 \| not escaped
-EOF"""
-    # Heredoc content should not be escaped
-    expected = input_cmd
-    result = escape_bash_special_chars(input_cmd)
-    assert result == expected, (
-        f'Failed to handle heredoc correctly\nExpected: {expected}\nGot: {result}'
-    )
-
-
-def test_escape_bash_special_chars_with_parameter_expansion():
-    test_cases = [
-        # Parameter expansion should be preserved
-        ('echo $HOME', 'echo $HOME'),
-        ('echo ${HOME}', 'echo ${HOME}'),
-        ('echo ${HOME:-default}', 'echo ${HOME:-default}'),
-        # Mixed with special chars
-        ('echo $HOME \\; ls', 'echo $HOME \\\\; ls'),
-        ('echo ${PATH} \\| grep bin', 'echo ${PATH} \\\\| grep bin'),
-        # Quoted parameter expansion
-        ('echo "$HOME"', 'echo "$HOME"'),
-        ('echo "${HOME}"', 'echo "${HOME}"'),
-        # Complex parameter expansions
-        ('echo ${var:=default} \\; ls', 'echo ${var:=default} \\\\; ls'),
-        ('echo ${!prefix*} \\| sort', 'echo ${!prefix*} \\\\| sort'),
-    ]
-
-    for input_cmd, expected in test_cases:
-        result = escape_bash_special_chars(input_cmd)
-        assert result == expected, (
-            f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
-        )
-
-
-def test_escape_bash_special_chars_with_command_substitution():
-    test_cases = [
-        # Basic command substitution
-        ('echo $(pwd)', 'echo $(pwd)'),
-        ('echo `pwd`', 'echo `pwd`'),
-        # Mixed with special chars
-        ('echo $(pwd) \\; ls', 'echo $(pwd) \\\\; ls'),
-        ('echo `pwd` \\| grep home', 'echo `pwd` \\\\| grep home'),
-        # Nested command substitution
-        ('echo $(echo `pwd`)', 'echo $(echo `pwd`)'),
-        # Complex command substitution
-        ('echo $(find . -name "*.txt" \\; ls)', 'echo $(find . -name "*.txt" \\; ls)'),
-        # Mixed with quotes
-        ('echo "$(pwd)"', 'echo "$(pwd)"'),
-        ('echo "`pwd`"', 'echo "`pwd`"'),
-    ]
-
-    for input_cmd, expected in test_cases:
-        result = escape_bash_special_chars(input_cmd)
-        assert result == expected, (
-            f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
-        )
-
-
-def test_escape_bash_special_chars_mixed_nodes():
-    test_cases = [
-        # Mix of parameter expansion and command substitution
-        ('echo $HOME/$(pwd)', 'echo $HOME/$(pwd)'),
-        # Mix with special chars
-        ('echo $HOME/$(pwd) \\; ls', 'echo $HOME/$(pwd) \\\\; ls'),
-        # Complex mixed cases
-        (
-            'echo "${HOME}/$(basename `pwd`) \\; next"',
-            'echo "${HOME}/$(basename `pwd`) \\; next"',
-        ),
-        (
-            'VAR=${HOME} \\; echo $(pwd)',
-            'VAR=${HOME} \\\\; echo $(pwd)',
-        ),
-        # Real-world examples
-        (
-            'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\;',
-            'find . -name "*.txt" -exec grep "${PATTERN:-default}" {} \\\\;',
-        ),
-        (
-            'echo "Current path: ${PWD}/$(basename `pwd`)" \\| grep home',
-            'echo "Current path: ${PWD}/$(basename `pwd`)" \\\\| grep home',
-        ),
-    ]
-
-    for input_cmd, expected in test_cases:
-        result = escape_bash_special_chars(input_cmd)
-        assert result == expected, (
-            f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
-        )
-
-
-def test_escape_bash_special_chars_with_chained_commands():
-    test_cases = [
-        # Basic chained commands
-        ('ls && pwd', 'ls && pwd'),
-        ('echo "hello" && ls', 'echo "hello" && ls'),
-        # Chained commands with special chars
-        ('ls \\; pwd && echo test', 'ls \\\\; pwd && echo test'),
-        ('echo test && grep pattern \\| sort', 'echo test && grep pattern \\\\| sort'),
-        # Complex chained cases
-        ('echo ${HOME} && ls \\; pwd', 'echo ${HOME} && ls \\\\; pwd'),
-        (
-            'echo "$(pwd)" && cat file \\> out.txt',
-            'echo "$(pwd)" && cat file \\\\> out.txt',
-        ),
-        # Multiple chains
-        ('cmd1 && cmd2 && cmd3', 'cmd1 && cmd2 && cmd3'),
-        (
-            'cmd1 \\; ls && cmd2 \\| grep && cmd3',
-            'cmd1 \\\\; ls && cmd2 \\\\| grep && cmd3',
-        ),
-    ]
-
-    for input_cmd, expected in test_cases:
-        result = escape_bash_special_chars(input_cmd)
-        assert result == expected, (
-            f'Failed on input "{input_cmd}"\nExpected: "{expected}"\nGot: "{result}"'
-        )
--- a/tests/unit/runtime/utils/test_bash_ps1_metadata.py
+++ b/tests/unit/runtime/utils/test_bash_ps1_metadata.py
@@ -1,339 +0,0 @@
-import json
-
-from openhands.events.observation.commands import (
-    CMD_OUTPUT_METADATA_PS1_REGEX,
-    CMD_OUTPUT_PS1_BEGIN,
-    CMD_OUTPUT_PS1_END,
-    CmdOutputMetadata,
-    CmdOutputObservation,
-)
-
-
-def test_ps1_metadata_format():
-    """Test that PS1 prompt has correct format markers"""
-    prompt = CmdOutputMetadata.to_ps1_prompt()
-    print(prompt)
-    assert prompt.startswith('\n###PS1JSON###\n')
-    assert prompt.endswith('\n###PS1END###\n')
-    assert r'\"exit_code\"' in prompt, 'PS1 prompt should contain escaped double quotes'
-
-
-def test_ps1_metadata_json_structure():
-    """Test that PS1 prompt contains valid JSON with expected fields"""
-    prompt = CmdOutputMetadata.to_ps1_prompt()
-    # Extract JSON content between markers
-    json_str = prompt.replace('###PS1JSON###\n', '').replace('\n###PS1END###\n', '')
-    # Remove escaping before parsing
-    json_str = json_str.replace(r'\"', '"')
-    # Remove any trailing content after the JSON
-    json_str = json_str.split('###PS1END###')[0].strip()
-    data = json.loads(json_str)
-
-    # Check required fields
-    expected_fields = {
-        'pid',
-        'exit_code',
-        'username',
-        'hostname',
-        'working_dir',
-        'py_interpreter_path',
-    }
-    assert set(data.keys()) == expected_fields
-
-
-def test_ps1_metadata_parsing():
-    """Test parsing PS1 output into CmdOutputMetadata"""
-    test_data = {
-        'exit_code': 0,
-        'username': 'testuser',
-        'hostname': 'localhost',
-        'working_dir': '/home/testuser',
-        'py_interpreter_path': '/usr/bin/python',
-    }
-
-    ps1_str = f"""###PS1JSON###
-{json.dumps(test_data, indent=2)}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == test_data['exit_code']
-    assert metadata.username == test_data['username']
-    assert metadata.hostname == test_data['hostname']
-    assert metadata.working_dir == test_data['working_dir']
-    assert metadata.py_interpreter_path == test_data['py_interpreter_path']
-
-
-def test_ps1_metadata_parsing_string():
-    """Test parsing PS1 output into CmdOutputMetadata"""
-    ps1_str = r"""###PS1JSON###
-{
-  "exit_code": "0",
-  "username": "myname",
-  "hostname": "myhostname",
-  "working_dir": "~/mydir",
-  "py_interpreter_path": "/my/python/path"
-}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.username == 'myname'
-    assert metadata.hostname == 'myhostname'
-    assert metadata.working_dir == '~/mydir'
-    assert metadata.py_interpreter_path == '/my/python/path'
-
-
-def test_ps1_metadata_parsing_string_real_example():
-    """Test parsing PS1 output into CmdOutputMetadata"""
-    ps1_str = r"""
-###PS1JSON###
-{
-  "pid": "",
-  "exit_code": "0",
-  "username": "runner",
-  "hostname": "fv-az1055-610",
-  "working_dir": "/home/runner/work/OpenHands/OpenHands",
-  "py_interpreter_path": "/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python"
-}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.username == 'runner'
-    assert metadata.hostname == 'fv-az1055-610'
-    assert metadata.working_dir == '/home/runner/work/OpenHands/OpenHands'
-    assert (
-        metadata.py_interpreter_path
-        == '/home/runner/.cache/pypoetry/virtualenvs/openhands-ai-ULPBlkAi-py3.12/bin/python'
-    )
-
-
-def test_ps1_metadata_parsing_additional_prefix():
-    """Test parsing PS1 output into CmdOutputMetadata"""
-    test_data = {
-        'exit_code': 0,
-        'username': 'testuser',
-        'hostname': 'localhost',
-        'working_dir': '/home/testuser',
-        'py_interpreter_path': '/usr/bin/python',
-    }
-
-    ps1_str = f"""
-This is something that not part of the PS1 prompt
-
-###PS1JSON###
-{json.dumps(test_data, indent=2)}
-###PS1END###
-"""
-
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == test_data['exit_code']
-    assert metadata.username == test_data['username']
-    assert metadata.hostname == test_data['hostname']
-    assert metadata.working_dir == test_data['working_dir']
-    assert metadata.py_interpreter_path == test_data['py_interpreter_path']
-
-
-def test_ps1_metadata_parsing_invalid():
-    """Test parsing invalid PS1 output returns default metadata"""
-    # Test with invalid JSON
-    invalid_json = """###PS1JSON###
-    {invalid json}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(invalid_json)
-    assert len(matches) == 0  # No matches should be found for invalid JSON
-
-    # Test with missing markers
-    invalid_format = """NOT A VALID PS1 PROMPT"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(invalid_format)
-    assert len(matches) == 0
-
-    # Test with empty PS1 metadata
-    empty_metadata = """###PS1JSON###
-
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(empty_metadata)
-    assert len(matches) == 0  # No matches should be found for empty metadata
-
-    # Test with whitespace in PS1 metadata
-    whitespace_metadata = """###PS1JSON###
-
-    {
-        "exit_code": "0",
-        "pid": "123",
-        "username": "test",
-        "hostname": "localhost",
-        "working_dir": "/home/test",
-        "py_interpreter_path": "/usr/bin/python"
-    }
-
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(whitespace_metadata)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.pid == 123
-
-
-def test_ps1_metadata_missing_fields():
-    """Test handling of missing fields in PS1 metadata"""
-    # Test with only required fields
-    minimal_data = {'exit_code': 0, 'pid': 123}
-    ps1_str = f"""###PS1JSON###
-{json.dumps(minimal_data)}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.pid == 123
-    assert metadata.username is None
-    assert metadata.hostname is None
-    assert metadata.working_dir is None
-    assert metadata.py_interpreter_path is None
-
-    # Test with missing exit_code but valid pid
-    no_exit_code = {'pid': 123, 'username': 'test'}
-    ps1_str = f"""###PS1JSON###
-{json.dumps(no_exit_code)}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == -1  # default value
-    assert metadata.pid == 123
-    assert metadata.username == 'test'
-
-
-def test_ps1_metadata_multiple_blocks():
-    """Test handling multiple PS1 metadata blocks"""
-    test_data = {
-        'exit_code': 0,
-        'username': 'testuser',
-        'hostname': 'localhost',
-        'working_dir': '/home/testuser',
-        'py_interpreter_path': '/usr/bin/python',
-    }
-
-    ps1_str = f"""###PS1JSON###
-{json.dumps(test_data, indent=2)}
-###PS1END###
-Some other content
-###PS1JSON###
-{json.dumps(test_data, indent=2)}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 2  # Should find both blocks
-    # Both blocks should parse successfully
-    metadata1 = CmdOutputMetadata.from_ps1_match(matches[0])
-    metadata2 = CmdOutputMetadata.from_ps1_match(matches[1])
-    assert metadata1.exit_code == test_data['exit_code']
-    assert metadata2.exit_code == test_data['exit_code']
-
-
-def test_ps1_metadata_regex_pattern():
-    """Test the regex pattern used to extract PS1 metadata"""
-    # Test basic pattern matching
-    test_str = f'{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}'
-    matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
-    match = next(matches)
-    assert match.group(1).strip() == 'test'
-
-    # Test with content before and after
-    test_str = f'prefix\n{CMD_OUTPUT_PS1_BEGIN}test\n{CMD_OUTPUT_PS1_END}suffix'
-    matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
-    match = next(matches)
-    assert match.group(1).strip() == 'test'
-
-    # Test with multiline content
-    test_str = f'{CMD_OUTPUT_PS1_BEGIN}line1\nline2\nline3\n{CMD_OUTPUT_PS1_END}'
-    matches = CMD_OUTPUT_METADATA_PS1_REGEX.finditer(test_str)
-    match = next(matches)
-    assert match.group(1).strip() == 'line1\nline2\nline3'
-
-
-def test_cmd_output_observation_properties():
-    """Test CmdOutputObservation class properties"""
-    # Test with successful command
-    metadata = CmdOutputMetadata(exit_code=0, pid=123)
-    obs = CmdOutputObservation(command='ls', content='file1\nfile2', metadata=metadata)
-    assert obs.command_id == 123
-    assert obs.exit_code == 0
-    assert not obs.error
-    assert 'exit code 0' in obs.message
-    assert 'ls' in obs.message
-    assert 'file1' in str(obs)
-    assert 'file2' in str(obs)
-    assert 'metadata' in str(obs)
-
-    # Test with failed command
-    metadata = CmdOutputMetadata(exit_code=1, pid=456)
-    obs = CmdOutputObservation(command='invalid', content='error', metadata=metadata)
-    assert obs.command_id == 456
-    assert obs.exit_code == 1
-    assert obs.error
-    assert 'exit code 1' in obs.message
-    assert 'invalid' in obs.message
-    assert 'error' in str(obs)
-
-
-def test_ps1_metadata_empty_fields():
-    """Test handling of empty fields in PS1 metadata"""
-    # Test with empty strings
-    empty_data = {
-        'exit_code': 0,
-        'pid': 123,
-        'username': '',
-        'hostname': '',
-        'working_dir': '',
-        'py_interpreter_path': '',
-    }
-    ps1_str = f"""###PS1JSON###
-{json.dumps(empty_data)}
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(ps1_str)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.pid == 123
-    assert metadata.username == ''
-    assert metadata.hostname == ''
-    assert metadata.working_dir == ''
-    assert metadata.py_interpreter_path == ''
-
-    # Test with malformed but valid JSON
-    malformed_json = """###PS1JSON###
-    {
-        "exit_code":0,
-        "pid"  :  123,
-        "username":    "test"  ,
-        "hostname": "host",
-        "working_dir"    :"dir",
-        "py_interpreter_path":"path"
-    }
-###PS1END###
-"""
-    matches = CmdOutputMetadata.matches_ps1_metadata(malformed_json)
-    assert len(matches) == 1
-    metadata = CmdOutputMetadata.from_ps1_match(matches[0])
-    assert metadata.exit_code == 0
-    assert metadata.pid == 123
-    assert metadata.username == 'test'
-    assert metadata.hostname == 'host'
-    assert metadata.working_dir == 'dir'
-    assert metadata.py_interpreter_path == 'path'
--- a/tests/unit/runtime/utils/test_bash_session.py
+++ b/tests/unit/runtime/utils/test_bash_session.py
@@ -1,388 +0,0 @@
-import os
-import tempfile
-import time
-
-from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import CmdRunAction
-from openhands.runtime.utils.bash import BashCommandStatus, BashSession
-from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
-
-
-def get_no_change_timeout_suffix(timeout_seconds):
-    """Helper function to generate the expected no-change timeout suffix."""
-    return (
-        f'\n[The command has no new output after {timeout_seconds} seconds. '
-        f'{TIMEOUT_MESSAGE_TEMPLATE}]'
-    )
-
-
-def test_session_initialization():
-    # Test with custom working directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        session = BashSession(work_dir=temp_dir)
-        session.initialize()
-        obs = session.execute(CmdRunAction('pwd'))
-        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-        assert temp_dir in obs.content
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-        session.close()
-
-    # Test with custom username
-    session = BashSession(work_dir=os.getcwd(), username='nobody')
-    session.initialize()
-    assert 'openhands-nobody' in session.session.name
-    session.close()
-
-
-def test_cwd_property(tmp_path):
-    session = BashSession(work_dir=tmp_path)
-    session.initialize()
-    # Change directory and verify pwd updates
-    random_dir = tmp_path / 'random'
-    random_dir.mkdir()
-    session.execute(CmdRunAction(f'cd {random_dir}'))
-    assert session.cwd == str(random_dir)
-    session.close()
-
-
-def test_basic_command():
-    session = BashSession(work_dir=os.getcwd())
-    session.initialize()
-
-    # Test simple command
-    obs = session.execute(CmdRunAction("echo 'hello world'"))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'hello world' in obs.content
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.exit_code == 0
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    # Test command with error
-    obs = session.execute(CmdRunAction('nonexistent_command'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.metadata.exit_code == 127
-    assert 'nonexistent_command: command not found' in obs.content
-    assert obs.metadata.suffix == '\n[The command completed with exit code 127.]'
-    assert obs.metadata.prefix == ''
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    # Test multiple commands in sequence
-    obs = session.execute(CmdRunAction('echo "first" && echo "second" && echo "third"'))
-    assert 'first' in obs.content
-    assert 'second' in obs.content
-    assert 'third' in obs.content
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.exit_code == 0
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    session.close()
-
-
-def test_long_running_command_follow_by_execute():
-    session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
-    session.initialize()
-
-    # Test command that produces output slowly
-    obs = session.execute(
-        CmdRunAction('for i in {1..3}; do echo $i; sleep 3; done', blocking=False)
-    )
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert '1' in obs.content  # First number should appear before timeout
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
-    assert obs.metadata.prefix == ''
-
-    # Continue watching output
-    obs = session.execute(CmdRunAction('', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert '2' in obs.content
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    # Test command that produces no output
-    obs = session.execute(CmdRunAction('sleep 15'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert '3' not in obs.content
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert 'The previous command is still running' in obs.metadata.suffix
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    time.sleep(3)
-
-    # Run it again, this time it should produce output
-    obs = session.execute(CmdRunAction('sleep 15'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert '3' in obs.content
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert 'The previous command is still running' in obs.metadata.suffix
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    session.close()
-
-
-def test_interactive_command():
-    session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=3)
-    session.initialize()
-
-    # Test interactive command with blocking=True
-    obs = session.execute(
-        CmdRunAction(
-            'read -p \'Enter name: \' name && echo "Hello $name"',
-        )
-    )
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Enter name:' in obs.content
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
-    assert obs.metadata.prefix == ''
-
-    # Send input
-    obs = session.execute(CmdRunAction('John', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Hello John' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-    assert obs.metadata.prefix == ''
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    # Test multiline command input
-    obs = session.execute(CmdRunAction('cat << EOF'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.metadata.exit_code == -1
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
-    assert obs.metadata.prefix == ''
-
-    obs = session.execute(CmdRunAction('line 1', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.metadata.exit_code == -1
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-
-    obs = session.execute(CmdRunAction('line 2', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.metadata.exit_code == -1
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(3)
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-
-    obs = session.execute(CmdRunAction('EOF', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'line 1' in obs.content and 'line 2' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-    assert obs.metadata.prefix == ''
-
-    session.close()
-
-
-def test_ctrl_c():
-    session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
-    session.initialize()
-
-    # Start infinite loop
-    obs = session.execute(
-        CmdRunAction("while true; do echo 'looping'; sleep 3; done"),
-    )
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'looping' in obs.content
-    assert obs.metadata.suffix == get_no_change_timeout_suffix(2)
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    # Send Ctrl+C
-    obs = session.execute(CmdRunAction('C-c', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    # Check that the process was interrupted (exit code can be 1 or 130 depending on the shell/OS)
-    assert obs.metadata.exit_code in (
-        1,
-        130,
-    )  # Accept both common exit codes for interrupted processes
-    assert 'CTRL+C was sent' in obs.metadata.suffix
-    assert obs.metadata.prefix == ''
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    session.close()
-
-
-def test_empty_command_errors():
-    session = BashSession(work_dir=os.getcwd())
-    session.initialize()
-
-    # Test empty command without previous command
-    obs = session.execute(CmdRunAction(''))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert obs.content == 'ERROR: No previous running command to retrieve logs from.'
-    assert obs.metadata.exit_code == -1
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.suffix == ''
-    assert session.prev_status is None
-
-    session.close()
-
-
-def test_command_output_continuation():
-    """Test that we can continue to get output from a long-running command.
-
-    This test has been modified to be more robust against timing issues.
-    """
-    session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=1)
-    session.initialize()
-
-    # Start a command that produces output slowly but with longer sleep time
-    # to ensure we hit the timeout
-    obs = session.execute(CmdRunAction('for i in {1..5}; do echo $i; sleep 2; done'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-
-    # Check if the command completed immediately or timed out
-    if session.prev_status == BashCommandStatus.COMPLETED:
-        # If the command completed immediately, verify we got all the output
-        logger.info('Command completed immediately', extra={'msg_type': 'TEST_INFO'})
-        assert '1' in obs.content
-        assert '2' in obs.content
-        assert '3' in obs.content
-        assert '4' in obs.content
-        assert '5' in obs.content
-        assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-    else:
-        # If the command timed out, verify we got the timeout message
-        assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-        assert '1' in obs.content
-        assert '[The command has no new output after 1 seconds.' in obs.metadata.suffix
-
-        # Continue getting output until we see all numbers
-        numbers_seen = set()
-        for i in range(1, 6):
-            if str(i) in obs.content:
-                numbers_seen.add(i)
-
-        # We need to see numbers 2-5 and then the command completion
-        while (
-            len(numbers_seen) < 5 or session.prev_status != BashCommandStatus.COMPLETED
-        ):
-            obs = session.execute(CmdRunAction('', is_input=True))
-            logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-
-            # Check for numbers in the output
-            for i in range(1, 6):
-                if str(i) in obs.content and i not in numbers_seen:
-                    numbers_seen.add(i)
-                    logger.info(
-                        f'Found number {i} in output', extra={'msg_type': 'TEST_INFO'}
-                    )
-
-            # Check if the command has completed
-            if session.prev_status == BashCommandStatus.COMPLETED:
-                assert (
-                    '[The command completed with exit code 0.]' in obs.metadata.suffix
-                )
-                break
-            else:
-                assert (
-                    '[The command has no new output after 1 seconds.'
-                    in obs.metadata.suffix
-                )
-                assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-        # Verify we've seen all numbers
-        assert numbers_seen == {1, 2, 3, 4, 5}, (
-            f'Expected to see numbers 1-5, but saw {numbers_seen}'
-        )
-
-        # Verify the command completed
-        assert session.prev_status == BashCommandStatus.COMPLETED
-
-    session.close()
-
-
-def test_long_output():
-    session = BashSession(work_dir=os.getcwd())
-    session.initialize()
-
-    # Generate a long output that may exceed buffer size
-    obs = session.execute(CmdRunAction('for i in {1..5000}; do echo "Line $i"; done'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Line 1' in obs.content
-    assert 'Line 5000' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-
-    session.close()
-
-
-def test_long_output_exceed_history_limit():
-    session = BashSession(work_dir=os.getcwd())
-    session.initialize()
-
-    # Generate a long output that may exceed buffer size
-    obs = session.execute(CmdRunAction('for i in {1..50000}; do echo "Line $i"; done'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Previous command outputs are truncated' in obs.metadata.prefix
-    assert 'Line 40000' in obs.content
-    assert 'Line 50000' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-
-    session.close()
-
-
-def test_multiline_command():
-    session = BashSession(work_dir=os.getcwd())
-    session.initialize()
-
-    # Test multiline command with PS2 prompt disabled
-    obs = session.execute(
-        CmdRunAction("""if true; then
-echo "inside if"
-fi""")
-    )
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'inside if' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.prefix == ''
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-
-    session.close()
-
-
-def test_python_interactive_input():
-    session = BashSession(work_dir=os.getcwd(), no_change_timeout_seconds=2)
-    session.initialize()
-
-    # Test Python program that asks for input - properly escaped for bash
-    python_script = """name = input('Enter your name: '); age = input('Enter your age: '); print(f'Hello {name}, you are {age} years old')"""
-
-    # Start Python with the interactive script
-    obs = session.execute(CmdRunAction(f'python3 -c "{python_script}"'))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Enter your name:' in obs.content
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    # Send first input (name)
-    obs = session.execute(CmdRunAction('Alice', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Enter your age:' in obs.content
-    assert obs.metadata.exit_code == -1
-    assert session.prev_status == BashCommandStatus.NO_CHANGE_TIMEOUT
-
-    # Send second input (age)
-    obs = session.execute(CmdRunAction('25', is_input=True))
-    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
-    assert 'Hello Alice, you are 25 years old' in obs.content
-    assert obs.metadata.exit_code == 0
-    assert obs.metadata.suffix == '\n[The command completed with exit code 0.]'
-    assert session.prev_status == BashCommandStatus.COMPLETED
-
-    session.close()
--- a/tests/unit/runtime/utils/test_git_handler.py
+++ b/tests/unit/runtime/utils/test_git_handler.py
@@ -1,294 +0,0 @@
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import unittest
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from openhands.runtime.utils import git_changes, git_diff, git_handler
-from openhands.runtime.utils.git_handler import CommandResult, GitHandler
-
-
-@pytest.mark.skipif(sys.platform == 'win32', reason='Windows is not supported')
-class TestGitHandler(unittest.TestCase):
-    def setUp(self):
-        # Create temporary directories for our test repositories
-        self.test_dir = tempfile.mkdtemp()
-        self.origin_dir = os.path.join(self.test_dir, 'origin')
-        self.local_dir = os.path.join(self.test_dir, 'local')
-
-        # Create the directories
-        os.makedirs(self.origin_dir, exist_ok=True)
-        os.makedirs(self.local_dir, exist_ok=True)
-
-        # Track executed commands for verification
-        self.executed_commands = []
-        self.created_files = []
-
-        # Initialize the GitHandler with our mock functions
-        self.git_handler = GitHandler(
-            execute_shell_fn=self._execute_command, create_file_fn=self._create_file
-        )
-        self.git_handler.set_cwd(self.local_dir)
-
-        self.git_handler.git_changes_cmd = f'python3 {git_changes.__file__}'
-        self.git_handler.git_diff_cmd = f'python3 {git_diff.__file__} "{{file_path}}"'
-
-        # Set up the git repositories
-        self._setup_git_repos()
-
-    def tearDown(self):
-        # Clean up the temporary directories
-        shutil.rmtree(self.test_dir)
-
-    def _execute_command(self, cmd, cwd=None):
-        """Execute a shell command and return the result."""
-        result = subprocess.run(
-            args=cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            cwd=cwd,
-        )
-        stderr = result.stderr or b''
-        stdout = result.stdout or b''
-        return CommandResult((stderr + stdout).decode(), result.returncode)
-
-    def run_command(self, cmd, cwd=None):
-        result = self._execute_command(cmd, cwd)
-        if result.exit_code != 0:
-            raise RuntimeError(
-                f'command_error:{cmd};{result.exit_code};{result.content}'
-            )
-
-    def _create_file(self, path, content):
-        """Mock function for creating files."""
-        self.created_files.append((path, content))
-        try:
-            with open(path, 'w') as f:
-                f.write(content)
-            return 0
-        except Exception:
-            return -1
-
-    def write_file(
-        self,
-        dir: str,
-        name: str,
-        additional_content: tuple[str, ...] = ('Line 1', 'Line 2', 'Line 3'),
-    ):
-        with open(os.path.join(dir, name), 'w') as f:
-            f.write(name)
-            for line in additional_content:
-                f.write('\n')
-                f.write(line)
-        assert os.path.exists(os.path.join(dir, name))
-
-    def _setup_git_repos(self):
-        """Set up real git repositories for testing."""
-        # Set up origin repository
-        self.run_command('git init --initial-branch=main', self.origin_dir)
-        self._execute_command(
-            "git config user.email 'test@example.com'", self.origin_dir
-        )
-        self._execute_command("git config user.name 'Test User'", self.origin_dir)
-
-        # Set up the initial state...
-        self.write_file(self.origin_dir, 'unchanged.txt')
-        self.write_file(self.origin_dir, 'committed_modified.txt')
-        self.write_file(self.origin_dir, 'staged_modified.txt')
-        self.write_file(self.origin_dir, 'unstaged_modified.txt')
-        self.write_file(self.origin_dir, 'committed_delete.txt')
-        self.write_file(self.origin_dir, 'staged_delete.txt')
-        self.write_file(self.origin_dir, 'unstaged_delete.txt')
-        self.run_command("git add . && git commit -m 'Initial Commit'", self.origin_dir)
-
-        # Clone the origin repository to local
-        self.run_command(f'git clone "{self.origin_dir}" "{self.local_dir}"')
-
-        self._execute_command(
-            "git config user.email 'test@example.com'", self.local_dir
-        )
-        self._execute_command("git config user.name 'Test User'", self.local_dir)
-
-        self.run_command('git checkout -b feature-branch', self.local_dir)
-
-        # Setup committed changes...
-        self.write_file(self.local_dir, 'committed_modified.txt', ('Line 4',))
-        self.write_file(self.local_dir, 'committed_add.txt')
-        os.remove(os.path.join(self.local_dir, 'committed_delete.txt'))
-        self.run_command(
-            "git add . && git commit -m 'First batch of changes'", self.local_dir
-        )
-
-        # Setup staged changes...
-        self.write_file(self.local_dir, 'staged_modified.txt', ('Line 4',))
-        self.write_file(self.local_dir, 'staged_add.txt')
-        os.remove(os.path.join(self.local_dir, 'staged_delete.txt'))
-        self.run_command('git add .', self.local_dir)
-
-        # Setup unstaged changes...
-        self.write_file(self.local_dir, 'unstaged_modified.txt', ('Line 4',))
-        self.write_file(self.local_dir, 'unstaged_add.txt')
-        os.remove(os.path.join(self.local_dir, 'unstaged_delete.txt'))
-
-    def setup_nested(self):
-        nested_1 = Path(self.local_dir, 'nested 1')
-        nested_1.mkdir()
-        nested_1 = str(nested_1)
-        self.run_command('git init --initial-branch=main', nested_1)
-        self._execute_command("git config user.email 'test@example.com'", nested_1)
-        self._execute_command("git config user.name 'Test User'", nested_1)
-        self.write_file(nested_1, 'committed_add.txt')
-        self.run_command('git add .', nested_1)
-        self.run_command('git commit -m "Initial Commit"', nested_1)
-        self.write_file(nested_1, 'staged_add.txt')
-
-        nested_2 = Path(self.local_dir, 'nested_2')
-        nested_2.mkdir()
-        nested_2 = str(nested_2)
-        self.run_command('git init --initial-branch=main', nested_2)
-        self._execute_command("git config user.email 'test@example.com'", nested_2)
-        self._execute_command("git config user.name 'Test User'", nested_2)
-        self.write_file(nested_2, 'committed_add.txt')
-        self.run_command('git add .', nested_2)
-        self.run_command('git commit -m "Initial Commit"', nested_2)
-        self.write_file(nested_2, 'unstaged_add.txt')
-
-    def test_get_git_changes(self):
-        """Test with unpushed commits, staged commits, and unstaged commits"""
-        changes = self.git_handler.get_git_changes()
-
-        expected_changes = [
-            {'status': 'A', 'path': 'committed_add.txt'},
-            {'status': 'D', 'path': 'committed_delete.txt'},
-            {'status': 'M', 'path': 'committed_modified.txt'},
-            {'status': 'A', 'path': 'staged_add.txt'},
-            {'status': 'D', 'path': 'staged_delete.txt'},
-            {'status': 'M', 'path': 'staged_modified.txt'},
-            {'status': 'A', 'path': 'unstaged_add.txt'},
-            {'status': 'D', 'path': 'unstaged_delete.txt'},
-            {'status': 'M', 'path': 'unstaged_modified.txt'},
-        ]
-
-        assert changes == expected_changes
-
-    def test_get_git_changes_after_push(self):
-        """Test with staged commits, and unstaged commits"""
-        self.run_command('git push -u origin feature-branch', self.local_dir)
-        changes = self.git_handler.get_git_changes()
-
-        expected_changes = [
-            {'status': 'A', 'path': 'staged_add.txt'},
-            {'status': 'D', 'path': 'staged_delete.txt'},
-            {'status': 'M', 'path': 'staged_modified.txt'},
-            {'status': 'A', 'path': 'unstaged_add.txt'},
-            {'status': 'D', 'path': 'unstaged_delete.txt'},
-            {'status': 'M', 'path': 'unstaged_modified.txt'},
-        ]
-
-        assert changes == expected_changes
-
-    def test_get_git_changes_nested_repos(self):
-        """Test with staged commits, and unstaged commits"""
-        self.setup_nested()
-
-        changes = self.git_handler.get_git_changes()
-
-        expected_changes = [
-            {'status': 'A', 'path': 'committed_add.txt'},
-            {'status': 'D', 'path': 'committed_delete.txt'},
-            {'status': 'M', 'path': 'committed_modified.txt'},
-            {'status': 'A', 'path': 'nested 1/committed_add.txt'},
-            {'status': 'A', 'path': 'nested 1/staged_add.txt'},
-            {'status': 'A', 'path': 'nested_2/committed_add.txt'},
-            {'status': 'A', 'path': 'nested_2/unstaged_add.txt'},
-            {'status': 'A', 'path': 'staged_add.txt'},
-            {'status': 'D', 'path': 'staged_delete.txt'},
-            {'status': 'M', 'path': 'staged_modified.txt'},
-            {'status': 'A', 'path': 'unstaged_add.txt'},
-            {'status': 'D', 'path': 'unstaged_delete.txt'},
-            {'status': 'M', 'path': 'unstaged_modified.txt'},
-        ]
-
-        assert changes == expected_changes
-
-    def test_get_git_diff_staged_modified(self):
-        """Test on a staged modified"""
-        diff = self.git_handler.get_git_diff('staged_modified.txt')
-        expected_diff = {
-            'original': 'staged_modified.txt\nLine 1\nLine 2\nLine 3',
-            'modified': 'staged_modified.txt\nLine 4',
-        }
-        assert diff == expected_diff
-
-    def test_get_git_diff_unchanged(self):
-        """Test that get_git_diff delegates to the git_diff module."""
-        diff = self.git_handler.get_git_diff('unchanged.txt')
-        expected_diff = {
-            'original': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
-            'modified': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
-        }
-        assert diff == expected_diff
-
-    def test_get_git_diff_unpushed(self):
-        """Test that get_git_diff delegates to the git_diff module."""
-        diff = self.git_handler.get_git_diff('committed_modified.txt')
-        expected_diff = {
-            'original': 'committed_modified.txt\nLine 1\nLine 2\nLine 3',
-            'modified': 'committed_modified.txt\nLine 4',
-        }
-        assert diff == expected_diff
-
-    def test_get_git_diff_unstaged_add(self):
-        """Test that get_git_diff delegates to the git_diff module."""
-        diff = self.git_handler.get_git_diff('unstaged_add.txt')
-        expected_diff = {
-            'original': '',
-            'modified': 'unstaged_add.txt\nLine 1\nLine 2\nLine 3',
-        }
-        assert diff == expected_diff
-
-    def test_get_git_changes_fallback(self):
-        """Test that get_git_changes falls back to creating a script file when needed."""
-        # Break the git changes command
-        with patch(
-            'openhands.runtime.utils.git_handler.GIT_CHANGES_CMD',
-            'non-existant-command',
-        ):
-            self.git_handler.git_changes_cmd = git_handler.GIT_CHANGES_CMD
-
-            changes = self.git_handler.get_git_changes()
-
-            expected_changes = [
-                {'status': 'A', 'path': 'committed_add.txt'},
-                {'status': 'D', 'path': 'committed_delete.txt'},
-                {'status': 'M', 'path': 'committed_modified.txt'},
-                {'status': 'A', 'path': 'staged_add.txt'},
-                {'status': 'D', 'path': 'staged_delete.txt'},
-                {'status': 'M', 'path': 'staged_modified.txt'},
-                {'status': 'A', 'path': 'unstaged_add.txt'},
-                {'status': 'D', 'path': 'unstaged_delete.txt'},
-                {'status': 'M', 'path': 'unstaged_modified.txt'},
-            ]
-
-            assert changes == expected_changes
-
-    def test_get_git_diff_fallback(self):
-        """Test that get_git_diff delegates to the git_diff module."""
-        # Break the git diff command
-        with patch(
-            'openhands.runtime.utils.git_handler.GIT_DIFF_CMD', 'non-existant-command'
-        ):
-            self.git_handler.git_diff_cmd = git_handler.GIT_DIFF_CMD
-
-            diff = self.git_handler.get_git_diff('unchanged.txt')
-            expected_diff = {
-                'original': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
-                'modified': 'unchanged.txt\nLine 1\nLine 2\nLine 3',
-            }
-            assert diff == expected_diff
--- a/tests/unit/runtime/utils/test_log_streamer.py
+++ b/tests/unit/runtime/utils/test_log_streamer.py
@@ -1,90 +0,0 @@
-from unittest.mock import Mock
-
-import pytest
-
-from openhands.runtime.utils.log_streamer import LogStreamer
-
-
-@pytest.fixture
-def mock_container():
-    return Mock()
-
-
-@pytest.fixture
-def mock_log_fn():
-    return Mock()
-
-
-def test_init_failure_handling(mock_container, mock_log_fn):
-    """Test that LogStreamer handles initialization failures gracefully."""
-    mock_container.logs.side_effect = Exception('Test error')
-
-    streamer = LogStreamer(mock_container, mock_log_fn)
-    assert streamer.stdout_thread is None
-    assert streamer.log_generator is None
-    mock_log_fn.assert_called_with(
-        'error', 'Failed to initialize log streaming: Test error'
-    )
-
-
-def test_stream_logs_without_generator(mock_container, mock_log_fn):
-    """Test that _stream_logs handles missing log generator gracefully."""
-    streamer = LogStreamer(mock_container, mock_log_fn)
-    streamer.log_generator = None
-    streamer._stream_logs()
-    mock_log_fn.assert_called_with('error', 'Log generator not initialized')
-
-
-def test_cleanup_without_thread(mock_container, mock_log_fn):
-    """Test that cleanup works even if stdout_thread is not initialized."""
-    streamer = LogStreamer(mock_container, mock_log_fn)
-    streamer.stdout_thread = None
-    streamer.close()  # Should not raise any exceptions
-
-
-def test_normal_operation(mock_container, mock_log_fn):
-    """Test normal operation of LogStreamer."""
-
-    # Create a mock generator class that mimics Docker's log generator
-    class MockLogGenerator:
-        def __init__(self, logs):
-            self.logs = iter(logs)
-            self.closed = False
-
-        def __iter__(self):
-            return self
-
-        def __next__(self):
-            if self.closed:
-                raise StopIteration
-            return next(self.logs)
-
-        def close(self):
-            self.closed = True
-
-    mock_logs = MockLogGenerator([b'test log 1\n', b'test log 2\n'])
-    mock_container.logs.return_value = mock_logs
-
-    streamer = LogStreamer(mock_container, mock_log_fn)
-    assert streamer.stdout_thread is not None
-    assert streamer.log_generator is not None
-
-    streamer.close()
-
-    # Verify logs were processed
-    expected_calls = [
-        ('debug', '[inside container] test log 1'),
-        ('debug', '[inside container] test log 2'),
-    ]
-    actual_calls = [(args[0], args[1]) for args, _ in mock_log_fn.call_args_list]
-    for expected in expected_calls:
-        assert expected in actual_calls
-
-
-def test_del_without_thread(mock_container, mock_log_fn):
-    """Test that __del__ works even if stdout_thread was not initialized."""
-    streamer = LogStreamer(mock_container, mock_log_fn)
-    delattr(
-        streamer, 'stdout_thread'
-    )  # Simulate case where the thread was never created
-    streamer.__del__()  # Should not raise any exceptions
--- a/tests/unit/runtime/utils/test_windows_bash.py
+++ b/tests/unit/runtime/utils/test_windows_bash.py
@@ -1,602 +0,0 @@
-import os
-import sys
-import tempfile
-import time
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from openhands.events.action import CmdRunAction
-from openhands.events.observation import ErrorObservation
-from openhands.events.observation.commands import (
-    CmdOutputObservation,
-)
-from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE
-
-
-def get_timeout_suffix(timeout_seconds):
-    """Helper function to generate the expected timeout suffix."""
-    return (
-        f'[The command timed out after {timeout_seconds} seconds. '
-        f'{TIMEOUT_MESSAGE_TEMPLATE}]'
-    )
-
-
-# Skip all tests in this module if not running on Windows
-pytestmark = pytest.mark.skipif(
-    sys.platform != 'win32', reason='WindowsPowershellSession tests require Windows'
-)
-
-
-@pytest.fixture
-def temp_work_dir():
-    """Create a temporary directory for testing."""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        yield temp_dir
-
-
-@pytest.fixture
-def windows_bash_session(temp_work_dir):
-    """Create a WindowsPowershellSession instance for testing."""
-    # Instantiate the class. Initialization happens in __init__.
-    session = WindowsPowershellSession(
-        work_dir=temp_work_dir,
-        username=None,
-    )
-    assert session._initialized  # Should be true after __init__
-    yield session
-    # Ensure cleanup happens even if test fails
-    session.close()
-
-
-if sys.platform == 'win32':
-    from openhands.runtime.utils.windows_bash import WindowsPowershellSession
-
-
-def test_command_execution(windows_bash_session):
-    """Test basic command execution."""
-    # Test a simple command
-    action = CmdRunAction(command="Write-Output 'Hello World'")
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Check content, stripping potential trailing newlines
-    content = result.content.strip()
-    assert content == 'Hello World'
-    assert result.exit_code == 0
-
-    # Test a simple command with multiline input but single line output
-    action = CmdRunAction(
-        command="""Write-Output `
-    ('hello ' + `
-    'world')"""
-    )
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Check content, stripping potential trailing newlines
-    content = result.content.strip()
-    assert content == 'hello world'
-    assert result.exit_code == 0
-
-    # Test a simple command with a newline
-    action = CmdRunAction(command='Write-Output "Hello\\n World"')
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Check content, stripping potential trailing newlines
-    content = result.content.strip()
-    assert content == 'Hello\\n World'
-    assert result.exit_code == 0
-
-
-def test_command_with_error(windows_bash_session):
-    """Test command execution with an error reported via Write-Error."""
-    # Test a command that will write an error
-    action = CmdRunAction(command="Write-Error 'Test Error'")
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Error stream is captured and appended
-    assert 'ERROR' in result.content
-    # Our implementation should set exit code to 1 when errors occur in stream
-    assert result.exit_code == 1
-
-
-def test_command_failure_exit_code(windows_bash_session):
-    """Test command execution that results in a non-zero exit code."""
-    # Test a command that causes a script failure (e.g., invalid cmdlet)
-    action = CmdRunAction(command='Get-NonExistentCmdlet')
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Error should be captured in the output
-    assert 'ERROR' in result.content
-    assert (
-        'is not recognized' in result.content
-        or 'CommandNotFoundException' in result.content
-    )
-    assert result.exit_code == 1
-
-
-def test_control_commands(windows_bash_session):
-    """Test handling of control commands (not supported)."""
-    # Test Ctrl+C - should return ErrorObservation if no command is running
-    action_c = CmdRunAction(command='C-c', is_input=True)
-    result_c = windows_bash_session.execute(action_c)
-    assert isinstance(result_c, ErrorObservation)
-    assert 'No previous running command to interact with' in result_c.content
-
-    # Run a long-running command
-    action_long_running = CmdRunAction(command='Start-Sleep -Seconds 100')
-    result_long_running = windows_bash_session.execute(action_long_running)
-    assert isinstance(result_long_running, CmdOutputObservation)
-    assert result_long_running.exit_code == -1
-
-    # Test unsupported control command
-    action_d = CmdRunAction(command='C-d', is_input=True)
-    result_d = windows_bash_session.execute(action_d)
-    assert "Your input command 'C-d' was NOT processed" in result_d.metadata.suffix
-    assert (
-        'Direct input to running processes (is_input=True) is not supported by this PowerShell session implementation.'
-        in result_d.metadata.suffix
-    )
-    assert 'You can use C-c to stop the process' in result_d.metadata.suffix
-
-    # Ctrl+C now can cancel the long-running command
-    action_c = CmdRunAction(command='C-c', is_input=True)
-    result_c = windows_bash_session.execute(action_c)
-    assert isinstance(result_c, CmdOutputObservation)
-    assert result_c.exit_code == 0
-
-
-def test_command_timeout(windows_bash_session):
-    """Test command timeout handling."""
-    # Test a command that will timeout
-    test_timeout_sec = 1
-    action = CmdRunAction(command='Start-Sleep -Seconds 5')
-    action.set_hard_timeout(test_timeout_sec)
-    start_time = time.monotonic()
-    result = windows_bash_session.execute(action)
-    duration = time.monotonic() - start_time
-
-    assert isinstance(result, CmdOutputObservation)
-    # Check for timeout specific metadata
-    assert 'timed out' in result.metadata.suffix.lower()  # Check suffix, not content
-    assert result.exit_code == -1  # Timeout should result in exit code -1
-    # Check that it actually timed out near the specified time
-    assert abs(duration - test_timeout_sec) < 0.5  # Allow some buffer
-
-
-def test_long_running_command(windows_bash_session):
-    action = CmdRunAction(command='python -u -m http.server 8081')
-    action.set_hard_timeout(1)
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Verify the initial output was captured
-    assert 'Serving HTTP on' in result.content
-    # Check for timeout specific metadata
-    assert get_timeout_suffix(1.0) in result.metadata.suffix
-    assert result.exit_code == -1
-
-    # The action timed out, but the command should be still running
-    # We should now be able to interrupt it
-    action = CmdRunAction(command='C-c', is_input=True)
-    action.set_hard_timeout(30)  # Give it enough time to stop
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # On Windows, Stop-Job termination doesn't inherently return output.
-    # The CmdOutputObservation will have content="" and exit_code=0 if successful.
-    # The KeyboardInterrupt message assertion is removed as it's added manually
-    # by the wrapper and might not be guaranteed depending on timing/implementation details.
-    assert result.exit_code == 0
-
-    # Verify the server is actually stopped by starting another one on the same port
-    action = CmdRunAction(command='python -u -m http.server 8081')
-    action.set_hard_timeout(1)  # Set a short timeout to check if it starts
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Verify the initial output was captured, indicating the port was free
-    assert 'Serving HTTP on' in result.content
-    # The command will time out again, so the exit code should be -1
-    assert result.exit_code == -1
-
-    # Clean up the second server process
-    action = CmdRunAction(command='C-c', is_input=True)
-    action.set_hard_timeout(30)
-    result = windows_bash_session.execute(action)
-    assert result.exit_code == 0
-
-
-def test_multiple_commands_rejected_and_individual_execution(windows_bash_session):
-    """Test that executing multiple commands separated by newline is rejected,
-    but individual commands (including multiline) execute correctly.
-    """
-    # Define a list of commands, including multiline and special characters
-    cmds = [
-        'Get-ChildItem',
-        'Write-Output "hello`nworld"',
-        """Write-Output "hello it's me\"""",
-        """Write-Output `
-    'hello' `
-    -NoNewline""",
-        """Write-Output 'hello`nworld`nare`nyou`nthere?'""",
-        """Write-Output 'hello`nworld`nare`nyou`n`nthere?'""",
-        """Write-Output 'hello`nworld `"'""",  # Escape the trailing double quote
-    ]
-    joined_cmds = '\n'.join(cmds)
-
-    # 1. Test that executing multiple commands at once fails
-    action_multi = CmdRunAction(command=joined_cmds)
-    result_multi = windows_bash_session.execute(action_multi)
-
-    assert isinstance(result_multi, ErrorObservation)
-    assert 'ERROR: Cannot execute multiple commands at once' in result_multi.content
-
-    # 2. Now run each command individually and verify they work
-    results = []
-    for cmd in cmds:
-        action_single = CmdRunAction(command=cmd)
-        obs = windows_bash_session.execute(action_single)
-        assert isinstance(obs, CmdOutputObservation)
-        assert obs.exit_code == 0
-        results.append(obs.content.strip())  # Strip trailing newlines for comparison
-
-
-def test_working_directory(windows_bash_session, temp_work_dir):
-    """Test working directory handling."""
-    initial_cwd = windows_bash_session._cwd
-    abs_temp_work_dir = os.path.abspath(temp_work_dir)
-    assert initial_cwd == abs_temp_work_dir
-
-    # Create a subdirectory
-    sub_dir_path = Path(abs_temp_work_dir) / 'subdir'
-    sub_dir_path.mkdir()
-    assert sub_dir_path.is_dir()
-
-    # Test changing directory
-    action_cd = CmdRunAction(command='Set-Location subdir')
-    result_cd = windows_bash_session.execute(action_cd)
-    assert isinstance(result_cd, CmdOutputObservation)
-    assert result_cd.exit_code == 0
-
-    # Check that the session's internal CWD state was updated - only check the last component of path
-    assert windows_bash_session._cwd.lower().endswith('\\subdir')
-    # Check that the metadata reflects the directory *after* the command
-    assert result_cd.metadata.working_dir.lower().endswith('\\subdir')
-
-    # Execute a command in the new directory to confirm
-    action_pwd = CmdRunAction(command='(Get-Location).Path')
-    result_pwd = windows_bash_session.execute(action_pwd)
-    assert isinstance(result_pwd, CmdOutputObservation)
-    assert result_pwd.exit_code == 0
-    # Check the command output reflects the new directory
-    assert result_pwd.content.strip().lower().endswith('\\subdir')
-    # Metadata should also reflect the current directory
-    assert result_pwd.metadata.working_dir.lower().endswith('\\subdir')
-
-    # Test changing back to original directory
-    action_cd_back = CmdRunAction(command=f"Set-Location '{abs_temp_work_dir}'")
-    result_cd_back = windows_bash_session.execute(action_cd_back)
-    assert isinstance(result_cd_back, CmdOutputObservation)
-    assert result_cd_back.exit_code == 0
-    # Check only the base name of the temp directory
-    temp_dir_basename = os.path.basename(abs_temp_work_dir)
-    assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower())
-    assert result_cd_back.metadata.working_dir.lower().endswith(
-        temp_dir_basename.lower()
-    )
-
-
-def test_cleanup(windows_bash_session):
-    """Test proper cleanup of resources (runspace)."""
-    # Session should be initialized before close
-    assert windows_bash_session._initialized
-    assert windows_bash_session.runspace is not None
-
-    # Close the session
-    windows_bash_session.close()
-
-    # Verify cleanup
-    assert not windows_bash_session._initialized
-    assert windows_bash_session.runspace is None
-    assert windows_bash_session._closed
-
-
-def test_syntax_error_handling(windows_bash_session):
-    """Test handling of syntax errors in PowerShell commands."""
-    # Test invalid command syntax
-    action = CmdRunAction(command="Write-Output 'Missing Quote")
-    result = windows_bash_session.execute(action)
-    assert isinstance(result, ErrorObservation)
-    # Error message appears in the output via PowerShell error stream
-    assert 'missing' in result.content.lower() or 'terminator' in result.content.lower()
-
-
-def test_special_characters_handling(windows_bash_session):
-    """Test handling of commands containing special characters."""
-    # Test command with special characters
-    special_chars_cmd = '''Write-Output "Special Chars: \\`& \\`| \\`< \\`> \\`\\` \\`' \\`\" \\`! \\`$ \\`% \\`^ \\`( \\`) \\`- \\`= \\`+ \\`[ \\`] \\`{ \\`} \\`; \\`: \\`, \\`. \\`? \\`/ \\`~"'''
-    action = CmdRunAction(command=special_chars_cmd)
-    result = windows_bash_session.execute(action)
-    assert isinstance(result, CmdOutputObservation)
-    # Check output contains the special characters
-    assert 'Special Chars:' in result.content
-    assert '&' in result.content and '|' in result.content
-    assert result.exit_code == 0
-
-
-def test_empty_command(windows_bash_session):
-    """Test handling of empty command string when no command is running."""
-    action = CmdRunAction(command='')
-    result = windows_bash_session.execute(action)
-    assert isinstance(result, CmdOutputObservation)
-    # Should indicate error as per test_bash.py behavior
-    assert 'ERROR: No previous running command to retrieve logs from.' in result.content
-    # Exit code is typically 0 even for this specific "error" message in the bash implementation
-    assert result.exit_code == 0
-
-
-def test_exception_during_execution(windows_bash_session):
-    """Test handling of exceptions during command execution."""
-    # Patch the PowerShell class itself within the module where it's used
-    patch_target = 'openhands.runtime.utils.windows_bash.PowerShell'
-
-    # Create a mock PowerShell class
-    mock_powershell_class = MagicMock()
-    # Configure its Create method (which is called in execute) to raise an exception
-    # This simulates an error during the creation of the PowerShell object itself.
-    mock_powershell_class.Create.side_effect = Exception(
-        'Test exception from mocked Create'
-    )
-
-    with patch(patch_target, mock_powershell_class):
-        action = CmdRunAction(command="Write-Output 'Test'")
-        # Now, when execute calls PowerShell.Create(), it will hit our mock and raise the exception
-        result = windows_bash_session.execute(action)
-
-        # The exception should be caught by the try...except block in execute()
-        assert isinstance(result, ErrorObservation)
-        # Check the error message generated by the execute method's exception handler
-        assert 'Failed to start PowerShell job' in result.content
-        assert 'Test exception from mocked Create' in result.content
-
-
-def test_streaming_output(windows_bash_session):
-    """Test handling of streaming output from commands."""
-    # Command that produces output incrementally
-    command = """
-    1..3 | ForEach-Object {
-        Write-Output "Line $_"
-        Start-Sleep -Milliseconds 100
-    }
-    """
-    action = CmdRunAction(command=command)
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    assert 'Line 1' in result.content
-    assert 'Line 2' in result.content
-    assert 'Line 3' in result.content
-    assert result.exit_code == 0
-
-
-def test_shutdown_signal_handling(windows_bash_session):
-    """Test handling of shutdown signal during command execution."""
-    # This would require mocking the shutdown_listener, which might be complex.
-    # For now, we'll just verify that a long-running command can be executed
-    # and that execute() returns properly.
-    command = 'Start-Sleep -Seconds 1'
-    action = CmdRunAction(command=command)
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    assert result.exit_code == 0
-
-
-def test_runspace_state_after_error(windows_bash_session):
-    """Test that the runspace remains usable after a command error."""
-    # First, execute a command with an error
-    error_action = CmdRunAction(command='NonExistentCommand')
-    error_result = windows_bash_session.execute(error_action)
-    assert isinstance(error_result, CmdOutputObservation)
-    assert error_result.exit_code == 1
-
-    # Then, execute a valid command
-    valid_action = CmdRunAction(command="Write-Output 'Still working'")
-    valid_result = windows_bash_session.execute(valid_action)
-    assert isinstance(valid_result, CmdOutputObservation)
-    assert 'Still working' in valid_result.content
-    assert valid_result.exit_code == 0
-
-
-def test_stateful_file_operations(windows_bash_session, temp_work_dir):
-    """Test file operations to verify runspace state persistence.
-
-    This test verifies that:
-    1. The working directory state persists between commands
-    2. File operations work correctly relative to the current directory
-    3. The runspace maintains state for path-dependent operations
-    """
-    abs_temp_work_dir = os.path.abspath(temp_work_dir)
-
-    # 1. Create a subdirectory
-    sub_dir_name = 'file_test_dir'
-    sub_dir_path = Path(abs_temp_work_dir) / sub_dir_name
-
-    # Use PowerShell to create directory
-    create_dir_action = CmdRunAction(
-        command=f'New-Item -Path "{sub_dir_name}" -ItemType Directory'
-    )
-    result = windows_bash_session.execute(create_dir_action)
-    assert result.exit_code == 0
-
-    # Verify directory exists on disk
-    assert sub_dir_path.exists() and sub_dir_path.is_dir()
-
-    # 2. Change to the new directory
-    cd_action = CmdRunAction(command=f"Set-Location '{sub_dir_name}'")
-    result = windows_bash_session.execute(cd_action)
-    assert result.exit_code == 0
-    # Check only the last directory component
-    assert windows_bash_session._cwd.lower().endswith(f'\\{sub_dir_name.lower()}')
-
-    # 3. Create a file in the current directory (which should be the subdirectory)
-    test_content = 'This is a test file created by PowerShell'
-    create_file_action = CmdRunAction(
-        command=f'Set-Content -Path "test_file.txt" -Value "{test_content}"'
-    )
-    result = windows_bash_session.execute(create_file_action)
-    assert result.exit_code == 0
-
-    # 4. Verify file exists at the expected path (in the subdirectory)
-    expected_file_path = sub_dir_path / 'test_file.txt'
-    assert expected_file_path.exists() and expected_file_path.is_file()
-
-    # 5. Read file contents using PowerShell and verify
-    read_file_action = CmdRunAction(command='Get-Content -Path "test_file.txt"')
-    result = windows_bash_session.execute(read_file_action)
-    assert result.exit_code == 0
-    assert test_content in result.content
-
-    # 6. Go back to parent and try to access file using relative path
-    cd_parent_action = CmdRunAction(command='Set-Location ..')
-    result = windows_bash_session.execute(cd_parent_action)
-    assert result.exit_code == 0
-    # Check only the base name of the temp directory
-    temp_dir_basename = os.path.basename(abs_temp_work_dir)
-    assert windows_bash_session._cwd.lower().endswith(temp_dir_basename.lower())
-
-    # 7. Read the file using relative path
-    read_from_parent_action = CmdRunAction(
-        command=f'Get-Content -Path "{sub_dir_name}/test_file.txt"'
-    )
-    result = windows_bash_session.execute(read_from_parent_action)
-    assert result.exit_code == 0
-    assert test_content in result.content
-
-    # 8. Clean up
-    remove_file_action = CmdRunAction(
-        command=f'Remove-Item -Path "{sub_dir_name}/test_file.txt" -Force'
-    )
-    result = windows_bash_session.execute(remove_file_action)
-    assert result.exit_code == 0
-
-
-def test_command_output_continuation(windows_bash_session):
-    """Test retrieving continued output using empty command after timeout."""
-    # Windows PowerShell version
-    action = CmdRunAction('1..5 | ForEach-Object { Write-Output $_; Start-Sleep 3 }')
-    action.set_hard_timeout(2.5)
-    obs = windows_bash_session.execute(action)
-    assert obs.content.strip() == '1'
-    assert obs.metadata.prefix == ''
-    assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-
-    # Continue watching output
-    action = CmdRunAction('')
-    action.set_hard_timeout(2.5)
-    obs = windows_bash_session.execute(action)
-    assert '[Below is the output of the previous command.]' in obs.metadata.prefix
-    assert obs.content.strip() == '2'
-    assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-
-    # Continue until completion
-    for expected in ['3', '4', '5']:
-        action = CmdRunAction('')
-        action.set_hard_timeout(2.5)
-        obs = windows_bash_session.execute(action)
-        assert '[Below is the output of the previous command.]' in obs.metadata.prefix
-        assert obs.content.strip() == expected
-        assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-
-    # Final empty command to complete
-    action = CmdRunAction('')
-    obs = windows_bash_session.execute(action)
-    assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-
-
-def test_long_running_command_followed_by_execute(windows_bash_session):
-    """Tests behavior when a new command is sent while another is running after timeout."""
-    # Start a slow command
-    action = CmdRunAction('1..3 | ForEach-Object { Write-Output $_; Start-Sleep 3 }')
-    action.set_hard_timeout(2.5)
-    obs = windows_bash_session.execute(action)
-    assert '1' in obs.content  # First number should appear before timeout
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-    assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-    assert obs.metadata.prefix == ''
-
-    # Continue watching output
-    action = CmdRunAction('')
-    action.set_hard_timeout(2.5)
-    obs = windows_bash_session.execute(action)
-    assert '2' in obs.content
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert '[The command timed out after 2.5 seconds.' in obs.metadata.suffix
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-
-    # Test command that produces no output
-    action = CmdRunAction('sleep 15')
-    action.set_hard_timeout(2.5)
-    obs = windows_bash_session.execute(action)
-    assert '3' not in obs.content
-    assert obs.metadata.prefix == '[Below is the output of the previous command.]\n'
-    assert 'The previous command is still running' in obs.metadata.suffix
-    assert obs.metadata.exit_code == -1  # -1 indicates command is still running
-
-    # Finally continue again
-    action = CmdRunAction('')
-    obs = windows_bash_session.execute(action)
-    assert '3' in obs.content
-    assert '[The command completed with exit code 0.]' in obs.metadata.suffix
-
-
-def test_command_non_existent_file(windows_bash_session):
-    """Test command execution for a non-existent file returns non-zero exit code."""
-    # Use Get-Content which should fail if the file doesn't exist
-    action = CmdRunAction(command='Get-Content non_existent_file.txt')
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    # Check that the exit code is non-zero (should be 1 due to the '$?' check)
-    assert result.exit_code == 1
-    # Check that the error message is captured in the output (error stream part)
-    assert 'Cannot find path' in result.content or 'does not exist' in result.content
-
-
-def test_interactive_input(windows_bash_session):
-    """Test interactive input attempt reflects implementation limitations."""
-    action = CmdRunAction('$name = Read-Host "Enter name"')
-    result = windows_bash_session.execute(action)
-
-    assert isinstance(result, CmdOutputObservation)
-    assert (
-        'A command that prompts the user failed because the host program or the command type does not support user interaction. The host was attempting to request confirmation with the following message'
-        in result.content
-    )
-    assert result.exit_code == 1
-
-
-def test_windows_path_handling(windows_bash_session, temp_work_dir):
-    """Test that os.chdir works with both forward slashes and escaped backslashes on Windows."""
-    # Create a test directory
-    test_dir = Path(temp_work_dir) / 'test_dir'
-    test_dir.mkdir()
-
-    # Test both path formats
-    path_formats = [
-        str(test_dir).replace('\\', '/'),  # Forward slashes
-        str(test_dir).replace('\\', '\\\\'),  # Escaped backslashes
-    ]
-
-    for path in path_formats:
-        # Test changing directory using os.chdir through PowerShell
-        action = CmdRunAction(command=f'python -c "import os; os.chdir(\'{path}\')"')
-        result = windows_bash_session.execute(action)
-        assert isinstance(result, CmdOutputObservation)
-        assert result.exit_code == 0, f'Failed with path format: {path}'