(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/root) (#3791)

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
tobitege 2024-09-14 21:51:30 +02:00 committed by GitHub
parent 57390eb26b
commit 554636cf2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 867 additions and 702 deletions

View File

@ -1,5 +1,5 @@
# Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository
name: Build, Test and Publish Runtime Image
name: Build, Test and Publish RT Image
# Only run one workflow of the same group at a time.
# There can be at most one running and one pending job in a concurrency group at any time.
@ -104,9 +104,9 @@ jobs:
name: runtime-${{ matrix.base_image.tag }}
path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
# Run unit tests with the EventStream runtime Docker images
test_runtime:
name: Test Runtime
# Run unit tests with the EventStream runtime Docker images as root
test_runtime_root:
name: RT Unit Tests (Root)
needs: [ghcr_build_runtime]
runs-on: ubuntu-latest
strategy:
@ -164,11 +164,84 @@ jobs:
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
RUN_AS_OPENHANDS=false \
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# Run unit tests with the EventStream runtime Docker images as openhands user
test_runtime_oh:
name: RT Unit Tests (openhands)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
matrix:
base_image: ['nikolaik']
steps:
- uses: actions/checkout@v4
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
swap-storage: true
# Forked repos can't push to GHCR, so we need to download the image as an artifact
- name: Download runtime image for fork
if: github.event.pull_request.head.repo.fork
uses: actions/download-artifact@v4
with:
name: runtime-${{ matrix.base_image }}
path: /tmp
- name: Load runtime image for fork
if: github.event.pull_request.head.repo.fork
run: |
docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
- name: Cache Poetry dependencies
uses: actions/cache@v4
with:
path: |
~/.cache/pypoetry
~/.virtualenvs
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install poetry via pipx
run: pipx install poetry
- name: Install Python dependencies using Poetry
run: make install-python-dependencies
- name: Run runtime tests
run: |
# We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
# then across more than 2 CPUs for some reason
poetry run pip install pytest-xdist
# Install to be able to retry on failures for flaky tests
poetry run pip install pytest-rerunfailures
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
SKIP_CONTAINER_LOGS=true \
TEST_RUNTIME=eventstream \
SANDBOX_USER_ID=$(id -u) \
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
RUN_AS_OPENHANDS=true \
poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:
@ -176,7 +249,7 @@ jobs:
# Run integration tests with the eventstream runtime Docker image
runtime_integration_tests_on_linux:
name: Runtime Integration Tests on Linux
name: RT Integration Tests (Linux)
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
strategy:
@ -237,7 +310,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime, runtime_integration_tests_on_linux]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
steps:
- name: All tests passed
run: echo "All runtime tests have passed successfully!"
@ -246,7 +319,7 @@ jobs:
name: All Runtime Tests Passed
if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
runs-on: ubuntu-latest
needs: [test_runtime, runtime_integration_tests_on_linux]
needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
steps:
- name: Some tests failed
run: |

View File

@ -507,7 +507,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
if isinstance(value, dict):
try:
if key is not None and key.lower() == 'agent':
logger.openhands_logger.info(
logger.openhands_logger.debug(
'Attempt to load default agent config from config toml'
)
non_dict_fields = {
@ -517,13 +517,13 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
cfg.set_agent_config(agent_config, 'agent')
for nested_key, nested_value in value.items():
if isinstance(nested_value, dict):
logger.openhands_logger.info(
logger.openhands_logger.debug(
f'Attempt to load group {nested_key} from config toml as agent config'
)
agent_config = AgentConfig(**nested_value)
cfg.set_agent_config(agent_config, nested_key)
elif key is not None and key.lower() == 'llm':
logger.openhands_logger.info(
logger.openhands_logger.debug(
'Attempt to load default LLM config from config toml'
)
non_dict_fields = {
@ -533,7 +533,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
cfg.set_llm_config(llm_config, 'llm')
for nested_key, nested_value in value.items():
if isinstance(nested_value, dict):
logger.openhands_logger.info(
logger.openhands_logger.debug(
f'Attempt to load group {nested_key} from config toml as llm config'
)
llm_config = LLMConfig(**nested_value)
@ -584,10 +584,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
def finalize_config(cfg: AppConfig):
"""More tweaks to the config after it's been loaded."""
cfg.workspace_base = os.path.abspath(cfg.workspace_base)
# Set workspace_mount_path if not set by the user
if cfg.workspace_mount_path is UndefinedString.UNDEFINED:
cfg.workspace_mount_path = os.path.abspath(cfg.workspace_base)
cfg.workspace_base = os.path.abspath(cfg.workspace_base)
cfg.workspace_mount_path = cfg.workspace_base
if cfg.workspace_mount_rewrite: # and not config.workspace_mount_path:
# TODO why do we need to check if workspace_mount_path is None?

View File

@ -68,6 +68,10 @@ class DockerRuntimeBuilder(RuntimeBuilder):
Returns:
bool: Whether the Docker image exists in the registry or in the local store
"""
if not image_name:
logger.error(f'Invalid image name: `{image_name}`')
return False
try:
logger.info(f'Checking, if image exists locally:\n{image_name}')
self.docker_client.images.get(image_name)

View File

@ -84,7 +84,6 @@ class RuntimeClient:
self.lock = asyncio.Lock()
self.plugins: dict[str, Plugin] = {}
self.browser = BrowserEnv(browsergym_eval_env)
self._initial_pwd = work_dir
@property
def initial_pwd(self):
@ -116,27 +115,85 @@ class RuntimeClient:
logger.info('Runtime client initialized.')
def _init_user(self, username: str, user_id: int) -> None:
"""Create user if not exists."""
"""Create working directory and user if not exists.
It performs the following steps effectively:
* Creates the Working Directory:
- Uses mkdir -p to create the directory.
- Sets ownership to username:root.
- Adjusts permissions to be readable and writable by group and others.
* User Verification and Creation:
- Checks if the user exists using id -u.
- If the user exists with the correct UID, it skips creation.
- If the UID differs, it logs a warning and updates self.user_id.
- If the user doesn't exist, it proceeds to create the user.
* Sudo Configuration:
- Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant
passwordless sudo access to the sudo group.
- Adds the user to the sudo group with the useradd command, handling
UID conflicts by incrementing the UID if necessary.
"""
# First create the working directory, independent of the user
logger.info(f'Client working directory: {self.initial_pwd}')
command = f'umask 002; mkdir -p {self.initial_pwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str = output.stdout.decode()
command = f'chown -R {username}:root {self.initial_pwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
command = f'chmod g+rw {self.initial_pwd}'
output = subprocess.run(command, shell=True, capture_output=True)
out_str += output.stdout.decode()
logger.debug(f'Created working directory. Output: [{out_str}]')
# Skip root since it is already created
if username == 'root':
return
# Check if the username already exists
existing_user_id = -1
try:
subprocess.run(
result = subprocess.run(
f'id -u {username}', shell=True, check=True, capture_output=True
)
logger.debug(f'User {username} already exists. Skipping creation.')
existing_user_id = int(result.stdout.decode().strip())
# The user ID already exists, skip setup
if existing_user_id == user_id:
logger.debug(
f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
)
else:
logger.warning(
f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
)
self.user_id = existing_user_id
return
except subprocess.CalledProcessError:
pass # User does not exist, continue with creation
except subprocess.CalledProcessError as e:
# Returncode 1 indicates, that the user does not exist yet
if e.returncode == 1:
logger.debug(
f'User `{username}` does not exist. Proceeding with user creation.'
)
else:
logger.error(
f'Error checking user `{username}`, skipping setup:\n{e}\n'
)
raise
# Add sudoer
sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
output = subprocess.run(sudoer_line, shell=True, capture_output=True)
if output.returncode != 0:
raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
sudoer_line = r'%sudo ALL=(ALL) NOPASSWD:ALL\n'
sudoers_path = '/etc/sudoers.d/99_sudo'
if not Path(sudoers_path).exists():
with open(sudoers_path, 'w') as f:
f.write(sudoer_line)
output = subprocess.run(['chmod', '0440', sudoers_path])
if output.returncode != 0:
logger.error('Failed to chmod 99_sudo file!')
else:
logger.debug('Added sudoer successfully.')
# Attempt to add the user, retrying with incremented user_id if necessary
while True:
@ -144,16 +201,10 @@ class RuntimeClient:
f'useradd -rm -d /home/{username} -s /bin/bash '
f'-g root -G sudo -u {user_id} {username}'
)
if not os.path.exists(self.initial_pwd):
command += f' && mkdir -p {self.initial_pwd}'
command += f' && chown -R {username}:root {self.initial_pwd}'
command += f' && chmod g+s {self.initial_pwd}'
output = subprocess.run(command, shell=True, capture_output=True)
if output.returncode == 0:
logger.debug(
f'Added user {username} successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
)
break
elif f'UID {user_id} is not unique' in output.stderr.decode():
@ -163,7 +214,7 @@ class RuntimeClient:
user_id += 1
else:
raise RuntimeError(
f'Failed to create user {username}: {output.stderr.decode()}'
f'Failed to create user `{username}`! Output: [{output.stderr.decode()}]'
)
def _init_bash_shell(self, work_dir: str, username: str) -> None:
@ -181,8 +232,8 @@ class RuntimeClient:
# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
# Set umask to allow group write permissions
self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
self.shell.expect(self.__bash_expect_regex)
self.shell.sendline(
@ -190,8 +241,11 @@ class RuntimeClient:
)
self.shell.expect(self.__bash_expect_regex)
logger.debug(
f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}'
f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
)
# Ensure the group has write permissions on the working directory
self.shell.sendline(f'chmod g+rw "{work_dir}"')
self.shell.expect(self.__bash_expect_regex)
async def _init_bash_commands(self):
logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
@ -295,14 +349,14 @@ class RuntimeClient:
bash_prompt = self._get_bash_prompt_and_update_pwd()
if keep_prompt:
output += '\r\n' + bash_prompt
logger.debug(f'Command output: {output}')
# logger.debug(f'Command output:\n{output}')
return output, exit_code
async def run_action(self, action) -> Observation:
action_type = action.action
logger.debug(f'Running action: {action}')
logger.debug(f'Running action:\n{action}')
observation = await getattr(self, action_type)(action)
logger.debug(f'Action output: {observation}')
logger.debug(f'Action output:\n{observation}')
return observation
async def run(self, action: CmdRunAction) -> CmdOutputObservation:
@ -355,10 +409,9 @@ class RuntimeClient:
_jupyter_plugin: JupyterPlugin = self.plugins['jupyter'] # type: ignore
# This is used to make AgentSkills in Jupyter aware of the
# current working directory in Bash
if self.pwd != getattr(self, '_jupyter_pwd', None):
logger.debug(
f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD"
)
jupyter_pwd = getattr(self, '_jupyter_pwd', None)
if self.pwd != jupyter_pwd:
logger.debug(f'{self.pwd} != {jupyter_pwd} -> reset Jupyter PWD')
reset_jupyter_pwd_code = f'import os; os.chdir("{self.pwd}")'
_aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
_reset_obs = await _jupyter_plugin.run(_aux_action)
@ -450,7 +503,7 @@ class RuntimeClient:
os.chown(filepath, file_stat.st_uid, file_stat.st_gid)
else:
# set the new file permissions if the file is new
os.chmod(filepath, 0o644)
os.chmod(filepath, 0o664)
os.chown(filepath, self.user_id, self.user_id)
except FileNotFoundError:

View File

@ -38,8 +38,7 @@ from openhands.runtime.utils.runtime_build import build_runtime_image
class LogBuffer:
"""
Synchronous buffer for Docker container logs.
"""Synchronous buffer for Docker container logs.
This class provides a thread-safe way to collect, store, and retrieve logs
from a Docker container. It uses a list to store log lines and provides methods
@ -94,7 +93,7 @@ class LogBuffer:
)
self.close(timeout=5)
def close(self, timeout: float = 10.0):
def close(self, timeout: float = 5.0):
self._stop_event.set()
self.log_stream_thread.join(timeout)
@ -102,6 +101,14 @@ class LogBuffer:
class EventStreamRuntime(Runtime):
"""This runtime will subscribe the event stream.
When receive an event, it will send the event to runtime-client which run inside the docker environment.
From the sid also an instance_id is generated in combination with a UID.
Args:
config (AppConfig): The application configuration.
event_stream (EventStream): The event stream to subscribe to.
sid (str, optional): The session ID. Defaults to 'default'.
plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None.
env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None.
"""
container_name_prefix = 'openhands-sandbox-'
@ -115,13 +122,16 @@ class EventStreamRuntime(Runtime):
env_vars: dict[str, str] | None = None,
):
self.config = config
self._port = find_available_tcp_port()
self.api_url = f'http://{self.config.sandbox.api_hostname}:{self._port}'
self._host_port = 30000 # initial dummy value
self._container_port = 30001 # initial dummy value
self.api_url = (
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
)
self.session = requests.Session()
self.instance_id = (
sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
)
self.docker_client: docker.DockerClient = self._init_docker_client()
self.base_container_image = self.config.sandbox.base_container_image
self.runtime_container_image = self.config.sandbox.runtime_container_image
@ -131,7 +141,7 @@ class EventStreamRuntime(Runtime):
self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
self.runtime_builder = DockerRuntimeBuilder(self.docker_client)
logger.debug(f'EventStreamRuntime `{sid}`')
logger.debug(f'EventStreamRuntime `{self.instance_id}`')
# Buffer for container logs
self.log_buffer: LogBuffer | None = None
@ -140,7 +150,9 @@ class EventStreamRuntime(Runtime):
logger.info(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
)
self.skip_container_logs = (
os.environ.get('SKIP_CONTAINER_LOGS', 'false').lower() == 'true'
)
if self.runtime_container_image is None:
if self.base_container_image is None:
raise ValueError(
@ -152,19 +164,18 @@ class EventStreamRuntime(Runtime):
extra_deps=self.config.sandbox.runtime_extra_deps,
)
self.container = self._init_container(
self.sandbox_workspace_dir,
mount_dir=self.config.workspace_mount_path,
sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox, # e.g. /workspace
mount_dir=self.config.workspace_mount_path, # e.g. /opt/openhands/_test_workspace
plugins=plugins,
)
# will initialize both the event stream and the env vars
super().__init__(config, event_stream, sid, plugins, env_vars)
self._wait_until_alive()
logger.info(
f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}'
)
logger.info(f'Container initialized with env vars: {env_vars}')
time.sleep(1)
@staticmethod
def _init_docker_client() -> docker.DockerClient:
@ -196,24 +207,48 @@ class EventStreamRuntime(Runtime):
f'--plugins {" ".join([plugin.name for plugin in plugins])} '
)
network_mode: str | None = None
port_mapping: dict[str, int] | None = None
if self.config.sandbox.use_host_network:
network_mode = 'host'
self._host_port = self._find_available_port()
self._container_port = (
self._host_port
) # in future this might differ from host port
self.api_url = (
f'http://{self.config.sandbox.api_hostname}:{self._container_port}'
)
use_host_network = self.config.sandbox.use_host_network
network_mode: str | None = 'host' if use_host_network else None
port_mapping: dict[str, list[dict[str, str]]] | None = (
None
if use_host_network
else {
f'{self._container_port}/tcp': [{'HostPort': str(self._host_port)}]
}
)
if use_host_network:
logger.warn(
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
)
else:
port_mapping = {f'{self._port}/tcp': self._port}
if mount_dir is not None:
# Combine environment variables
environment = {
'port': str(self._container_port),
'PYTHONUNBUFFERED': 1,
}
if self.config.debug:
environment['DEBUG'] = 'true'
logger.info(f'Workspace Base: {self.config.workspace_base}')
if mount_dir is not None and sandbox_workspace_dir is not None:
# e.g. result would be: {"/home/user/openhands/workspace": {'bind': "/workspace", 'mode': 'rw'}}
volumes = {mount_dir: {'bind': sandbox_workspace_dir, 'mode': 'rw'}}
logger.info(f'Mount dir: {sandbox_workspace_dir}')
logger.info(f'Mount dir: {mount_dir}')
else:
logger.warn(
'Mount dir is not set, will not mount the workspace directory to the container.'
'Warning: Mount dir is not set, will not mount the workspace directory to the container!\n'
)
volumes = None
logger.info(f'Sandbox workspace: {sandbox_workspace_dir}')
if self.config.sandbox.browsergym_eval_env is not None:
browsergym_arg = (
@ -225,9 +260,9 @@ class EventStreamRuntime(Runtime):
self.runtime_container_image,
command=(
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
'PYTHONUNBUFFERED=1 poetry run '
f'python -u -m openhands.runtime.client.client {self._port} '
f'--working-dir {sandbox_workspace_dir} '
f'poetry run '
f'python -u -m openhands.runtime.client.client {self._container_port} '
f'--working-dir "{sandbox_workspace_dir}" '
f'{plugin_arg}'
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
f'--user-id {self.config.sandbox.user_id} '
@ -235,24 +270,26 @@ class EventStreamRuntime(Runtime):
),
network_mode=network_mode,
ports=port_mapping,
working_dir='/openhands/code/',
working_dir='/openhands/code/', # do not change this!
name=self.container_name,
detach=True,
environment={'DEBUG': 'true'} if self.config.debug else None,
environment=environment,
volumes=volumes,
)
self.log_buffer = LogBuffer(container)
logger.info(f'Container started. Server url: {self.api_url}')
return container
except Exception as e:
logger.error('Failed to start container')
logger.error(
f'Error: Instance {self.instance_id} FAILED to start container!\n'
)
logger.exception(e)
self.close(close_client=False)
raise e
@tenacity.retry(
stop=tenacity.stop_after_attempt(10),
wait=tenacity.wait_exponential(multiplier=2, min=10, max=60),
wait=tenacity.wait_exponential(multiplier=2, min=1, max=20),
reraise=(ConnectionRefusedError,),
)
def _wait_until_alive(self):
@ -278,10 +315,11 @@ class EventStreamRuntime(Runtime):
)
if not self.log_buffer.client_ready:
time.sleep(1)
attempts = 0
while not self.log_buffer.client_ready and attempts < 5:
while not self.log_buffer.client_ready and attempts < 4:
attempts += 1
time.sleep(1)
time.sleep(2)
logs = self.log_buffer.get_and_clear()
if logs:
formatted_logs = '\n'.join([f' |{log}' for log in logs])
@ -303,13 +341,8 @@ class EventStreamRuntime(Runtime):
logger.error(msg)
raise RuntimeError(msg)
@property
def sandbox_workspace_dir(self):
return self.config.workspace_mount_path_in_sandbox
def close(self, close_client: bool = True, rm_all_containers: bool = True):
"""
Closes the EventStreamRuntime and associated objects
"""Closes the EventStreamRuntime and associated objects
Parameters:
- close_client (bool): Whether to close the DockerClient
@ -322,23 +355,29 @@ class EventStreamRuntime(Runtime):
if self.session:
self.session.close()
containers = self.docker_client.containers.list(all=True)
for container in containers:
try:
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
# that all 'openhands-sandbox-' containers are removed as well.
if rm_all_containers and container.name.startswith(
self.container_name_prefix
):
container.remove(force=True)
elif container.name == self.container_name:
logs = container.logs(tail=1000).decode('utf-8')
logger.debug(
f'==== Container logs ====\n{logs}\n==== End of container logs ===='
)
container.remove(force=True)
except docker.errors.NotFound:
pass
try:
containers = self.docker_client.containers.list(all=True)
for container in containers:
try:
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
# that all 'openhands-sandbox-' containers are removed as well.
if rm_all_containers and container.name.startswith(
self.container_name_prefix
):
container.remove(force=True)
elif container.name == self.container_name:
if not self.skip_container_logs:
logs = container.logs(tail=1000).decode('utf-8')
logger.debug(
f'==== Container logs on close ====\n{logs}\n==== End of container logs ===='
)
container.remove(force=True)
except docker.errors.APIError:
pass
except docker.errors.NotFound:
pass
except docker.errors.NotFound: # yes, this can happen!
pass
if close_client:
self.docker_client.close()
@ -494,3 +533,20 @@ class EventStreamRuntime(Runtime):
raise TimeoutError('List files operation timed out')
except Exception as e:
raise RuntimeError(f'List files operation failed: {str(e)}')
def _is_port_in_use_docker(self, port):
containers = self.docker_client.containers.list()
for container in containers:
container_ports = container.ports
if str(port) in str(container_ports):
return True
return False
def _find_available_port(self, max_attempts=5):
port = 39999
for _ in range(max_attempts):
port = find_available_tcp_port(30000, 39999)
if not self._is_port_in_use_docker(port):
return port
# If no port is found after max_attempts, return the last tried port
return port

View File

@ -19,7 +19,7 @@ class JupyterPlugin(Plugin):
name: str = 'jupyter'
async def initialize(self, username: str, kernel_id: str = 'openhands-default'):
self.kernel_gateway_port = find_available_tcp_port()
self.kernel_gateway_port = find_available_tcp_port(40000, 49999)
self.kernel_id = kernel_id
self.gateway_process = subprocess.Popen(
(

View File

@ -142,7 +142,7 @@ class RemoteRuntime(Runtime):
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
'PYTHONUNBUFFERED=1 poetry run '
f'python -u -m openhands.runtime.client.client {self.port} '
f'--working-dir {self.sandbox_workspace_dir} '
f'--working-dir {self.config.workspace_mount_path_in_sandbox} '
f'{plugin_arg}'
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
f'--user-id {self.config.sandbox.user_id} '
@ -203,10 +203,6 @@ class RemoteRuntime(Runtime):
logger.warning(msg)
raise RuntimeError(msg)
@property
def sandbox_workspace_dir(self):
return self.config.workspace_mount_path_in_sandbox
def close(self):
if self.runtime_id:
try:

View File

@ -67,7 +67,6 @@ class Runtime:
self.config = copy.deepcopy(config)
self.DEFAULT_ENV_VARS = _default_env_vars(config.sandbox)
atexit.register(self.close)
logger.debug(f'Runtime `{sid}`')
if self.DEFAULT_ENV_VARS:
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')

View File

@ -1,4 +1,7 @@
from openhands.runtime.utils.bash import split_bash_commands
from openhands.runtime.utils.system import find_available_tcp_port
from openhands.runtime.utils.system import (
display_number_matrix,
find_available_tcp_port,
)
__all__ = ['find_available_tcp_port', 'split_bash_commands']
__all__ = ['display_number_matrix', 'find_available_tcp_port', 'split_bash_commands']

View File

@ -55,7 +55,7 @@ def _put_source_code_to_dir(temp_dir: str):
' ', r'\ '
) # escape spaces in the project root
result = subprocess.run(
f'python -m build -s -o {temp_dir} {_cleaned_project_root}',
f'python -m build -s -o "{temp_dir}" {_cleaned_project_root}',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@ -142,13 +142,14 @@ def prep_docker_build_folder(
skip_init=skip_init,
extra_deps=extra_deps,
)
logger.debug(
(
f'===== Dockerfile content start =====\n'
f'{dockerfile_content}\n'
f'===== Dockerfile content end ====='
if os.getenv('SKIP_CONTAINER_LOGS', 'false') != 'true':
logger.debug(
(
f'===== Dockerfile content start =====\n'
f'{dockerfile_content}\n'
f'===== Dockerfile content end ====='
)
)
)
with open(os.path.join(dir_path, 'Dockerfile'), 'w') as file:
file.write(dockerfile_content)

View File

@ -60,6 +60,7 @@ RUN cd /openhands/code && \
{{ extra_deps }} {% if extra_deps %} && {% endif %} \
/openhands/miniforge3/bin/mamba run -n base poetry cache clear --all . && \
{% if not skip_init %}chmod -R g+rws /openhands/poetry && {% endif %} \
mkdir -p /openhands/workspace && chmod -R g+rws,o+rw /openhands/workspace && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
/openhands/miniforge3/bin/mamba clean --all

View File

@ -1,17 +1,34 @@
import random
import socket
import time
def find_available_tcp_port() -> int:
"""Find an available TCP port, return -1 if none available."""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.bind(('localhost', 0))
port = sock.getsockname()[1]
return port
except Exception:
return -1
finally:
sock.close()
def find_available_tcp_port(min_port=30000, max_port=39999, max_attempts=10) -> int:
"""Find an available TCP port in a specified range.
Args:
min_port (int): The lower bound of the port range (default: 30000)
max_port (int): The upper bound of the port range (default: 39999)
max_attempts (int): Maximum number of attempts to find an available port (default: 10)
Returns:
int: An available port number, or -1 if none found after max_attempts
"""
rng = random.SystemRandom()
ports = list(range(min_port, max_port + 1))
rng.shuffle(ports)
for port in ports[:max_attempts]:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
sock.bind(('localhost', port))
return port
except OSError:
time.sleep(0.1) # Short delay to further reduce chance of collisions
continue
finally:
sock.close()
return -1
def display_number_matrix(number: int) -> str | None:

View File

@ -1,2 +1,3 @@
[pytest]
addopts = -p no:warnings
asyncio_default_fixture_loop_scope = function

View File

@ -5,6 +5,8 @@ set -eo pipefail
## CONSTANTS AND ENVIRONMENTAL VARIABLES ##
##############################################################
echo -e "\n\n============================================================"
# unset environmental variables that might disturb testing
unset OPENAI_API_KEY
unset SANDBOX_ENV_OPENAI_API_KEY
@ -16,7 +18,7 @@ get_script_dir() {
local source="${BASH_SOURCE[0]}"
while [ -h "$source" ]; do
local dir="$( cd -P "$( dirname "$source" )" && pwd )"
source="$(readlink "$source")"
source="$(readlink -f "$source" 2>/dev/null || echo "$source")"
[[ $source != /* ]] && source="$dir/$source"
done
echo "$( cd -P "$( dirname "$source" )" && pwd )"
@ -27,9 +29,6 @@ TMP_FILE="${TMP_FILE:-tmp.log}"
if [ -z "$WORKSPACE_BASE" ]; then
WORKSPACE_BASE=$(pwd)
fi
if [ -z "$WORKSPACE_MOUNT_PATH" ]; then
WORKSPACE_MOUNT_PATH=$WORKSPACE_BASE
fi
DEBUG=true # needed for llm logging to create mock files!
@ -39,7 +38,7 @@ fi
export SCRIPT_DIR=$(get_script_dir)
export PROJECT_ROOT=$(realpath "$SCRIPT_DIR/../..")
export LOG_DIR=$PROJECT_ROOT/logs
export LOG_DIR="$PROJECT_ROOT/logs"
echo "Current working directory: $(pwd)"
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "PROJECT_ROOT: $PROJECT_ROOT"
@ -47,22 +46,29 @@ echo "LOG_DIR: $LOG_DIR"
echo "LOG_TO_FILE: $LOG_TO_FILE"
WORKSPACE_BASE=${WORKSPACE_BASE}/_test_workspace
mkdir -p $WORKSPACE_BASE
chmod -R 777 $WORKSPACE_BASE
WORKSPACE_BASE=$(realpath $WORKSPACE_BASE)
mkdir -p "$WORKSPACE_BASE"
chmod -R 777 "$WORKSPACE_BASE"
WORKSPACE_BASE=$(realpath "$WORKSPACE_BASE")
WORKSPACE_MOUNT_PATH=${WORKSPACE_MOUNT_PATH}/_test_workspace
mkdir -p $WORKSPACE_MOUNT_PATH
chmod -R 777 $WORKSPACE_MOUNT_PATH
WORKSPACE_MOUNT_PATH=$(realpath $WORKSPACE_MOUNT_PATH)
if [ -z "$WORKSPACE_MOUNT_PATH" ]; then
WORKSPACE_MOUNT_PATH="$WORKSPACE_BASE"
else
WORKSPACE_MOUNT_PATH="${WORKSPACE_MOUNT_PATH}/_test_workspace"
mkdir -p "$WORKSPACE_MOUNT_PATH"
chmod -R 755 "$WORKSPACE_MOUNT_PATH"
WORKSPACE_MOUNT_PATH=$(realpath "$WORKSPACE_MOUNT_PATH")
fi
WORKSPACE_MOUNT_PATH_IN_SANDBOX="${WORKSPACE_MOUNT_PATH_IN_SANDBOX:-/workspace}"
echo "WORKSPACE_BASE: $WORKSPACE_BASE"
echo "WORKSPACE_MOUNT_PATH: $WORKSPACE_MOUNT_PATH"
echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
# Ensure we're in the correct directory
cd "$PROJECT_ROOT" || exit 1
mkdir -p $WORKSPACE_BASE
mkdir -p "$WORKSPACE_BASE"
# use environmental variable if exists
TEST_RUNTIME="${TEST_RUNTIME:-eventstream}"
@ -178,7 +184,7 @@ cleanup() {
kill $HTTP_SERVER_PID || true
unset HTTP_SERVER_PID
fi
[ -f $TMP_FILE ] && rm $TMP_FILE
[ -f "$TMP_FILE" ] && rm "$TMP_FILE"
echo "Cleanup done!"
}
@ -200,14 +206,14 @@ regenerate_without_llm() {
PROJECT_ROOT="$PROJECT_ROOT" \
WORKSPACE_BASE="$WORKSPACE_BASE" \
WORKSPACE_MOUNT_PATH="$WORKSPACE_MOUNT_PATH" \
MAX_ITERATIONS=$MAX_ITERATIONS \
MAX_ITERATIONS="$MAX_ITERATIONS" \
FORCE_APPLY_PROMPTS=true \
DEFAULT_AGENT=$agent \
DEFAULT_AGENT="$agent" \
TEST_RUNTIME="$TEST_RUNTIME" \
LLM=$LLM \
DEBUG=$DEBUG \
LOG_TO_FILE=$LOG_TO_FILE \
FORCE_REGENERATE=$FORCE_REGENERATE \
LLM="$LLM" \
DEBUG="$DEBUG" \
LOG_TO_FILE="$LOG_TO_FILE" \
FORCE_REGENERATE="$FORCE_REGENERATE" \
SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
poetry run pytest -s "$SCRIPT_DIR/test_agent.py::$test_name"
set +x
@ -216,12 +222,12 @@ regenerate_without_llm() {
regenerate_with_llm() {
cd "$PROJECT_ROOT"
rm -rf $WORKSPACE_BASE/*
rm -rf "$WORKSPACE_BASE/*"
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE"
fi
rm -rf logs
rm -rf "$LOG_DIR"
rm -rf "$SCRIPT_DIR/mock/${TEST_RUNTIME}_runtime/$agent/$test_name/*"
# set -x to print the command being executed
set -x
@ -233,12 +239,12 @@ regenerate_with_llm() {
DEFAULT_AGENT=$agent \
RUNTIME="$TEST_RUNTIME" \
SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
LLM=$LLM \
DEBUG=$DEBUG \
LOG_TO_FILE=$LOG_TO_FILE \
FORCE_REGENERATE=$FORCE_REGENERATE \
LLM="$LLM" \
DEBUG="$DEBUG" \
LOG_TO_FILE="$LOG_TO_FILE" \
FORCE_REGENERATE="$FORCE_REGENERATE" \
poetry run python "$PROJECT_ROOT/openhands/core/main.py" \
-i $MAX_ITERATIONS \
-i "$MAX_ITERATIONS" \
-t "$task Do not ask me for confirmation at any point." \
-c $agent
set +x
@ -256,8 +262,8 @@ if [ "$num_of_tests" -ne "${#test_names[@]}" ]; then
exit 1
fi
rm -rf logs
rm -rf $WORKSPACE_BASE/*
rm -rf "$LOG_DIR"
rm -rf "$WORKSPACE_BASE/*"
for ((i = 0; i < num_of_tests; i++)); do
task=${tasks[i]}
test_name=${test_names[i]}
@ -286,9 +292,9 @@ for ((i = 0; i < num_of_tests; i++)); do
cd "$PROJECT_ROOT/tests"
cd "$PROJECT_ROOT"
rm -rf $WORKSPACE_BASE/*
rm -rf "$WORKSPACE_BASE/*"
if [ -d "$SCRIPT_DIR/workspace/$test_name" ]; then
cp -r "$SCRIPT_DIR/workspace/$test_name"/* $WORKSPACE_BASE
cp -r "$SCRIPT_DIR/workspace/$test_name"/* "$WORKSPACE_BASE"
fi
if [ "$TEST_ONLY" ]; then
@ -395,7 +401,7 @@ for ((i = 0; i < num_of_tests; i++)); do
fi
done
rm -rf logs
rm -rf $WORKSPACE_BASE
rm -rf "$LOG_DIR"
rm -rf "$WORKSPACE_BASE"
echo "Done!"
cd "$PROJECT_ROOT"

View File

@ -6,7 +6,7 @@ import subprocess
import pytest
from openhands.controller.state.state import State
from openhands.core.config import AppConfig, SandboxConfig, load_from_env
from openhands.core.config import load_app_config
from openhands.core.main import run_controller
from openhands.core.schema import AgentState
from openhands.events.action import (
@ -21,36 +21,23 @@ TEST_RUNTIME = os.getenv('TEST_RUNTIME')
assert TEST_RUNTIME in ['eventstream', 'remote']
_ = get_runtime_cls(TEST_RUNTIME) # make sure it does not raise an error
CONFIG = AppConfig(
max_iterations=int(os.getenv('MAX_ITERATIONS', 20)),
max_budget_per_task=int(os.getenv('MAX_BUDGET_PER_TASK', 15)),
runtime=TEST_RUNTIME,
default_agent=os.getenv('DEFAULT_AGENT'),
workspace_base=os.getenv('WORKSPACE_BASE'),
workspace_mount_path=os.getenv('WORKSPACE_MOUNT_PATH'),
sandbox=SandboxConfig(
use_host_network=True,
),
CONFIG = load_app_config()
CONFIG.max_iterations = int(os.getenv('MAX_ITERATIONS', 20))
CONFIG.max_budget_per_task = int(os.getenv('MAX_BUDGET_PER_TASK', 15))
CONFIG.runtime = TEST_RUNTIME
CONFIG.default_agent = os.getenv('DEFAULT_AGENT')
CONFIG.workspace_base = os.getenv('WORKSPACE_BASE')
CONFIG.workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH')
CONFIG.workspace_mount_path_in_sandbox = os.getenv(
'WORKSPACE_MOUNT_PATH_IN_SANDBOX', '/workspace'
)
load_from_env(CONFIG, os.environ)
CONFIG.sandbox.use_host_network = True
print('\nPaths used:')
print(f'workspace_base: {CONFIG.workspace_base}')
print(f'workspace_mount_path: {CONFIG.workspace_mount_path}')
print(f'workspace_mount_path_in_sandbox: {CONFIG.workspace_mount_path_in_sandbox}')
# Check if running in WSL environment
if 'WSL_DISTRO_NAME' in os.environ:
if (
CONFIG.workspace_base
and CONFIG.workspace_mount_path
and CONFIG.workspace_base != CONFIG.workspace_mount_path
):
print(
'\n**********\nWARNING: if WORKSPACE_MOUNT_PATH is set differently to'
'\nWORKSPACE_BASE some file operation tests may fail!\n**********\n'
)
def get_number_of_prompts(test_name: str):
mock_dir = os.path.join(

View File

@ -1,11 +1,15 @@
import os
import random
import shutil
import stat
import time
from pathlib import Path
import pytest
from pytest import TempPathFactory
from openhands.core.config import AppConfig, SandboxConfig, load_from_env
from openhands.core.config import load_app_config
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.runtime.client.runtime import EventStreamRuntime
from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
@ -13,19 +17,86 @@ from openhands.runtime.remote.runtime import RemoteRuntime
from openhands.runtime.runtime import Runtime
from openhands.storage import get_file_store
TEST_IN_CI = os.getenv('TEST_IN_CI', 'False').lower() in ['true', '1', 'yes']
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream').lower()
RUN_AS_OPENHANDS = os.getenv('RUN_AS_OPENHANDS', 'True').lower() in ['true', '1', 'yes']
test_mount_path = ''
project_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
sandbox_test_folder = '/openhands/workspace'
def _get_runtime_sid(runtime: Runtime):
logger.debug(f'\nruntime.sid: {runtime.sid}')
return runtime.sid
def _get_host_folder(runtime: Runtime):
return runtime.config.workspace_mount_path
def _get_sandbox_folder(runtime: Runtime):
sid = _get_runtime_sid(runtime)
if sid:
return Path(os.path.join(sandbox_test_folder, sid))
return None
def _remove_folder(folder: str) -> bool:
success = False
if folder and os.path.isdir(folder):
try:
os.rmdir(folder)
success = True
except OSError:
try:
shutil.rmtree(folder)
success = True
except OSError:
pass
logger.debug(f'\nCleanup: `{folder}`: ' + ('[OK]' if success else '[FAILED]'))
return success
def _close_test_runtime(runtime: Runtime):
if isinstance(runtime, EventStreamRuntime):
runtime.close(rm_all_containers=False)
else:
runtime.close()
time.sleep(1)
def _reset_pwd():
global project_dir
# Try to change back to project directory
try:
os.chdir(project_dir)
logger.info(f'Changed back to project directory `{project_dir}')
except Exception as e:
logger.error(f'Failed to change back to project directory: {e}')
# *****************************************************************************
# *****************************************************************************
@pytest.fixture(autouse=True)
def print_method_name(request):
print('\n########################################################################')
print(
'\n\n########################################################################'
)
print(f'Running test: {request.node.name}')
print('########################################################################')
yield
print(
'########################################################################\n\n'
)
@pytest.fixture
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
"""
Creates a unique temporary directory
def temp_dir(tmp_path_factory: TempPathFactory, request) -> str:
"""Creates a unique temporary directory.
Upon finalization, the temporary directory and its content is removed.
The cleanup function is also called upon KeyboardInterrupt.
Parameters:
- tmp_path_factory (TempPathFactory): A TempPathFactory class
@ -33,15 +104,23 @@ def temp_dir(tmp_path_factory: TempPathFactory) -> str:
Returns:
- str: The temporary directory path that was created
"""
unique_suffix = random.randint(10000, 99999)
temp_directory = tmp_path_factory.mktemp(
f'test_runtime_{unique_suffix}', numbered=False
temp_dir = tmp_path_factory.mktemp(
'rt_' + str(random.randint(100000, 999999)), numbered=False
)
return str(temp_directory)
logger.info(f'\n*** {request.node.name}\n>> temp folder: {temp_dir}\n')
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
# Set permissions to ensure the directory is writable and deletable
os.chmod(temp_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) # 0777 permissions
def cleanup():
global project_dir
os.chdir(project_dir)
_remove_folder(temp_dir)
request.addfinalizer(cleanup)
return str(temp_dir)
# Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
@ -55,28 +134,47 @@ def get_box_classes():
raise ValueError(f'Invalid runtime: {runtime}')
def get_run_as_openhands():
print(
'\n\n########################################################################'
)
print('USER: ' + 'openhands' if RUN_AS_OPENHANDS else 'root')
print(
'########################################################################\n\n'
)
return [RUN_AS_OPENHANDS]
@pytest.fixture(scope='module') # for xdist
def runtime_setup_module():
_reset_pwd()
yield
_reset_pwd()
@pytest.fixture(scope='session') # not for xdist
def runtime_setup_session():
_reset_pwd()
yield
_reset_pwd()
# This assures that all tests run together per runtime, not alternating between them,
# which cause errors (especially outside GitHub actions).
@pytest.fixture(scope='module', params=get_box_classes())
def box_class(request):
time.sleep(2)
time.sleep(1)
return request.param
# TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
# since `EventStreamRuntime` supports running as an arbitrary user.
@pytest.fixture(scope='module', params=[True, False])
@pytest.fixture(scope='module', params=get_run_as_openhands())
def run_as_openhands(request):
time.sleep(1)
return request.param
@pytest.fixture(scope='module', params=[True, False])
def enable_auto_lint(request):
time.sleep(1)
return request.param
@pytest.fixture(scope='module', params=None)
def base_container_image(request):
time.sleep(1)
@ -96,21 +194,12 @@ def base_container_image(request):
if request.param is None:
request.param = pytest.param(
'nikolaik/python-nodejs:python3.11-nodejs22',
'python:3.11-bookworm',
'node:22-bookworm',
'golang:1.23-bookworm',
)
print(f'Container image: {request.param}')
return request.param
@pytest.fixture
def runtime(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
yield runtime
time.sleep(1)
def _load_runtime(
temp_dir,
box_class,
@ -118,29 +207,45 @@ def _load_runtime(
enable_auto_lint: bool = False,
base_container_image: str | None = None,
browsergym_eval_env: str | None = None,
use_workspace: bool | None = None,
) -> Runtime:
sid = 'test'
cli_session = 'main_test'
sid = 'rt_' + str(random.randint(100000, 999999))
# AgentSkills need to be initialized **before** Jupyter
# otherwise Jupyter will not access the proper dependencies installed by AgentSkills
plugins = [AgentSkillsRequirement(), JupyterRequirement()]
config = AppConfig(
workspace_base=temp_dir,
workspace_mount_path=temp_dir,
sandbox=SandboxConfig(
use_host_network=True,
browsergym_eval_env=browsergym_eval_env,
),
)
load_from_env(config, os.environ)
config = load_app_config()
config.run_as_openhands = run_as_openhands
# Folder where all tests create their own folder
global test_mount_path
if use_workspace:
test_mount_path = os.path.join(config.workspace_base, 'rt')
else:
test_mount_path = os.path.join(
temp_dir, sid
) # need a subfolder to avoid conflicts
config.workspace_mount_path = test_mount_path
# Mounting folder specific for this test inside the sandbox
config.workspace_mount_path_in_sandbox = f'{sandbox_test_folder}/{sid}'
print('\nPaths used:')
print(f'use_host_network: {config.sandbox.use_host_network}')
print(f'workspace_base: {config.workspace_base}')
print(f'workspace_mount_path: {config.workspace_mount_path}')
print(
f'workspace_mount_path_in_sandbox: {config.workspace_mount_path_in_sandbox}\n'
)
config.sandbox.browsergym_eval_env = browsergym_eval_env
config.sandbox.enable_auto_lint = enable_auto_lint
if base_container_image is not None:
config.sandbox.base_container_image = base_container_image
file_store = get_file_store(config.file_store, config.file_store_path)
event_stream = EventStream(cli_session, file_store)
event_stream = EventStream(sid, file_store)
runtime = box_class(
config=config,
@ -148,9 +253,14 @@ def _load_runtime(
sid=sid,
plugins=plugins,
)
time.sleep(1)
time.sleep(2)
return runtime
# Export necessary function
__all__ = ['_load_runtime']
__all__ = [
'_load_runtime',
'_get_host_folder',
'_get_sandbox_folder',
'_remove_folder',
]

View File

@ -1,11 +1,14 @@
"""Bash-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import os
import tempfile
import time
import pytest
from conftest import _load_runtime
from conftest import (
TEST_IN_CI,
_close_test_runtime,
_get_sandbox_folder,
_load_runtime,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
@ -16,77 +19,63 @@ from openhands.events.observation import CmdOutputObservation
# ============================================================================================================================
def _run_cmd_action(runtime, custom_command: str, keep_prompt=True):
action = CmdRunAction(command=custom_command, keep_prompt=keep_prompt)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
assert isinstance(obs, CmdOutputObservation)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
return obs
def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
try:
# We set env var PS1="\u@\h:\w $"
# and construct the PEXCEPT prompt base on it.
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
# and failed to pexcept the right content, causing it fail to get error code.
obs = runtime.run_action(CmdRunAction(command='env'))
# We set env var PS1="\u@\h:\w $"
# and construct the PEXCEPT prompt base on it.
# When run `env`, bad implementation of CmdRunAction will be pexcepted by this
# and failed to pexcept the right content, causing it fail to get error code.
obs = runtime.run_action(CmdRunAction(command='env'))
# For example:
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
# CONDA_EXE=/openhands/miniforge3/bin/conda
# [...]
# LC_CTYPE=C.UTF-8
# PS1=\u@\h:\w $
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
# CONDA_DEFAULT_ENV=base
# For example:
# 02:16:13 - openhands:DEBUG: client.py:78 - Executing command: env
# 02:16:13 - openhands:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
# CONDA_EXE=/openhands/miniforge3/bin/conda
# [...]
# LC_CTYPE=C.UTF-8
# PS1=\u@\h:\w $
# 02:16:13 - openhands:DEBUG: client.py:89 - Executing command for exit code: env
# 02:16:13 - openhands:DEBUG: client.py:92 - Exit code Output:
# CONDA_DEFAULT_ENV=base
# As long as the exit code is 0, the test will pass.
assert isinstance(
obs, CmdOutputObservation
), 'The observation should be a CmdOutputObservation.'
assert obs.exit_code == 0, 'The exit code should be 0.'
runtime.close(rm_all_containers=False)
time.sleep(1)
# As long as the exit code is 0, the test will pass.
assert isinstance(
obs, CmdOutputObservation
), 'The observation should be a CmdOutputObservation.'
assert obs.exit_code == 0, 'The exit code should be 0.'
finally:
_close_test_runtime(runtime)
def test_single_multiline_command(temp_dir, box_class):
def test_multiline_commands(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
try:
# single multiline command
obs = _run_cmd_action(runtime, 'echo \\\n -e "foo"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'foo' in obs.content
action = CmdRunAction(command='echo \\\n -e "foo"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'foo' in obs.content
# test multiline echo
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
def test_multiline_echo(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
action = CmdRunAction(command='echo -e "hello\nworld"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
def test_runtime_whitespace(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
action = CmdRunAction(command='echo -e "\\n\\n\\n"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '\r\n\r\n\r\n' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
# test whitespace
obs = _run_cmd_action(runtime, 'echo -e "\\n\\n\\n"')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '\r\n\r\n\r\n' in obs.content
finally:
_close_test_runtime(runtime)
def test_multiple_multiline_commands(temp_dir, box_class, run_as_openhands):
@ -120,48 +109,36 @@ world "
joined_cmds = '\n'.join(cmds)
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
try:
obs = _run_cmd_action(runtime, joined_cmds)
assert obs.exit_code == 0, 'The exit code should be 0.'
action = CmdRunAction(command=joined_cmds)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'total 0' in obs.content
assert 'hello\r\nworld' in obs.content
assert "hello it\\'s me" in obs.content
assert 'hello -v' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
assert 'hello\r\nworld "\r\n' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
assert 'total 0' in obs.content
assert 'hello\r\nworld' in obs.content
assert "hello it\\'s me" in obs.content
assert 'hello -v' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
assert 'hello\r\nworld "\r\n' in obs.content
finally:
_close_test_runtime(runtime)
def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
"""Test that the PS2 sign is not added to the output of a multiline command."""
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
try:
obs = _run_cmd_action(runtime, 'echo -e "hello\nworld"')
assert obs.exit_code == 0, 'The exit code should be 0.'
action = CmdRunAction(command='echo -e "hello\nworld"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert 'hello\r\nworld' in obs.content
assert '>' not in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
assert 'hello\r\nworld' in obs.content
assert '>' not in obs.content
finally:
_close_test_runtime(runtime)
def test_multiline_command_loop(temp_dir, box_class):
# https://github.com/All-Hands-AI/OpenHands/issues/3143
runtime = _load_runtime(temp_dir, box_class)
init_cmd = """
mkdir -p _modules && \
for month in {01..04}; do
@ -171,15 +148,6 @@ for month in {01..04}; do
done
echo "created files"
"""
action = CmdRunAction(command=init_cmd)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'created files' in obs.content
follow_up_cmd = """
for file in _modules/*.md; do
new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
@ -187,153 +155,104 @@ for file in _modules/*.md; do
done
echo "success"
"""
action = CmdRunAction(command=follow_up_cmd)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
runtime = _load_runtime(temp_dir, box_class)
try:
obs = _run_cmd_action(runtime, init_cmd)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'created files' in obs.content
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'success' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, follow_up_cmd)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'success' in obs.content
finally:
_close_test_runtime(runtime)
def test_cmd_run(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
try:
obs = _run_cmd_action(runtime, 'ls -l /openhands/workspace')
assert obs.exit_code == 0
action = CmdRunAction(command='ls -l')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'total 0' in obs.content
obs = _run_cmd_action(runtime, 'ls -l')
assert obs.exit_code == 0
assert 'total 0' in obs.content
action = CmdRunAction(command='mkdir test')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, 'mkdir test')
assert obs.exit_code == 0
action = CmdRunAction(command='ls -l')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
if run_as_openhands:
assert 'openhands' in obs.content
else:
assert 'root' in obs.content
assert 'test' in obs.content
obs = _run_cmd_action(runtime, 'ls -l')
assert obs.exit_code == 0
if run_as_openhands:
assert 'openhands' in obs.content
else:
assert 'root' in obs.content
assert 'test' in obs.content
action = CmdRunAction(command='touch test/foo.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, 'touch test/foo.txt')
assert obs.exit_code == 0
action = CmdRunAction(command='ls -l test')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'foo.txt' in obs.content
obs = _run_cmd_action(runtime, 'ls -l test')
assert obs.exit_code == 0
assert 'foo.txt' in obs.content
# clean up: this is needed, since CI will not be
# run as root, and this test may leave a file
# owned by root
action = CmdRunAction(command='rm -rf test')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
# clean up: this is needed, since CI will not be
# run as root, and this test may leave a file
# owned by root
_run_cmd_action(runtime, 'rm -rf test')
assert obs.exit_code == 0
finally:
_close_test_runtime(runtime)
def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
action = CmdRunAction(command='cd ~ && pwd')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
if run_as_openhands:
assert '/home/openhands' in obs.content
else:
assert '/root' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
try:
obs = _run_cmd_action(runtime, 'cd ~ && pwd')
assert obs.exit_code == 0
if run_as_openhands:
assert '/home/openhands' in obs.content
else:
assert '/root' in obs.content
finally:
_close_test_runtime(runtime)
def test_multi_cmd_run_in_single_line(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
action = CmdRunAction(command='pwd && ls -l')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert '/workspace' in obs.content
assert 'total 0' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
try:
obs = _run_cmd_action(runtime, 'pwd && ls -l')
assert obs.exit_code == 0
assert '/workspace' in obs.content
assert 'total 0' in obs.content
finally:
_close_test_runtime(runtime)
def test_stateful_cmd(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
sandbox_dir = _get_sandbox_folder(runtime)
try:
obs = _run_cmd_action(runtime, 'mkdir -p test')
assert obs.exit_code == 0, 'The exit code should be 0.'
action = CmdRunAction(command='mkdir test')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
obs = _run_cmd_action(runtime, 'cd test')
assert obs.exit_code == 0, 'The exit code should be 0.'
action = CmdRunAction(command='cd test')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
action = CmdRunAction(command='pwd')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '/workspace/test' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, 'pwd')
assert obs.exit_code == 0, 'The exit code should be 0.'
assert f'{sandbox_dir}/test' in obs.content
finally:
_close_test_runtime(runtime)
def test_failed_cmd(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
action = CmdRunAction(command='non_existing_command')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
runtime.close(rm_all_containers=False)
time.sleep(1)
try:
obs = _run_cmd_action(runtime, 'non_existing_command')
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
finally:
_close_test_runtime(runtime)
def _create_test_file(host_temp_dir):
@ -344,154 +263,121 @@ def _create_test_file(host_temp_dir):
def test_copy_single_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
try:
sandbox_dir = _get_sandbox_folder(runtime)
sandbox_file = os.path.join(sandbox_dir, 'test_file.txt')
_create_test_file(temp_dir)
runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
with tempfile.TemporaryDirectory() as host_temp_dir:
_create_test_file(host_temp_dir)
runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace')
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
assert obs.exit_code == 0
assert 'test_file.txt' in obs.content
action = CmdRunAction(command='ls -alh /workspace')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'test_file.txt' in obs.content
action = CmdRunAction(command='cat /workspace/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, f'cat {sandbox_file}')
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
finally:
_close_test_runtime(runtime)
def _create_test_dir_with_files(host_temp_dir):
os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
with open(os.path.join(host_temp_dir, 'test_dir', 'file1.txt'), 'w') as f:
def _create_host_test_dir_with_files(test_dir):
logger.debug(f'creating `{test_dir}`')
if not os.path.isdir(test_dir):
os.makedirs(test_dir, exist_ok=True)
logger.debug('creating test files in `test_dir`')
with open(os.path.join(test_dir, 'file1.txt'), 'w') as f:
f.write('File 1 content')
with open(os.path.join(host_temp_dir, 'test_dir', 'file2.txt'), 'w') as f:
with open(os.path.join(test_dir, 'file2.txt'), 'w') as f:
f.write('File 2 content')
def test_copy_directory_recursively(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
with tempfile.TemporaryDirectory() as host_temp_dir:
sandbox_dir = _get_sandbox_folder(runtime)
try:
temp_dir_copy = os.path.join(temp_dir, 'test_dir')
# We need a separate directory, since temp_dir is mounted to /workspace
_create_test_dir_with_files(host_temp_dir)
runtime.copy_to(
os.path.join(host_temp_dir, 'test_dir'), '/workspace', recursive=True
)
_create_host_test_dir_with_files(temp_dir_copy)
action = CmdRunAction(command='ls -alh /workspace')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'test_dir' in obs.content
assert 'file1.txt' not in obs.content
assert 'file2.txt' not in obs.content
runtime.copy_to(temp_dir_copy, sandbox_dir, recursive=True)
action = CmdRunAction(command='ls -alh /workspace/test_dir')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'file1.txt' in obs.content
assert 'file2.txt' in obs.content
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
assert obs.exit_code == 0
assert 'test_dir' in obs.content
assert 'file1.txt' not in obs.content
assert 'file2.txt' not in obs.content
action = CmdRunAction(command='cat /workspace/test_dir/file1.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'File 1 content' in obs.content
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}/test_dir')
assert obs.exit_code == 0
assert 'file1.txt' in obs.content
assert 'file2.txt' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_dir/file1.txt')
assert obs.exit_code == 0
assert 'File 1 content' in obs.content
finally:
_close_test_runtime(runtime)
def test_copy_to_non_existent_directory(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
with tempfile.TemporaryDirectory() as host_temp_dir:
_create_test_file(host_temp_dir)
try:
sandbox_dir = _get_sandbox_folder(runtime)
_create_test_file(temp_dir)
runtime.copy_to(
os.path.join(host_temp_dir, 'test_file.txt'), '/workspace/new_dir'
os.path.join(temp_dir, 'test_file.txt'), f'{sandbox_dir}/new_dir'
)
action = CmdRunAction(command='cat /workspace/new_dir/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/new_dir/test_file.txt')
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
finally:
_close_test_runtime(runtime)
def test_overwrite_existing_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
try:
sandbox_dir = _get_sandbox_folder(runtime)
# touch a file in /workspace
action = CmdRunAction(command='touch /workspace/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
assert obs.exit_code == 0
action = CmdRunAction(command='cat /workspace/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Hello, World!' not in obs.content
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
assert obs.exit_code == 0
with tempfile.TemporaryDirectory() as host_temp_dir:
_create_test_file(host_temp_dir)
runtime.copy_to(os.path.join(host_temp_dir, 'test_file.txt'), '/workspace')
obs = _run_cmd_action(runtime, f'ls -alh {sandbox_dir}')
assert obs.exit_code == 0
action = CmdRunAction(command='cat /workspace/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt')
assert obs.exit_code == 0
assert 'Hello, World!' not in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
_create_test_file(temp_dir)
runtime.copy_to(os.path.join(temp_dir, 'test_file.txt'), sandbox_dir)
obs = _run_cmd_action(runtime, f'cat {sandbox_dir}/test_file.txt')
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
finally:
_close_test_runtime(runtime)
def test_copy_non_existent_file(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
try:
sandbox_dir = _get_sandbox_folder(runtime)
with pytest.raises(FileNotFoundError):
runtime.copy_to(
os.path.join(sandbox_dir, 'non_existent_file.txt'),
f'{sandbox_dir}/should_not_exist.txt',
)
with pytest.raises(FileNotFoundError):
runtime.copy_to(
os.path.join(temp_dir, 'non_existent_file.txt'),
'/workspace/should_not_exist.txt',
)
action = CmdRunAction(command='ls /workspace/should_not_exist.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0 # File should not exist
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(runtime, f'ls {sandbox_dir}/should_not_exist.txt')
assert obs.exit_code != 0 # File should not exist
finally:
_close_test_runtime(runtime)
def test_keep_prompt(box_class, temp_dir):
@ -500,27 +386,26 @@ def test_keep_prompt(box_class, temp_dir):
box_class=box_class,
run_as_openhands=False,
)
try:
sandbox_dir = _get_sandbox_folder(runtime)
action = CmdRunAction(command='touch /workspace/test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'root@' in obs.content
obs = _run_cmd_action(runtime, f'touch {sandbox_dir}/test_file.txt')
assert obs.exit_code == 0
assert 'root@' in obs.content
action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
assert 'root@' not in obs.content
runtime.close(rm_all_containers=False)
time.sleep(1)
obs = _run_cmd_action(
runtime, f'cat {sandbox_dir}/test_file.txt', keep_prompt=False
)
assert obs.exit_code == 0
assert 'root@' not in obs.content
finally:
_close_test_runtime(runtime)
@pytest.mark.skipif(
TEST_IN_CI != 'True',
reason='This test is not working in WSL (file ownership)',
)
def test_git_operation(box_class):
# do not mount workspace, since workspace mount by tests will be owned by root
# while the user_id we get via os.getuid() is different from root
@ -531,69 +416,43 @@ def test_git_operation(box_class):
# Need to use non-root user to expose issues
run_as_openhands=True,
)
# this will happen if permission of runtime is not properly configured
# fatal: detected dubious ownership in repository at '/workspace'
try:
# check the ownership of the current directory
obs = _run_cmd_action(runtime, 'ls -alh .')
assert obs.exit_code == 0
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
for line in obs.content.split('\r\n'):
if ' ..' in line:
# parent directory should be owned by root
assert 'root' in line
assert 'openhands' not in line
elif ' .' in line:
# current directory should be owned by openhands
# and its group should be root
assert 'openhands' in line
assert 'root' in line
# check the ownership of the current directory
action = CmdRunAction(command='ls -alh .')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
# drwx--S--- 2 openhands root 64 Aug 7 23:32 .
# drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
for line in obs.content.split('\r\n'):
if ' ..' in line:
# parent directory should be owned by root
assert 'root' in line
assert 'openhands' not in line
elif ' .' in line:
# current directory should be owned by openhands
# and its group should be root
assert 'openhands' in line
assert 'root' in line
# make sure all git operations are allowed
obs = _run_cmd_action(runtime, 'git init')
assert obs.exit_code == 0
# make sure all git operations are allowed
action = CmdRunAction(command='git init')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
# create a file
obs = _run_cmd_action(runtime, 'echo "hello" > test_file.txt')
assert obs.exit_code == 0
# create a file
action = CmdRunAction(command='echo "hello" > test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
# git add
obs = _run_cmd_action(runtime, 'git add test_file.txt')
assert obs.exit_code == 0
# git add
action = CmdRunAction(command='git add test_file.txt')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
# git diff
obs = _run_cmd_action(runtime, 'git diff')
assert obs.exit_code == 0
# git diff
action = CmdRunAction(command='git diff')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
# git commit
action = CmdRunAction(command='git commit -m "test commit"')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
# git commit
obs = _run_cmd_action(runtime, 'git commit -m "test commit"')
assert obs.exit_code == 0
finally:
_close_test_runtime(runtime)

View File

@ -1,9 +1,8 @@
"""Browsing-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import json
import time
from conftest import _load_runtime
from conftest import _close_test_runtime, _load_runtime
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
@ -66,8 +65,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_browsergym_eval_env(box_class, temp_dir):
@ -111,5 +109,4 @@ def test_browsergym_eval_env(box_class, temp_dir):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert json.loads(obs.content) == [0.0]
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)

View File

@ -1,10 +1,9 @@
"""Env vars related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import os
import time
from unittest.mock import patch
from conftest import _load_runtime
from conftest import _close_test_runtime, _load_runtime
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
@ -30,8 +29,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
), f'Output: [{obs.content}] for {box_class}'
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_env_vars_runtime_operations(temp_dir, box_class):
@ -66,5 +64,4 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
)
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)

View File

@ -1,9 +1,7 @@
"""Image-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import time
import pytest
from conftest import _load_runtime
from conftest import _close_test_runtime, _load_runtime
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
@ -17,7 +15,6 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
"""Make sure Python is available in bash."""
if base_container_image not in [
'python:3.11-bookworm',
'nikolaik/python-nodejs:python3.11-nodejs22',
]:
pytest.skip('This test is only for python-related images')
@ -45,15 +42,13 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'pip' in obs.content # Check that pip is available
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_nodejs_22_version(temp_dir, box_class, base_container_image):
"""Make sure Node.js is available in bash."""
if base_container_image not in [
'node:22-bookworm',
'nikolaik/python-nodejs:python3.11-nodejs22',
]:
pytest.skip('This test is only for nodejs-related images')
@ -68,8 +63,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'v22' in obs.content # Check for specific version
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_go_version(temp_dir, box_class, base_container_image):
@ -90,5 +84,4 @@ def test_go_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'go1.23' in obs.content # Check for specific version
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)

View File

@ -1,8 +1,12 @@
"""Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
import time
from conftest import _load_runtime
import pytest
from conftest import (
TEST_IN_CI,
_close_test_runtime,
_get_sandbox_folder,
_load_runtime,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
@ -28,6 +32,8 @@ from openhands.runtime.client.runtime import EventStreamRuntime
def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
sandbox_dir = _get_sandbox_folder(runtime)
# Test run command
action_cmd = CmdRunAction(command='ls -l')
logger.info(action_cmd, extra={'msg_type': 'ACTION'})
@ -48,7 +54,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content.strip() == (
'Hello, `World`!\n'
'[Jupyter current working directory: /workspace]\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
)
@ -69,7 +75,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
assert obs.content == ''
# event stream runtime will always use absolute path
assert obs.path == '/workspace/hello.sh'
assert obs.path == f'{sandbox_dir}/hello.sh'
# Test read file (file should exist)
action_read = FileReadAction(path='hello.sh')
@ -81,7 +87,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.content == 'echo "Hello, World!"\n'
assert obs.path == '/workspace/hello.sh'
assert obs.path == f'{sandbox_dir}/hello.sh'
# clean up
action = CmdRunAction(command='rm -rf hello.sh')
@ -90,10 +96,13 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
@pytest.mark.skipif(
TEST_IN_CI != 'True',
reason='This test is not working in WSL (file ownership)',
)
def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
@ -111,7 +120,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
else:
assert 'root' in obs.content
# print pwd
# print the current working directory
test_code = 'import os; print(os.getcwd())'
action_ipython = IPythonRunCellAction(code=test_code)
logger.info(action_ipython, extra={'msg_type': 'ACTION'})
@ -152,7 +161,6 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
if run_as_openhands:
# -rw-r--r-- 1 openhands root 13 Jul 28 03:53 test.txt
assert 'openhands' in obs.content.split('\r\n')[0]
assert 'root' in obs.content.split('\r\n')[0]
else:
# -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
assert 'root' in obs.content.split('\r\n')[0]
@ -164,12 +172,12 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_ipython_simple(temp_dir, box_class):
runtime = _load_runtime(temp_dir, box_class)
sandbox_dir = _get_sandbox_folder(runtime)
# Test run ipython
# get username
@ -183,20 +191,20 @@ def test_ipython_simple(temp_dir, box_class):
obs.content.strip()
== (
'1\n'
'[Jupyter current working directory: /workspace]\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
).strip()
)
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def _test_ipython_agentskills_fileop_pwd_impl(
runtime: EventStreamRuntime, enable_auto_lint: bool
):
sandbox_dir = _get_sandbox_folder(runtime)
# remove everything in /workspace
action = CmdRunAction(command='rm -rf /workspace/*')
action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@ -215,12 +223,12 @@ def _test_ipython_agentskills_fileop_pwd_impl(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, IPythonRunCellObservation)
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
'[File: /workspace/hello.py (1 lines total)]\n'
f'[File: {sandbox_dir}/hello.py (1 lines total)]\n'
'(this is the beginning of the file)\n'
'1|\n'
'(this is the end of the file)\n'
'[File hello.py created.]\n'
'[Jupyter current working directory: /workspace]\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
).strip().split('\n')
@ -239,12 +247,12 @@ def _test_ipython_agentskills_fileop_pwd_impl(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, IPythonRunCellObservation)
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
'[File: /workspace/test/hello.py (1 lines total)]\n'
f'[File: {sandbox_dir}/test/hello.py (1 lines total)]\n'
'(this is the beginning of the file)\n'
'1|\n'
'(this is the end of the file)\n'
'[File hello.py created.]\n'
'[Jupyter current working directory: /workspace/test]\n'
f'[Jupyter current working directory: {sandbox_dir}/test]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
).strip().split('\n')
@ -258,10 +266,10 @@ def _test_ipython_agentskills_fileop_pwd_impl(
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, IPythonRunCellObservation)
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
"""
f"""
[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
ERRORS:
/workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
{sandbox_dir}/test/hello.py:1:3: E999 IndentationError: unexpected indent
[This is how your edit would have looked if applied]
-------------------------------------------------
(this is the beginning of the file)
@ -278,7 +286,7 @@ ERRORS:
Your changes have NOT been applied. Please fix your edit command and try again.
You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
DO NOT re-run the same failed edit command. Running it again will lead to the same error.
[Jupyter current working directory: /workspace/test]
[Jupyter current working directory: {sandbox_dir}/test]
[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
"""
).strip().split('\n')
@ -292,39 +300,44 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert isinstance(obs, IPythonRunCellObservation)
assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
"""
[File: /workspace/test/hello.py (1 lines total after edit)]
f"""
[File: {sandbox_dir}/test/hello.py (1 lines total after edit)]
(this is the beginning of the file)
1|print("hello world")
(this is the end of the file)
[File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
[Jupyter current working directory: /workspace/test]
[Jupyter current working directory: {sandbox_dir}/test]
[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
"""
).strip().split('\n')
action = CmdRunAction(command='rm -rf /workspace/*')
action = CmdRunAction(command=f'rm -rf {sandbox_dir}/*')
logger.info(action, extra={'msg_type': 'ACTION'})
obs = runtime.run_action(action)
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close(rm_all_containers=False)
time.sleep(1)
def test_ipython_agentskills_fileop_pwd(
temp_dir, box_class, run_as_openhands, enable_auto_lint
def test_ipython_agentskills_fileop_pwd_with_lint(
temp_dir, box_class, run_as_openhands
):
"""Make sure that cd in bash also update the current working directory in ipython."""
runtime = _load_runtime(
temp_dir, box_class, run_as_openhands, enable_auto_lint=enable_auto_lint
temp_dir, box_class, run_as_openhands, enable_auto_lint=True
)
_test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
_test_ipython_agentskills_fileop_pwd_impl(runtime, True)
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_ipython_agentskills_fileop_pwd_without_lint(
temp_dir, box_class, run_as_openhands
):
runtime = _load_runtime(
temp_dir, box_class, run_as_openhands, enable_auto_lint=False
)
_test_ipython_agentskills_fileop_pwd_impl(runtime, False)
_close_test_runtime(runtime)
def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
@ -392,13 +405,13 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
).strip().split('\n')
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)
def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
"""Make sure that cd in bash also update the current working directory in ipython."""
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
sandbox_dir = _get_sandbox_folder(runtime)
# It should error out since pymsgbox is not installed
action = IPythonRunCellAction(code='import pymsgbox')
@ -424,9 +437,8 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
# import should not error out
assert obs.content.strip() == (
'[Code executed successfully with no output]\n'
'[Jupyter current working directory: /workspace]\n'
f'[Jupyter current working directory: {sandbox_dir}]\n'
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
)
runtime.close(rm_all_containers=False)
time.sleep(1)
_close_test_runtime(runtime)