Reduce runtime tests duration by running them across CPUs (#3779)

* Reduce runtime tests duration by running them across CPUs

* fix hardcoded image name

* test two cpus

* Test folder change

* Up the CPU to 4 again to test

* Change to 3 CPUs

* Down to 2

* Add param to remove all openhands containers

* Add comment

* Add reruns just in case

* Fix ordering of if
This commit is contained in:
mamoodi 2024-09-10 14:31:17 -04:00 committed by GitHub
parent 97a03faf33
commit f3b2085f9b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 76 additions and 41 deletions

View File

@ -107,9 +107,10 @@ jobs:
# Run unit tests with the EventStream runtime Docker images
test_runtime:
name: Test Runtime
runs-on: ubuntu-latest
needs: [ghcr_build_runtime]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
base_image: ['nikolaik']
steps:
@ -153,6 +154,13 @@ jobs:
run: make install-python-dependencies
- name: Run runtime tests
run: |
# We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
# then across more than 2 CPUs for some reason
poetry run pip install pytest-xdist
# Install to be able to retry on failures for flaky tests
poetry run pip install pytest-rerunfailures
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
@ -160,7 +168,7 @@ jobs:
SANDBOX_USER_ID=$(id -u) \
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
TEST_IN_CI=true \
poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
env:

View File

@ -305,7 +305,15 @@ class EventStreamRuntime(Runtime):
def sandbox_workspace_dir(self):
return self.config.workspace_mount_path_in_sandbox
def close(self, close_client: bool = True):
def close(self, close_client: bool = True, rm_all_containers: bool = True):
"""
Closes the EventStreamRuntime and associated objects
Parameters:
- close_client (bool): Whether to close the DockerClient
- rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
"""
if self.log_buffer:
self.log_buffer.close()
@ -315,7 +323,13 @@ class EventStreamRuntime(Runtime):
containers = self.docker_client.containers.list(all=True)
for container in containers:
try:
if container.name.startswith(self.container_name_prefix):
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
# that all 'openhands-sandbox-' containers are removed as well.
if rm_all_containers and container.name.startswith(
self.container_name_prefix
):
container.remove(force=True)
elif container.name == self.container_name:
logs = container.logs(tail=1000).decode('utf-8')
logger.debug(
f'==== Container logs ====\n{logs}\n==== End of container logs ===='
@ -323,6 +337,7 @@ class EventStreamRuntime(Runtime):
container.remove(force=True)
except docker.errors.NotFound:
pass
if close_client:
self.docker_client.close()

View File

@ -1,4 +1,5 @@
import os
import random
import time
import pytest
@ -23,7 +24,21 @@ def print_method_name(request):
@pytest.fixture
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
return str(tmp_path_factory.mktemp('test_runtime'))
"""
Creates a unique temporary directory
Parameters:
- tmp_path_factory (TempPathFactory): A TempPathFactory class
Returns:
- str: The temporary directory path that was created
"""
unique_suffix = random.randint(10000, 99999)
temp_directory = tmp_path_factory.mktemp(
f'test_runtime_{unique_suffix}', numbered=False
)
return str(temp_directory)
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
@ -93,7 +108,6 @@ def base_container_image(request):
def runtime(temp_dir, box_class, run_as_openhands):
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
yield runtime
runtime.close()
time.sleep(1)

View File

@ -42,7 +42,7 @@ def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
), 'The observation should be a CmdOutputObservation.'
assert obs.exit_code == 0, 'The exit code should be 0.'
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -56,7 +56,7 @@ def test_single_multiline_command(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'foo' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -70,7 +70,7 @@ def test_multiline_echo(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'hello\r\nworld' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -85,7 +85,7 @@ def test_runtime_whitespace(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '\r\n\r\n\r\n' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -137,7 +137,7 @@ world "
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
assert 'hello\r\nworld "\r\n' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -153,7 +153,7 @@ def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
assert 'hello\r\nworld' in obs.content
assert '>' not in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -196,7 +196,7 @@ echo "success"
assert obs.exit_code == 0, 'The exit code should be 0.'
assert 'success' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -255,7 +255,7 @@ def test_cmd_run(temp_dir, box_class, run_as_openhands):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -273,7 +273,7 @@ def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
else:
assert '/root' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -289,7 +289,7 @@ def test_multi_cmd_run_in_single_line(temp_dir, box_class):
assert '/workspace' in obs.content
assert 'total 0' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -318,7 +318,7 @@ def test_stateful_cmd(temp_dir, box_class):
assert obs.exit_code == 0, 'The exit code should be 0.'
assert '/workspace/test' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -332,7 +332,7 @@ def test_failed_cmd(temp_dir, box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -365,7 +365,7 @@ def test_copy_single_file(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -414,7 +414,7 @@ def test_copy_directory_recursively(temp_dir, box_class):
assert obs.exit_code == 0
assert 'File 1 content' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -435,7 +435,7 @@ def test_copy_to_non_existent_directory(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -470,7 +470,7 @@ def test_overwrite_existing_file(temp_dir, box_class):
assert obs.exit_code == 0
assert 'Hello, World!' in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -490,7 +490,7 @@ def test_copy_non_existent_file(temp_dir, box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code != 0 # File should not exist
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -517,7 +517,7 @@ def test_keep_prompt(box_class, temp_dir):
assert obs.exit_code == 0
assert 'root@' not in obs.content
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -595,7 +595,5 @@ def test_git_operation(box_class):
assert isinstance(obs, CmdOutputObservation)
assert obs.exit_code == 0
runtime.close()
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)

View File

@ -66,7 +66,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -111,5 +111,5 @@ def test_browsergym_eval_env(box_class, temp_dir):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert json.loads(obs.content) == [0.0]
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)

View File

@ -30,7 +30,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
), f'Output: [{obs.content}] for {box_class}'
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -66,5 +66,5 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
)
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)

View File

@ -45,7 +45,7 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'pip' in obs.content # Check that pip is available
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -68,7 +68,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'v22' in obs.content # Check for specific version
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -90,5 +90,5 @@ def test_go_version(temp_dir, box_class, base_container_image):
assert obs.exit_code == 0
assert 'go1.23' in obs.content # Check for specific version
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)

View File

@ -90,7 +90,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -164,7 +164,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -188,7 +188,7 @@ def test_ipython_simple(temp_dir, box_class):
).strip()
)
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -309,7 +309,7 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
assert obs.exit_code == 0
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -323,7 +323,7 @@ def test_ipython_agentskills_fileop_pwd(
)
_test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -392,7 +392,7 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
).strip().split('\n')
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)
@ -428,5 +428,5 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
)
runtime.close()
runtime.close(rm_all_containers=False)
time.sleep(1)