mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Reduce runtime tests duration by running them across CPUs (#3779)
* Reduce runtime tests duration by running them across CPUs * fix hardcoded image name * test two cpus * Test folder change * Up the CPU to 4 again to test * Change to 3 CPUs * Down to 2 * Add param to remove all openhands containers * Add comment * Add reruns just in case * Fix ordering of if
This commit is contained in:
parent
97a03faf33
commit
f3b2085f9b
12
.github/workflows/ghcr_runtime.yml
vendored
12
.github/workflows/ghcr_runtime.yml
vendored
@ -107,9 +107,10 @@ jobs:
|
||||
# Run unit tests with the EventStream runtime Docker images
|
||||
test_runtime:
|
||||
name: Test Runtime
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ghcr_build_runtime]
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
base_image: ['nikolaik']
|
||||
steps:
|
||||
@ -153,6 +154,13 @@ jobs:
|
||||
run: make install-python-dependencies
|
||||
- name: Run runtime tests
|
||||
run: |
|
||||
# We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
|
||||
# then across more than 2 CPUs for some reason
|
||||
poetry run pip install pytest-xdist
|
||||
|
||||
# Install to be able to retry on failures for flaky tests
|
||||
poetry run pip install pytest-rerunfailures
|
||||
|
||||
image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
|
||||
image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
@ -160,7 +168,7 @@ jobs:
|
||||
SANDBOX_USER_ID=$(id -u) \
|
||||
SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
|
||||
TEST_IN_CI=true \
|
||||
poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
|
||||
@ -305,7 +305,15 @@ class EventStreamRuntime(Runtime):
|
||||
def sandbox_workspace_dir(self):
|
||||
return self.config.workspace_mount_path_in_sandbox
|
||||
|
||||
def close(self, close_client: bool = True):
|
||||
def close(self, close_client: bool = True, rm_all_containers: bool = True):
|
||||
"""
|
||||
Closes the EventStreamRuntime and associated objects
|
||||
|
||||
Parameters:
|
||||
- close_client (bool): Whether to close the DockerClient
|
||||
- rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
|
||||
"""
|
||||
|
||||
if self.log_buffer:
|
||||
self.log_buffer.close()
|
||||
|
||||
@ -315,7 +323,13 @@ class EventStreamRuntime(Runtime):
|
||||
containers = self.docker_client.containers.list(all=True)
|
||||
for container in containers:
|
||||
try:
|
||||
if container.name.startswith(self.container_name_prefix):
|
||||
# If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
|
||||
# that all 'openhands-sandbox-' containers are removed as well.
|
||||
if rm_all_containers and container.name.startswith(
|
||||
self.container_name_prefix
|
||||
):
|
||||
container.remove(force=True)
|
||||
elif container.name == self.container_name:
|
||||
logs = container.logs(tail=1000).decode('utf-8')
|
||||
logger.debug(
|
||||
f'==== Container logs ====\n{logs}\n==== End of container logs ===='
|
||||
@ -323,6 +337,7 @@ class EventStreamRuntime(Runtime):
|
||||
container.remove(force=True)
|
||||
except docker.errors.NotFound:
|
||||
pass
|
||||
|
||||
if close_client:
|
||||
self.docker_client.close()
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
import pytest
|
||||
@ -23,7 +24,21 @@ def print_method_name(request):
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(tmp_path_factory: TempPathFactory) -> str:
|
||||
return str(tmp_path_factory.mktemp('test_runtime'))
|
||||
"""
|
||||
Creates a unique temporary directory
|
||||
|
||||
Parameters:
|
||||
- tmp_path_factory (TempPathFactory): A TempPathFactory class
|
||||
|
||||
Returns:
|
||||
- str: The temporary directory path that was created
|
||||
"""
|
||||
|
||||
unique_suffix = random.randint(10000, 99999)
|
||||
temp_directory = tmp_path_factory.mktemp(
|
||||
f'test_runtime_{unique_suffix}', numbered=False
|
||||
)
|
||||
return str(temp_directory)
|
||||
|
||||
|
||||
TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
|
||||
@ -93,7 +108,6 @@ def base_container_image(request):
|
||||
def runtime(temp_dir, box_class, run_as_openhands):
|
||||
runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
|
||||
yield runtime
|
||||
runtime.close()
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
|
||||
), 'The observation should be a CmdOutputObservation.'
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -56,7 +56,7 @@ def test_single_multiline_command(temp_dir, box_class):
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'foo' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -70,7 +70,7 @@ def test_multiline_echo(temp_dir, box_class):
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -85,7 +85,7 @@ def test_runtime_whitespace(temp_dir, box_class):
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '\r\n\r\n\r\n' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -137,7 +137,7 @@ world "
|
||||
assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
|
||||
assert 'hello\r\nworld "\r\n' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -153,7 +153,7 @@ def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
|
||||
assert 'hello\r\nworld' in obs.content
|
||||
assert '>' not in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -196,7 +196,7 @@ echo "success"
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert 'success' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -255,7 +255,7 @@ def test_cmd_run(temp_dir, box_class, run_as_openhands):
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -273,7 +273,7 @@ def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
|
||||
else:
|
||||
assert '/root' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -289,7 +289,7 @@ def test_multi_cmd_run_in_single_line(temp_dir, box_class):
|
||||
assert '/workspace' in obs.content
|
||||
assert 'total 0' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -318,7 +318,7 @@ def test_stateful_cmd(temp_dir, box_class):
|
||||
assert obs.exit_code == 0, 'The exit code should be 0.'
|
||||
assert '/workspace/test' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -332,7 +332,7 @@ def test_failed_cmd(temp_dir, box_class):
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -365,7 +365,7 @@ def test_copy_single_file(temp_dir, box_class):
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -414,7 +414,7 @@ def test_copy_directory_recursively(temp_dir, box_class):
|
||||
assert obs.exit_code == 0
|
||||
assert 'File 1 content' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -435,7 +435,7 @@ def test_copy_to_non_existent_directory(temp_dir, box_class):
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -470,7 +470,7 @@ def test_overwrite_existing_file(temp_dir, box_class):
|
||||
assert obs.exit_code == 0
|
||||
assert 'Hello, World!' in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -490,7 +490,7 @@ def test_copy_non_existent_file(temp_dir, box_class):
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code != 0 # File should not exist
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -517,7 +517,7 @@ def test_keep_prompt(box_class, temp_dir):
|
||||
assert obs.exit_code == 0
|
||||
assert 'root@' not in obs.content
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -595,7 +595,5 @@ def test_git_operation(box_class):
|
||||
assert isinstance(obs, CmdOutputObservation)
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
@ -66,7 +66,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -111,5 +111,5 @@ def test_browsergym_eval_env(box_class, temp_dir):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert json.loads(obs.content) == [0.0]
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
@ -30,7 +30,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
|
||||
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -66,5 +66,5 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
|
||||
and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
|
||||
)
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
@ -45,7 +45,7 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'pip' in obs.content # Check that pip is available
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -68,7 +68,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'v22' in obs.content # Check for specific version
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -90,5 +90,5 @@ def test_go_version(temp_dir, box_class, base_container_image):
|
||||
assert obs.exit_code == 0
|
||||
assert 'go1.23' in obs.content # Check for specific version
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
@ -90,7 +90,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -164,7 +164,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -188,7 +188,7 @@ def test_ipython_simple(temp_dir, box_class):
|
||||
).strip()
|
||||
)
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -309,7 +309,7 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
assert obs.exit_code == 0
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -323,7 +323,7 @@ def test_ipython_agentskills_fileop_pwd(
|
||||
)
|
||||
_test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -392,7 +392,7 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
).strip().split('\n')
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
@ -428,5 +428,5 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
|
||||
'[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
|
||||
)
|
||||
|
||||
runtime.close()
|
||||
runtime.close(rm_all_containers=False)
|
||||
time.sleep(1)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user