mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
feat: support running docker runtime stresstest in CI (#6100)
Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
This commit is contained in:
parent
affbc49b08
commit
77aa843d53
@ -35,6 +35,10 @@ class SandboxConfig:
|
||||
remote_runtime_resource_factor: Factor to scale the resource allocation for remote runtime.
|
||||
Must be one of [1, 2, 4, 8]. Will only be used if the runtime is remote.
|
||||
enable_gpu: Whether to enable GPU.
|
||||
docker_runtime_kwargs: Additional keyword arguments to pass to the Docker runtime when running containers.
|
||||
This should be a JSON string that will be parsed into a dictionary.
|
||||
Example in config.toml:
|
||||
docker_runtime_kwargs = '{"mem_limit": "4g", "cpu_quota": 100000}'
|
||||
"""
|
||||
|
||||
remote_runtime_api_url: str = 'http://localhost:8000'
|
||||
@ -61,6 +65,7 @@ class SandboxConfig:
|
||||
close_delay: int = 900
|
||||
remote_runtime_resource_factor: int = 1
|
||||
enable_gpu: bool = False
|
||||
docker_runtime_kwargs: str | None = None
|
||||
|
||||
def defaults_to_dict(self) -> dict:
|
||||
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
|
||||
|
||||
@ -267,13 +267,11 @@ class DockerRuntime(ActionExecutionClient):
|
||||
environment=environment,
|
||||
volumes=volumes,
|
||||
device_requests=(
|
||||
[docker.types.DeviceRequest(
|
||||
capabilities=[['gpu']],
|
||||
count=-1
|
||||
)]
|
||||
[docker.types.DeviceRequest(capabilities=[['gpu']], count=-1)]
|
||||
if self.config.sandbox.enable_gpu
|
||||
else None
|
||||
),
|
||||
**(self.config.sandbox.docker_runtime_kwargs or {}),
|
||||
)
|
||||
self.log('debug', f'Container started. Server url: {self.api_url}')
|
||||
self.send_status_message('STATUS$CONTAINER_STARTED')
|
||||
|
||||
@ -215,6 +215,7 @@ def _load_runtime(
|
||||
use_workspace: bool | None = None,
|
||||
force_rebuild_runtime: bool = False,
|
||||
runtime_startup_env_vars: dict[str, str] | None = None,
|
||||
docker_runtime_kwargs: dict[str, str] | None = None,
|
||||
) -> Runtime:
|
||||
sid = 'rt_' + str(random.randint(100000, 999999))
|
||||
|
||||
@ -226,6 +227,7 @@ def _load_runtime(
|
||||
config.run_as_openhands = run_as_openhands
|
||||
config.sandbox.force_rebuild_runtime = force_rebuild_runtime
|
||||
config.sandbox.keep_runtime_alive = False
|
||||
config.sandbox.docker_runtime_kwargs = docker_runtime_kwargs
|
||||
# Folder where all tests create their own folder
|
||||
global test_mount_path
|
||||
if use_workspace:
|
||||
|
||||
@ -1,18 +1,21 @@
|
||||
"""Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox."""
|
||||
|
||||
import pytest
|
||||
from conftest import TEST_IN_CI, _close_test_runtime, _load_runtime
|
||||
from conftest import _close_test_runtime, _load_runtime
|
||||
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.events.action import CmdRunAction
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
TEST_IN_CI,
|
||||
reason='This test should only be run locally, not in CI.',
|
||||
)
|
||||
def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
|
||||
runtime = _load_runtime(temp_dir, runtime_cls)
|
||||
runtime = _load_runtime(
|
||||
temp_dir,
|
||||
runtime_cls,
|
||||
docker_runtime_kwargs={
|
||||
'cpu_period': 100000, # 100ms
|
||||
'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU)
|
||||
'mem_limit': '4G', # 4 GB of memory
|
||||
},
|
||||
)
|
||||
|
||||
action = CmdRunAction(
|
||||
command='sudo apt-get update && sudo apt-get install -y stress-ng'
|
||||
@ -23,11 +26,9 @@ def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1):
|
||||
assert obs.exit_code == 0
|
||||
|
||||
for _ in range(repeat):
|
||||
# run stress-ng stress tests for 5 minutes
|
||||
# FIXME: this would make Docker daemon die, even though running this
|
||||
# command on its own in the same container is fine
|
||||
action = CmdRunAction(command='stress-ng --all 1 -t 5m')
|
||||
action.timeout = 600
|
||||
# run stress-ng stress tests for 1 minute
|
||||
action = CmdRunAction(command='stress-ng --all 1 -t 1m')
|
||||
action.timeout = 120
|
||||
logger.info(action, extra={'msg_type': 'ACTION'})
|
||||
obs = runtime.run_action(action)
|
||||
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user