mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix: runtime test for mac (#3005)
* move use_host_network to sandbox config * fix test runtime tests * fix kwargs to make it clearer
This commit is contained in:
parent
d6642c26be
commit
ff6ddc831f
@ -25,9 +25,6 @@ workspace_base = "./workspace"
|
||||
# Disable color in terminal output
|
||||
#disable_color = false
|
||||
|
||||
# Enable auto linting after editing
|
||||
#enable_auto_lint = false
|
||||
|
||||
# Enable saving and restoring the session when run from CLI
|
||||
#enable_cli_session = false
|
||||
|
||||
@ -76,8 +73,6 @@ persist_sandbox = false
|
||||
# SSH port for the sandbox
|
||||
#ssh_port = 63710
|
||||
|
||||
# Use host network
|
||||
#use_host_network = false
|
||||
|
||||
# Name of the default agent
|
||||
#default_agent = "CodeActAgent"
|
||||
@ -197,6 +192,12 @@ llm_config = 'gpt3'
|
||||
# Container image to use for the sandbox
|
||||
#container_image = "ghcr.io/opendevin/sandbox:main"
|
||||
|
||||
# Use host network
|
||||
#use_host_network = false
|
||||
|
||||
# Enable auto linting after editing
|
||||
#enable_auto_lint = false
|
||||
|
||||
#################################### Eval ####################################
|
||||
# Configuration for the evaluation, please refer to the specific evaluation
|
||||
# plugin for the available options
|
||||
|
||||
@ -33,13 +33,15 @@ workspace_mount_path = "/path/to/your/workspace"
|
||||
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
run_as_devin = false
|
||||
|
||||
[sandbox]
|
||||
# SWEBench eval specific - but you can tweak it to your needs
|
||||
use_host_network = false
|
||||
run_as_devin = false
|
||||
# linting python after editing helps LLM fix indentations
|
||||
enable_auto_lint = true
|
||||
|
||||
[sandbox]
|
||||
|
||||
box_type = "ssh"
|
||||
timeout = 120
|
||||
|
||||
|
||||
@ -20,12 +20,12 @@ workspace_mount_path = "/path/to/workspace"
|
||||
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
use_host_network = false
|
||||
# AgentBench specific
|
||||
run_as_devin = true
|
||||
enable_auto_lint = true
|
||||
|
||||
[sandbox]
|
||||
use_host_network = false
|
||||
enable_auto_lint = true
|
||||
box_type = "ssh"
|
||||
timeout = 120
|
||||
|
||||
|
||||
@ -217,7 +217,7 @@ class BiocoderSSHBox(DockerSSHBox):
|
||||
config.workspace_mount_path = workspace_base
|
||||
|
||||
# linting python after editing helps LLM fix indentations
|
||||
config.enable_auto_lint = True
|
||||
config.sandbox.enable_auto_lint = True
|
||||
|
||||
# create folder for transferring files back/forth
|
||||
biocoder_cache_folder = 'biocoder_cache'
|
||||
@ -268,7 +268,7 @@ class BiocoderSSHBox(DockerSSHBox):
|
||||
f.write(json.dumps(testcase_json, indent=4))
|
||||
|
||||
# linting python after editing helps LLM fix indentations
|
||||
config.enable_auto_lint = True
|
||||
config.sandbox.enable_auto_lint = True
|
||||
|
||||
sandbox = cls(
|
||||
container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
|
||||
|
||||
@ -18,6 +18,8 @@ Add the following configurations:
|
||||
max_iterations = 100
|
||||
cache_dir = "/tmp/cache"
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
[sandbox]
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
|
||||
@ -36,6 +36,8 @@ Add the following configurations:
|
||||
max_iterations = 100
|
||||
cache_dir = "/tmp/cache"
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
[sandbox]
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
|
||||
@ -18,6 +18,8 @@ Add the following configurations:
|
||||
max_iterations = 100
|
||||
cache_dir = "/tmp/cache"
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
[sandbox]
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
|
||||
@ -13,6 +13,8 @@ Add the following configurations:
|
||||
max_iterations = 100
|
||||
cache_dir = "/tmp/cache"
|
||||
ssh_hostname = "localhost"
|
||||
|
||||
[sandbox]
|
||||
enable_auto_lint = true
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
|
||||
@ -25,10 +25,13 @@ Add the following configurations:
|
||||
max_iterations = 100
|
||||
cache_dir = "/tmp/cache"
|
||||
ssh_hostname = "localhost"
|
||||
enable_auto_lint = true
|
||||
run_as_devin = false
|
||||
sandbox_container_image = "public.ecr.aws/i5g0m1f6/ml-bench" # Use the latest image from the ML-Bench repository
|
||||
|
||||
[sandbox]
|
||||
enable_auto_lint = true
|
||||
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[llm.eval_gpt4_1106_preview]
|
||||
model = "gpt-4-1106-preview"
|
||||
|
||||
@ -50,11 +50,13 @@ ssh_hostname = "localhost"
|
||||
box_type = "ssh"
|
||||
timeout = 120
|
||||
|
||||
run_as_devin = false
|
||||
max_budget_per_task = 4 # 4 USD
|
||||
|
||||
[sandbox]
|
||||
# SWEBench eval specific
|
||||
use_host_network = false
|
||||
run_as_devin = false
|
||||
enable_auto_lint = true
|
||||
max_budget_per_task = 4 # 4 USD
|
||||
|
||||
# TODO: Change these to the model you want to evaluate
|
||||
[llm.eval_gpt4_1106_preview_llm]
|
||||
|
||||
@ -139,7 +139,9 @@ class SandboxConfig(metaclass=Singleton):
|
||||
container_image: The container image to use for the sandbox.
|
||||
user_id: The user ID for the sandbox.
|
||||
timeout: The timeout for the sandbox.
|
||||
|
||||
enable_auto_lint: Whether to enable auto-lint.
|
||||
use_host_network: Whether to use the host network.
|
||||
initialize_plugins: Whether to initialize plugins.
|
||||
"""
|
||||
|
||||
box_type: str = 'ssh'
|
||||
@ -153,6 +155,7 @@ class SandboxConfig(metaclass=Singleton):
|
||||
enable_auto_lint: bool = (
|
||||
False # once enabled, OpenDevin would lint files after editing
|
||||
)
|
||||
use_host_network: bool = False
|
||||
initialize_plugins: bool = True
|
||||
|
||||
def defaults_to_dict(self) -> dict:
|
||||
@ -201,7 +204,6 @@ class AppConfig(metaclass=Singleton):
|
||||
max_iterations: The maximum number of iterations.
|
||||
max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop.
|
||||
e2b_api_key: The E2B API key.
|
||||
use_host_network: Whether to use the host network.
|
||||
ssh_hostname: The SSH hostname.
|
||||
disable_color: Whether to disable color. For terminals that don't support color.
|
||||
debug: Whether to enable debugging.
|
||||
@ -230,7 +232,6 @@ class AppConfig(metaclass=Singleton):
|
||||
max_iterations: int = 100
|
||||
max_budget_per_task: float | None = None
|
||||
e2b_api_key: str = ''
|
||||
use_host_network: bool = False
|
||||
ssh_hostname: str = 'localhost'
|
||||
disable_color: bool = False
|
||||
persist_sandbox: bool = False
|
||||
@ -531,7 +532,7 @@ def finalize_config(cfg: AppConfig):
|
||||
if llm.embedding_base_url is None:
|
||||
llm.embedding_base_url = llm.base_url
|
||||
|
||||
if cfg.use_host_network and platform.system() == 'Darwin':
|
||||
if cfg.sandbox.use_host_network and platform.system() == 'Darwin':
|
||||
logger.opendevin_logger.warning(
|
||||
'Please upgrade to Docker Desktop 4.29.0 or later to use host network mode on macOS. '
|
||||
'See https://github.com/docker/roadmap/issues/238#issuecomment-2044688144 for more information.'
|
||||
|
||||
@ -64,7 +64,9 @@ class RuntimeClient:
|
||||
self.__bash_PS1 = r'[PEXPECT_BEGIN] \u@\h:\w [PEXPECT_END]'
|
||||
|
||||
# This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
|
||||
self.__bash_expect_regex = r'\[PEXPECT_BEGIN\] ([a-z_][a-z0-9_-]*)@([a-zA-Z][a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
|
||||
self.__bash_expect_regex = (
|
||||
r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]'
|
||||
)
|
||||
|
||||
self.shell.sendline(f'export PS1="{self.__bash_PS1}"')
|
||||
self.shell.expect(self.__bash_expect_regex)
|
||||
|
||||
@ -118,6 +118,17 @@ class EventStreamRuntime(Runtime):
|
||||
if plugins is None:
|
||||
plugins = []
|
||||
plugin_names = ' '.join([plugin.name for plugin in plugins])
|
||||
|
||||
network_mode: str | None = None
|
||||
port_mapping: dict[str, int] | None = None
|
||||
if self.sandbox_config.use_host_network:
|
||||
network_mode = 'host'
|
||||
logger.warn(
|
||||
'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop'
|
||||
)
|
||||
else:
|
||||
port_mapping = {f'{self._port}/tcp': self._port}
|
||||
|
||||
container = self.docker_client.containers.run(
|
||||
self.container_image,
|
||||
command=(
|
||||
@ -127,7 +138,8 @@ class EventStreamRuntime(Runtime):
|
||||
f'--working-dir {sandbox_workspace_dir} '
|
||||
f'--plugins {plugin_names}'
|
||||
),
|
||||
network_mode='host',
|
||||
network_mode=network_mode,
|
||||
ports=port_mapping,
|
||||
working_dir='/opendevin/code/',
|
||||
name=self.container_name,
|
||||
detach=True,
|
||||
@ -148,7 +160,7 @@ class EventStreamRuntime(Runtime):
|
||||
return self.session
|
||||
|
||||
@tenacity.retry(
|
||||
stop=tenacity.stop_after_attempt(5),
|
||||
stop=tenacity.stop_after_attempt(10),
|
||||
wait=tenacity.wait_exponential(multiplier=2, min=4, max=600),
|
||||
)
|
||||
async def _wait_until_alive(self):
|
||||
|
||||
@ -120,7 +120,6 @@ class DockerSSHBox(Sandbox):
|
||||
workspace_mount_path: str,
|
||||
sandbox_workspace_dir: str,
|
||||
cache_dir: str,
|
||||
use_host_network: bool,
|
||||
run_as_devin: bool,
|
||||
ssh_hostname: str = 'host.docker.internal',
|
||||
ssh_password: str | None = None,
|
||||
@ -131,7 +130,7 @@ class DockerSSHBox(Sandbox):
|
||||
self.workspace_mount_path = workspace_mount_path
|
||||
self.sandbox_workspace_dir = sandbox_workspace_dir
|
||||
self.cache_dir = cache_dir
|
||||
self.use_host_network = use_host_network
|
||||
self.use_host_network = config.use_host_network
|
||||
self.run_as_devin = run_as_devin
|
||||
logger.info(
|
||||
f'SSHBox is running as {"opendevin" if self.run_as_devin else "root"} user with USER_ID={config.user_id} in the sandbox'
|
||||
@ -641,7 +640,6 @@ if __name__ == '__main__':
|
||||
workspace_mount_path='/path/to/workspace',
|
||||
cache_dir='/path/to/cache',
|
||||
sandbox_workspace_dir='/sandbox',
|
||||
use_host_network=False,
|
||||
persist_sandbox=False,
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@ -44,7 +44,6 @@ def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox:
|
||||
workspace_mount_path=config.workspace_mount_path,
|
||||
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
|
||||
cache_dir=config.cache_dir,
|
||||
use_host_network=config.use_host_network,
|
||||
run_as_devin=config.run_as_devin,
|
||||
ssh_hostname=config.ssh_hostname,
|
||||
ssh_password=config.ssh_password,
|
||||
|
||||
@ -63,8 +63,10 @@ def _generate_dockerfile(
|
||||
dockerfile_content = (
|
||||
f'FROM {base_image}\n'
|
||||
# FIXME: make this more generic / cross-platform
|
||||
'RUN apt update && apt install -y wget sudo\n'
|
||||
'RUN apt-get update && apt-get install -y libgl1-mesa-glx\n' # Extra dependency for OpenCV
|
||||
# Install necessary packages
|
||||
# libgl1-mesa-glx is extra dependency for OpenCV
|
||||
'RUN apt-get update && apt-get install -y wget sudo libgl1-mesa-glx\n'
|
||||
'RUN apt-get clean && rm -rf /var/lib/apt/lists/*\n' # Clean up the apt cache to reduce image size
|
||||
'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
|
||||
'RUN echo "" > /opendevin/bash.bashrc\n'
|
||||
'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
|
||||
@ -150,13 +152,14 @@ def _build_sandbox_image(
|
||||
else:
|
||||
logger.info(str(log))
|
||||
|
||||
# check if the image is built successfully
|
||||
image = docker_client.images.get(target_image_name)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Build failed: Image {target_image_name} not found')
|
||||
logger.info(f'Image {target_image_name} built successfully')
|
||||
except docker.errors.BuildError as e:
|
||||
logger.error(f'Sandbox image build failed: {e}')
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.error(f'An error occurred during sandbox image build: {e}')
|
||||
raise e
|
||||
|
||||
|
||||
def _get_new_image_name(base_image: str, dev_mode: bool = False) -> str:
|
||||
@ -200,7 +203,7 @@ def build_runtime_image(
|
||||
docker_client.images.pull(new_image_name)
|
||||
except Exception as e:
|
||||
logger.info(f'Error pulling image {new_image_name}, building it from scratch')
|
||||
logger.error(f'Error: {e}')
|
||||
logger.info(f'Non-fatal error: {e}')
|
||||
|
||||
# Detect if the sandbox image is built
|
||||
image_exists = _check_image_exists(new_image_name, docker_client)
|
||||
|
||||
@ -90,7 +90,6 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir):
|
||||
workspace_mount_path=config.workspace_mount_path,
|
||||
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
|
||||
cache_dir=config.cache_dir,
|
||||
use_host_network=config.use_host_network,
|
||||
run_as_devin=config.run_as_devin,
|
||||
ssh_hostname=config.ssh_hostname,
|
||||
ssh_password=config.ssh_password,
|
||||
|
||||
@ -29,7 +29,9 @@ def temp_dir(monkeypatch):
|
||||
|
||||
|
||||
async def _load_runtime(box_class, event_stream, plugins, sid):
|
||||
sandbox_config = SandboxConfig()
|
||||
sandbox_config = SandboxConfig(
|
||||
use_host_network=False,
|
||||
)
|
||||
if box_class == EventStreamRuntime:
|
||||
runtime = EventStreamRuntime(
|
||||
sandbox_config=sandbox_config,
|
||||
@ -85,6 +87,8 @@ async def test_env_vars_os_environ():
|
||||
obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
|
||||
await runtime.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_env_vars_runtime_add_env_var():
|
||||
@ -105,6 +109,7 @@ async def test_env_vars_runtime_add_env_var():
|
||||
assert (
|
||||
obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
await runtime.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -126,6 +131,7 @@ async def test_env_vars_runtime_add_multiple_env_vars():
|
||||
assert (
|
||||
obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
await runtime.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -148,6 +154,7 @@ async def test_env_vars_runtime_add_env_var_overwrite():
|
||||
assert (
|
||||
obs.content.strip().split('\r\n')[0].strip() == 'xyz'
|
||||
), f'Output: [{obs.content}] for {box_class}'
|
||||
await runtime.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -26,7 +26,6 @@ def create_docker_box_from_app_config(
|
||||
workspace_mount_path=path,
|
||||
sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
|
||||
cache_dir=config.cache_dir,
|
||||
use_host_network=config.use_host_network,
|
||||
run_as_devin=True,
|
||||
ssh_hostname=config.ssh_hostname,
|
||||
ssh_password=config.ssh_password,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user