diff --git a/config.template.toml b/config.template.toml index c5fc9768a0..1ce48dd517 100644 --- a/config.template.toml +++ b/config.template.toml @@ -25,9 +25,6 @@ workspace_base = "./workspace" # Disable color in terminal output #disable_color = false -# Enable auto linting after editing -#enable_auto_lint = false - # Enable saving and restoring the session when run from CLI #enable_cli_session = false @@ -76,8 +73,6 @@ persist_sandbox = false # SSH port for the sandbox #ssh_port = 63710 -# Use host network -#use_host_network = false # Name of the default agent #default_agent = "CodeActAgent" @@ -197,6 +192,12 @@ llm_config = 'gpt3' # Container image to use for the sandbox #container_image = "ghcr.io/opendevin/sandbox:main" +# Use host network +#use_host_network = false + +# Enable auto linting after editing +#enable_auto_lint = false + #################################### Eval #################################### # Configuration for the evaluation, please refer to the specific evaluation # plugin for the available options diff --git a/evaluation/TUTORIAL.md b/evaluation/TUTORIAL.md index 2d78a7a87d..5906d7ff58 100644 --- a/evaluation/TUTORIAL.md +++ b/evaluation/TUTORIAL.md @@ -33,13 +33,15 @@ workspace_mount_path = "/path/to/your/workspace" ssh_hostname = "localhost" +run_as_devin = false + +[sandbox] # SWEBench eval specific - but you can tweak it to your needs use_host_network = false -run_as_devin = false # linting python after editing helps LLM fix indentations enable_auto_lint = true -[sandbox] + box_type = "ssh" timeout = 120 diff --git a/evaluation/agent_bench/README.md b/evaluation/agent_bench/README.md index afcea7a2c3..6da710e1c1 100644 --- a/evaluation/agent_bench/README.md +++ b/evaluation/agent_bench/README.md @@ -20,12 +20,12 @@ workspace_mount_path = "/path/to/workspace" ssh_hostname = "localhost" -use_host_network = false # AgentBench specific run_as_devin = true -enable_auto_lint = true [sandbox] +use_host_network = false +enable_auto_lint = true box_type = "ssh" timeout = 120 diff --git a/evaluation/biocoder/biocoder_env_box.py b/evaluation/biocoder/biocoder_env_box.py index 9b2667f0cf..cdf1248d04 100644 --- a/evaluation/biocoder/biocoder_env_box.py +++ b/evaluation/biocoder/biocoder_env_box.py @@ -217,7 +217,7 @@ class BiocoderSSHBox(DockerSSHBox): config.workspace_mount_path = workspace_base # linting python after editing helps LLM fix indentations - config.enable_auto_lint = True + config.sandbox.enable_auto_lint = True # create folder for transferring files back/forth biocoder_cache_folder = 'biocoder_cache' @@ -268,7 +268,7 @@ class BiocoderSSHBox(DockerSSHBox): f.write(json.dumps(testcase_json, indent=4)) # linting python after editing helps LLM fix indentations - config.enable_auto_lint = True + config.sandbox.enable_auto_lint = True sandbox = cls( container_image=BIOCODER_BENCH_CONTAINER_IMAGE, diff --git a/evaluation/bird/README.md b/evaluation/bird/README.md index dc30bca29e..05e0fd8021 100644 --- a/evaluation/bird/README.md +++ b/evaluation/bird/README.md @@ -18,6 +18,8 @@ Add the following configurations: max_iterations = 100 cache_dir = "/tmp/cache" ssh_hostname = "localhost" + +[sandbox] enable_auto_lint = true # TODO: Change these to the model you want to evaluate diff --git a/evaluation/gpqa/README.md b/evaluation/gpqa/README.md index 8bc4785957..330da56762 100644 --- a/evaluation/gpqa/README.md +++ b/evaluation/gpqa/README.md @@ -36,6 +36,8 @@ Add the following configurations: max_iterations = 100 cache_dir = "/tmp/cache" ssh_hostname = "localhost" + +[sandbox] enable_auto_lint = true # TODO: Change these to the model you want to evaluate diff --git a/evaluation/humanevalfix/README.md b/evaluation/humanevalfix/README.md index 7ac679ddbd..d231be1a61 100644 --- a/evaluation/humanevalfix/README.md +++ b/evaluation/humanevalfix/README.md @@ -18,6 +18,8 @@ Add the following configurations: max_iterations = 100 cache_dir = "/tmp/cache" ssh_hostname = "localhost" + +[sandbox] enable_auto_lint = true # TODO: Change these to the model you want to evaluate diff --git a/evaluation/logic_reasoning/README.md b/evaluation/logic_reasoning/README.md index d2b21325ad..c0e313cf8b 100644 --- a/evaluation/logic_reasoning/README.md +++ b/evaluation/logic_reasoning/README.md @@ -13,6 +13,8 @@ Add the following configurations: max_iterations = 100 cache_dir = "/tmp/cache" ssh_hostname = "localhost" + +[sandbox] enable_auto_lint = true # TODO: Change these to the model you want to evaluate diff --git a/evaluation/ml_bench/README.md b/evaluation/ml_bench/README.md index 9cf1acbb6f..51e59cad79 100644 --- a/evaluation/ml_bench/README.md +++ b/evaluation/ml_bench/README.md @@ -25,10 +25,13 @@ Add the following configurations: max_iterations = 100 cache_dir = "/tmp/cache" ssh_hostname = "localhost" -enable_auto_lint = true run_as_devin = false sandbox_container_image = "public.ecr.aws/i5g0m1f6/ml-bench" # Use the latest image from the ML-Bench repository +[sandbox] +enable_auto_lint = true + + # TODO: Change these to the model you want to evaluate [llm.eval_gpt4_1106_preview] model = "gpt-4-1106-preview" diff --git a/evaluation/swe_bench/README.md b/evaluation/swe_bench/README.md index 7e75915472..39713ecfc9 100644 --- a/evaluation/swe_bench/README.md +++ b/evaluation/swe_bench/README.md @@ -50,11 +50,13 @@ ssh_hostname = "localhost" box_type = "ssh" timeout = 120 +run_as_devin = false +max_budget_per_task = 4 # 4 USD + +[sandbox] # SWEBench eval specific use_host_network = false -run_as_devin = false enable_auto_lint = true -max_budget_per_task = 4 # 4 USD # TODO: Change these to the model you want to evaluate [llm.eval_gpt4_1106_preview_llm] diff --git a/opendevin/core/config.py b/opendevin/core/config.py index de33d4d197..77e8eb049b 100644 --- a/opendevin/core/config.py +++ b/opendevin/core/config.py @@ -139,7 +139,9 @@ class SandboxConfig(metaclass=Singleton): container_image: The container image to use for the sandbox. user_id: The user ID for the sandbox. timeout: The timeout for the sandbox. - + enable_auto_lint: Whether to enable auto-lint. + use_host_network: Whether to use the host network. + initialize_plugins: Whether to initialize plugins. """ box_type: str = 'ssh' @@ -153,6 +155,7 @@ class SandboxConfig(metaclass=Singleton): enable_auto_lint: bool = ( False # once enabled, OpenDevin would lint files after editing ) + use_host_network: bool = False initialize_plugins: bool = True def defaults_to_dict(self) -> dict: @@ -201,7 +204,6 @@ class AppConfig(metaclass=Singleton): max_iterations: The maximum number of iterations. max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop. e2b_api_key: The E2B API key. - use_host_network: Whether to use the host network. ssh_hostname: The SSH hostname. disable_color: Whether to disable color. For terminals that don't support color. debug: Whether to enable debugging. @@ -230,7 +232,6 @@ class AppConfig(metaclass=Singleton): max_iterations: int = 100 max_budget_per_task: float | None = None e2b_api_key: str = '' - use_host_network: bool = False ssh_hostname: str = 'localhost' disable_color: bool = False persist_sandbox: bool = False @@ -531,7 +532,7 @@ def finalize_config(cfg: AppConfig): if llm.embedding_base_url is None: llm.embedding_base_url = llm.base_url - if cfg.use_host_network and platform.system() == 'Darwin': + if cfg.sandbox.use_host_network and platform.system() == 'Darwin': logger.opendevin_logger.warning( 'Please upgrade to Docker Desktop 4.29.0 or later to use host network mode on macOS. ' 'See https://github.com/docker/roadmap/issues/238#issuecomment-2044688144 for more information.' diff --git a/opendevin/runtime/client/client.py b/opendevin/runtime/client/client.py index a33e977ce3..30b2236ca3 100644 --- a/opendevin/runtime/client/client.py +++ b/opendevin/runtime/client/client.py @@ -64,7 +64,9 @@ class RuntimeClient: self.__bash_PS1 = r'[PEXPECT_BEGIN] \u@\h:\w [PEXPECT_END]' # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed - self.__bash_expect_regex = r'\[PEXPECT_BEGIN\] ([a-z_][a-z0-9_-]*)@([a-zA-Z][a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]' + self.__bash_expect_regex = ( + r'\[PEXPECT_BEGIN\] ([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+) \[PEXPECT_END\]' + ) self.shell.sendline(f'export PS1="{self.__bash_PS1}"') self.shell.expect(self.__bash_expect_regex) diff --git a/opendevin/runtime/client/runtime.py b/opendevin/runtime/client/runtime.py index 40c5214205..3220db514d 100644 --- a/opendevin/runtime/client/runtime.py +++ b/opendevin/runtime/client/runtime.py @@ -118,6 +118,17 @@ class EventStreamRuntime(Runtime): if plugins is None: plugins = [] plugin_names = ' '.join([plugin.name for plugin in plugins]) + + network_mode: str | None = None + port_mapping: dict[str, int] | None = None + if self.sandbox_config.use_host_network: + network_mode = 'host' + logger.warn( + 'Using host network mode. If you are using MacOS, please make sure you have the latest version of Docker Desktop and enabled host network feature: https://docs.docker.com/network/drivers/host/#docker-desktop' + ) + else: + port_mapping = {f'{self._port}/tcp': self._port} + container = self.docker_client.containers.run( self.container_image, command=( @@ -127,7 +138,8 @@ class EventStreamRuntime(Runtime): f'--working-dir {sandbox_workspace_dir} ' f'--plugins {plugin_names}' ), - network_mode='host', + network_mode=network_mode, + ports=port_mapping, working_dir='/opendevin/code/', name=self.container_name, detach=True, @@ -148,7 +160,7 @@ class EventStreamRuntime(Runtime): return self.session @tenacity.retry( - stop=tenacity.stop_after_attempt(5), + stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_exponential(multiplier=2, min=4, max=600), ) async def _wait_until_alive(self): diff --git a/opendevin/runtime/docker/ssh_box.py b/opendevin/runtime/docker/ssh_box.py index 59919266e4..9bd7423cf2 100644 --- a/opendevin/runtime/docker/ssh_box.py +++ b/opendevin/runtime/docker/ssh_box.py @@ -120,7 +120,6 @@ class DockerSSHBox(Sandbox): workspace_mount_path: str, sandbox_workspace_dir: str, cache_dir: str, - use_host_network: bool, run_as_devin: bool, ssh_hostname: str = 'host.docker.internal', ssh_password: str | None = None, @@ -131,7 +130,7 @@ class DockerSSHBox(Sandbox): self.workspace_mount_path = workspace_mount_path self.sandbox_workspace_dir = sandbox_workspace_dir self.cache_dir = cache_dir - self.use_host_network = use_host_network + self.use_host_network = config.use_host_network self.run_as_devin = run_as_devin logger.info( f'SSHBox is running as {"opendevin" if self.run_as_devin else "root"} user with USER_ID={config.user_id} in the sandbox' @@ -641,7 +640,6 @@ if __name__ == '__main__': workspace_mount_path='/path/to/workspace', cache_dir='/path/to/cache', sandbox_workspace_dir='/sandbox', - use_host_network=False, persist_sandbox=False, ) except Exception as e: diff --git a/opendevin/runtime/server/runtime.py b/opendevin/runtime/server/runtime.py index 951f81115d..efc2bd3647 100644 --- a/opendevin/runtime/server/runtime.py +++ b/opendevin/runtime/server/runtime.py @@ -44,7 +44,6 @@ def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox: workspace_mount_path=config.workspace_mount_path, sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, cache_dir=config.cache_dir, - use_host_network=config.use_host_network, run_as_devin=config.run_as_devin, ssh_hostname=config.ssh_hostname, ssh_password=config.ssh_password, diff --git a/opendevin/runtime/utils/runtime_build.py b/opendevin/runtime/utils/runtime_build.py index e31cdb3689..b49e777906 100644 --- a/opendevin/runtime/utils/runtime_build.py +++ b/opendevin/runtime/utils/runtime_build.py @@ -63,8 +63,10 @@ def _generate_dockerfile( dockerfile_content = ( f'FROM {base_image}\n' # FIXME: make this more generic / cross-platform - 'RUN apt update && apt install -y wget sudo\n' - 'RUN apt-get update && apt-get install -y libgl1-mesa-glx\n' # Extra dependency for OpenCV + # Install necessary packages + # libgl1-mesa-glx is extra dependency for OpenCV + 'RUN apt-get update && apt-get install -y wget sudo libgl1-mesa-glx\n' + 'RUN apt-get clean && rm -rf /var/lib/apt/lists/*\n' # Clean up the apt cache to reduce image size 'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n' 'RUN echo "" > /opendevin/bash.bashrc\n' 'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n' @@ -150,13 +152,14 @@ def _build_sandbox_image( else: logger.info(str(log)) + # check if the image is built successfully + image = docker_client.images.get(target_image_name) + if image is None: + raise RuntimeError(f'Build failed: Image {target_image_name} not found') logger.info(f'Image {target_image_name} built successfully') except docker.errors.BuildError as e: logger.error(f'Sandbox image build failed: {e}') raise e - except Exception as e: - logger.error(f'An error occurred during sandbox image build: {e}') - raise e def _get_new_image_name(base_image: str, dev_mode: bool = False) -> str: @@ -200,7 +203,7 @@ def build_runtime_image( docker_client.images.pull(new_image_name) except Exception as e: logger.info(f'Error pulling image {new_image_name}, building it from scratch') - logger.error(f'Error: {e}') + logger.info(f'Non-fatal error: {e}') # Detect if the sandbox image is built image_exists = _check_image_exists(new_image_name, docker_client) diff --git a/tests/unit/test_ipython.py b/tests/unit/test_ipython.py index c789940330..4e52487e47 100644 --- a/tests/unit/test_ipython.py +++ b/tests/unit/test_ipython.py @@ -90,7 +90,6 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir): workspace_mount_path=config.workspace_mount_path, sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, cache_dir=config.cache_dir, - use_host_network=config.use_host_network, run_as_devin=config.run_as_devin, ssh_hostname=config.ssh_hostname, ssh_password=config.ssh_password, diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py index 7fbb8dd2ea..0ef9a5413b 100644 --- a/tests/unit/test_runtime.py +++ b/tests/unit/test_runtime.py @@ -29,7 +29,9 @@ def temp_dir(monkeypatch): async def _load_runtime(box_class, event_stream, plugins, sid): - sandbox_config = SandboxConfig() + sandbox_config = SandboxConfig( + use_host_network=False, + ) if box_class == EventStreamRuntime: runtime = EventStreamRuntime( sandbox_config=sandbox_config, @@ -85,6 +87,8 @@ async def test_env_vars_os_environ(): obs.content.strip().split('\n\r')[0].strip() == 'BAZ' ), f'Output: [{obs.content}] for {box_class}' + await runtime.close() + @pytest.mark.asyncio async def test_env_vars_runtime_add_env_var(): @@ -105,6 +109,7 @@ async def test_env_vars_runtime_add_env_var(): assert ( obs.content.strip().split('\r\n')[0].strip() == 'abc"def' ), f'Output: [{obs.content}] for {box_class}' + await runtime.close() @pytest.mark.asyncio @@ -126,6 +131,7 @@ async def test_env_vars_runtime_add_multiple_env_vars(): assert ( obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz' ), f'Output: [{obs.content}] for {box_class}' + await runtime.close() @pytest.mark.asyncio @@ -148,6 +154,7 @@ async def test_env_vars_runtime_add_env_var_overwrite(): assert ( obs.content.strip().split('\r\n')[0].strip() == 'xyz' ), f'Output: [{obs.content}] for {box_class}' + await runtime.close() @pytest.mark.asyncio diff --git a/tests/unit/test_sandbox.py b/tests/unit/test_sandbox.py index 3e8d25b6b4..aec3804fc4 100644 --- a/tests/unit/test_sandbox.py +++ b/tests/unit/test_sandbox.py @@ -26,7 +26,6 @@ def create_docker_box_from_app_config( workspace_mount_path=path, sandbox_workspace_dir=config.workspace_mount_path_in_sandbox, cache_dir=config.cache_dir, - use_host_network=config.use_host_network, run_as_devin=True, ssh_hostname=config.ssh_hostname, ssh_password=config.ssh_password,