[Arch] Shrink runtime image size (#3051)

* test_runtime_client.py to test _execute_bash()

* runtime_build and runtime tweaks

* fix in docker script

* revert bash changes

* use sandbox_config.update_source_code to control source code update

* add od_version to the sandbox tag

* add doc instruction for update source code

* do not remove whole poetry folder;
add mamba clean

* add missing newlines

---------

Co-authored-by: tobitege <tobitege@gmx.de>
This commit is contained in:
Xingyao Wang 2024-07-22 02:34:45 +08:00 committed by GitHub
parent f3c23e8039
commit ce8a11a62f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 128 additions and 37 deletions

3
.gitignore vendored
View File

@ -210,6 +210,7 @@ cache
# configuration
config.toml
config.toml_
config.toml.bak
containers/agnostic_sandbox
@ -217,3 +218,5 @@ containers/agnostic_sandbox
# swe-bench-eval
image_build_logs
run_instance_logs
od_runtime_*.tar

View File

@ -142,6 +142,8 @@ class SandboxConfig(metaclass=Singleton):
enable_auto_lint: Whether to enable auto-lint.
use_host_network: Whether to use the host network.
initialize_plugins: Whether to initialize plugins.
update_source_code: Whether to update the source code in the EventStreamRuntime.
Used for development of EventStreamRuntime.
"""
box_type: str = 'ssh'
@ -157,6 +159,7 @@ class SandboxConfig(metaclass=Singleton):
)
use_host_network: bool = False
initialize_plugins: bool = True
update_source_code: bool = False
def defaults_to_dict(self) -> dict:
"""Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

View File

@ -1,3 +1,14 @@
"""
This is the main file for the runtime client.
It is responsible for executing actions received from OpenDevin backend and producing observations.
NOTE: this will be executed inside the docker sandbox.
If you already have pre-build docker image yet you changed the code in this file OR dependencies, you need to rebuild the docker image to update the source code.
You should add SANDBOX_UPDATE_SOURCE_CODE=True to any `python XXX.py` command you run to update the source code.
"""
import argparse
import asyncio
import os

View File

@ -81,14 +81,15 @@ class EventStreamRuntime(Runtime):
# NOTE: You can need set DEBUG=true to update the source code
# inside the container. This is useful when you want to test/debug the
# latest code in the runtime docker container.
update_source_code=config.debug,
update_source_code=self.sandbox_config.update_source_code,
)
self.container = await self._init_container(
self.sandbox_workspace_dir,
mount_dir=config.workspace_mount_path,
plugins=self.plugins,
)
# Initialize the env vars
# MUST call super().ainit() to initialize both default env vars
# AND the ones in env vars!
await super().ainit(env_vars)
@staticmethod

View File

@ -74,8 +74,9 @@ class Runtime:
This method should be called after the runtime's constructor.
"""
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
await self.add_env_vars(self.DEFAULT_ENV_VARS)
if self.DEFAULT_ENV_VARS:
logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
await self.add_env_vars(self.DEFAULT_ENV_VARS)
if env_vars is not None:
logger.debug(f'Adding provided env vars: {env_vars}')
await self.add_env_vars(env_vars)

View File

@ -3,14 +3,23 @@ import os
import shutil
import subprocess
import tempfile
from importlib.metadata import version
import docker
import toml
import opendevin
from opendevin.core.logger import opendevin_logger as logger
def _get_package_version():
"""Read the version from pyproject.toml as the other one may be outdated."""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__)))
pyproject_path = os.path.join(project_root, 'pyproject.toml')
with open(pyproject_path, 'r') as f:
pyproject_data = toml.load(f)
return pyproject_data['tool']['poetry']['version']
def _create_project_source_dist():
"""Create a source distribution of the project. Return the path to the tarball."""
# Copy the project directory to the container
@ -24,8 +33,10 @@ def _create_project_source_dist():
logger.error(f'Build failed: {result}')
raise Exception(f'Build failed: {result}')
# Fetch the correct version from pyproject.toml
package_version = _get_package_version()
tarball_path = os.path.join(
project_root, 'dist', f'opendevin-{version("opendevin")}.tar.gz'
project_root, 'dist', f'opendevin-{package_version}.tar.gz'
)
if not os.path.exists(tarball_path):
logger.error(f'Source distribution not found at {tarball_path}')
@ -60,44 +71,64 @@ def _generate_dockerfile(
if skip_init:
dockerfile_content = f'FROM {base_image}\n'
else:
# Ubuntu 22.x has libgl1-mesa-glx, but 24.x and above have libgl1!
if 'ubuntu' in base_image and (
base_image.endswith(':latest') or base_image.endswith(':24.04')
):
LIBGL_MESA = 'libgl1'
else:
LIBGL_MESA = 'libgl1-mesa-glx'
dockerfile_content = (
f'FROM {base_image}\n'
# FIXME: make this more generic / cross-platform
# Install necessary packages
# libgl1-mesa-glx is extra dependency for OpenCV
'RUN apt-get update && apt-get install -y wget sudo libgl1-mesa-glx\n'
'RUN apt-get clean && rm -rf /var/lib/apt/lists/*\n' # Clean up the apt cache to reduce image size
'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
'RUN echo "" > /opendevin/bash.bashrc\n'
'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
' wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
' bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
' rm Miniforge3.sh && \\\n'
' chmod -R g+w /opendevin/miniforge3 && \\\n'
' bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
' fi\n'
# Install necessary packages and clean up in one layer
f'RUN apt-get update && apt-get install -y wget sudo apt-utils {LIBGL_MESA} libasound2-plugins && \\\n'
f' apt-get clean && rm -rf /var/lib/apt/lists/*\n'
# Create necessary directories
f'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs && \\\n'
f' echo "" > /opendevin/bash.bashrc\n'
# Install Miniforge3
f'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
f' wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
f' bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
f' rm Miniforge3.sh && \\\n'
f' chmod -R g+w /opendevin/miniforge3 && \\\n'
f' bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
f' fi\n'
'RUN /opendevin/miniforge3/bin/mamba install python=3.11 -y\n'
'RUN /opendevin/miniforge3/bin/mamba install conda-forge::poetry -y\n'
)
# Copy the project directory to the container
dockerfile_content += 'COPY project.tar.gz /opendevin\n'
# remove /opendevin/code if it exists
# Remove /opendevin/code if it exists
dockerfile_content += (
'RUN if [ -d /opendevin/code ]; then rm -rf /opendevin/code; fi\n'
)
# unzip the tarball to /opendevin/code
# Unzip the tarball to /opendevin/code
dockerfile_content += (
'RUN cd /opendevin && tar -xzvf project.tar.gz && rm project.tar.gz\n'
)
dockerfile_content += f'RUN mv /opendevin/{source_code_dirname} /opendevin/code\n'
# install (or update) the dependencies
# ALTERNATIVE, but maybe not complete? (toml error!)
dockerfile_content += (
'RUN cd /opendevin/code && '
'/opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && '
'/opendevin/miniforge3/bin/mamba run -n base poetry install\n'
# for browser (update if needed)
'RUN apt-get update && cd /opendevin/code && /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium\n'
'/opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root\n'
'RUN /opendevin/miniforge3/bin/mamba run -n base poetry cache clear --all . && \\\n'
'apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* &&\\\n'
'/opendevin/miniforge3/bin/mamba clean --all\n'
)
# For browser (update if needed)
dockerfile_content += (
'RUN apt-get update && \\\n'
' cd /opendevin/code && \\\n'
' /opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \\\n'
' /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \\\n'
' apt-get clean && \\\n'
' rm -rf /var/lib/apt/lists/*\n'
)
return dockerfile_content
@ -176,14 +207,17 @@ def _get_new_image_name(base_image: str, dev_mode: bool = False) -> str:
base_image = base_image + ':latest'
[repo, tag] = base_image.split(':')
repo = repo.replace('/', '___')
return f'{prefix}:{repo}_tag_{tag}'
od_version = _get_package_version()
return f'{prefix}:od_v{od_version}_image_{repo}_tag_{tag}'
def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool:
images = docker_client.images.list()
for image in images:
if image_name in image.tags:
return True
if images:
for image in images:
if image_name in image.tags:
return True
return False
@ -191,31 +225,44 @@ def build_runtime_image(
base_image: str,
docker_client: docker.DockerClient,
update_source_code: bool = False,
save_to_local_store: bool = False, # New parameter to control saving to local store
) -> str:
"""Build the runtime image for the OpenDevin runtime.
This is only used for **eventstream runtime**.
"""
new_image_name = _get_new_image_name(base_image)
logger.info(f'New image name: {new_image_name}')
# Ensure new_image_name contains a colon
if ':' not in new_image_name:
raise ValueError(
f'Invalid image name: {new_image_name}. Expected format "repository:tag".'
)
# Try to pull the new image from the registry
try:
docker_client.images.pull(new_image_name)
except Exception as e:
logger.info(f'Error pulling image {new_image_name}, building it from scratch')
logger.info(f'Non-fatal error: {e}')
except Exception:
logger.info(f'Cannot pull image {new_image_name} directly')
# Detect if the sandbox image is built
image_exists = _check_image_exists(new_image_name, docker_client)
if image_exists:
logger.info(f'Image {new_image_name} exists')
else:
logger.info(f'Image {new_image_name} does not exist')
skip_init = False
if image_exists and not update_source_code:
# If (1) Image exists & we are not updating the source code, we can reuse the existing production image
logger.info('No image build done (not updating source code)')
return new_image_name
elif image_exists and update_source_code:
# If (2) Image exists & we plan to update the source code (in dev mode), we need to rebuild the image
# and give it a special name
# e.g., od_runtime:ubuntu_tag_latest -> od_runtime_dev:ubuntu_tag_latest
logger.info('Image exists, but updating source code requested')
base_image = new_image_name
new_image_name = _get_new_image_name(base_image, dev_mode=True)
@ -223,22 +270,47 @@ def build_runtime_image(
else:
# If (3) Image does not exist, we need to build it from scratch
# e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest
skip_init = False # since we need to build the image from scratch
# This snippet would allow to load from archive:
# tar_path = f'{new_image_name.replace(":", "_")}.tar'
# if os.path.exists(tar_path):
# logger.info(f'Loading image from {tar_path}')
# load_command = ['docker', 'load', '-i', tar_path]
# subprocess.run(load_command, check=True)
# logger.info(f'Image {new_image_name} loaded from {tar_path}')
# return new_image_name
skip_init = False
logger.info(f'Building image [{new_image_name}] from scratch')
if not skip_init:
logger.info(f'Building image [{new_image_name}] from scratch')
_build_sandbox_image(base_image, new_image_name, docker_client, skip_init=skip_init)
# Only for development: allow to save image as archive:
if not image_exists and save_to_local_store:
tar_path = f'{new_image_name.replace(":", "_")}.tar'
save_command = ['docker', 'save', '-o', tar_path, new_image_name]
subprocess.run(save_command, check=True)
logger.info(f'Image saved to {tar_path}')
load_command = ['docker', 'load', '-i', tar_path]
subprocess.run(load_command, check=True)
logger.info(f'Image {new_image_name} loaded back into Docker from {tar_path}')
return new_image_name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--base_image', type=str, default='ubuntu:latest')
parser.add_argument('--base_image', type=str, default='ubuntu:22.04')
parser.add_argument('--update_source_code', type=bool, default=False)
parser.add_argument('--save_to_local_store', type=bool, default=False)
args = parser.parse_args()
client = docker.from_env()
image_name = build_runtime_image(
args.base_image, client, update_source_code=args.update_source_code
args.base_image,
client,
update_source_code=args.update_source_code,
save_to_local_store=args.save_to_local_store,
)
print(f'\nBUILT Image: {image_name}\n')