From b6243bb96b668e116f1cc174d35a9ead5ec3447b Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Thu, 15 Aug 2024 04:02:12 +0800 Subject: [PATCH] feat: refactor image building logic into runtime builder (#3395) * feat: refactor building logic into runtime builder * return image name * fix testcases * use runtime builder for eventstream runtime --- opendevin/runtime/builder/__init__.py | 4 + opendevin/runtime/builder/base.py | 37 ++++++ opendevin/runtime/builder/docker.py | 83 +++++++++++++ opendevin/runtime/client/runtime.py | 5 +- opendevin/runtime/utils/runtime_build.py | 145 ++++++++--------------- tests/unit/test_runtime_build.py | 84 +++++++------ 6 files changed, 216 insertions(+), 142 deletions(-) create mode 100644 opendevin/runtime/builder/__init__.py create mode 100644 opendevin/runtime/builder/base.py create mode 100644 opendevin/runtime/builder/docker.py diff --git a/opendevin/runtime/builder/__init__.py b/opendevin/runtime/builder/__init__.py new file mode 100644 index 0000000000..dbd0067203 --- /dev/null +++ b/opendevin/runtime/builder/__init__.py @@ -0,0 +1,4 @@ +from .base import RuntimeBuilder +from .docker import DockerRuntimeBuilder + +__all__ = ['RuntimeBuilder', 'DockerRuntimeBuilder'] diff --git a/opendevin/runtime/builder/base.py b/opendevin/runtime/builder/base.py new file mode 100644 index 0000000000..cd78cc4c0b --- /dev/null +++ b/opendevin/runtime/builder/base.py @@ -0,0 +1,37 @@ +import abc + + +class RuntimeBuilder(abc.ABC): + @abc.abstractmethod + def build( + self, + path: str, + tags: list[str], + ) -> str: + """ + Build the runtime image. + + Args: + path (str): The path to the runtime image's build directory. + tags (list[str]): The tags to apply to the runtime image (e.g., ["repo:my-repo", "sha:my-sha"]). + + Returns: + str: The name of the runtime image (e.g., "repo:sha"). + + Raises: + RuntimeError: If the build failed. + """ + pass + + @abc.abstractmethod + def image_exists(self, image_name: str) -> bool: + """ + Check if the runtime image exists. + + Args: + image_name (str): The name of the runtime image (e.g., "repo:sha"). + + Returns: + bool: Whether the runtime image exists. + """ + pass diff --git a/opendevin/runtime/builder/docker.py b/opendevin/runtime/builder/docker.py new file mode 100644 index 0000000000..c16af0d846 --- /dev/null +++ b/opendevin/runtime/builder/docker.py @@ -0,0 +1,83 @@ +import docker + +from opendevin.core.logger import opendevin_logger as logger + +from .base import RuntimeBuilder + + +class DockerRuntimeBuilder(RuntimeBuilder): + def __init__(self, docker_client: docker.DockerClient): + self.docker_client = docker_client + + def build(self, path: str, tags: list[str]) -> str: + target_image_hash_name = tags[0] + target_image_repo, target_image_hash_tag = target_image_hash_name.split(':') + target_image_tag = tags[1].split(':')[1] if len(tags) > 1 else None + + try: + build_logs = self.docker_client.api.build( + path=path, + tag=target_image_hash_name, + rm=True, + decode=True, + ) + except docker.errors.BuildError as e: + logger.error(f'Sandbox image build failed: {e}') + raise RuntimeError(f'Sandbox image build failed: {e}') + + for log in build_logs: + if 'stream' in log: + print(log['stream'].strip()) + elif 'error' in log: + logger.error(log['error'].strip()) + else: + logger.info(str(log)) + + logger.info(f'Image [{target_image_hash_name}] build finished.') + + assert ( + target_image_tag + ), f'Expected target image tag [{target_image_tag}] is None' + image = self.docker_client.images.get(target_image_hash_name) + image.tag(target_image_repo, target_image_tag) + logger.info( + f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]' + ) + + # Check if the image is built successfully + image = self.docker_client.images.get(target_image_hash_name) + if image is None: + raise RuntimeError( + f'Build failed: Image {target_image_hash_name} not found' + ) + + tags_str = ( + f'{target_image_hash_tag}, {target_image_tag}' + if target_image_tag + else target_image_hash_tag + ) + logger.info( + f'Image {target_image_repo} with tags [{tags_str}] built successfully' + ) + return target_image_hash_name + + def image_exists(self, image_name: str) -> bool: + """Check if the image exists in the registry (try to pull it first) AND in the local store. + + Args: + image_name (str): The Docker image to check (:) + Returns: + bool: Whether the Docker image exists in the registry and in the local store + """ + # Try to pull the Docker image from the registry + try: + self.docker_client.images.pull(image_name) + except Exception: + logger.info(f'Cannot pull image {image_name} directly') + + images = self.docker_client.images.list() + if images: + for image in images: + if image_name in image.tags: + return True + return False diff --git a/opendevin/runtime/client/runtime.py b/opendevin/runtime/client/runtime.py index 451bff3957..a0dfb2e38c 100644 --- a/opendevin/runtime/client/runtime.py +++ b/opendevin/runtime/client/runtime.py @@ -28,6 +28,7 @@ from opendevin.events.observation import ( ) from opendevin.events.serialization import event_to_dict, observation_from_dict from opendevin.events.serialization.action import ACTION_TYPE_TO_CLASS +from opendevin.runtime.builder import DockerRuntimeBuilder from opendevin.runtime.plugins import PluginRequirement from opendevin.runtime.runtime import Runtime from opendevin.runtime.utils import find_available_tcp_port @@ -70,6 +71,8 @@ class EventStreamRuntime(Runtime): self.container = None self.action_semaphore = asyncio.Semaphore(1) # Ensure one action at a time + + self.runtime_builder = DockerRuntimeBuilder(self.docker_client) logger.debug(f'EventStreamRuntime `{sid}` config:\n{self.config}') async def ainit(self, env_vars: dict[str, str] | None = None): @@ -80,7 +83,7 @@ class EventStreamRuntime(Runtime): self.container_image = build_runtime_image( self.container_image, - self.docker_client, + self.runtime_builder, extra_deps=self.config.sandbox.od_runtime_extra_deps, ) self.container = await self._init_container( diff --git a/opendevin/runtime/utils/runtime_build.py b/opendevin/runtime/utils/runtime_build.py index fbfcf44e9a..bed8803a78 100644 --- a/opendevin/runtime/utils/runtime_build.py +++ b/opendevin/runtime/utils/runtime_build.py @@ -11,6 +11,7 @@ from jinja2 import Environment, FileSystemLoader import opendevin from opendevin.core.logger import opendevin_logger as logger +from opendevin.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder RUNTIME_IMAGE_REPO = os.getenv( 'OD_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/opendevin/od_runtime' @@ -164,67 +165,6 @@ def prep_docker_build_folder( return hash -def _build_sandbox_image( - docker_folder: str, - docker_client: docker.DockerClient, - target_image_repo: str, - target_image_hash_tag: str, - target_image_tag: str, -) -> str: - """Build and tag the sandbox image. - The image will be tagged as both: - - target_image_hash_tag - - target_image_tag - - Parameters: - - docker_folder (str): the path to the docker build folder - - docker_client (docker.DockerClient): the docker client - - target_image_repo (str): the repository name for the target image - - target_image_hash_tag (str): the *hash* tag for the target image that is calculated based - on the contents of the docker build folder (source code and Dockerfile) - e.g. 1234567890abcdef - -target_image_tag (str): the tag for the target image that's generic and based on the base image name - e.g. od_v0.8.3_image_ubuntu_tag_22.04 - """ - # Build the Docker image and tag it with the hash (target_image_hash_tag) - target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}' - try: - build_logs = docker_client.api.build( - path=docker_folder, - tag=target_image_hash_name, - rm=True, - decode=True, - ) - except docker.errors.BuildError as e: - logger.error(f'Sandbox image build failed: {e}') - raise e - - for log in build_logs: - if 'stream' in log: - print(log['stream'].strip()) - elif 'error' in log: - logger.error(log['error'].strip()) - else: - logger.info(str(log)) - - # Re-tag the image with the target_image_tag - logger.info(f'Image [{target_image_hash_name}] build finished.') - image = docker_client.images.get(target_image_hash_name) - image.tag(target_image_repo, target_image_tag) - logger.info( - f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]' - ) - - # Check if the image is built successfully - image = docker_client.images.get(target_image_hash_name) - if image is None: - raise RuntimeError(f'Build failed: Image {target_image_hash_name} not found') - logger.info( - f'Image {target_image_repo} with tags [{target_image_hash_tag}, {target_image_tag}] built successfully' - ) - return target_image_hash_name - - def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]: """Retrieves the Docker repo and tag associated with the Docker image. @@ -254,33 +194,9 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]: return RUNTIME_IMAGE_REPO, f'od_v{od_version}_image_{repo}_tag_{tag}' -def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool: - """Check if the image exists in the registry (try to pull it first) AND in the local store. - - Parameters: - - image_name (str): The Docker image to check (:) - - docker_client (docker.DockerClient): The Docker client - - Returns: - - bool: Whether the Docker image exists in the registry and in the local store - """ - # Try to pull the Docker image from the registry - try: - docker_client.images.pull(image_name) - except Exception: - logger.info(f'Cannot pull image {image_name} directly') - - images = docker_client.images.list() - if images: - for image in images: - if image_name in image.tags: - return True - return False - - def build_runtime_image( base_image: str, - docker_client: docker.DockerClient, + runtime_builder: RuntimeBuilder, extra_deps: str | None = None, docker_build_folder: str | None = None, dry_run: bool = False, @@ -291,7 +207,7 @@ def build_runtime_image( Parameters: - base_image (str): The name of the base Docker image to use - - docker_client (docker.DockerClient): The Docker client + - runtime_builder (RuntimeBuilder): The runtime builder to use - extra_deps (str): - docker_build_folder (str): The directory to use for the build. If not provided a temporary directory will be used - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image @@ -325,7 +241,7 @@ def build_runtime_image( # Scenario 1: If we already have an image with the exact same hash, then it means the image is already built # with the exact same source code and Dockerfile, so we will reuse it. Building it is not required. - if _check_image_exists(hash_runtime_image_name, docker_client): + if runtime_builder.image_exists(hash_runtime_image_name): logger.info( f'Image [{hash_runtime_image_name}] already exists so we will reuse it.' ) @@ -334,10 +250,7 @@ def build_runtime_image( # Scenario 2: If a Docker image with the exact hash is not found, we will FIRST try to re-build it # by leveraging the `generic_runtime_image_name` to save some time # from re-building the dependencies (e.g., poetry install, apt install) - elif ( - _check_image_exists(generic_runtime_image_name, docker_client) - and not force_rebuild - ): + elif runtime_builder.image_exists(generic_runtime_image_name) and not force_rebuild: logger.info( f'Cannot find docker Image [{hash_runtime_image_name}]\n' f'Will try to re-build it from latest [{generic_runtime_image_name}] image to potentially save ' @@ -361,7 +274,7 @@ def build_runtime_image( if not dry_run: _build_sandbox_image( docker_folder=cur_docker_build_folder, - docker_client=docker_client, + runtime_builder=runtime_builder, target_image_repo=runtime_image_repo, # NOTE: WE ALWAYS use the "from_scratch_hash" tag for the target image # otherwise, even if the source code is exactly the same, the image *might* be re-built @@ -400,7 +313,7 @@ def build_runtime_image( if not dry_run: _build_sandbox_image( docker_folder=cur_docker_build_folder, - docker_client=docker_client, + runtime_builder=runtime_builder, target_image_repo=runtime_image_repo, # NOTE: WE ALWAYS use the "from_scratch_hash" tag for the target image target_image_hash_tag=from_scratch_hash, @@ -417,6 +330,44 @@ def build_runtime_image( return f'{runtime_image_repo}:{from_scratch_hash}' +def _build_sandbox_image( + docker_folder: str, + runtime_builder: RuntimeBuilder, + target_image_repo: str, + target_image_hash_tag: str, + target_image_tag: str, +) -> str: + """Build and tag the sandbox image. + The image will be tagged as both: + - target_image_hash_tag + - target_image_tag + + Parameters: + - docker_folder (str): the path to the docker build folder + - runtime_builder (RuntimeBuilder): the runtime builder instance + - target_image_repo (str): the repository name for the target image + - target_image_hash_tag (str): the *hash* tag for the target image that is calculated based + on the contents of the docker build folder (source code and Dockerfile) + e.g. 1234567890abcdef + -target_image_tag (str): the tag for the target image that's generic and based on the base image name + e.g. od_v0.8.3_image_ubuntu_tag_22.04 + """ + target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}' + target_image_generic_name = f'{target_image_repo}:{target_image_tag}' + + try: + success = runtime_builder.build( + path=docker_folder, tags=[target_image_hash_name, target_image_generic_name] + ) + if not success: + raise RuntimeError(f'Build failed for image {target_image_hash_name}') + except Exception as e: + logger.error(f'Sandbox image build failed: {e}') + raise + + return target_image_hash_name + + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( @@ -450,7 +401,7 @@ if __name__ == '__main__': # then obtain the MD5 hash of the folder and return : runtime_image_hash_name = build_runtime_image( args.base_image, - docker_client=docker.from_env(), + runtime_builder=DockerRuntimeBuilder(docker.from_env()), docker_build_folder=temp_dir, dry_run=True, force_rebuild=args.force_rebuild, @@ -487,6 +438,6 @@ if __name__ == '__main__': # If a build_folder is not provided, after copying the required source code and dynamically creating the # Dockerfile, we actually build the Docker image logger.info('Building image in a temporary folder') - client = docker.from_env() - image_name = build_runtime_image(args.base_image, client) + docker_builder = DockerRuntimeBuilder(docker.from_env()) + image_name = build_runtime_image(args.base_image, docker_builder) print(f'\nBUILT Image: {image_name}\n') diff --git a/tests/unit/test_runtime_build.py b/tests/unit/test_runtime_build.py index 438355401e..911cd8dcf7 100644 --- a/tests/unit/test_runtime_build.py +++ b/tests/unit/test_runtime_build.py @@ -194,37 +194,7 @@ def test_get_runtime_image_repo_and_tag_eventstream(): ) -@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient') -def test_build_runtime_image_from_scratch(mock_docker_client, temp_dir): - base_image = 'debian:11' - - mock_docker_client.images.list.return_value = [] - - # for image.tag(target_repo, target_image_tag) - mock_image = MagicMock() - mock_docker_client.images.get.return_value = mock_image - - from_scratch_hash = prep_docker_build_folder( - temp_dir, - base_image, - skip_init=False, - ) - - image_name = build_runtime_image(base_image, mock_docker_client) - - # The build call should be called with the hash tag - mock_docker_client.api.build.assert_called_once_with( - path=ANY, tag=f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}', rm=True, decode=True - ) - # Then the hash tag should be tagged to the version - mock_image.tag.assert_called_once_with( - f'{RUNTIME_IMAGE_REPO}', f'{OD_VERSION}_image_debian_tag_11' - ) - assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}' - - -@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient') -def test_build_runtime_image_exact_hash_exist(mock_docker_client, temp_dir): +def test_build_runtime_image_from_scratch(temp_dir): base_image = 'debian:11' from_scratch_hash = prep_docker_build_folder( @@ -233,20 +203,45 @@ def test_build_runtime_image_exact_hash_exist(mock_docker_client, temp_dir): skip_init=False, ) - mock_docker_client.images.list.return_value = [ - MagicMock(tags=[f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}']) - ] + mock_runtime_builder = MagicMock() + mock_runtime_builder.image_exists.return_value = False + mock_runtime_builder.build.return_value = ( + f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}' + ) - image_name = build_runtime_image(base_image, mock_docker_client) + image_name = build_runtime_image(base_image, mock_runtime_builder) + mock_runtime_builder.build.assert_called_once_with( + path=ANY, + tags=[ + f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}', + f'{RUNTIME_IMAGE_REPO}:{OD_VERSION}_image_debian_tag_11', + ], + ) assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}' - mock_docker_client.api.build.assert_not_called() + + +def test_build_runtime_image_exact_hash_exist(temp_dir): + base_image = 'debian:11' + + from_scratch_hash = prep_docker_build_folder( + temp_dir, + base_image, + skip_init=False, + ) + + mock_runtime_builder = MagicMock() + mock_runtime_builder.image_exists.return_value = True + mock_runtime_builder.build.return_value = ( + f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}' + ) + + image_name = build_runtime_image(base_image, mock_runtime_builder) + assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}' + mock_runtime_builder.build.assert_not_called() @patch('opendevin.runtime.utils.runtime_build._build_sandbox_image') -@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient') -def test_build_runtime_image_exact_hash_not_exist( - mock_docker_client, mock_build_sandbox_image, temp_dir -): +def test_build_runtime_image_exact_hash_not_exist(mock_build_sandbox_image, temp_dir): base_image = 'debian:11' repo, latest_image_tag = get_runtime_image_repo_and_tag(base_image) latest_image_name = f'{repo}:{latest_image_tag}' @@ -263,8 +258,9 @@ def test_build_runtime_image_exact_hash_not_exist( skip_init=True, ) - # latest image exists BUT not the exact hash - mock_docker_client.images.list.return_value = [MagicMock(tags=[latest_image_name])] + mock_runtime_builder = MagicMock() + # Set up mock_runtime_builder.image_exists to return False then True + mock_runtime_builder.image_exists.side_effect = [False, True] with patch( 'opendevin.runtime.utils.runtime_build.prep_docker_build_folder' @@ -274,7 +270,7 @@ def test_build_runtime_image_exact_hash_not_exist( non_from_scratch_hash, ] - image_name = build_runtime_image(base_image, mock_docker_client) + image_name = build_runtime_image(base_image, mock_runtime_builder) mock_prep_docker_build_folder.assert_has_calls( [ @@ -287,7 +283,7 @@ def test_build_runtime_image_exact_hash_not_exist( mock_build_sandbox_image.assert_called_once_with( docker_folder=ANY, - docker_client=mock_docker_client, + runtime_builder=mock_runtime_builder, target_image_repo=repo, target_image_hash_tag=from_scratch_hash, target_image_tag=latest_image_tag,