feat: refactor image building logic into runtime builder (#3395)

* feat: refactor building logic into runtime builder

* return image name

* fix testcases

* use runtime builder for eventstream runtime
This commit is contained in:
Xingyao Wang 2024-08-15 04:02:12 +08:00 committed by GitHub
parent be2ea6ab35
commit b6243bb96b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 216 additions and 142 deletions

View File

@ -0,0 +1,4 @@
from .base import RuntimeBuilder
from .docker import DockerRuntimeBuilder
__all__ = ['RuntimeBuilder', 'DockerRuntimeBuilder']

View File

@ -0,0 +1,37 @@
import abc
class RuntimeBuilder(abc.ABC):
@abc.abstractmethod
def build(
self,
path: str,
tags: list[str],
) -> str:
"""
Build the runtime image.
Args:
path (str): The path to the runtime image's build directory.
tags (list[str]): The tags to apply to the runtime image (e.g., ["repo:my-repo", "sha:my-sha"]).
Returns:
str: The name of the runtime image (e.g., "repo:sha").
Raises:
RuntimeError: If the build failed.
"""
pass
@abc.abstractmethod
def image_exists(self, image_name: str) -> bool:
"""
Check if the runtime image exists.
Args:
image_name (str): The name of the runtime image (e.g., "repo:sha").
Returns:
bool: Whether the runtime image exists.
"""
pass

View File

@ -0,0 +1,83 @@
import docker
from opendevin.core.logger import opendevin_logger as logger
from .base import RuntimeBuilder
class DockerRuntimeBuilder(RuntimeBuilder):
def __init__(self, docker_client: docker.DockerClient):
self.docker_client = docker_client
def build(self, path: str, tags: list[str]) -> str:
target_image_hash_name = tags[0]
target_image_repo, target_image_hash_tag = target_image_hash_name.split(':')
target_image_tag = tags[1].split(':')[1] if len(tags) > 1 else None
try:
build_logs = self.docker_client.api.build(
path=path,
tag=target_image_hash_name,
rm=True,
decode=True,
)
except docker.errors.BuildError as e:
logger.error(f'Sandbox image build failed: {e}')
raise RuntimeError(f'Sandbox image build failed: {e}')
for log in build_logs:
if 'stream' in log:
print(log['stream'].strip())
elif 'error' in log:
logger.error(log['error'].strip())
else:
logger.info(str(log))
logger.info(f'Image [{target_image_hash_name}] build finished.')
assert (
target_image_tag
), f'Expected target image tag [{target_image_tag}] is None'
image = self.docker_client.images.get(target_image_hash_name)
image.tag(target_image_repo, target_image_tag)
logger.info(
f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
)
# Check if the image is built successfully
image = self.docker_client.images.get(target_image_hash_name)
if image is None:
raise RuntimeError(
f'Build failed: Image {target_image_hash_name} not found'
)
tags_str = (
f'{target_image_hash_tag}, {target_image_tag}'
if target_image_tag
else target_image_hash_tag
)
logger.info(
f'Image {target_image_repo} with tags [{tags_str}] built successfully'
)
return target_image_hash_name
def image_exists(self, image_name: str) -> bool:
"""Check if the image exists in the registry (try to pull it first) AND in the local store.
Args:
image_name (str): The Docker image to check (<image repo>:<image tag>)
Returns:
bool: Whether the Docker image exists in the registry and in the local store
"""
# Try to pull the Docker image from the registry
try:
self.docker_client.images.pull(image_name)
except Exception:
logger.info(f'Cannot pull image {image_name} directly')
images = self.docker_client.images.list()
if images:
for image in images:
if image_name in image.tags:
return True
return False

View File

@ -28,6 +28,7 @@ from opendevin.events.observation import (
)
from opendevin.events.serialization import event_to_dict, observation_from_dict
from opendevin.events.serialization.action import ACTION_TYPE_TO_CLASS
from opendevin.runtime.builder import DockerRuntimeBuilder
from opendevin.runtime.plugins import PluginRequirement
from opendevin.runtime.runtime import Runtime
from opendevin.runtime.utils import find_available_tcp_port
@ -70,6 +71,8 @@ class EventStreamRuntime(Runtime):
self.container = None
self.action_semaphore = asyncio.Semaphore(1) # Ensure one action at a time
self.runtime_builder = DockerRuntimeBuilder(self.docker_client)
logger.debug(f'EventStreamRuntime `{sid}` config:\n{self.config}')
async def ainit(self, env_vars: dict[str, str] | None = None):
@ -80,7 +83,7 @@ class EventStreamRuntime(Runtime):
self.container_image = build_runtime_image(
self.container_image,
self.docker_client,
self.runtime_builder,
extra_deps=self.config.sandbox.od_runtime_extra_deps,
)
self.container = await self._init_container(

View File

@ -11,6 +11,7 @@ from jinja2 import Environment, FileSystemLoader
import opendevin
from opendevin.core.logger import opendevin_logger as logger
from opendevin.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder
RUNTIME_IMAGE_REPO = os.getenv(
'OD_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/opendevin/od_runtime'
@ -164,67 +165,6 @@ def prep_docker_build_folder(
return hash
def _build_sandbox_image(
docker_folder: str,
docker_client: docker.DockerClient,
target_image_repo: str,
target_image_hash_tag: str,
target_image_tag: str,
) -> str:
"""Build and tag the sandbox image.
The image will be tagged as both:
- target_image_hash_tag
- target_image_tag
Parameters:
- docker_folder (str): the path to the docker build folder
- docker_client (docker.DockerClient): the docker client
- target_image_repo (str): the repository name for the target image
- target_image_hash_tag (str): the *hash* tag for the target image that is calculated based
on the contents of the docker build folder (source code and Dockerfile)
e.g. 1234567890abcdef
-target_image_tag (str): the tag for the target image that's generic and based on the base image name
e.g. od_v0.8.3_image_ubuntu_tag_22.04
"""
# Build the Docker image and tag it with the hash (target_image_hash_tag)
target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}'
try:
build_logs = docker_client.api.build(
path=docker_folder,
tag=target_image_hash_name,
rm=True,
decode=True,
)
except docker.errors.BuildError as e:
logger.error(f'Sandbox image build failed: {e}')
raise e
for log in build_logs:
if 'stream' in log:
print(log['stream'].strip())
elif 'error' in log:
logger.error(log['error'].strip())
else:
logger.info(str(log))
# Re-tag the image with the target_image_tag
logger.info(f'Image [{target_image_hash_name}] build finished.')
image = docker_client.images.get(target_image_hash_name)
image.tag(target_image_repo, target_image_tag)
logger.info(
f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
)
# Check if the image is built successfully
image = docker_client.images.get(target_image_hash_name)
if image is None:
raise RuntimeError(f'Build failed: Image {target_image_hash_name} not found')
logger.info(
f'Image {target_image_repo} with tags [{target_image_hash_tag}, {target_image_tag}] built successfully'
)
return target_image_hash_name
def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
"""Retrieves the Docker repo and tag associated with the Docker image.
@ -254,33 +194,9 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
return RUNTIME_IMAGE_REPO, f'od_v{od_version}_image_{repo}_tag_{tag}'
def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool:
"""Check if the image exists in the registry (try to pull it first) AND in the local store.
Parameters:
- image_name (str): The Docker image to check (<image repo>:<image tag>)
- docker_client (docker.DockerClient): The Docker client
Returns:
- bool: Whether the Docker image exists in the registry and in the local store
"""
# Try to pull the Docker image from the registry
try:
docker_client.images.pull(image_name)
except Exception:
logger.info(f'Cannot pull image {image_name} directly')
images = docker_client.images.list()
if images:
for image in images:
if image_name in image.tags:
return True
return False
def build_runtime_image(
base_image: str,
docker_client: docker.DockerClient,
runtime_builder: RuntimeBuilder,
extra_deps: str | None = None,
docker_build_folder: str | None = None,
dry_run: bool = False,
@ -291,7 +207,7 @@ def build_runtime_image(
Parameters:
- base_image (str): The name of the base Docker image to use
- docker_client (docker.DockerClient): The Docker client
- runtime_builder (RuntimeBuilder): The runtime builder to use
- extra_deps (str):
- docker_build_folder (str): The directory to use for the build. If not provided a temporary directory will be used
- dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image
@ -325,7 +241,7 @@ def build_runtime_image(
# Scenario 1: If we already have an image with the exact same hash, then it means the image is already built
# with the exact same source code and Dockerfile, so we will reuse it. Building it is not required.
if _check_image_exists(hash_runtime_image_name, docker_client):
if runtime_builder.image_exists(hash_runtime_image_name):
logger.info(
f'Image [{hash_runtime_image_name}] already exists so we will reuse it.'
)
@ -334,10 +250,7 @@ def build_runtime_image(
# Scenario 2: If a Docker image with the exact hash is not found, we will FIRST try to re-build it
# by leveraging the `generic_runtime_image_name` to save some time
# from re-building the dependencies (e.g., poetry install, apt install)
elif (
_check_image_exists(generic_runtime_image_name, docker_client)
and not force_rebuild
):
elif runtime_builder.image_exists(generic_runtime_image_name) and not force_rebuild:
logger.info(
f'Cannot find docker Image [{hash_runtime_image_name}]\n'
f'Will try to re-build it from latest [{generic_runtime_image_name}] image to potentially save '
@ -361,7 +274,7 @@ def build_runtime_image(
if not dry_run:
_build_sandbox_image(
docker_folder=cur_docker_build_folder,
docker_client=docker_client,
runtime_builder=runtime_builder,
target_image_repo=runtime_image_repo,
# NOTE: WE ALWAYS use the "from_scratch_hash" tag for the target image
# otherwise, even if the source code is exactly the same, the image *might* be re-built
@ -400,7 +313,7 @@ def build_runtime_image(
if not dry_run:
_build_sandbox_image(
docker_folder=cur_docker_build_folder,
docker_client=docker_client,
runtime_builder=runtime_builder,
target_image_repo=runtime_image_repo,
# NOTE: WE ALWAYS use the "from_scratch_hash" tag for the target image
target_image_hash_tag=from_scratch_hash,
@ -417,6 +330,44 @@ def build_runtime_image(
return f'{runtime_image_repo}:{from_scratch_hash}'
def _build_sandbox_image(
docker_folder: str,
runtime_builder: RuntimeBuilder,
target_image_repo: str,
target_image_hash_tag: str,
target_image_tag: str,
) -> str:
"""Build and tag the sandbox image.
The image will be tagged as both:
- target_image_hash_tag
- target_image_tag
Parameters:
- docker_folder (str): the path to the docker build folder
- runtime_builder (RuntimeBuilder): the runtime builder instance
- target_image_repo (str): the repository name for the target image
- target_image_hash_tag (str): the *hash* tag for the target image that is calculated based
on the contents of the docker build folder (source code and Dockerfile)
e.g. 1234567890abcdef
-target_image_tag (str): the tag for the target image that's generic and based on the base image name
e.g. od_v0.8.3_image_ubuntu_tag_22.04
"""
target_image_hash_name = f'{target_image_repo}:{target_image_hash_tag}'
target_image_generic_name = f'{target_image_repo}:{target_image_tag}'
try:
success = runtime_builder.build(
path=docker_folder, tags=[target_image_hash_name, target_image_generic_name]
)
if not success:
raise RuntimeError(f'Build failed for image {target_image_hash_name}')
except Exception as e:
logger.error(f'Sandbox image build failed: {e}')
raise
return target_image_hash_name
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
@ -450,7 +401,7 @@ if __name__ == '__main__':
# then obtain the MD5 hash of the folder and return <image_repo>:<temp_dir_md5_hash>
runtime_image_hash_name = build_runtime_image(
args.base_image,
docker_client=docker.from_env(),
runtime_builder=DockerRuntimeBuilder(docker.from_env()),
docker_build_folder=temp_dir,
dry_run=True,
force_rebuild=args.force_rebuild,
@ -487,6 +438,6 @@ if __name__ == '__main__':
# If a build_folder is not provided, after copying the required source code and dynamically creating the
# Dockerfile, we actually build the Docker image
logger.info('Building image in a temporary folder')
client = docker.from_env()
image_name = build_runtime_image(args.base_image, client)
docker_builder = DockerRuntimeBuilder(docker.from_env())
image_name = build_runtime_image(args.base_image, docker_builder)
print(f'\nBUILT Image: {image_name}\n')

View File

@ -194,37 +194,7 @@ def test_get_runtime_image_repo_and_tag_eventstream():
)
@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient')
def test_build_runtime_image_from_scratch(mock_docker_client, temp_dir):
base_image = 'debian:11'
mock_docker_client.images.list.return_value = []
# for image.tag(target_repo, target_image_tag)
mock_image = MagicMock()
mock_docker_client.images.get.return_value = mock_image
from_scratch_hash = prep_docker_build_folder(
temp_dir,
base_image,
skip_init=False,
)
image_name = build_runtime_image(base_image, mock_docker_client)
# The build call should be called with the hash tag
mock_docker_client.api.build.assert_called_once_with(
path=ANY, tag=f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}', rm=True, decode=True
)
# Then the hash tag should be tagged to the version
mock_image.tag.assert_called_once_with(
f'{RUNTIME_IMAGE_REPO}', f'{OD_VERSION}_image_debian_tag_11'
)
assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient')
def test_build_runtime_image_exact_hash_exist(mock_docker_client, temp_dir):
def test_build_runtime_image_from_scratch(temp_dir):
base_image = 'debian:11'
from_scratch_hash = prep_docker_build_folder(
@ -233,20 +203,45 @@ def test_build_runtime_image_exact_hash_exist(mock_docker_client, temp_dir):
skip_init=False,
)
mock_docker_client.images.list.return_value = [
MagicMock(tags=[f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'])
]
mock_runtime_builder = MagicMock()
mock_runtime_builder.image_exists.return_value = False
mock_runtime_builder.build.return_value = (
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
)
image_name = build_runtime_image(base_image, mock_docker_client)
image_name = build_runtime_image(base_image, mock_runtime_builder)
mock_runtime_builder.build.assert_called_once_with(
path=ANY,
tags=[
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}',
f'{RUNTIME_IMAGE_REPO}:{OD_VERSION}_image_debian_tag_11',
],
)
assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
mock_docker_client.api.build.assert_not_called()
def test_build_runtime_image_exact_hash_exist(temp_dir):
base_image = 'debian:11'
from_scratch_hash = prep_docker_build_folder(
temp_dir,
base_image,
skip_init=False,
)
mock_runtime_builder = MagicMock()
mock_runtime_builder.image_exists.return_value = True
mock_runtime_builder.build.return_value = (
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
)
image_name = build_runtime_image(base_image, mock_runtime_builder)
assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
mock_runtime_builder.build.assert_not_called()
@patch('opendevin.runtime.utils.runtime_build._build_sandbox_image')
@patch('opendevin.runtime.utils.runtime_build.docker.DockerClient')
def test_build_runtime_image_exact_hash_not_exist(
mock_docker_client, mock_build_sandbox_image, temp_dir
):
def test_build_runtime_image_exact_hash_not_exist(mock_build_sandbox_image, temp_dir):
base_image = 'debian:11'
repo, latest_image_tag = get_runtime_image_repo_and_tag(base_image)
latest_image_name = f'{repo}:{latest_image_tag}'
@ -263,8 +258,9 @@ def test_build_runtime_image_exact_hash_not_exist(
skip_init=True,
)
# latest image exists BUT not the exact hash
mock_docker_client.images.list.return_value = [MagicMock(tags=[latest_image_name])]
mock_runtime_builder = MagicMock()
# Set up mock_runtime_builder.image_exists to return False then True
mock_runtime_builder.image_exists.side_effect = [False, True]
with patch(
'opendevin.runtime.utils.runtime_build.prep_docker_build_folder'
@ -274,7 +270,7 @@ def test_build_runtime_image_exact_hash_not_exist(
non_from_scratch_hash,
]
image_name = build_runtime_image(base_image, mock_docker_client)
image_name = build_runtime_image(base_image, mock_runtime_builder)
mock_prep_docker_build_folder.assert_has_calls(
[
@ -287,7 +283,7 @@ def test_build_runtime_image_exact_hash_not_exist(
mock_build_sandbox_image.assert_called_once_with(
docker_folder=ANY,
docker_client=mock_docker_client,
runtime_builder=mock_runtime_builder,
target_image_repo=repo,
target_image_hash_tag=from_scratch_hash,
target_image_tag=latest_image_tag,