feat: support remote runtime (#3406)

* feat: refactor building logic into runtime builder

* return image name

* fix testcases

* use runtime builder for eventstream runtime

* have runtime builder return str

* add api_key to sandbox config

* draft remote runtime

* remove extra if clause

* initialize runtime based on box class

* add build logic

* use base64 for file upload

* get runtime image prefix from API

* replace ___ with _s_ to make it a valid image name

* use /build to start build and /build_status to check the build progress

* update logging

* fix exit code

* always use port

* add remote runtime

* rename runtime

* fix tests import

* make dir first if work_dir does not exists;

* update debug print to remote runtime

* fix exit close_sync

* update logging

* add retry for stop

* use all box class for test keep prompt

* fix test browsing

* add retry stop

* merge init commands to save startup time

* fix await

* remove sandbox url

* support execute through specific runtime url

* fix file ops

* simplify close

* factor out runtime retry code

* fix exception handling

* fix content type error (e.g., bad gateway when runtime is not ready)

* add retry for wait until alive;
add retry for check image exists

* Revert "add retry for wait until alive;"

This reverts commit dd013cd2681a159cd07747497d8c95e145d01c32.

* retry when wait until alive

* clean up msg

* directly save sdist to temp dir for _put_source_code_to_dir

* support running testcases in parallel

* tweak logging;
try to close session

* try to close session even on exception

* update poetry lock

* support remote to run integration tests

* add warning for workspace base on remote runtime

* set default runtime api

* remove server runtime

* update poetry lock

* support running swe-bench (n=1) eval on remoteruntime

* add a timeout of 30 min

* add todo for docker namespace

* update poetry loc
This commit is contained in:
Xingyao Wang 2024-08-29 10:53:37 -05:00 committed by GitHub
parent 296fa8182a
commit 8b1f207d39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 683 additions and 86 deletions

View File

@ -72,6 +72,12 @@ then your command would be:
./evaluation/swe_bench/scripts/run_infer.sh llm.eval_gpt4_1106_preview HEAD CodeActAgent 10
```
**Evaluate on `RemoteRuntime` (alpha)** (contact Xingyao over slack if you want to try this out!)
```bash
SANDBOX_API_KEY="CONTACT-XINGYAO-TO-GET-A-TESTING-API-KEY" RUNTIME=remote EVAL_DOCKER_IMAGE_PREFIX="us-docker.pkg.dev/evaluation-428620/swe-bench-images" ./evaluation/swe_bench/scripts/run_infer.sh llm.eval HEAD CodeActAgent 300
```
Multi-processing is still WIP.
### Specify a subset of tasks to run infer
If you would like to specify a list of tasks you'd like to benchmark on, you could

View File

@ -24,6 +24,7 @@ from openhands.core.config import (
AppConfig,
SandboxConfig,
get_llm_config_arg,
load_from_env,
parse_arguments,
)
from openhands.core.logger import openhands_logger as logger
@ -86,6 +87,19 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
return instruction
# TODO: migrate all swe-bench docker to ghcr.io/openhands
DOCKER_IMAGE_PREFIX = os.environ.get('EVAL_DOCKER_IMAGE_PREFIX', 'docker.io/xingyaoww/')
logger.info(f'Using docker image prefix: {DOCKER_IMAGE_PREFIX}')
def get_instance_docker_image(instance_id: str) -> str:
image_name = 'sweb.eval.x86_64.' + instance_id
image_name = image_name.replace(
'__', '_s_'
) # to comply with docker image naming convention
return DOCKER_IMAGE_PREFIX.rstrip('/') + '/' + image_name
def get_config(
instance: pd.Series,
metadata: EvalMetadata,
@ -93,14 +107,14 @@ def get_config(
SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
if USE_INSTANCE_IMAGE:
# We use a different instance image for the each instance of swe-bench eval
base_container_image = 'sweb.eval.x86_64.' + instance['instance_id']
base_container_image = get_instance_docker_image(instance['instance_id'])
else:
base_container_image = SWE_BENCH_CONTAINER_IMAGE
logger.info(f'Using swe-bench container image: {base_container_image}')
config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='eventstream',
max_budget_per_task=4,
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
@ -114,6 +128,15 @@ def get_config(
workspace_base=None,
workspace_mount_path=None,
)
selected_env_vars = {'runtime', 'sandbox_api_key'}
selected_env_vars = {
k: v for k, v in os.environ.items() if k.lower() in selected_env_vars
}
if selected_env_vars:
logger.info(
f'Loading config keys from env vars: {list(selected_env_vars.keys())}'
)
load_from_env(config, selected_env_vars)
config.set_llm_config(metadata.llm_config)
return config

View File

@ -201,9 +201,8 @@ class SandboxConfig:
"""
api_hostname: str = 'localhost'
base_container_image: str | None = (
'nikolaik/python-nodejs:python3.11-nodejs22' # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
)
api_key: str | None = None
base_container_image: str = 'nikolaik/python-nodejs:python3.11-nodejs22' # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
runtime_container_image: str | None = None
user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
timeout: int = 120

View File

@ -11,6 +11,10 @@ def get_runtime_cls(name: str):
from openhands.runtime.e2b.runtime import E2BRuntime
return E2BRuntime
elif name == 'remote':
from openhands.runtime.remote.runtime import RemoteRuntime
return RemoteRuntime
else:
raise ValueError(f'Runtime {name} not supported')

View File

@ -16,7 +16,9 @@ class RuntimeBuilder(abc.ABC):
tags (list[str]): The tags to apply to the runtime image (e.g., ["repo:my-repo", "sha:my-sha"]).
Returns:
str: The name of the runtime image (e.g., "repo:sha").
str: The name:tag of the runtime image after build (e.g., "repo:sha").
This can be different from the tags input if the builder chooses to mutate the tags (e.g., adding a
registry prefix). This should be used for subsequent use (e.g., `docker run`).
Raises:
RuntimeError: If the build failed.

View File

@ -0,0 +1,117 @@
import base64
import io
import tarfile
import time
import requests
from openhands.core.logger import openhands_logger as logger
from openhands.runtime.builder import RuntimeBuilder
class RemoteRuntimeBuilder(RuntimeBuilder):
"""This class interacts with the remote Runtime API for building and managing container images."""
def __init__(self, api_url: str, api_key: str):
self.api_url = api_url
self.api_key = api_key
def build(self, path: str, tags: list[str]) -> str:
"""Builds a Docker image using the Runtime API's /build endpoint."""
# Create a tar archive of the build context
tar_buffer = io.BytesIO()
with tarfile.open(fileobj=tar_buffer, mode='w:gz') as tar:
tar.add(path, arcname='.')
tar_buffer.seek(0)
# Encode the tar file as base64
base64_encoded_tar = base64.b64encode(tar_buffer.getvalue()).decode('utf-8')
# Prepare the multipart form data
files = [
('context', ('context.tar.gz', base64_encoded_tar)),
('target_image', (None, tags[0])),
]
# Add additional tags if present
for tag in tags[1:]:
files.append(('tags', (None, tag)))
# Send the POST request to /build
headers = {'X-API-Key': self.api_key}
response = requests.post(f'{self.api_url}/build', files=files, headers=headers)
if response.status_code != 202:
logger.error(f'Build initiation failed: {response.text}')
raise RuntimeError(f'Build initiation failed: {response.text}')
build_data = response.json()
build_id = build_data['build_id']
logger.info(f'Build initiated with ID: {build_id}')
# Poll /build_status until the build is complete
start_time = time.time()
timeout = 30 * 60 # 20 minutes in seconds
while True:
if time.time() - start_time > timeout:
logger.error('Build timed out after 30 minutes')
raise RuntimeError('Build timed out after 30 minutes')
status_response = requests.get(
f'{self.api_url}/build_status',
params={'build_id': build_id},
headers=headers,
)
if status_response.status_code != 200:
logger.error(f'Failed to get build status: {status_response.text}')
raise RuntimeError(
f'Failed to get build status: {status_response.text}'
)
status_data = status_response.json()
status = status_data['status']
logger.info(f'Build status: {status}')
if status == 'SUCCESS':
logger.info(f"Successfully built {status_data['image']}")
return status_data['image']
elif status in [
'FAILURE',
'INTERNAL_ERROR',
'TIMEOUT',
'CANCELLED',
'EXPIRED',
]:
error_message = status_data.get(
'error', f'Build failed with status: {status}'
)
logger.error(error_message)
raise RuntimeError(error_message)
# Wait before polling again
time.sleep(5)
def image_exists(self, image_name: str) -> bool:
"""Checks if an image exists in the remote registry using the /image_exists endpoint."""
params = {'image': image_name}
session = requests.Session()
session.headers.update({'X-API-Key': self.api_key})
response = session.get(f'{self.api_url}/image_exists', params=params)
if response.status_code != 200:
logger.error(f'Failed to check image existence: {response.text}')
raise RuntimeError(f'Failed to check image existence: {response.text}')
result = response.json()
if result['exists']:
logger.info(
f"Image {image_name} exists. "
f"Uploaded at: {result['image']['upload_time']}, "
f"Size: {result['image']['image_size_bytes'] / 1024 / 1024:.2f} MB"
)
else:
logger.info(f'Image {image_name} does not exist.')
return result['exists']

View File

@ -58,9 +58,7 @@ class ActionRequest(BaseModel):
ROOT_GID = 0
INIT_COMMANDS = [
'git config --global user.name "openhands"',
'git config --global user.email "openhands@all-hands.dev"',
"alias git='git --no-pager'",
'git config --global user.name "openhands" && git config --global user.email "openhands@all-hands.dev" && alias git="git --no-pager"',
]
@ -187,7 +185,9 @@ class RuntimeClient:
self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
self.shell.expect(self.__bash_expect_regex)
self.shell.sendline(f'cd {work_dir}')
self.shell.sendline(
f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
)
self.shell.expect(self.__bash_expect_regex)
logger.debug(
f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}'

View File

@ -0,0 +1,424 @@
import asyncio
import os
import ssl
import tempfile
import uuid
from typing import Any, Optional, Type
from zipfile import ZipFile
import aiohttp
import aiohttp.client_exceptions
import tenacity
from openhands.core.config import AppConfig
from openhands.core.logger import openhands_logger as logger
from openhands.events import EventStream
from openhands.events.action import (
BrowseInteractiveAction,
BrowseURLAction,
CmdRunAction,
FileReadAction,
FileWriteAction,
IPythonRunCellAction,
)
from openhands.events.action.action import Action
from openhands.events.observation import (
ErrorObservation,
NullObservation,
Observation,
)
from openhands.events.serialization import event_to_dict, observation_from_dict
from openhands.events.serialization.action import ACTION_TYPE_TO_CLASS
from openhands.runtime.builder.remote import RemoteRuntimeBuilder
from openhands.runtime.plugins import PluginRequirement
from openhands.runtime.runtime import Runtime
from openhands.runtime.utils.runtime_build import build_runtime_image
DEFAULT_RETRY_EXCEPTIONS = [
ssl.SSLCertVerificationError,
aiohttp.ClientError,
aiohttp.client_exceptions.ContentTypeError,
aiohttp.client_exceptions.ClientConnectorCertificateError,
ssl.SSLCertVerificationError,
asyncio.TimeoutError,
]
class RemoteRuntime(Runtime):
"""This runtime will connect to a remote od-runtime-client."""
port: int = 60000 # default port for the remote runtime client
def __init__(
self,
config: AppConfig,
event_stream: EventStream,
sid: str = 'default',
plugins: list[PluginRequirement] | None = None,
):
super().__init__(config, event_stream, sid, plugins)
if self.config.sandbox.api_hostname == 'localhost':
self.config.sandbox.api_hostname = 'api.all-hands.dev/v0/runtime'
logger.warning(
'Using localhost as the API hostname is not supported in the RemoteRuntime. Please set a proper hostname.\n'
'Setting it to default value: api.all-hands.dev/v0/runtime'
)
self.api_url = f'https://{self.config.sandbox.api_hostname.rstrip("/")}'
self.session: Optional[aiohttp.ClientSession] = None
self.action_semaphore = asyncio.Semaphore(1) # Ensure one action at a time
if self.config.workspace_base is not None:
logger.warning(
'Setting workspace_base is not supported in the remote runtime.'
)
if self.config.sandbox.api_key is None:
raise ValueError(
'API key is required to use the remote runtime. '
'Please set the API key in the config (config.toml) or as an environment variable (SANDBOX_API_KEY).'
)
self.runtime_builder = RemoteRuntimeBuilder(
self.api_url, self.config.sandbox.api_key
)
self.runtime_id: str | None = None
self.runtime_url: str | None = None
self.instance_id = (
sid + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
)
if self.config.sandbox.runtime_container_image is not None:
raise ValueError(
'Setting runtime_container_image is not supported in the remote runtime.'
)
self.container_image: str = self.config.sandbox.base_container_image
self.container_name = 'od-remote-runtime-' + self.instance_id
logger.debug(f'RemoteRuntime `{sid}` config:\n{self.config}')
async def _send_request(
self,
method: str,
url: str,
retry_exceptions: list[Type[Exception]] | None = None,
**kwargs: Any,
) -> aiohttp.ClientResponse:
if retry_exceptions is None:
retry_exceptions = DEFAULT_RETRY_EXCEPTIONS
session = await self._ensure_session()
def log_retry(retry_state):
exception = retry_state.outcome.exception()
logger.warning(
f'Retry attempt {retry_state.attempt_number} failed with exception: {exception}'
)
@tenacity.retry(
stop=tenacity.stop_after_attempt(10),
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
retry=tenacity.retry_if_exception_type(tuple(retry_exceptions)),
reraise=True,
after=log_retry,
)
async def _send_request_with_retry():
async with session.request(method, url, **kwargs) as response:
await response.read()
return response
return await _send_request_with_retry()
async def ainit(self, env_vars: dict[str, str] | None = None):
# Check if the container image exists
# Use the /registry_prefix endpoint to get the registry prefix
response = await self._send_request('GET', f'{self.api_url}/registry_prefix')
if response.status != 200:
raise RuntimeError(
f'Failed to get registry prefix: {await response.text()}'
)
response_json = await response.json()
registry_prefix = response_json['registry_prefix']
os.environ['OD_RUNTIME_RUNTIME_IMAGE_REPO'] = (
registry_prefix.rstrip('/') + '/runtime'
)
logger.info(
f'Runtime image repo: {os.environ["OD_RUNTIME_RUNTIME_IMAGE_REPO"]}'
)
if self.config.sandbox.runtime_extra_deps:
logger.info(
f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
)
# Build the container image
self.container_image = build_runtime_image(
self.container_image,
self.runtime_builder,
extra_deps=self.config.sandbox.runtime_extra_deps,
)
# Use the /image_exists endpoint to check if the image exists
response = await self._send_request(
'GET',
f'{self.api_url}/image_exists',
params={'image': self.container_image},
)
if response.status != 200 or not (await response.json())['exists']:
raise RuntimeError(f'Container image {self.container_image} does not exist')
# Prepare the request body for the /start endpoint
plugin_arg = ''
if self.plugins is not None and len(self.plugins) > 0:
plugin_arg = (
f'--plugins {" ".join([plugin.name for plugin in self.plugins])} '
)
if self.config.sandbox.browsergym_eval_env is not None:
browsergym_arg = (
f'--browsergym-eval-env {self.config.sandbox.browsergym_eval_env}'
)
else:
browsergym_arg = ''
start_request = {
'image': self.container_image,
'command': (
f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
'PYTHONUNBUFFERED=1 poetry run '
f'python -u -m openhands.runtime.client.client {self.port} '
f'--working-dir {self.sandbox_workspace_dir} '
f'{plugin_arg}'
f'--username {"openhands" if self.config.run_as_openhands else "root"} '
f'--user-id {self.config.sandbox.user_id} '
f'{browsergym_arg}'
),
'working_dir': '/openhands/code/',
'name': self.container_name,
'environment': {'DEBUG': 'true'} if self.config.debug else {},
}
# Start the sandbox using the /start endpoint
response = await self._send_request(
'POST', f'{self.api_url}/start', json=start_request
)
if response.status != 201:
raise RuntimeError(f'Failed to start sandbox: {await response.text()}')
start_response = await response.json()
self.runtime_id = start_response['runtime_id']
self.runtime_url = start_response['url']
logger.info(
f'Sandbox started. Runtime ID: {self.runtime_id}, URL: {self.runtime_url}'
)
# Initialize environment variables
await super().ainit(env_vars)
logger.info(
f'Runtime initialized with plugins: {[plugin.name for plugin in self.plugins]}'
)
logger.info(f'Runtime initialized with env vars: {env_vars}')
assert (
self.runtime_id is not None
), 'Runtime ID is not set. This should never happen.'
assert (
self.runtime_url is not None
), 'Runtime URL is not set. This should never happen.'
async def _ensure_session(self):
if self.session is None or self.session.closed:
self.session = aiohttp.ClientSession(
headers={'X-API-Key': self.config.sandbox.api_key}
)
return self.session
@tenacity.retry(
stop=tenacity.stop_after_attempt(10),
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
retry=tenacity.retry_if_exception_type(RuntimeError),
reraise=True,
)
async def _wait_until_alive(self):
logger.info('Waiting for sandbox to be alive...')
response = await self._send_request('GET', f'{self.runtime_url}/alive')
if response.status == 200:
return
else:
msg = f'Runtime is not alive (id={self.runtime_id}). Status: {response.status}.'
logger.warning(msg)
raise RuntimeError(msg)
@property
def sandbox_workspace_dir(self):
return self.config.workspace_mount_path_in_sandbox
async def close(self):
if self.runtime_id:
try:
response = await self._send_request(
'POST', f'{self.api_url}/stop', json={'runtime_id': self.runtime_id}
)
if response.status != 200:
logger.error(f'Failed to stop sandbox: {await response.text()}')
else:
logger.info(f'Sandbox stopped. Runtime ID: {self.runtime_id}')
except Exception as e:
raise e
finally:
if self.session is not None:
await self.session.close()
self.session = None
async def run_action(self, action: Action) -> Observation:
if action.timeout is None:
action.timeout = self.config.sandbox.timeout
async with self.action_semaphore:
if not action.runnable:
return NullObservation('')
action_type = action.action # type: ignore[attr-defined]
if action_type not in ACTION_TYPE_TO_CLASS:
return ErrorObservation(f'Action {action_type} does not exist.')
if not hasattr(self, action_type):
return ErrorObservation(
f'Action {action_type} is not supported in the current runtime.'
)
await self._wait_until_alive()
assert action.timeout is not None
try:
logger.info('Executing action')
request_body = {'action': event_to_dict(action)}
logger.debug(f'Request body: {request_body}')
response = await self._send_request(
'POST',
f'{self.runtime_url}/execute_action',
json=request_body,
timeout=action.timeout,
retry_exceptions=list(
filter(
lambda e: e != asyncio.TimeoutError,
DEFAULT_RETRY_EXCEPTIONS,
)
),
)
if response.status == 200:
output = await response.json()
obs = observation_from_dict(output)
obs._cause = action.id # type: ignore[attr-defined]
return obs
else:
error_message = await response.text()
logger.error(f'Error from server: {error_message}')
obs = ErrorObservation(f'Action execution failed: {error_message}')
except asyncio.TimeoutError:
logger.error('No response received within the timeout period.')
obs = ErrorObservation('Action execution timed out')
except Exception as e:
logger.error(f'Error during action execution: {e}')
obs = ErrorObservation(f'Action execution failed: {str(e)}')
return obs
async def run(self, action: CmdRunAction) -> Observation:
return await self.run_action(action)
async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
return await self.run_action(action)
async def read(self, action: FileReadAction) -> Observation:
return await self.run_action(action)
async def write(self, action: FileWriteAction) -> Observation:
return await self.run_action(action)
async def browse(self, action: BrowseURLAction) -> Observation:
return await self.run_action(action)
async def browse_interactive(self, action: BrowseInteractiveAction) -> Observation:
return await self.run_action(action)
async def copy_to(
self, host_src: str, sandbox_dest: str, recursive: bool = False
) -> None:
if not os.path.exists(host_src):
raise FileNotFoundError(f'Source file {host_src} does not exist')
await self._wait_until_alive()
try:
if recursive:
with tempfile.NamedTemporaryFile(
suffix='.zip', delete=False
) as temp_zip:
temp_zip_path = temp_zip.name
with ZipFile(temp_zip_path, 'w') as zipf:
for root, _, files in os.walk(host_src):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(
file_path, os.path.dirname(host_src)
)
zipf.write(file_path, arcname)
upload_data = {'file': open(temp_zip_path, 'rb')}
else:
upload_data = {'file': open(host_src, 'rb')}
params = {'destination': sandbox_dest, 'recursive': str(recursive).lower()}
response = await self._send_request(
'POST',
f'{self.runtime_url}/upload_file',
data=upload_data,
params=params,
retry_exceptions=list(
filter(
lambda e: e != asyncio.TimeoutError, DEFAULT_RETRY_EXCEPTIONS
)
),
)
if response.status == 200:
logger.info(
f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}. Response: {await response.text()}'
)
return
else:
error_message = await response.text()
raise Exception(f'Copy operation failed: {error_message}')
except asyncio.TimeoutError:
raise TimeoutError('Copy operation timed out')
except Exception as e:
raise RuntimeError(f'Copy operation failed: {str(e)}')
finally:
if recursive:
os.unlink(temp_zip_path)
logger.info(f'Copy completed: host:{host_src} -> runtime:{sandbox_dest}')
async def list_files(self, path: str | None = None) -> list[str]:
await self._wait_until_alive()
try:
data = {}
if path is not None:
data['path'] = path
response = await self._send_request(
'POST',
f'{self.runtime_url}/list_files',
json=data,
retry_exceptions=list(
filter(
lambda e: e != asyncio.TimeoutError, DEFAULT_RETRY_EXCEPTIONS
)
),
)
if response.status == 200:
response_json = await response.json()
assert isinstance(response_json, list)
return response_json
else:
error_message = await response.text()
raise Exception(f'List files operation failed: {error_message}')
except asyncio.TimeoutError:
raise TimeoutError('List files operation timed out')
except Exception as e:
raise RuntimeError(f'List files operation failed: {str(e)}')

View File

@ -87,16 +87,16 @@ class Runtime:
def close_sync(self) -> None:
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# No running event loop, use asyncio.run()
asyncio.run(self.close())
else:
# There is a running event loop, create a task
loop = asyncio.get_event_loop()
if loop.is_closed():
return
if loop.is_running():
loop.create_task(self.close())
else:
loop.run_until_complete(self.close())
except RuntimeError:
# Event loop is already closed, nothing to do
pass
# ====================================================================

View File

@ -13,9 +13,9 @@ import openhands
from openhands.core.logger import openhands_logger as logger
from openhands.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder
RUNTIME_IMAGE_REPO = os.getenv(
'OD_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/all-hands-ai/runtime'
)
def get_runtime_image_repo():
return os.getenv('OD_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/all-hands-ai/runtime')
def _get_package_version():
@ -31,18 +31,27 @@ def _get_package_version():
return pyproject_data['tool']['poetry']['version']
def _create_project_source_dist():
"""Create a source distribution of the project.
def _put_source_code_to_dir(temp_dir: str):
"""Builds the project source tarball directly in temp_dir and unpacks it.
The OpenHands source code ends up in the temp_dir/code directory.
Returns:
- str: The path to the project tarball
Parameters:
- temp_dir (str): The directory to put the source code in
"""
project_root = os.path.dirname(os.path.dirname(os.path.abspath(openhands.__file__)))
logger.info(f'Using project root: {project_root}')
# run "python -m build -s" on project_root to create project tarball
# Fetch the correct version from pyproject.toml
package_version = _get_package_version()
tarball_filename = f'openhands_ai-{package_version}.tar.gz'
tarball_path = os.path.join(temp_dir, tarball_filename)
# Run "python -m build -s" on project_root to create project tarball directly in temp_dir
_cleaned_project_root = project_root.replace(
' ', r'\ '
) # escape spaces in the project root
result = subprocess.run(
'python -m build -s ' + project_root.replace(' ', r'\ '),
f'python -m build -s -o {temp_dir} {_cleaned_project_root}',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@ -56,47 +65,20 @@ def _create_project_source_dist():
logger.error(f'Build failed: {result}')
raise Exception(f'Build failed: {result}')
# Fetch the correct version from pyproject.toml
package_version = _get_package_version()
tarball_path = os.path.join(
project_root, 'dist', f'openhands_ai-{package_version}.tar.gz'
)
if not os.path.exists(tarball_path):
logger.error(f'Source distribution not found at {tarball_path}')
raise Exception(f'Source distribution not found at {tarball_path}')
logger.info(f'Source distribution created at {tarball_path}')
return tarball_path
def _put_source_code_to_dir(temp_dir: str):
"""Builds the project source tarball. Copies it to temp_dir and unpacks it.
The OpenHands source code ends up in the temp_dir/code directory
Parameters:
- temp_dir (str): The directory to put the source code in
"""
project_tar = 'project.tar.gz'
project_path = os.path.join(temp_dir, project_tar)
logger.info('Building source distribution...')
# Build the project source tarball
tarball_path = _create_project_source_dist()
filename = os.path.basename(tarball_path)
filename = filename.removesuffix('.tar.gz')
# Move the project tarball to temp_dir
_res = shutil.copy(tarball_path, project_path)
if _res:
os.remove(tarball_path)
logger.info('Source distribution moved to ' + project_path)
# Unzip the tarball
shutil.unpack_archive(project_path, temp_dir)
shutil.unpack_archive(tarball_path, temp_dir)
# Remove the tarball
os.remove(project_path)
os.remove(tarball_path)
# Rename the directory containing the code to 'code'
os.rename(os.path.join(temp_dir, filename), os.path.join(temp_dir, 'code'))
os.rename(
os.path.join(temp_dir, f'openhands_ai-{package_version}'),
os.path.join(temp_dir, 'code'),
)
logger.info(f'Unpacked source code directory: {os.path.join(temp_dir, "code")}')
@ -187,7 +169,7 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
- tuple[str, str]: The Docker repo and tag of the Docker image
"""
if RUNTIME_IMAGE_REPO in base_image:
if get_runtime_image_repo() in base_image:
logger.info(
f'The provided image [{base_image}] is already a valid runtime image.\n'
f'Will try to reuse it as is.'
@ -201,9 +183,11 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
if ':' not in base_image:
base_image = base_image + ':latest'
[repo, tag] = base_image.split(':')
repo = repo.replace('/', '___')
# replace '/' with '_s_' to avoid '/' in the image name
# while make it a valid docker image name
repo = repo.replace('/', '_s_')
od_version = _get_package_version()
return RUNTIME_IMAGE_REPO, f'od_v{od_version}_image_{repo}_tag_{tag}'
return get_runtime_image_repo(), f'od_v{od_version}_image_{repo}_tag_{tag}'
def build_runtime_image(
@ -368,16 +352,16 @@ def _build_sandbox_image(
target_image_generic_name = f'{target_image_repo}:{target_image_tag}'
try:
success = runtime_builder.build(
image_name = runtime_builder.build(
path=docker_folder, tags=[target_image_hash_name, target_image_generic_name]
)
if not success:
if not image_name:
raise RuntimeError(f'Build failed for image {target_image_hash_name}')
except Exception as e:
logger.error(f'Sandbox image build failed: {e}')
raise
return target_image_hash_name
return image_name
if __name__ == '__main__':

38
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "aenum"
@ -1607,6 +1607,20 @@ tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
tests = ["Werkzeug (>=1.0.1)", "absl-py", "accelerate", "bert-score (>=0.3.6)", "cer (>=1.2.0)", "charcut (>=1.1.1)", "jiwer", "mauve-text", "nltk", "pytest", "pytest-datadir", "pytest-xdist", "requests-file (>=1.5.1)", "rouge-score (>=0.1.2)", "sacrebleu", "sacremoses", "scikit-learn", "scipy (>=1.10.0)", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1,<=2.10)", "texttable (>=1.6.3)", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "torch", "transformers", "trectools", "unidecode (>=1.3.4)"]
torch = ["torch"]
[[package]]
name = "execnet"
version = "2.1.1"
description = "execnet: rapid multi-Python deployment"
optional = false
python-versions = ">=3.8"
files = [
{file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
{file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
]
[package.extras]
testing = ["hatch", "pre-commit", "pytest", "tox"]
[[package]]
name = "executing"
version = "2.0.1"
@ -6542,6 +6556,26 @@ files = [
py = "*"
pytest = ">=3.10"
[[package]]
name = "pytest-xdist"
version = "3.6.1"
description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
optional = false
python-versions = ">=3.8"
files = [
{file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
{file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
]
[package.dependencies]
execnet = ">=2.1"
pytest = ">=7.0.0"
[package.extras]
psutil = ["psutil (>=3.0)"]
setproctitle = ["setproctitle"]
testing = ["filelock"]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@ -9477,4 +9511,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "b7a2c28cf99b0e85de3148ab3edbeaf1e721ad8430f8c57cb0cc7f6ccafc5666"
content-hash = "d69e66db7f0ba4063db8c7d5f98313f536c514e843637ebdccc2b5ac02f0d54c"

View File

@ -75,6 +75,7 @@ pytest = "*"
pytest-cov = "*"
pytest-asyncio = "*"
pytest-forked = "*"
pytest-xdist = "*"
flake8 = "*"
openai = "*"
opencv-python = "*"
@ -84,6 +85,7 @@ reportlab = "*"
[tool.coverage.run]
concurrency = ["gevent"]
[tool.poetry.group.runtime.dependencies]
jupyterlab = "*"
notebook = "*"
@ -114,6 +116,7 @@ ignore = ["D1"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.poetry.group.evaluation.dependencies]
streamlit = "*"
whatthepatch = "*"

View File

@ -18,7 +18,7 @@ from openhands.events.observation.delegate import AgentDelegateObservation
from openhands.runtime import get_runtime_cls
TEST_RUNTIME = os.getenv('TEST_RUNTIME')
assert TEST_RUNTIME in ['eventstream', 'server']
assert TEST_RUNTIME in ['eventstream', 'remote']
_ = get_runtime_cls(TEST_RUNTIME) # make sure it does not raise an error
CONFIG = AppConfig(

View File

@ -9,6 +9,7 @@ from openhands.core.config import AppConfig, SandboxConfig, load_from_env
from openhands.events import EventStream
from openhands.runtime.client.runtime import EventStreamRuntime
from openhands.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
from openhands.runtime.remote.runtime import RemoteRuntime
from openhands.runtime.runtime import Runtime
from openhands.storage import get_file_store
@ -34,6 +35,8 @@ def get_box_classes():
runtime = TEST_RUNTIME
if runtime.lower() == 'eventstream':
return [EventStreamRuntime]
elif runtime.lower() == 'remote':
return [RemoteRuntime]
else:
raise ValueError(f'Invalid runtime: {runtime}')

View File

@ -10,7 +10,6 @@ from conftest import _load_runtime
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.observation import CmdOutputObservation
from openhands.runtime.client.runtime import EventStreamRuntime
# ============================================================================================================================
# Bash-specific tests
@ -517,10 +516,11 @@ async def test_copy_non_existent_file(temp_dir, box_class):
@pytest.mark.asyncio
async def test_keep_prompt(temp_dir):
# only EventStreamRuntime supports keep_prompt
async def test_keep_prompt(box_class, temp_dir):
runtime = await _load_runtime(
temp_dir, box_class=EventStreamRuntime, run_as_openhands=False
temp_dir,
box_class=box_class,
run_as_openhands=False,
)
action = CmdRunAction(command='touch /workspace/test_file.txt')

View File

@ -16,7 +16,6 @@ from openhands.events.observation import (
BrowserOutputObservation,
CmdOutputObservation,
)
from openhands.runtime.client.runtime import EventStreamRuntime
# ============================================================================================================================
# Browsing tests
@ -74,11 +73,10 @@ async def test_simple_browse(temp_dir, box_class, run_as_openhands):
@pytest.mark.asyncio
async def test_browsergym_eval_env(temp_dir):
async def test_browsergym_eval_env(box_class, temp_dir):
runtime = await _load_runtime(
temp_dir,
# only supported in event stream runtime
box_class=EventStreamRuntime,
box_class=box_class,
run_as_openhands=False, # need root permission to access file
base_container_image='xingyaoww/od-eval-miniwob:v1.0',
browsergym_eval_env='browsergym/miniwob.choose-list',

View File

@ -8,11 +8,11 @@ import toml
from pytest import TempPathFactory
from openhands.runtime.utils.runtime_build import (
RUNTIME_IMAGE_REPO,
_generate_dockerfile,
_get_package_version,
_put_source_code_to_dir,
build_runtime_image,
get_runtime_image_repo,
get_runtime_image_repo_and_tag,
prep_docker_build_folder,
)
@ -175,22 +175,22 @@ def test_get_runtime_image_repo_and_tag_eventstream():
base_image = 'debian:11'
img_repo, img_tag = get_runtime_image_repo_and_tag(base_image)
assert (
img_repo == f'{RUNTIME_IMAGE_REPO}'
img_repo == f'{get_runtime_image_repo()}'
and img_tag == f'{OD_VERSION}_image_debian_tag_11'
)
base_image = 'nikolaik/python-nodejs:python3.11-nodejs22'
img_repo, img_tag = get_runtime_image_repo_and_tag(base_image)
assert (
img_repo == f'{RUNTIME_IMAGE_REPO}'
img_repo == f'{get_runtime_image_repo()}'
and img_tag
== f'{OD_VERSION}_image_nikolaik___python-nodejs_tag_python3.11-nodejs22'
== f'{OD_VERSION}_image_nikolaik_s_python-nodejs_tag_python3.11-nodejs22'
)
base_image = 'ubuntu'
img_repo, img_tag = get_runtime_image_repo_and_tag(base_image)
assert (
img_repo == f'{RUNTIME_IMAGE_REPO}'
img_repo == f'{get_runtime_image_repo()}'
and img_tag == f'{OD_VERSION}_image_ubuntu_tag_latest'
)
@ -207,18 +207,18 @@ def test_build_runtime_image_from_scratch(temp_dir):
mock_runtime_builder = MagicMock()
mock_runtime_builder.image_exists.return_value = False
mock_runtime_builder.build.return_value = (
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
f'{get_runtime_image_repo()}:{from_scratch_hash}'
)
image_name = build_runtime_image(base_image, mock_runtime_builder)
mock_runtime_builder.build.assert_called_once_with(
path=ANY,
tags=[
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}',
f'{RUNTIME_IMAGE_REPO}:{OD_VERSION}_image_debian_tag_11',
f'{get_runtime_image_repo()}:{from_scratch_hash}',
f'{get_runtime_image_repo()}:{OD_VERSION}_image_debian_tag_11',
],
)
assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
assert image_name == f'{get_runtime_image_repo()}:{from_scratch_hash}'
def test_build_runtime_image_exact_hash_exist(temp_dir):
@ -233,11 +233,11 @@ def test_build_runtime_image_exact_hash_exist(temp_dir):
mock_runtime_builder = MagicMock()
mock_runtime_builder.image_exists.return_value = True
mock_runtime_builder.build.return_value = (
f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
f'{get_runtime_image_repo()}:{from_scratch_hash}'
)
image_name = build_runtime_image(base_image, mock_runtime_builder)
assert image_name == f'{RUNTIME_IMAGE_REPO}:{from_scratch_hash}'
assert image_name == f'{get_runtime_image_repo()}:{from_scratch_hash}'
mock_runtime_builder.build.assert_not_called()