mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 13:47:19 +08:00
fix: improve remote runtime reliability on large-scale evaluation (#4869)
This commit is contained in:
@@ -83,6 +83,7 @@ def get_config(instance: pd.Series) -> AppConfig:
|
||||
timeout=1800,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@@ -146,6 +146,7 @@ def get_config(
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_remote_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@@ -14,7 +14,8 @@ class SandboxConfig:
|
||||
base_container_image: The base container image from which to build the runtime image.
|
||||
runtime_container_image: The runtime container image to use.
|
||||
user_id: The user ID for the sandbox.
|
||||
timeout: The timeout for the sandbox.
|
||||
timeout: The timeout for the default sandbox action execution.
|
||||
remote_runtime_init_timeout: The timeout for the remote runtime to start.
|
||||
enable_auto_lint: Whether to enable auto-lint.
|
||||
use_host_network: Whether to use the host network.
|
||||
initialize_plugins: Whether to initialize plugins.
|
||||
@@ -41,6 +42,7 @@ class SandboxConfig:
|
||||
runtime_container_image: str | None = None
|
||||
user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
|
||||
timeout: int = 120
|
||||
remote_runtime_init_timeout: int = 180
|
||||
enable_auto_lint: bool = (
|
||||
False # once enabled, OpenHands would lint files after editing
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
from zipfile import ZipFile
|
||||
|
||||
@@ -260,13 +260,19 @@ class RemoteRuntime(Runtime):
|
||||
{'X-Session-API-Key': start_response['session_api_key']}
|
||||
)
|
||||
|
||||
@tenacity.retry(
|
||||
stop=tenacity.stop_after_delay(180) | stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
)
|
||||
def _wait_until_alive(self):
|
||||
retry_decorator = tenacity.retry(
|
||||
stop=tenacity.stop_after_delay(
|
||||
self.config.sandbox.remote_runtime_init_timeout
|
||||
)
|
||||
| stop_if_should_exit(),
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
)
|
||||
return retry_decorator(self._wait_until_alive_impl)()
|
||||
|
||||
def _wait_until_alive_impl(self):
|
||||
self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
|
||||
runtime_info_response = self._send_request(
|
||||
'GET',
|
||||
|
||||
Reference in New Issue
Block a user