From 9b262dd057de603cce86aa89f1226a7dfd992194 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sun, 16 Mar 2025 21:18:54 -0400 Subject: [PATCH] fix retry on ConnectionError & retry on remote runtime by default (#7294) --- openhands/core/config/sandbox_config.py | 3 ++- openhands/runtime/impl/remote/remote_runtime.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index 91055853c9..5cdecea8ef 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -15,6 +15,7 @@ class SandboxConfig(BaseModel): timeout: The timeout for the default sandbox action execution. remote_runtime_init_timeout: The timeout for the remote runtime to start. remote_runtime_api_timeout: The timeout for the remote runtime API requests. + remote_runtime_enable_retries: Whether to enable retries (on recoverable errors like requests.ConnectionError) for the remote runtime API requests. enable_auto_lint: Whether to enable auto-lint. use_host_network: Whether to use the host network. runtime_binding_address: The binding address for the runtime ports. It specifies which network interface on the host machine Docker should bind the runtime ports to. @@ -53,7 +54,7 @@ class SandboxConfig(BaseModel): timeout: int = Field(default=120) remote_runtime_init_timeout: int = Field(default=180) remote_runtime_api_timeout: int = Field(default=10) - remote_runtime_enable_retries: bool = Field(default=False) + remote_runtime_enable_retries: bool = Field(default=True) remote_runtime_class: str | None = Field( default=None ) # can be "None" (default to gvisor) or "sysbox" (support docker inside runtime + more stable) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 428760001e..6492dbc724 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -1,3 +1,4 @@ +import logging import os from typing import Callable from urllib.parse import urlparse @@ -425,10 +426,11 @@ class RemoteRuntime(ActionExecutionClient): return self._send_action_server_request_impl(method, url, **kwargs) retry_decorator = tenacity.retry( - retry=tenacity.retry_if_exception_type(ConnectionError), + retry=tenacity.retry_if_exception_type(requests.ConnectionError), stop=tenacity.stop_after_attempt(3) | stop_if_should_exit() | self._stop_if_closed, + before_sleep=tenacity.before_sleep_log(logger, logging.WARNING), wait=tenacity.wait_exponential(multiplier=1, min=4, max=60), ) return retry_decorator(self._send_action_server_request_impl)(