mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Fix for issue where retries continue on a closed runtime (#6564)
Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
This commit is contained in:
parent
622fc5213d
commit
bbfdc62139
@ -69,6 +69,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -53,6 +53,7 @@ def get_config(
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -61,6 +61,7 @@ def get_config(
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=1800,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -67,6 +67,7 @@ def get_config(
|
||||
base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -80,6 +80,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -45,6 +45,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=False,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
workspace_base=None,
|
||||
workspace_mount_path=None,
|
||||
|
||||
@ -135,6 +135,7 @@ def get_config(
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_init_timeout=3600,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -71,6 +71,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -56,6 +56,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -49,6 +49,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -70,6 +70,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -91,6 +91,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -55,6 +55,7 @@ def get_config(
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps='$OH_INTERPRETER_PATH -m pip install scitools-pyke',
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -70,6 +70,7 @@ def get_config(
|
||||
remote_runtime_init_timeout=1800,
|
||||
keep_runtime_alive=False,
|
||||
timeout=120,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -113,6 +113,7 @@ def get_config(
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -73,6 +73,7 @@ def get_config(
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
|
||||
keep_runtime_alive=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -144,6 +144,7 @@ def get_config(
|
||||
dataset_name=metadata.dataset,
|
||||
instance_id=instance['instance_id'],
|
||||
),
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -50,6 +50,7 @@ def get_config(
|
||||
# large enough timeout, since some testcases take very long to run
|
||||
timeout=300,
|
||||
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# we mount trajectories path so that trajectories, generated by OpenHands
|
||||
# controller, can be accessible to the evaluator file in the runtime container
|
||||
|
||||
@ -50,6 +50,7 @@ def get_config(
|
||||
base_container_image='python:3.12-bookworm',
|
||||
enable_auto_lint=True,
|
||||
use_host_network=False,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -79,6 +79,7 @@ def get_config(
|
||||
'VWA_HOMEPAGE': f'{base_url}:4399',
|
||||
},
|
||||
timeout=300,
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -71,6 +71,7 @@ def get_config(
|
||||
'MAP': f'{base_url}:3000',
|
||||
'HOMEPAGE': f'{base_url}:4399',
|
||||
},
|
||||
remote_runtime_enable_retries=True,
|
||||
),
|
||||
# do not mount workspace
|
||||
workspace_base=None,
|
||||
|
||||
@ -51,6 +51,7 @@ class SandboxConfig(BaseModel):
|
||||
timeout: int = Field(default=120)
|
||||
remote_runtime_init_timeout: int = Field(default=180)
|
||||
remote_runtime_api_timeout: int = Field(default=10)
|
||||
remote_runtime_enable_retries: bool = Field(default=False)
|
||||
enable_auto_lint: bool = Field(
|
||||
default=False # once enabled, OpenHands would lint files after editing
|
||||
)
|
||||
|
||||
@ -291,7 +291,7 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
stop=tenacity.stop_after_delay(
|
||||
self.config.sandbox.remote_runtime_init_timeout
|
||||
)
|
||||
| stop_if_should_exit(),
|
||||
| stop_if_should_exit() | self._stop_if_closed,
|
||||
reraise=True,
|
||||
retry=tenacity.retry_if_exception_type(AgentRuntimeNotReadyError),
|
||||
wait=tenacity.wait_fixed(2),
|
||||
@ -388,12 +388,18 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
)
|
||||
raise
|
||||
|
||||
@tenacity.retry(
|
||||
retry=tenacity.retry_if_exception_type(ConnectionError),
|
||||
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
|
||||
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
|
||||
)
|
||||
def _send_action_server_request(self, method, url, **kwargs):
|
||||
if not self.config.sandbox.remote_runtime_enable_retries:
|
||||
return self._send_action_server_request(method, url, **kwargs)
|
||||
|
||||
retry_decorator = tenacity.retry(
|
||||
retry=tenacity.retry_if_exception_type(ConnectionError),
|
||||
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit() | self._stop_if_closed,
|
||||
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
|
||||
)
|
||||
return retry_decorator(self._send_action_server_request_impl)(method, url, **kwargs)
|
||||
|
||||
def _send_action_server_request_impl(self, method, url, **kwargs):
|
||||
try:
|
||||
return super()._send_action_server_request(method, url, **kwargs)
|
||||
except requests.Timeout:
|
||||
@ -424,3 +430,6 @@ class RemoteRuntime(ActionExecutionClient):
|
||||
) from e
|
||||
else:
|
||||
raise e
|
||||
|
||||
def _stop_if_closed(self, retry_state: tenacity.RetryCallState) -> bool:
|
||||
return self._runtime_closed
|
||||
|
||||
@ -150,7 +150,7 @@ class StandaloneConversationManager(ConversationManager):
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f'error_cleaning_stale: {str(e)}')
|
||||
logger.error(f'error_cleaning_stale')
|
||||
await asyncio.sleep(_CLEANUP_INTERVAL)
|
||||
|
||||
async def get_running_agent_loops(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user