From bbfdc62139d73e3b573f988b5b3870519ab8651e Mon Sep 17 00:00:00 2001 From: tofarr Date: Mon, 3 Feb 2025 08:44:09 -0700 Subject: [PATCH] Fix for issue where retries continue on a closed runtime (#6564) Co-authored-by: Xingyao Wang --- evaluation/benchmarks/EDA/run_infer.py | 1 + .../benchmarks/agent_bench/run_infer.py | 1 + .../benchmarks/aider_bench/run_infer.py | 1 + evaluation/benchmarks/biocoder/run_infer.py | 1 + evaluation/benchmarks/bird/run_infer.py | 1 + .../browsing_delegation/run_infer.py | 1 + .../benchmarks/commit0_bench/run_infer.py | 1 + .../benchmarks/discoverybench/run_infer.py | 1 + evaluation/benchmarks/gaia/run_infer.py | 1 + evaluation/benchmarks/gorilla/run_infer.py | 1 + evaluation/benchmarks/gpqa/run_infer.py | 1 + .../benchmarks/humanevalfix/run_infer.py | 1 + .../benchmarks/logic_reasoning/run_infer.py | 1 + evaluation/benchmarks/miniwob/run_infer.py | 1 + evaluation/benchmarks/mint/run_infer.py | 1 + .../benchmarks/scienceagentbench/run_infer.py | 1 + evaluation/benchmarks/swe_bench/run_infer.py | 1 + .../benchmarks/the_agent_company/run_infer.py | 1 + evaluation/benchmarks/toolqa/run_infer.py | 1 + .../benchmarks/visualwebarena/run_infer.py | 1 + evaluation/benchmarks/webarena/run_infer.py | 1 + openhands/core/config/sandbox_config.py | 1 + .../runtime/impl/remote/remote_runtime.py | 21 +++++++++++++------ .../standalone_conversation_manager.py | 2 +- 24 files changed, 38 insertions(+), 7 deletions(-) diff --git a/evaluation/benchmarks/EDA/run_infer.py b/evaluation/benchmarks/EDA/run_infer.py index 26756d3ea5..2d65e19438 100644 --- a/evaluation/benchmarks/EDA/run_infer.py +++ b/evaluation/benchmarks/EDA/run_infer.py @@ -69,6 +69,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=False, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/agent_bench/run_infer.py b/evaluation/benchmarks/agent_bench/run_infer.py index fb221e15ba..8c1f08b377 100644 --- a/evaluation/benchmarks/agent_bench/run_infer.py +++ b/evaluation/benchmarks/agent_bench/run_infer.py @@ -53,6 +53,7 @@ def get_config( remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), keep_runtime_alive=False, remote_runtime_init_timeout=3600, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/aider_bench/run_infer.py b/evaluation/benchmarks/aider_bench/run_infer.py index 926f4634ed..8045f948d3 100644 --- a/evaluation/benchmarks/aider_bench/run_infer.py +++ b/evaluation/benchmarks/aider_bench/run_infer.py @@ -61,6 +61,7 @@ def get_config( remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), keep_runtime_alive=False, remote_runtime_init_timeout=1800, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/biocoder/run_infer.py b/evaluation/benchmarks/biocoder/run_infer.py index dc6dc8fd3b..20f3dc4870 100644 --- a/evaluation/benchmarks/biocoder/run_infer.py +++ b/evaluation/benchmarks/biocoder/run_infer.py @@ -67,6 +67,7 @@ def get_config( base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE, enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/bird/run_infer.py b/evaluation/benchmarks/bird/run_infer.py index 8570e07b66..02d92aa3ee 100644 --- a/evaluation/benchmarks/bird/run_infer.py +++ b/evaluation/benchmarks/bird/run_infer.py @@ -80,6 +80,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/browsing_delegation/run_infer.py b/evaluation/benchmarks/browsing_delegation/run_infer.py index 33d6ef4805..164e117e26 100644 --- a/evaluation/benchmarks/browsing_delegation/run_infer.py +++ b/evaluation/benchmarks/browsing_delegation/run_infer.py @@ -45,6 +45,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=False, use_host_network=False, + remote_runtime_enable_retries=True, ), workspace_base=None, workspace_mount_path=None, diff --git a/evaluation/benchmarks/commit0_bench/run_infer.py b/evaluation/benchmarks/commit0_bench/run_infer.py index e690952ab9..2e0fc528f7 100644 --- a/evaluation/benchmarks/commit0_bench/run_infer.py +++ b/evaluation/benchmarks/commit0_bench/run_infer.py @@ -135,6 +135,7 @@ def get_config( remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), keep_runtime_alive=False, remote_runtime_init_timeout=3600, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/discoverybench/run_infer.py b/evaluation/benchmarks/discoverybench/run_infer.py index 30af2d19d4..fc5d74b135 100644 --- a/evaluation/benchmarks/discoverybench/run_infer.py +++ b/evaluation/benchmarks/discoverybench/run_infer.py @@ -71,6 +71,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/gaia/run_infer.py b/evaluation/benchmarks/gaia/run_infer.py index b4c704e497..a8b4428192 100644 --- a/evaluation/benchmarks/gaia/run_infer.py +++ b/evaluation/benchmarks/gaia/run_infer.py @@ -56,6 +56,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/gorilla/run_infer.py b/evaluation/benchmarks/gorilla/run_infer.py index e97be5ed83..d107151fc5 100644 --- a/evaluation/benchmarks/gorilla/run_infer.py +++ b/evaluation/benchmarks/gorilla/run_infer.py @@ -49,6 +49,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/gpqa/run_infer.py b/evaluation/benchmarks/gpqa/run_infer.py index cf4106b971..b92a30b859 100644 --- a/evaluation/benchmarks/gpqa/run_infer.py +++ b/evaluation/benchmarks/gpqa/run_infer.py @@ -70,6 +70,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/humanevalfix/run_infer.py b/evaluation/benchmarks/humanevalfix/run_infer.py index fec040079c..c2cccf90c7 100644 --- a/evaluation/benchmarks/humanevalfix/run_infer.py +++ b/evaluation/benchmarks/humanevalfix/run_infer.py @@ -91,6 +91,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/logic_reasoning/run_infer.py b/evaluation/benchmarks/logic_reasoning/run_infer.py index acd07edef2..e37c5b4ab0 100644 --- a/evaluation/benchmarks/logic_reasoning/run_infer.py +++ b/evaluation/benchmarks/logic_reasoning/run_infer.py @@ -55,6 +55,7 @@ def get_config( enable_auto_lint=True, use_host_network=False, runtime_extra_deps='$OH_INTERPRETER_PATH -m pip install scitools-pyke', + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/miniwob/run_infer.py b/evaluation/benchmarks/miniwob/run_infer.py index acc1431c81..023cbe9cab 100644 --- a/evaluation/benchmarks/miniwob/run_infer.py +++ b/evaluation/benchmarks/miniwob/run_infer.py @@ -70,6 +70,7 @@ def get_config( remote_runtime_init_timeout=1800, keep_runtime_alive=False, timeout=120, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/mint/run_infer.py b/evaluation/benchmarks/mint/run_infer.py index ddfef0ea68..4c356f26d9 100644 --- a/evaluation/benchmarks/mint/run_infer.py +++ b/evaluation/benchmarks/mint/run_infer.py @@ -113,6 +113,7 @@ def get_config( enable_auto_lint=True, use_host_network=False, runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}', + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/scienceagentbench/run_infer.py b/evaluation/benchmarks/scienceagentbench/run_infer.py index 2ca5c2b403..09619fb718 100644 --- a/evaluation/benchmarks/scienceagentbench/run_infer.py +++ b/evaluation/benchmarks/scienceagentbench/run_infer.py @@ -73,6 +73,7 @@ def get_config( api_key=os.environ.get('ALLHANDS_API_KEY', None), remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'), keep_runtime_alive=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index e1a1764f21..5e3f0e6a5b 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -144,6 +144,7 @@ def get_config( dataset_name=metadata.dataset, instance_id=instance['instance_id'], ), + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py index 376df6c47c..5cd7c027e2 100644 --- a/evaluation/benchmarks/the_agent_company/run_infer.py +++ b/evaluation/benchmarks/the_agent_company/run_infer.py @@ -50,6 +50,7 @@ def get_config( # large enough timeout, since some testcases take very long to run timeout=300, api_key=os.environ.get('ALLHANDS_API_KEY', None), + remote_runtime_enable_retries=True, ), # we mount trajectories path so that trajectories, generated by OpenHands # controller, can be accessible to the evaluator file in the runtime container diff --git a/evaluation/benchmarks/toolqa/run_infer.py b/evaluation/benchmarks/toolqa/run_infer.py index 8306292d8f..45b9febed2 100644 --- a/evaluation/benchmarks/toolqa/run_infer.py +++ b/evaluation/benchmarks/toolqa/run_infer.py @@ -50,6 +50,7 @@ def get_config( base_container_image='python:3.12-bookworm', enable_auto_lint=True, use_host_network=False, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/visualwebarena/run_infer.py b/evaluation/benchmarks/visualwebarena/run_infer.py index 5010daa42e..8986d3ab8f 100644 --- a/evaluation/benchmarks/visualwebarena/run_infer.py +++ b/evaluation/benchmarks/visualwebarena/run_infer.py @@ -79,6 +79,7 @@ def get_config( 'VWA_HOMEPAGE': f'{base_url}:4399', }, timeout=300, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/evaluation/benchmarks/webarena/run_infer.py b/evaluation/benchmarks/webarena/run_infer.py index 79b7fc4371..ad846190d8 100644 --- a/evaluation/benchmarks/webarena/run_infer.py +++ b/evaluation/benchmarks/webarena/run_infer.py @@ -71,6 +71,7 @@ def get_config( 'MAP': f'{base_url}:3000', 'HOMEPAGE': f'{base_url}:4399', }, + remote_runtime_enable_retries=True, ), # do not mount workspace workspace_base=None, diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index 55ff384e0c..12edbbd4d9 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -51,6 +51,7 @@ class SandboxConfig(BaseModel): timeout: int = Field(default=120) remote_runtime_init_timeout: int = Field(default=180) remote_runtime_api_timeout: int = Field(default=10) + remote_runtime_enable_retries: bool = Field(default=False) enable_auto_lint: bool = Field( default=False # once enabled, OpenHands would lint files after editing ) diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index 02e31fa364..eb1b58440b 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -291,7 +291,7 @@ class RemoteRuntime(ActionExecutionClient): stop=tenacity.stop_after_delay( self.config.sandbox.remote_runtime_init_timeout ) - | stop_if_should_exit(), + | stop_if_should_exit() | self._stop_if_closed, reraise=True, retry=tenacity.retry_if_exception_type(AgentRuntimeNotReadyError), wait=tenacity.wait_fixed(2), @@ -388,12 +388,18 @@ class RemoteRuntime(ActionExecutionClient): ) raise - @tenacity.retry( - retry=tenacity.retry_if_exception_type(ConnectionError), - stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(), - wait=tenacity.wait_exponential(multiplier=1, min=4, max=60), - ) def _send_action_server_request(self, method, url, **kwargs): + if not self.config.sandbox.remote_runtime_enable_retries: + return self._send_action_server_request(method, url, **kwargs) + + retry_decorator = tenacity.retry( + retry=tenacity.retry_if_exception_type(ConnectionError), + stop=tenacity.stop_after_attempt(3) | stop_if_should_exit() | self._stop_if_closed, + wait=tenacity.wait_exponential(multiplier=1, min=4, max=60), + ) + return retry_decorator(self._send_action_server_request_impl)(method, url, **kwargs) + + def _send_action_server_request_impl(self, method, url, **kwargs): try: return super()._send_action_server_request(method, url, **kwargs) except requests.Timeout: @@ -424,3 +430,6 @@ class RemoteRuntime(ActionExecutionClient): ) from e else: raise e + + def _stop_if_closed(self, retry_state: tenacity.RetryCallState) -> bool: + return self._runtime_closed diff --git a/openhands/server/conversation_manager/standalone_conversation_manager.py b/openhands/server/conversation_manager/standalone_conversation_manager.py index 078f012a53..a1748038d6 100644 --- a/openhands/server/conversation_manager/standalone_conversation_manager.py +++ b/openhands/server/conversation_manager/standalone_conversation_manager.py @@ -150,7 +150,7 @@ class StandaloneConversationManager(ConversationManager): ) return except Exception as e: - logger.warning(f'error_cleaning_stale: {str(e)}') + logger.error(f'error_cleaning_stale') await asyncio.sleep(_CLEANUP_INTERVAL) async def get_running_agent_loops(