Fix for issue where retries continue on a closed runtime (#6564)

Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
This commit is contained in:
tofarr 2025-02-03 08:44:09 -07:00 committed by GitHub
parent 622fc5213d
commit bbfdc62139
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 38 additions and 7 deletions

View File

@ -69,6 +69,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=False,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -53,6 +53,7 @@ def get_config(
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_runtime_alive=False,
remote_runtime_init_timeout=3600,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -61,6 +61,7 @@ def get_config(
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_runtime_alive=False,
remote_runtime_init_timeout=1800,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -67,6 +67,7 @@ def get_config(
base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -80,6 +80,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -45,6 +45,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=False,
use_host_network=False,
remote_runtime_enable_retries=True,
),
workspace_base=None,
workspace_mount_path=None,

View File

@ -135,6 +135,7 @@ def get_config(
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_runtime_alive=False,
remote_runtime_init_timeout=3600,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -71,6 +71,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -56,6 +56,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -49,6 +49,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -70,6 +70,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -91,6 +91,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -55,6 +55,7 @@ def get_config(
enable_auto_lint=True,
use_host_network=False,
runtime_extra_deps='$OH_INTERPRETER_PATH -m pip install scitools-pyke',
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -70,6 +70,7 @@ def get_config(
remote_runtime_init_timeout=1800,
keep_runtime_alive=False,
timeout=120,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -113,6 +113,7 @@ def get_config(
enable_auto_lint=True,
use_host_network=False,
runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -73,6 +73,7 @@ def get_config(
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_runtime_alive=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -144,6 +144,7 @@ def get_config(
dataset_name=metadata.dataset,
instance_id=instance['instance_id'],
),
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -50,6 +50,7 @@ def get_config(
# large enough timeout, since some testcases take very long to run
timeout=300,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_enable_retries=True,
),
# we mount trajectories path so that trajectories, generated by OpenHands
# controller, can be accessible to the evaluator file in the runtime container

View File

@ -50,6 +50,7 @@ def get_config(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -79,6 +79,7 @@ def get_config(
'VWA_HOMEPAGE': f'{base_url}:4399',
},
timeout=300,
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -71,6 +71,7 @@ def get_config(
'MAP': f'{base_url}:3000',
'HOMEPAGE': f'{base_url}:4399',
},
remote_runtime_enable_retries=True,
),
# do not mount workspace
workspace_base=None,

View File

@ -51,6 +51,7 @@ class SandboxConfig(BaseModel):
timeout: int = Field(default=120)
remote_runtime_init_timeout: int = Field(default=180)
remote_runtime_api_timeout: int = Field(default=10)
remote_runtime_enable_retries: bool = Field(default=False)
enable_auto_lint: bool = Field(
default=False # once enabled, OpenHands would lint files after editing
)

View File

@ -291,7 +291,7 @@ class RemoteRuntime(ActionExecutionClient):
stop=tenacity.stop_after_delay(
self.config.sandbox.remote_runtime_init_timeout
)
| stop_if_should_exit(),
| stop_if_should_exit() | self._stop_if_closed,
reraise=True,
retry=tenacity.retry_if_exception_type(AgentRuntimeNotReadyError),
wait=tenacity.wait_fixed(2),
@ -388,12 +388,18 @@ class RemoteRuntime(ActionExecutionClient):
)
raise
@tenacity.retry(
retry=tenacity.retry_if_exception_type(ConnectionError),
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit(),
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
)
def _send_action_server_request(self, method, url, **kwargs):
if not self.config.sandbox.remote_runtime_enable_retries:
return self._send_action_server_request(method, url, **kwargs)
retry_decorator = tenacity.retry(
retry=tenacity.retry_if_exception_type(ConnectionError),
stop=tenacity.stop_after_attempt(3) | stop_if_should_exit() | self._stop_if_closed,
wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
)
return retry_decorator(self._send_action_server_request_impl)(method, url, **kwargs)
def _send_action_server_request_impl(self, method, url, **kwargs):
try:
return super()._send_action_server_request(method, url, **kwargs)
except requests.Timeout:
@ -424,3 +430,6 @@ class RemoteRuntime(ActionExecutionClient):
) from e
else:
raise e
def _stop_if_closed(self, retry_state: tenacity.RetryCallState) -> bool:
return self._runtime_closed

View File

@ -150,7 +150,7 @@ class StandaloneConversationManager(ConversationManager):
)
return
except Exception as e:
logger.warning(f'error_cleaning_stale: {str(e)}')
logger.error(f'error_cleaning_stale')
await asyncio.sleep(_CLEANUP_INTERVAL)
async def get_running_agent_loops(