mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
fix(RemoteRuntime): add retry for pod status after /start (#4825)
This commit is contained in:
parent
5615d54f81
commit
f9fa1d95cb
@ -90,9 +90,8 @@ class RemoteRuntime(Runtime):
|
||||
self.runtime_url: str | None = None
|
||||
|
||||
async def connect(self):
|
||||
await call_sync_from_async(self._start_or_attach_to_runtime)
|
||||
try:
|
||||
await call_sync_from_async(self._wait_until_alive)
|
||||
await call_sync_from_async(self._start_or_attach_to_runtime)
|
||||
except RuntimeNotReadyError:
|
||||
self.log('error', 'Runtime failed to start, timed out before ready')
|
||||
raise
|
||||
@ -277,6 +276,14 @@ class RemoteRuntime(Runtime):
|
||||
assert runtime_data['runtime_id'] == self.runtime_id
|
||||
assert 'pod_status' in runtime_data
|
||||
pod_status = runtime_data['pod_status']
|
||||
|
||||
# FIXME: We should fix it at the backend of /start endpoint, make sure
|
||||
# the pod is created before returning the response.
|
||||
# Retry a period of time to give the cluster time to start the pod
|
||||
if pod_status == 'Not Found':
|
||||
raise RuntimeNotReadyError(
|
||||
f'Runtime (ID={self.runtime_id}) is not yet ready. Status: {pod_status}'
|
||||
)
|
||||
if pod_status == 'Ready':
|
||||
try:
|
||||
self._send_request(
|
||||
@ -291,7 +298,7 @@ class RemoteRuntime(Runtime):
|
||||
f'Runtime /alive failed to respond with 200: {e}'
|
||||
)
|
||||
return
|
||||
if pod_status in ('Failed', 'Unknown', 'Not Found'):
|
||||
if pod_status in ('Failed', 'Unknown'):
|
||||
# clean up the runtime
|
||||
self.close()
|
||||
raise RuntimeError(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user