Add debug logging for sandbox startup health checks (#12814)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Tim O'Farrell
2026-02-10 14:23:43 +00:00
committed by GitHub
parent f20c956196
commit affe0af361
2 changed files with 14 additions and 2 deletions

View File

@@ -230,7 +230,7 @@ class DockerSandboxService(SandboxService):
except asyncio.CancelledError:
raise
except Exception as exc:
# If the server is
# If the server has exceeded the startup grace period, it's an error
if sandbox_info.created_at < utc_now() - timedelta(
seconds=self.startup_grace_seconds
):
@@ -239,6 +239,10 @@ class DockerSandboxService(SandboxService):
)
sandbox_info.status = SandboxStatus.ERROR
else:
_logger.debug(
f'Sandbox server not yet available (still starting): '
f'{app_server_url} : {exc}'
)
sandbox_info.status = SandboxStatus.STARTING
sandbox_info.exposed_urls = None
sandbox_info.session_api_key = None

View File

@@ -1,4 +1,5 @@
import asyncio
import logging
import time
from abc import ABC, abstractmethod
@@ -18,6 +19,8 @@ from openhands.app_server.utils.docker_utils import (
from openhands.sdk.utils.models import DiscriminatedUnionMixin
from openhands.sdk.utils.paging import page_iterator
_logger = logging.getLogger(__name__)
SESSION_API_KEY_VARIABLE = 'OH_SESSION_API_KEYS_0'
WEBHOOK_CALLBACK_VARIABLE = 'OH_WEBHOOKS_0_BASE_URL'
ALLOW_CORS_ORIGINS_VARIABLE = 'OH_ALLOW_CORS_ORIGINS_0'
@@ -133,12 +136,17 @@ class SandboxService(ABC):
Returns:
True if agent server is alive, False otherwise
"""
url = None
try:
agent_server_url = self._get_agent_server_url(sandbox)
url = f'{agent_server_url.rstrip("/")}/alive'
response = await httpx_client.get(url, timeout=5.0)
return response.is_success
except Exception:
except Exception as exc:
_logger.debug(
f'Agent server health check failed for sandbox {sandbox.id}'
f'{f" at {url}" if url else ""}: {exc}'
)
return False
def _get_agent_server_url(self, sandbox: SandboxInfo) -> str: