From affe0af361f26993d7710d2cd09a0c9d0c5a6b9b Mon Sep 17 00:00:00 2001 From: Tim O'Farrell Date: Tue, 10 Feb 2026 14:23:43 +0000 Subject: [PATCH] Add debug logging for sandbox startup health checks (#12814) Co-authored-by: openhands --- openhands/app_server/sandbox/docker_sandbox_service.py | 6 +++++- openhands/app_server/sandbox/sandbox_service.py | 10 +++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/openhands/app_server/sandbox/docker_sandbox_service.py b/openhands/app_server/sandbox/docker_sandbox_service.py index 1114fde577..01829976c6 100644 --- a/openhands/app_server/sandbox/docker_sandbox_service.py +++ b/openhands/app_server/sandbox/docker_sandbox_service.py @@ -230,7 +230,7 @@ class DockerSandboxService(SandboxService): except asyncio.CancelledError: raise except Exception as exc: - # If the server is + # If the server has exceeded the startup grace period, it's an error if sandbox_info.created_at < utc_now() - timedelta( seconds=self.startup_grace_seconds ): @@ -239,6 +239,10 @@ class DockerSandboxService(SandboxService): ) sandbox_info.status = SandboxStatus.ERROR else: + _logger.debug( + f'Sandbox server not yet available (still starting): ' + f'{app_server_url} : {exc}' + ) sandbox_info.status = SandboxStatus.STARTING sandbox_info.exposed_urls = None sandbox_info.session_api_key = None diff --git a/openhands/app_server/sandbox/sandbox_service.py b/openhands/app_server/sandbox/sandbox_service.py index 8fbeb02626..4c8f0f3315 100644 --- a/openhands/app_server/sandbox/sandbox_service.py +++ b/openhands/app_server/sandbox/sandbox_service.py @@ -1,4 +1,5 @@ import asyncio +import logging import time from abc import ABC, abstractmethod @@ -18,6 +19,8 @@ from openhands.app_server.utils.docker_utils import ( from openhands.sdk.utils.models import DiscriminatedUnionMixin from openhands.sdk.utils.paging import page_iterator +_logger = logging.getLogger(__name__) + SESSION_API_KEY_VARIABLE = 'OH_SESSION_API_KEYS_0' WEBHOOK_CALLBACK_VARIABLE = 'OH_WEBHOOKS_0_BASE_URL' ALLOW_CORS_ORIGINS_VARIABLE = 'OH_ALLOW_CORS_ORIGINS_0' @@ -133,12 +136,17 @@ class SandboxService(ABC): Returns: True if agent server is alive, False otherwise """ + url = None try: agent_server_url = self._get_agent_server_url(sandbox) url = f'{agent_server_url.rstrip("/")}/alive' response = await httpx_client.get(url, timeout=5.0) return response.is_success - except Exception: + except Exception as exc: + _logger.debug( + f'Agent server health check failed for sandbox {sandbox.id}' + f'{f" at {url}" if url else ""}: {exc}' + ) return False def _get_agent_server_url(self, sandbox: SandboxInfo) -> str: