mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
Fix runtime status error on conversation resume (#12718)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -15,6 +15,7 @@ import re
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Annotated
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import base62
|
||||
import httpx
|
||||
@@ -40,6 +41,7 @@ from openhands.app_server.config import (
|
||||
depends_httpx_client,
|
||||
depends_sandbox_service,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import SandboxStatus
|
||||
from openhands.app_server.sandbox.sandbox_service import SandboxService
|
||||
from openhands.app_server.services.db_session_injector import set_db_session_keep_open
|
||||
from openhands.app_server.services.httpx_client_injector import (
|
||||
@@ -119,6 +121,7 @@ app_conversation_info_service_dependency = depends_app_conversation_info_service
|
||||
sandbox_service_dependency = depends_sandbox_service()
|
||||
db_session_dependency = depends_db_session()
|
||||
httpx_client_dependency = depends_httpx_client()
|
||||
_RESUME_GRACE_PERIOD = 60
|
||||
|
||||
|
||||
def _filter_conversations_by_age(
|
||||
@@ -470,6 +473,7 @@ async def get_conversation(
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_store: ConversationStore = Depends(get_conversation_store),
|
||||
app_conversation_service: AppConversationService = app_conversation_service_dependency,
|
||||
httpx_client: httpx.AsyncClient = httpx_client_dependency,
|
||||
) -> ConversationInfo | None:
|
||||
"""Get a single conversation by ID.
|
||||
|
||||
@@ -484,6 +488,44 @@ async def get_conversation(
|
||||
conversation_uuid
|
||||
)
|
||||
if app_conversation:
|
||||
if (
|
||||
app_conversation.sandbox_status == SandboxStatus.RUNNING
|
||||
and app_conversation.execution_status is None
|
||||
):
|
||||
# The sandbox is running, but we were unable to determine a status for
|
||||
# the conversation. It may be that it is still starting, or that the
|
||||
# conversation has been stopped / deleted.
|
||||
try:
|
||||
# Check the server info is available
|
||||
conversation_url = urlparse(app_conversation.conversation_url)
|
||||
sandbox_info_url = f'{str(conversation_url.scheme)}://{str(conversation_url.netloc)}/server_info'
|
||||
response = await httpx_client.get(sandbox_info_url)
|
||||
response.raise_for_status()
|
||||
server_info = response.json()
|
||||
|
||||
# If the server has not been running long, we consider it still starting
|
||||
uptime = int(server_info.get('uptime'))
|
||||
if uptime < _RESUME_GRACE_PERIOD:
|
||||
app_conversation.sandbox_status = SandboxStatus.STARTING
|
||||
|
||||
except Exception:
|
||||
# The sandbox is marked as RUNNING, but the server is not responding.
|
||||
# There is a bug in runtime API which means that the server is marked
|
||||
# as RUNNING before it is actually started. (Primarily affecting resumed
|
||||
# runtimes) As a temporary work around for this, we mark the server as
|
||||
# STARTING. If the sandbox is actually in an error state, the API will
|
||||
# discover this quite quickly and mark the sandbox as ERROR
|
||||
logger.warning(
|
||||
'get_sandbox_info_failed',
|
||||
extra={
|
||||
'conversation_id': app_conversation.id,
|
||||
'sandbox_id': app_conversation.sandbox_id,
|
||||
},
|
||||
exc_info=True,
|
||||
stack_info=True,
|
||||
)
|
||||
app_conversation.sandbox_status = SandboxStatus.STARTING
|
||||
|
||||
return _to_conversation_info(app_conversation)
|
||||
except (ValueError, TypeError, Exception):
|
||||
# Not a V1 conversation or service error
|
||||
|
||||
Reference in New Issue
Block a user