mirror of
https://github.com/OpenHands/OpenHands.git
synced 2026-03-22 05:37:20 +08:00
Fix runtime status error on conversation resume (#12718)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
@@ -15,6 +15,7 @@ import re
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Annotated
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import base62
|
||||
import httpx
|
||||
@@ -40,6 +41,7 @@ from openhands.app_server.config import (
|
||||
depends_httpx_client,
|
||||
depends_sandbox_service,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import SandboxStatus
|
||||
from openhands.app_server.sandbox.sandbox_service import SandboxService
|
||||
from openhands.app_server.services.db_session_injector import set_db_session_keep_open
|
||||
from openhands.app_server.services.httpx_client_injector import (
|
||||
@@ -119,6 +121,7 @@ app_conversation_info_service_dependency = depends_app_conversation_info_service
|
||||
sandbox_service_dependency = depends_sandbox_service()
|
||||
db_session_dependency = depends_db_session()
|
||||
httpx_client_dependency = depends_httpx_client()
|
||||
_RESUME_GRACE_PERIOD = 60
|
||||
|
||||
|
||||
def _filter_conversations_by_age(
|
||||
@@ -470,6 +473,7 @@ async def get_conversation(
|
||||
conversation_id: str = Depends(validate_conversation_id),
|
||||
conversation_store: ConversationStore = Depends(get_conversation_store),
|
||||
app_conversation_service: AppConversationService = app_conversation_service_dependency,
|
||||
httpx_client: httpx.AsyncClient = httpx_client_dependency,
|
||||
) -> ConversationInfo | None:
|
||||
"""Get a single conversation by ID.
|
||||
|
||||
@@ -484,6 +488,44 @@ async def get_conversation(
|
||||
conversation_uuid
|
||||
)
|
||||
if app_conversation:
|
||||
if (
|
||||
app_conversation.sandbox_status == SandboxStatus.RUNNING
|
||||
and app_conversation.execution_status is None
|
||||
):
|
||||
# The sandbox is running, but we were unable to determine a status for
|
||||
# the conversation. It may be that it is still starting, or that the
|
||||
# conversation has been stopped / deleted.
|
||||
try:
|
||||
# Check the server info is available
|
||||
conversation_url = urlparse(app_conversation.conversation_url)
|
||||
sandbox_info_url = f'{str(conversation_url.scheme)}://{str(conversation_url.netloc)}/server_info'
|
||||
response = await httpx_client.get(sandbox_info_url)
|
||||
response.raise_for_status()
|
||||
server_info = response.json()
|
||||
|
||||
# If the server has not been running long, we consider it still starting
|
||||
uptime = int(server_info.get('uptime'))
|
||||
if uptime < _RESUME_GRACE_PERIOD:
|
||||
app_conversation.sandbox_status = SandboxStatus.STARTING
|
||||
|
||||
except Exception:
|
||||
# The sandbox is marked as RUNNING, but the server is not responding.
|
||||
# There is a bug in runtime API which means that the server is marked
|
||||
# as RUNNING before it is actually started. (Primarily affecting resumed
|
||||
# runtimes) As a temporary work around for this, we mark the server as
|
||||
# STARTING. If the sandbox is actually in an error state, the API will
|
||||
# discover this quite quickly and mark the sandbox as ERROR
|
||||
logger.warning(
|
||||
'get_sandbox_info_failed',
|
||||
extra={
|
||||
'conversation_id': app_conversation.id,
|
||||
'sandbox_id': app_conversation.sandbox_id,
|
||||
},
|
||||
exc_info=True,
|
||||
stack_info=True,
|
||||
)
|
||||
app_conversation.sandbox_status = SandboxStatus.STARTING
|
||||
|
||||
return _to_conversation_info(app_conversation)
|
||||
except (ValueError, TypeError, Exception):
|
||||
# Not a V1 conversation or service error
|
||||
|
||||
@@ -3,6 +3,7 @@ from datetime import datetime, timezone
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from uuid import uuid4
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from fastapi import status
|
||||
from fastapi.responses import JSONResponse
|
||||
@@ -12,6 +13,7 @@ from openhands.app_server.app_conversation.app_conversation_info_service import
|
||||
)
|
||||
from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
AgentType,
|
||||
AppConversation,
|
||||
AppConversationInfo,
|
||||
AppConversationPage,
|
||||
AppConversationStartRequest,
|
||||
@@ -21,8 +23,10 @@ from openhands.app_server.app_conversation.app_conversation_models import (
|
||||
from openhands.app_server.app_conversation.app_conversation_service import (
|
||||
AppConversationService,
|
||||
)
|
||||
from openhands.app_server.sandbox.sandbox_models import SandboxStatus
|
||||
from openhands.microagent.microagent import KnowledgeMicroagent, RepoMicroagent
|
||||
from openhands.microagent.types import MicroagentMetadata, MicroagentType
|
||||
from openhands.server.data_models.conversation_info import ConversationStatus
|
||||
from openhands.server.data_models.conversation_info_result_set import (
|
||||
ConversationInfoResultSet,
|
||||
)
|
||||
@@ -32,7 +36,9 @@ from openhands.server.routes.conversation import (
|
||||
get_microagents,
|
||||
)
|
||||
from openhands.server.routes.manage_conversations import (
|
||||
_RESUME_GRACE_PERIOD,
|
||||
UpdateConversationRequest,
|
||||
get_conversation,
|
||||
search_conversations,
|
||||
update_conversation,
|
||||
)
|
||||
@@ -1459,3 +1465,102 @@ async def test_search_conversations_include_sub_conversations_with_other_filters
|
||||
assert call_kwargs.get('include_sub_conversations') is True
|
||||
assert call_kwargs.get('page_id') == 'test_v1_page_id'
|
||||
assert call_kwargs.get('limit') == 50
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
'sandbox_status,execution_status,server_uptime,server_error,expected_status,should_call_server',
|
||||
[
|
||||
# RUNNING sandbox, no execution_status, server responds within grace period -> STARTING
|
||||
(SandboxStatus.RUNNING, None, 30, None, ConversationStatus.STARTING, True),
|
||||
# RUNNING sandbox, no execution_status, server responds past grace period -> RUNNING
|
||||
(
|
||||
SandboxStatus.RUNNING,
|
||||
None,
|
||||
_RESUME_GRACE_PERIOD + 10,
|
||||
None,
|
||||
ConversationStatus.RUNNING,
|
||||
True,
|
||||
),
|
||||
# RUNNING sandbox, no execution_status, server unresponsive -> STARTING
|
||||
(
|
||||
SandboxStatus.RUNNING,
|
||||
None,
|
||||
None,
|
||||
httpx.ConnectError('Connection refused'),
|
||||
ConversationStatus.STARTING,
|
||||
True,
|
||||
),
|
||||
# RUNNING sandbox, WITH execution_status -> RUNNING, skip server check
|
||||
(SandboxStatus.RUNNING, 'IDLE', None, None, ConversationStatus.RUNNING, False),
|
||||
# Non-RUNNING sandbox -> STARTING, skip server check
|
||||
(SandboxStatus.STARTING, None, None, None, ConversationStatus.STARTING, False),
|
||||
],
|
||||
ids=[
|
||||
'running_no_exec_status_within_grace_period',
|
||||
'running_no_exec_status_past_grace_period',
|
||||
'running_no_exec_status_server_unresponsive',
|
||||
'running_with_exec_status_skips_check',
|
||||
'non_running_skips_check',
|
||||
],
|
||||
)
|
||||
async def test_get_conversation_resume_status_handling(
|
||||
sandbox_status,
|
||||
execution_status,
|
||||
server_uptime,
|
||||
server_error,
|
||||
expected_status,
|
||||
should_call_server,
|
||||
):
|
||||
"""Test get_conversation handles resume status correctly for various scenarios."""
|
||||
from openhands.sdk.conversation.state import ConversationExecutionStatus
|
||||
|
||||
conversation_id = uuid4()
|
||||
|
||||
# Convert string execution_status to enum if provided
|
||||
exec_status = None
|
||||
if execution_status == 'IDLE':
|
||||
exec_status = ConversationExecutionStatus.IDLE
|
||||
|
||||
mock_app_conversation = AppConversation(
|
||||
id=conversation_id,
|
||||
created_by_user_id='test_user',
|
||||
sandbox_id='test_sandbox',
|
||||
sandbox_status=sandbox_status,
|
||||
execution_status=exec_status,
|
||||
conversation_url='https://sandbox.example.com/conversation',
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
)
|
||||
|
||||
mock_app_conversation_service = AsyncMock(spec=AppConversationService)
|
||||
mock_app_conversation_service.get_app_conversation.return_value = (
|
||||
mock_app_conversation
|
||||
)
|
||||
mock_conversation_store = AsyncMock(spec=ConversationStore)
|
||||
mock_httpx_client = AsyncMock(spec=httpx.AsyncClient)
|
||||
|
||||
if server_error:
|
||||
mock_httpx_client.get.side_effect = server_error
|
||||
elif server_uptime is not None:
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {'uptime': server_uptime}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_httpx_client.get.return_value = mock_response
|
||||
|
||||
result = await get_conversation(
|
||||
conversation_id=str(conversation_id),
|
||||
conversation_store=mock_conversation_store,
|
||||
app_conversation_service=mock_app_conversation_service,
|
||||
httpx_client=mock_httpx_client,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.status == expected_status
|
||||
|
||||
if should_call_server:
|
||||
mock_httpx_client.get.assert_called_once_with(
|
||||
'https://sandbox.example.com/server_info'
|
||||
)
|
||||
else:
|
||||
mock_httpx_client.get.assert_not_called()
|
||||
|
||||
Reference in New Issue
Block a user