diff --git a/openhands/server/routes/manage_conversations.py b/openhands/server/routes/manage_conversations.py index 0273abc6e7..968d4a81bf 100644 --- a/openhands/server/routes/manage_conversations.py +++ b/openhands/server/routes/manage_conversations.py @@ -15,6 +15,7 @@ import re import uuid from datetime import datetime, timedelta, timezone from typing import Annotated +from urllib.parse import urlparse import base62 import httpx @@ -40,6 +41,7 @@ from openhands.app_server.config import ( depends_httpx_client, depends_sandbox_service, ) +from openhands.app_server.sandbox.sandbox_models import SandboxStatus from openhands.app_server.sandbox.sandbox_service import SandboxService from openhands.app_server.services.db_session_injector import set_db_session_keep_open from openhands.app_server.services.httpx_client_injector import ( @@ -119,6 +121,7 @@ app_conversation_info_service_dependency = depends_app_conversation_info_service sandbox_service_dependency = depends_sandbox_service() db_session_dependency = depends_db_session() httpx_client_dependency = depends_httpx_client() +_RESUME_GRACE_PERIOD = 60 def _filter_conversations_by_age( @@ -470,6 +473,7 @@ async def get_conversation( conversation_id: str = Depends(validate_conversation_id), conversation_store: ConversationStore = Depends(get_conversation_store), app_conversation_service: AppConversationService = app_conversation_service_dependency, + httpx_client: httpx.AsyncClient = httpx_client_dependency, ) -> ConversationInfo | None: """Get a single conversation by ID. @@ -484,6 +488,44 @@ async def get_conversation( conversation_uuid ) if app_conversation: + if ( + app_conversation.sandbox_status == SandboxStatus.RUNNING + and app_conversation.execution_status is None + ): + # The sandbox is running, but we were unable to determine a status for + # the conversation. It may be that it is still starting, or that the + # conversation has been stopped / deleted. + try: + # Check the server info is available + conversation_url = urlparse(app_conversation.conversation_url) + sandbox_info_url = f'{str(conversation_url.scheme)}://{str(conversation_url.netloc)}/server_info' + response = await httpx_client.get(sandbox_info_url) + response.raise_for_status() + server_info = response.json() + + # If the server has not been running long, we consider it still starting + uptime = int(server_info.get('uptime')) + if uptime < _RESUME_GRACE_PERIOD: + app_conversation.sandbox_status = SandboxStatus.STARTING + + except Exception: + # The sandbox is marked as RUNNING, but the server is not responding. + # There is a bug in runtime API which means that the server is marked + # as RUNNING before it is actually started. (Primarily affecting resumed + # runtimes) As a temporary work around for this, we mark the server as + # STARTING. If the sandbox is actually in an error state, the API will + # discover this quite quickly and mark the sandbox as ERROR + logger.warning( + 'get_sandbox_info_failed', + extra={ + 'conversation_id': app_conversation.id, + 'sandbox_id': app_conversation.sandbox_id, + }, + exc_info=True, + stack_info=True, + ) + app_conversation.sandbox_status = SandboxStatus.STARTING + return _to_conversation_info(app_conversation) except (ValueError, TypeError, Exception): # Not a V1 conversation or service error diff --git a/tests/unit/server/routes/test_conversation_routes.py b/tests/unit/server/routes/test_conversation_routes.py index 343894cefa..3892906ed0 100644 --- a/tests/unit/server/routes/test_conversation_routes.py +++ b/tests/unit/server/routes/test_conversation_routes.py @@ -3,6 +3,7 @@ from datetime import datetime, timezone from unittest.mock import AsyncMock, MagicMock, patch from uuid import uuid4 +import httpx import pytest from fastapi import status from fastapi.responses import JSONResponse @@ -12,6 +13,7 @@ from openhands.app_server.app_conversation.app_conversation_info_service import ) from openhands.app_server.app_conversation.app_conversation_models import ( AgentType, + AppConversation, AppConversationInfo, AppConversationPage, AppConversationStartRequest, @@ -21,8 +23,10 @@ from openhands.app_server.app_conversation.app_conversation_models import ( from openhands.app_server.app_conversation.app_conversation_service import ( AppConversationService, ) +from openhands.app_server.sandbox.sandbox_models import SandboxStatus from openhands.microagent.microagent import KnowledgeMicroagent, RepoMicroagent from openhands.microagent.types import MicroagentMetadata, MicroagentType +from openhands.server.data_models.conversation_info import ConversationStatus from openhands.server.data_models.conversation_info_result_set import ( ConversationInfoResultSet, ) @@ -32,7 +36,9 @@ from openhands.server.routes.conversation import ( get_microagents, ) from openhands.server.routes.manage_conversations import ( + _RESUME_GRACE_PERIOD, UpdateConversationRequest, + get_conversation, search_conversations, update_conversation, ) @@ -1459,3 +1465,102 @@ async def test_search_conversations_include_sub_conversations_with_other_filters assert call_kwargs.get('include_sub_conversations') is True assert call_kwargs.get('page_id') == 'test_v1_page_id' assert call_kwargs.get('limit') == 50 + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + 'sandbox_status,execution_status,server_uptime,server_error,expected_status,should_call_server', + [ + # RUNNING sandbox, no execution_status, server responds within grace period -> STARTING + (SandboxStatus.RUNNING, None, 30, None, ConversationStatus.STARTING, True), + # RUNNING sandbox, no execution_status, server responds past grace period -> RUNNING + ( + SandboxStatus.RUNNING, + None, + _RESUME_GRACE_PERIOD + 10, + None, + ConversationStatus.RUNNING, + True, + ), + # RUNNING sandbox, no execution_status, server unresponsive -> STARTING + ( + SandboxStatus.RUNNING, + None, + None, + httpx.ConnectError('Connection refused'), + ConversationStatus.STARTING, + True, + ), + # RUNNING sandbox, WITH execution_status -> RUNNING, skip server check + (SandboxStatus.RUNNING, 'IDLE', None, None, ConversationStatus.RUNNING, False), + # Non-RUNNING sandbox -> STARTING, skip server check + (SandboxStatus.STARTING, None, None, None, ConversationStatus.STARTING, False), + ], + ids=[ + 'running_no_exec_status_within_grace_period', + 'running_no_exec_status_past_grace_period', + 'running_no_exec_status_server_unresponsive', + 'running_with_exec_status_skips_check', + 'non_running_skips_check', + ], +) +async def test_get_conversation_resume_status_handling( + sandbox_status, + execution_status, + server_uptime, + server_error, + expected_status, + should_call_server, +): + """Test get_conversation handles resume status correctly for various scenarios.""" + from openhands.sdk.conversation.state import ConversationExecutionStatus + + conversation_id = uuid4() + + # Convert string execution_status to enum if provided + exec_status = None + if execution_status == 'IDLE': + exec_status = ConversationExecutionStatus.IDLE + + mock_app_conversation = AppConversation( + id=conversation_id, + created_by_user_id='test_user', + sandbox_id='test_sandbox', + sandbox_status=sandbox_status, + execution_status=exec_status, + conversation_url='https://sandbox.example.com/conversation', + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + + mock_app_conversation_service = AsyncMock(spec=AppConversationService) + mock_app_conversation_service.get_app_conversation.return_value = ( + mock_app_conversation + ) + mock_conversation_store = AsyncMock(spec=ConversationStore) + mock_httpx_client = AsyncMock(spec=httpx.AsyncClient) + + if server_error: + mock_httpx_client.get.side_effect = server_error + elif server_uptime is not None: + mock_response = MagicMock() + mock_response.json.return_value = {'uptime': server_uptime} + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + result = await get_conversation( + conversation_id=str(conversation_id), + conversation_store=mock_conversation_store, + app_conversation_service=mock_app_conversation_service, + httpx_client=mock_httpx_client, + ) + + assert result is not None + assert result.status == expected_status + + if should_call_server: + mock_httpx_client.get.assert_called_once_with( + 'https://sandbox.example.com/server_info' + ) + else: + mock_httpx_client.get.assert_not_called()