Improve budget exceeded error handling in V1 callback processors (#13219)

Co-authored-by: openhands <openhands@all-hands.dev>
2026-03-22 13:47:19 +08:00 · 2026-03-04 15:25:46 -05:00
parent 518fb2ee24
commit c32934ed2f
7 changed files with 368 additions and 61 deletions
--- a/enterprise/integrations/github/github_v1_callback_processor.py
+++ b/enterprise/integrations/github/github_v1_callback_processor.py
@@ -4,7 +4,8 @@ from uuid import UUID

 import httpx
 from github import Auth, Github, GithubIntegration
-from integrations.utils import CONVERSATION_URL, get_summary_instruction
+from integrations.utils import get_summary_instruction
+from integrations.v1_utils import handle_callback_error
 from pydantic import Field
 from server.auth.constants import GITHUB_APP_CLIENT_ID, GITHUB_APP_PRIVATE_KEY

@@ -42,7 +43,6 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
        event: Event,
    ) -> EventCallbackResult | None:
        """Process events for GitHub V1 integration."""
-
        # Only handle ConversationStateUpdateEvent
        if not isinstance(event, ConversationStateUpdateEvent):
            return None
@@ -78,25 +78,20 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
                detail=summary,
            )
        except Exception as e:
-            _logger.exception('[GitHub V1] Error processing callback: %s', e)
-
-            # Only try to post error to GitHub if we have basic requirements
-            try:
-                # Check if we have installation ID and credentials before posting
-                if (
-                    self.github_view_data.get('installation_id')
-                    and GITHUB_APP_CLIENT_ID
-                    and GITHUB_APP_PRIVATE_KEY
-                ):
-                    await self._post_summary_to_github(
-                        f'OpenHands encountered an error: **{str(e)}**.\n\n'
-                        f'[See the conversation]({CONVERSATION_URL.format(conversation_id)})'
-                        'for more information.'
-                    )
-            except Exception as post_error:
-                _logger.warning(
-                    '[GitHub V1] Failed to post error message to GitHub: %s', post_error
-                )
+            # Check if we have installation ID and credentials before posting
+            can_post_error = bool(
+                self.github_view_data.get('installation_id')
+                and GITHUB_APP_CLIENT_ID
+                and GITHUB_APP_PRIVATE_KEY
+            )
+            await handle_callback_error(
+                error=e,
+                conversation_id=conversation_id,
+                service_name='GitHub',
+                service_logger=_logger,
+                can_post_error=can_post_error,
+                post_error_func=self._post_summary_to_github,
+            )

            return EventCallbackResult(
                status=EventCallbackResultStatus.ERROR,
@@ -167,8 +162,8 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
        send_message_request = AskAgentRequest(question=message_content)

        url = (
-            f'{agent_server_url.rstrip("/")}'
-            f'/api/conversations/{conversation_id}/ask_agent'
+            f"{agent_server_url.rstrip('/')}"
+            f"/api/conversations/{conversation_id}/ask_agent"
        )
        headers = {'X-Session-API-Key': session_api_key}
        payload = send_message_request.model_dump()
@@ -230,8 +225,7 @@ class GithubV1CallbackProcessor(EventCallbackProcessor):
    # -------------------------------------------------------------------------

    async def _request_summary(self, conversation_id: UUID) -> str:
-        """
-        Ask the agent to produce a summary of its work and return the agent response.
+        """Ask the agent to produce a summary of its work and return the agent response.

        NOTE: This method now returns a string (the agent server's response text)
        and raises exceptions on errors. The wrapping into EventCallbackResult
--- a/enterprise/integrations/gitlab/gitlab_v1_callback_processor.py
+++ b/enterprise/integrations/gitlab/gitlab_v1_callback_processor.py
@@ -3,7 +3,8 @@ from typing import Any
 from uuid import UUID

 import httpx
-from integrations.utils import CONVERSATION_URL, get_summary_instruction
+from integrations.utils import get_summary_instruction
+from integrations.v1_utils import handle_callback_error
 from pydantic import Field

 from openhands.agent_server.models import AskAgentRequest, AskAgentResponse
@@ -75,20 +76,15 @@ class GitlabV1CallbackProcessor(EventCallbackProcessor):
                detail=summary,
            )
        except Exception as e:
-            _logger.exception('[GitLab V1] Error processing callback: %s', e)
-
-            # Only try to post error to GitLab if we have basic requirements
-            try:
-                if self.gitlab_view_data.get('keycloak_user_id'):
-                    await self._post_summary_to_gitlab(
-                        f'OpenHands encountered an error: **{str(e)}**.\n\n'
-                        f'[See the conversation]({CONVERSATION_URL.format(conversation_id)}) '
-                        'for more information.'
-                    )
-            except Exception as post_error:
-                _logger.warning(
-                    '[GitLab V1] Failed to post error message to GitLab: %s', post_error
-                )
+            can_post_error = bool(self.gitlab_view_data.get('keycloak_user_id'))
+            await handle_callback_error(
+                error=e,
+                conversation_id=conversation_id,
+                service_name='GitLab',
+                service_logger=_logger,
+                can_post_error=can_post_error,
+                post_error_func=self._post_summary_to_gitlab,
+            )

            return EventCallbackResult(
                status=EventCallbackResultStatus.ERROR,
@@ -149,8 +145,8 @@ class GitlabV1CallbackProcessor(EventCallbackProcessor):
        send_message_request = AskAgentRequest(question=message_content)

        url = (
-            f'{agent_server_url.rstrip("/")}'
-            f'/api/conversations/{conversation_id}/ask_agent'
+            f"{agent_server_url.rstrip('/')}"
+            f"/api/conversations/{conversation_id}/ask_agent"
        )
        headers = {'X-Session-API-Key': session_api_key}
        payload = send_message_request.model_dump()
--- a/enterprise/integrations/slack/slack_v1_callback_processor.py
+++ b/enterprise/integrations/slack/slack_v1_callback_processor.py
@@ -2,7 +2,8 @@ import logging
 from uuid import UUID

 import httpx
-from integrations.utils import CONVERSATION_URL, get_summary_instruction
+from integrations.utils import get_summary_instruction
+from integrations.v1_utils import handle_callback_error
 from pydantic import Field
 from slack_sdk import WebClient
 from storage.slack_team_store import SlackTeamStore
@@ -39,7 +40,6 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
        event: Event,
    ) -> EventCallbackResult | None:
        """Process events for Slack V1 integration."""
-
        # Only handle ConversationStateUpdateEvent
        if not isinstance(event, ConversationStateUpdateEvent):
            return None
@@ -62,19 +62,14 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
                detail=summary,
            )
        except Exception as e:
-            _logger.exception('[Slack V1] Error processing callback: %s', e)
-
-            # Only try to post error to Slack if we have basic requirements
-            try:
-                await self._post_summary_to_slack(
-                    f'OpenHands encountered an error: **{str(e)}**.\n\n'
-                    f'[See the conversation]({CONVERSATION_URL.format(conversation_id)})'
-                    'for more information.'
-                )
-            except Exception as post_error:
-                _logger.warning(
-                    '[Slack V1] Failed to post error message to Slack: %s', post_error
-                )
+            await handle_callback_error(
+                error=e,
+                conversation_id=conversation_id,
+                service_name='Slack',
+                service_logger=_logger,
+                can_post_error=True,  # Slack always attempts to post errors
+                post_error_func=self._post_summary_to_slack,
+            )

            return EventCallbackResult(
                status=EventCallbackResultStatus.ERROR,
@@ -149,8 +144,8 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
        send_message_request = AskAgentRequest(question=message_content)

        url = (
-            f'{agent_server_url.rstrip("/")}'
-            f'/api/conversations/{conversation_id}/ask_agent'
+            f"{agent_server_url.rstrip('/')}"
+            f"/api/conversations/{conversation_id}/ask_agent"
        )
        headers = {'X-Session-API-Key': session_api_key}
        payload = send_message_request.model_dump()
@@ -212,8 +207,7 @@ class SlackV1CallbackProcessor(EventCallbackProcessor):
    # -------------------------------------------------------------------------

    async def _request_summary(self, conversation_id: UUID) -> str:
-        """
-        Ask the agent to produce a summary of its work and return the agent response.
+        """Ask the agent to produce a summary of its work and return the agent response.

        NOTE: This method now returns a string (the agent server's response text)
        and raises exceptions on errors. The wrapping into EventCallbackResult
--- a/enterprise/integrations/v1_utils.py
+++ b/enterprise/integrations/v1_utils.py
@@ -1,3 +1,8 @@
+import logging
+from typing import Callable, Coroutine
+from uuid import UUID
+
+from integrations.utils import CONVERSATION_URL
 from pydantic import SecretStr
 from server.auth.saas_user_auth import SaasUserAuth
 from server.auth.token_manager import TokenManager
@@ -6,6 +11,78 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.server.user_auth.user_auth import UserAuth


+def is_budget_exceeded_error(error_message: str) -> bool:
+    """Check if an error message indicates a budget exceeded condition.
+
+    This is used to downgrade error logs to info logs for budget exceeded errors
+    since they are expected cost control behavior rather than unexpected errors.
+    """
+    lower_message = error_message.lower()
+    return 'budget' in lower_message and 'exceeded' in lower_message
+
+
+BUDGET_EXCEEDED_USER_MESSAGE = 'LLM budget has been exceeded, please re-fill.'
+
+
+async def handle_callback_error(
+    error: Exception,
+    conversation_id: UUID,
+    service_name: str,
+    service_logger: logging.Logger,
+    can_post_error: bool,
+    post_error_func: Callable[[str], Coroutine],
+) -> None:
+    """Handle callback processing errors with appropriate logging and user messages.
+
+    This centralizes the error handling logic for V1 callback processors to:
+    - Log budget exceeded errors at INFO level (expected cost control behavior)
+    - Log other errors at EXCEPTION level
+    - Post user-friendly error messages to the integration platform
+
+    Args:
+        error: The exception that occurred
+        conversation_id: The conversation ID for logging and linking
+        service_name: The service name for log messages (e.g., "GitHub", "GitLab", "Slack")
+        service_logger: The logger instance to use for logging
+        can_post_error: Whether the prerequisites are met to post an error message
+        post_error_func: Async function to post the error message to the platform
+    """
+    error_str = str(error)
+    budget_exceeded = is_budget_exceeded_error(error_str)
+
+    # Log appropriately based on error type
+    if budget_exceeded:
+        service_logger.info(
+            '[%s V1] Budget exceeded for conversation %s: %s',
+            service_name,
+            conversation_id,
+            error,
+        )
+    else:
+        service_logger.exception(
+            '[%s V1] Error processing callback: %s', service_name, error
+        )
+
+    # Try to post error message to the platform
+    if can_post_error:
+        try:
+            error_detail = (
+                BUDGET_EXCEEDED_USER_MESSAGE if budget_exceeded else error_str
+            )
+            await post_error_func(
+                f'OpenHands encountered an error: **{error_detail}**\n\n'
+                f'[See the conversation]({CONVERSATION_URL.format(conversation_id)}) '
+                'for more information.'
+            )
+        except Exception as post_error:
+            service_logger.warning(
+                '[%s V1] Failed to post error message to %s: %s',
+                service_name,
+                service_name,
+                post_error,
+            )
+
+
 async def get_saas_user_auth(
    keycloak_user_id: str, token_manager: TokenManager
 ) -> UserAuth:
--- a/enterprise/tests/unit/integrations/github/test_github_v1_callback_processor.py
+++ b/enterprise/tests/unit/integrations/github/test_github_v1_callback_processor.py
@@ -809,3 +809,94 @@ class TestGithubV1CallbackProcessor:
        )
        assert f'conversations/{conversation_id}' in error_comment
        assert 'for more information.' in error_comment
+
+    @patch(
+        'integrations.github.github_v1_callback_processor.GITHUB_APP_CLIENT_ID',
+        'test_client_id',
+    )
+    @patch(
+        'integrations.github.github_v1_callback_processor.GITHUB_APP_PRIVATE_KEY',
+        'test_private_key',
+    )
+    @patch('integrations.github.github_v1_callback_processor.get_summary_instruction')
+    @patch('openhands.app_server.config.get_httpx_client')
+    @patch('openhands.app_server.config.get_sandbox_service')
+    @patch('openhands.app_server.config.get_app_conversation_info_service')
+    @patch('integrations.github.github_v1_callback_processor._logger')
+    async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
+        self,
+        mock_logger,
+        mock_get_app_conversation_info_service,
+        mock_get_sandbox_service,
+        mock_get_httpx_client,
+        mock_get_summary_instruction,
+        github_callback_processor,
+        conversation_state_update_event,
+        event_callback,
+        mock_app_conversation_info,
+        mock_sandbox_info,
+    ):
+        """Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
+        conversation_id = uuid4()
+
+        mock_httpx_client = await _setup_happy_path_services(
+            mock_get_app_conversation_info_service,
+            mock_get_sandbox_service,
+            mock_get_httpx_client,
+            mock_app_conversation_info,
+            mock_sandbox_info,
+        )
+        # Simulate a budget exceeded error from the agent server
+        budget_error_msg = (
+            'HTTP 500 error: {"detail":"Internal Server Error",'
+            '"exception":"litellm.BadRequestError: Litellm_proxyException - '
+            'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
+        )
+        mock_httpx_client.post.side_effect = Exception(budget_error_msg)
+        mock_get_summary_instruction.return_value = 'Please provide a summary'
+
+        with (
+            patch(
+                'integrations.github.github_v1_callback_processor.GithubIntegration'
+            ) as mock_github_integration,
+            patch(
+                'integrations.github.github_v1_callback_processor.Github'
+            ) as mock_github,
+        ):
+            mock_integration = MagicMock()
+            mock_github_integration.return_value = mock_integration
+            mock_integration.get_access_token.return_value.token = 'test_token'
+
+            mock_gh = MagicMock()
+            mock_github.return_value.__enter__.return_value = mock_gh
+            mock_repo = MagicMock()
+            mock_issue = MagicMock()
+            mock_repo.get_issue.return_value = mock_issue
+            mock_gh.get_repo.return_value = mock_repo
+
+            result = await github_callback_processor(
+                conversation_id=conversation_id,
+                callback=event_callback,
+                event=conversation_state_update_event,
+            )
+
+        assert result is not None
+        assert result.status == EventCallbackResultStatus.ERROR
+
+        # Verify exception was NOT called (budget exceeded uses info instead)
+        mock_logger.exception.assert_not_called()
+
+        # Verify budget exceeded info log was called
+        info_calls = [str(call) for call in mock_logger.info.call_args_list]
+        budget_log_found = any('Budget exceeded' in call for call in info_calls)
+        assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
+
+        # Verify user-friendly message was posted to GitHub
+        mock_issue.create_comment.assert_called_once()
+        call_args = mock_issue.create_comment.call_args
+        posted_comment = call_args[1].get('body') or call_args[0][0]
+        assert 'OpenHands encountered an error' in posted_comment
+        assert 'LLM budget has been exceeded' in posted_comment
+        assert 'please re-fill' in posted_comment
+        # Should NOT contain the raw error message
+        assert 'litellm.BadRequestError' not in posted_comment
--- a/enterprise/tests/unit/integrations/gitlab/test_gitlab_v1_callback_processor.py
+++ b/enterprise/tests/unit/integrations/gitlab/test_gitlab_v1_callback_processor.py
@@ -374,3 +374,72 @@ class TestGitlabV1CallbackProcessor:
        assert 'OpenHands encountered an error' in error_comment
        assert 'Simulated agent server error' in error_comment
        assert f'conversations/{conversation_id}' in error_comment
+
+    @patch('openhands.app_server.config.get_app_conversation_info_service')
+    @patch('openhands.app_server.config.get_sandbox_service')
+    @patch('openhands.app_server.config.get_httpx_client')
+    @patch('integrations.gitlab.gitlab_v1_callback_processor.get_summary_instruction')
+    @patch('integrations.gitlab.gitlab_service.SaaSGitLabService')
+    @patch('integrations.gitlab.gitlab_v1_callback_processor._logger')
+    async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
+        self,
+        mock_logger,
+        mock_saas_gitlab_service_cls,
+        mock_get_summary_instruction,
+        mock_get_httpx_client,
+        mock_get_sandbox_service,
+        mock_get_app_conversation_info_service,
+        gitlab_callback_processor,
+        conversation_state_update_event,
+        event_callback,
+        mock_app_conversation_info,
+        mock_sandbox_info,
+    ):
+        """Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
+        conversation_id = uuid4()
+
+        mock_httpx_client = await _setup_happy_path_services(
+            mock_get_app_conversation_info_service,
+            mock_get_sandbox_service,
+            mock_get_httpx_client,
+            mock_app_conversation_info,
+            mock_sandbox_info,
+        )
+        # Simulate a budget exceeded error from the agent server
+        budget_error_msg = (
+            'HTTP 500 error: {"detail":"Internal Server Error",'
+            '"exception":"litellm.BadRequestError: Litellm_proxyException - '
+            'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
+        )
+        mock_httpx_client.post.side_effect = Exception(budget_error_msg)
+        mock_get_summary_instruction.return_value = 'Please provide a summary'
+
+        mock_gitlab_service = AsyncMock()
+        mock_saas_gitlab_service_cls.return_value = mock_gitlab_service
+
+        result = await gitlab_callback_processor(
+            conversation_id=conversation_id,
+            callback=event_callback,
+            event=conversation_state_update_event,
+        )
+
+        assert result is not None
+        assert result.status == EventCallbackResultStatus.ERROR
+
+        # Verify exception was NOT called (budget exceeded uses info instead)
+        mock_logger.exception.assert_not_called()
+
+        # Verify budget exceeded info log was called
+        info_calls = [str(call) for call in mock_logger.info.call_args_list]
+        budget_log_found = any('Budget exceeded' in call for call in info_calls)
+        assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
+
+        # Verify user-friendly message was posted to GitLab
+        mock_gitlab_service.reply_to_issue.assert_called_once()
+        call_args = mock_gitlab_service.reply_to_issue.call_args
+        posted_comment = call_args[0][3]  # 4th positional arg is the body
+        assert 'OpenHands encountered an error' in posted_comment
+        assert 'LLM budget has been exceeded' in posted_comment
+        assert 'please re-fill' in posted_comment
+        # Should NOT contain the raw error message
+        assert 'litellm.BadRequestError' not in posted_comment
--- a/enterprise/tests/unit/integrations/slack/test_slack_v1_callback_processor.py
+++ b/enterprise/tests/unit/integrations/slack/test_slack_v1_callback_processor.py
@@ -429,3 +429,89 @@ class TestSlackV1CallbackProcessor:
        assert result is not None
        assert result.status == EventCallbackResultStatus.ERROR
        assert expected_error_fragment in result.detail
+
+    @patch('storage.slack_team_store.SlackTeamStore.get_instance')
+    @patch('openhands.app_server.config.get_httpx_client')
+    @patch('openhands.app_server.config.get_sandbox_service')
+    @patch('openhands.app_server.config.get_app_conversation_info_service')
+    @patch('integrations.slack.slack_v1_callback_processor.get_summary_instruction')
+    @patch('integrations.slack.slack_v1_callback_processor._logger')
+    @patch('integrations.slack.slack_v1_callback_processor.WebClient')
+    async def test_budget_exceeded_error_logs_info_and_sends_friendly_message(
+        self,
+        mock_web_client_cls,
+        mock_logger,
+        mock_get_summary_instruction,
+        mock_get_app_conversation_info_service,
+        mock_get_sandbox_service,
+        mock_get_httpx_client,
+        mock_slack_team_store,
+        slack_callback_processor,
+        finish_event,
+        event_callback,
+        mock_app_conversation_info,
+        mock_sandbox_info,
+    ):
+        """Test that budget exceeded errors are logged at INFO level and user gets friendly message."""
+        conversation_id = uuid4()
+
+        # Mock SlackTeamStore
+        mock_store = MagicMock()
+        mock_store.get_team_bot_token = AsyncMock(return_value='xoxb-test-token')
+        mock_slack_team_store.return_value = mock_store
+
+        mock_get_summary_instruction.return_value = 'Please provide a summary'
+
+        # Mock services
+        mock_app_conversation_info_service = AsyncMock()
+        mock_app_conversation_info_service.get_app_conversation_info.return_value = (
+            mock_app_conversation_info
+        )
+        mock_get_app_conversation_info_service.return_value.__aenter__.return_value = (
+            mock_app_conversation_info_service
+        )
+
+        mock_sandbox_service = AsyncMock()
+        mock_sandbox_service.get_sandbox.return_value = mock_sandbox_info
+        mock_get_sandbox_service.return_value.__aenter__.return_value = (
+            mock_sandbox_service
+        )
+
+        # Simulate a budget exceeded error from the agent server
+        budget_error_msg = (
+            'HTTP 500 error: {"detail":"Internal Server Error",'
+            '"exception":"litellm.BadRequestError: Litellm_proxyException - '
+            'Budget has been exceeded! Current cost: 12.65, Max budget: 12.62"}'
+        )
+        mock_httpx_client = AsyncMock()
+        mock_httpx_client.post.side_effect = Exception(budget_error_msg)
+        mock_get_httpx_client.return_value.__aenter__.return_value = mock_httpx_client
+
+        # Mock Slack WebClient
+        mock_slack_client = MagicMock()
+        mock_web_client_cls.return_value = mock_slack_client
+
+        result = await slack_callback_processor(
+            conversation_id, event_callback, finish_event
+        )
+
+        assert result is not None
+        assert result.status == EventCallbackResultStatus.ERROR
+
+        # Verify exception was NOT called (budget exceeded uses info instead)
+        mock_logger.exception.assert_not_called()
+
+        # Verify budget exceeded info log was called
+        info_calls = [str(call) for call in mock_logger.info.call_args_list]
+        budget_log_found = any('Budget exceeded' in call for call in info_calls)
+        assert budget_log_found, f'Expected budget exceeded log, got: {info_calls}'
+
+        # Verify user-friendly message was posted to Slack
+        mock_slack_client.chat_postMessage.assert_called_once()
+        call_kwargs = mock_slack_client.chat_postMessage.call_args[1]
+        posted_message = call_kwargs.get('text', '')
+        assert 'OpenHands encountered an error' in posted_message
+        assert 'LLM budget has been exceeded' in posted_message
+        assert 'please re-fill' in posted_message
+        # Should NOT contain the raw error message
+        assert 'litellm.BadRequestError' not in posted_message