diff --git a/openhands/core/exceptions.py b/openhands/core/exceptions.py index 342e0db0e7..c154152d4b 100644 --- a/openhands/core/exceptions.py +++ b/openhands/core/exceptions.py @@ -88,6 +88,16 @@ class LLMResponseError(Exception): super().__init__(message) +# This exception should be retried +# Typically, after retry with a non-zero temperature, the LLM will return a response +class LLMNoResponseError(Exception): + def __init__( + self, + message: str = 'LLM did not return a response. This is only seen in Gemini models so far.', + ) -> None: + super().__init__(message) + + class UserCancelledError(Exception): def __init__(self, message: str = 'User cancelled the request') -> None: super().__init__(message) diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 42ffe5eac8..27a948fd1c 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -23,6 +23,7 @@ from litellm.exceptions import ( from litellm.types.utils import CostPerToken, ModelResponse, Usage from litellm.utils import create_pretrained_tokenizer +from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.core.message import Message from openhands.llm.debug_mixin import DebugMixin @@ -37,7 +38,12 @@ from openhands.llm.retry_mixin import RetryMixin __all__ = ['LLM'] # tuple of exceptions to retry on -LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,) +LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = ( + RateLimitError, + litellm.Timeout, + litellm.InternalServerError, + LLMNoResponseError, +) # cache prompt supporting models # remove this when we gemini and deepseek are supported @@ -63,6 +69,7 @@ FUNCTION_CALLING_SUPPORTED_MODELS = [ 'o1-2024-12-17', 'o3-mini-2025-01-31', 'o3-mini', + 'gemini-2.5-pro', ] REASONING_EFFORT_SUPPORTED_MODELS = [ @@ -267,8 +274,12 @@ class LLM(RetryMixin, DebugMixin): # if we mocked function calling, and we have tools, convert the response back to function calling format if mock_function_calling and mock_fncall_tools is not None: - logger.debug(f'Response choices: {len(resp.choices)}') - assert len(resp.choices) >= 1 + if len(resp.choices) < 1: + raise LLMNoResponseError( + 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' + + str(resp) + ) + non_fncall_response_message = resp.choices[0].message fn_call_messages_with_response = ( convert_non_fncall_messages_to_fncall_messages( @@ -282,6 +293,13 @@ class LLM(RetryMixin, DebugMixin): ) resp.choices[0].message = fn_call_response_message + # Check if resp has 'choices' key with at least one item + if not resp.get('choices') or len(resp['choices']) < 1: + raise LLMNoResponseError( + 'Response choices is less than 1 - This is only seen in Gemini models so far. Response: ' + + str(resp) + ) + message_back: str = resp['choices'][0]['message']['content'] or '' tool_calls: list[ChatCompletionMessageToolCall] = resp['choices'][0][ 'message' diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 08a8add639..19b7c9689f 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -5,6 +5,7 @@ from tenacity import ( wait_exponential, ) +from openhands.core.exceptions import LLMNoResponseError from openhands.core.logger import openhands_logger as logger from openhands.utils.tenacity_stop import stop_if_should_exit @@ -35,6 +36,22 @@ class RetryMixin: if retry_listener: retry_listener(retry_state.attempt_number, num_retries) + # Check if the exception is LLMNoResponseError + exception = retry_state.outcome.exception() + if isinstance(exception, LLMNoResponseError): + if hasattr(retry_state, 'kwargs'): + # Only change temperature if it's zero or not set + current_temp = retry_state.kwargs.get('temperature', 0) + if current_temp == 0: + retry_state.kwargs['temperature'] = 1.0 + logger.warning( + 'LLMNoResponseError detected with temperature=0, setting temperature to 1.0 for next attempt.' + ) + else: + logger.warning( + f'LLMNoResponseError detected with temperature={current_temp}, keeping original temperature' + ) + return retry( before_sleep=before_sleep, stop=stop_after_attempt(num_retries) | stop_if_should_exit(), diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 87db3bc62e..6d243ed577 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -10,7 +10,7 @@ from litellm.exceptions import ( ) from openhands.core.config import LLMConfig -from openhands.core.exceptions import OperationCancelled +from openhands.core.exceptions import LLMNoResponseError, OperationCancelled from openhands.core.message import Message, TextContent from openhands.llm.llm import LLM from openhands.llm.metrics import Metrics, TokenUsage @@ -385,6 +385,223 @@ def test_completion_keyboard_interrupt_handler(mock_litellm_completion, default_ _should_exit = False +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_zero_temp( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator properly handles LLMNoResponseError by: + 1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError + 2. Second call should have temperature=0.2 and return a successful response + """ + + # Define a side effect function that checks the temperature parameter + # and returns different responses based on it + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + # First call with temperature=0 should raise LLMNoResponseError + if temperature == 0: + raise LLMNoResponseError('LLM did not return a response') + + else: + return { + 'choices': [ + {'message': {'content': f'Response with temperature={temperature}'}} + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0, # Initial temperature is 0 + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] == 'Response with temperature=1.0' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0 + + # Check the temperature in the second call (should be 1.0) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 1.0 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_nonzero_temp( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, + and keeps the original temperature on retry. + + This test verifies that when LLMNoResponseError is raised with a non-zero temperature: + 1. The retry mechanism is triggered + 2. The temperature remains unchanged (not set to 0.2) + 3. After all retries are exhausted, the exception is propagated + """ + # Define a side effect function that always raises LLMNoResponseError + mock_litellm_completion.side_effect = LLMNoResponseError( + 'LLM did not return a response' + ) + + # Create LLM instance and make a completion call with non-zero temperature + llm = LLM(config=default_config) + + # We expect this to raise an exception after all retries are exhausted + with pytest.raises(LLMNoResponseError): + llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0.7, # Initial temperature is non-zero + ) + + # Verify that litellm_completion was called multiple times (retries happened) + # The default_config has num_retries=2, so it should be called 2 times + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0.7) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0.7 + + # Check the temperature in the second call (should remain 0.7, not changed to 0.2) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.7 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError with non-zero temperature + and successfully retries while preserving the original temperature. + + This test verifies that: + 1. First call to llm_completion with temperature=0.7 throws LLMNoResponseError + 2. Second call with the same temperature=0.7 returns a successful response + """ + + # Define a side effect function that raises LLMNoResponseError on first call + # and returns a successful response on second call + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + if mock_litellm_completion.call_count == 1: + # First call should raise LLMNoResponseError + raise LLMNoResponseError('LLM did not return a response') + else: + # Second call should return a successful response + return { + 'choices': [ + { + 'message': { + 'content': f'Successful response with temperature={temperature}' + } + } + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call with non-zero temperature + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0.7, # Non-zero temperature + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] + == 'Successful response with temperature=0.7' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0.7) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0.7 + + # Check the temperature in the second call (should still be 0.7) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 0.7 + + +@patch('openhands.llm.llm.litellm_completion') +def test_completion_retry_with_llm_no_response_error_successful_retry( + mock_litellm_completion, default_config +): + """ + Test that the retry decorator works for LLMNoResponseError with zero temperature + and successfully retries with temperature=0.2. + + This test verifies that: + 1. First call to llm_completion with temperature=0 throws LLMNoResponseError + 2. Second call with temperature=0.2 returns a successful response + """ + + # Define a side effect function that raises LLMNoResponseError on first call + # and returns a successful response on second call + def side_effect(*args, **kwargs): + temperature = kwargs.get('temperature', 0) + + if mock_litellm_completion.call_count == 1: + # First call should raise LLMNoResponseError + raise LLMNoResponseError('LLM did not return a response') + else: + # Second call should return a successful response + return { + 'choices': [ + { + 'message': { + 'content': f'Successful response with temperature={temperature}' + } + } + ] + } + + mock_litellm_completion.side_effect = side_effect + + # Create LLM instance and make a completion call with explicit temperature=0 + llm = LLM(config=default_config) + response = llm.completion( + messages=[{'role': 'user', 'content': 'Hello!'}], + stream=False, + temperature=0, # Explicitly set temperature to 0 + ) + + # Verify the response after retry + assert ( + response['choices'][0]['message']['content'] + == 'Successful response with temperature=1.0' + ) + + # Verify that litellm_completion was called twice + assert mock_litellm_completion.call_count == 2 + + # Check the temperature in the first call (should be 0) + first_call_kwargs = mock_litellm_completion.call_args_list[0][1] + assert first_call_kwargs.get('temperature') == 0 + + # Check the temperature in the second call (should be 1.0) + second_call_kwargs = mock_litellm_completion.call_args_list[1][1] + assert second_call_kwargs.get('temperature') == 1.0 + + @patch('openhands.llm.llm.litellm_completion') def test_completion_with_litellm_mock(mock_litellm_completion, default_config): mock_response = {