diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index dcfeecd9c6..91e0c79f15 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -163,6 +163,7 @@ class LLM(RetryMixin, DebugMixin): 'temperature': self.config.temperature, 'max_completion_tokens': self.config.max_output_tokens, } + if self.config.top_k is not None: # openai doesn't expose top_k # litellm will handle it a bit differently than the openai-compatible params @@ -486,26 +487,6 @@ class LLM(RetryMixin, DebugMixin): # Safe fallback for any potentially viable model self.config.max_input_tokens = 4096 - if self.config.max_output_tokens is None: - # Safe default for any potentially viable model - self.config.max_output_tokens = 4096 - if self.model_info is not None: - # max_output_tokens has precedence over max_tokens, if either exists. - # litellm has models with both, one or none of these 2 parameters! - if 'max_output_tokens' in self.model_info and isinstance( - self.model_info['max_output_tokens'], int - ): - self.config.max_output_tokens = self.model_info['max_output_tokens'] - elif 'max_tokens' in self.model_info and isinstance( - self.model_info['max_tokens'], int - ): - self.config.max_output_tokens = self.model_info['max_tokens'] - if any( - model in self.config.model - for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet'] - ): - self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set - # Initialize function calling capability # Check if model name is in our supported list model_name_supported = ( diff --git a/tests/unit/test_llm.py b/tests/unit/test_llm.py index 77c24f7da1..94840bf39d 100644 --- a/tests/unit/test_llm.py +++ b/tests/unit/test_llm.py @@ -132,7 +132,7 @@ def test_llm_init_with_model_info(mock_get_model_info, default_config): llm = LLM(default_config) llm.init_model_info() assert llm.config.max_input_tokens == 8000 - assert llm.config.max_output_tokens == 2000 + assert llm.config.max_output_tokens is None @patch('openhands.llm.llm.litellm.get_model_info') @@ -141,7 +141,7 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config): llm = LLM(default_config) llm.init_model_info() assert llm.config.max_input_tokens == 4096 - assert llm.config.max_output_tokens == 4096 + assert llm.config.max_output_tokens is None def test_llm_init_with_custom_config(): @@ -260,7 +260,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config): llm = LLM(default_config) llm.init_model_info() assert llm.config.max_input_tokens == 7000 - assert llm.config.max_output_tokens == 1500 + assert llm.config.max_output_tokens is None mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')