Simplify max_output_tokens handling in LLM classes (#9296)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-06-23 06:48:45 -04:00 · 2025-06-23 06:48:45 -04:00 · 1e33624951
commit 1e33624951
parent 8b90d610c6
2 changed files with 4 additions and 23 deletions
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -163,6 +163,7 @@ class LLM(RetryMixin, DebugMixin):
            'temperature': self.config.temperature,
            'max_completion_tokens': self.config.max_output_tokens,
        }
+
        if self.config.top_k is not None:
            # openai doesn't expose top_k
            # litellm will handle it a bit differently than the openai-compatible params
@ -486,26 +487,6 @@ class LLM(RetryMixin, DebugMixin):
                # Safe fallback for any potentially viable model
                self.config.max_input_tokens = 4096

-        if self.config.max_output_tokens is None:
-            # Safe default for any potentially viable model
-            self.config.max_output_tokens = 4096
-            if self.model_info is not None:
-                # max_output_tokens has precedence over max_tokens, if either exists.
-                # litellm has models with both, one or none of these 2 parameters!
-                if 'max_output_tokens' in self.model_info and isinstance(
-                    self.model_info['max_output_tokens'], int
-                ):
-                    self.config.max_output_tokens = self.model_info['max_output_tokens']
-                elif 'max_tokens' in self.model_info and isinstance(
-                    self.model_info['max_tokens'], int
-                ):
-                    self.config.max_output_tokens = self.model_info['max_tokens']
-            if any(
-                model in self.config.model
-                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
-            ):
-                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set
-
        # Initialize function calling capability
        # Check if model name is in our supported list
        model_name_supported = (
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@ -132,7 +132,7 @@ def test_llm_init_with_model_info(mock_get_model_info, default_config):
    llm = LLM(default_config)
    llm.init_model_info()
    assert llm.config.max_input_tokens == 8000
-    assert llm.config.max_output_tokens == 2000
+    assert llm.config.max_output_tokens is None


@patch('openhands.llm.llm.litellm.get_model_info')
@ -141,7 +141,7 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config):
    llm = LLM(default_config)
    llm.init_model_info()
    assert llm.config.max_input_tokens == 4096
-    assert llm.config.max_output_tokens == 4096
+    assert llm.config.max_output_tokens is None


 def test_llm_init_with_custom_config():
@ -260,7 +260,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
    llm = LLM(default_config)
    llm.init_model_info()
    assert llm.config.max_input_tokens == 7000
-    assert llm.config.max_output_tokens == 1500
+    assert llm.config.max_output_tokens is None
    mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')