Change default max_output_tokens to None and add comprehensive model tests (#9366)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-06-29 21:57:34 -04:00 · 2025-06-29 21:57:34 -04:00 · 17853cd5bd
commit 17853cd5bd
parent c992b6d2a0
3 changed files with 111 additions and 24 deletions
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -482,24 +482,26 @@ class LLM(RetryMixin, DebugMixin):
            )
            self.config.top_p = 0.9 if self.config.top_p == 1 else self.config.top_p

-        # Set the max tokens in an LM-specific way if not set
-        if self.config.max_input_tokens is None:
-            if (
-                self.model_info is not None
-                and 'max_input_tokens' in self.model_info
-                and isinstance(self.model_info['max_input_tokens'], int)
-            ):
-                self.config.max_input_tokens = self.model_info['max_input_tokens']
-            else:
-                # Safe fallback for any potentially viable model
-                self.config.max_input_tokens = 4096
+        # Set max_input_tokens from model info if not explicitly set
+        if (
+            self.config.max_input_tokens is None
+            and self.model_info is not None
+            and 'max_input_tokens' in self.model_info
+            and isinstance(self.model_info['max_input_tokens'], int)
+        ):
+            self.config.max_input_tokens = self.model_info['max_input_tokens']

+        # Set max_output_tokens from model info if not explicitly set
        if self.config.max_output_tokens is None:
-            # Safe default for any potentially viable model
-            self.config.max_output_tokens = 4096
-            if self.model_info is not None:
-                # max_output_tokens has precedence over max_tokens, if either exists.
-                # litellm has models with both, one or none of these 2 parameters!
+            # Special case for Claude 3.7 Sonnet models
+            if any(
+                model in self.config.model
+                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
+            ):
+                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set
+            # Try to get from model info
+            elif self.model_info is not None:
+                # max_output_tokens has precedence over max_tokens
                if 'max_output_tokens' in self.model_info and isinstance(
                    self.model_info['max_output_tokens'], int
                ):
@ -508,11 +510,6 @@ class LLM(RetryMixin, DebugMixin):
                    self.model_info['max_tokens'], int
                ):
                    self.config.max_output_tokens = self.model_info['max_tokens']
-            if any(
-                model in self.config.model
-                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
-            ):
-                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set

        # Initialize function calling capability
        # Check if model name is in our supported list
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@ -296,7 +296,6 @@ class Memory:
                self.knowledge_microagents[name] = agent_knowledge
            for name, agent_repo in repo_agents.items():
                self.repo_microagents[name] = agent_repo
-
        except Exception as e:
            logger.warning(
                f'Failed to load user microagents from {USER_MICROAGENTS_DIR}: {str(e)}'
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@ -140,8 +140,8 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config):
    mock_get_model_info.side_effect = Exception('Model info not available')
    llm = LLM(default_config)
    llm.init_model_info()
-    assert llm.config.max_input_tokens == 4096
-    assert llm.config.max_output_tokens == 4096
+    assert llm.config.max_input_tokens is None
+    assert llm.config.max_output_tokens is None


 def test_llm_init_with_custom_config():
@ -981,3 +981,94 @@ def test_llm_base_url_auto_protocol_patch(mock_get):

    called_url = mock_get.call_args[0][0]
    assert called_url.startswith('http://') or called_url.startswith('https://')
+
+
+# Tests for max_output_tokens configuration and usage
+
+
+def test_unknown_model_token_limits():
+    """Test that models without known token limits get None for both max_output_tokens and max_input_tokens."""
+    # Create LLM instance with a non-existent model to avoid litellm having model info for it
+    config = LLMConfig(model='non-existent-model', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens and max_input_tokens are initialized to None (default value)
+    assert llm.config.max_output_tokens is None
+    assert llm.config.max_input_tokens is None
+
+
+def test_max_tokens_from_model_info():
+    """Test that max_output_tokens and max_input_tokens are correctly initialized from model info."""
+    # Create LLM instance with GPT-4 model which has known token limits
+    config = LLMConfig(model='gpt-4', api_key='test_key')
+    llm = LLM(config)
+
+    # GPT-4 has specific token limits
+    # These are the expected values from litellm
+    assert llm.config.max_output_tokens == 4096
+    assert llm.config.max_input_tokens == 8192
+
+
+def test_claude_3_7_sonnet_max_output_tokens():
+    """Test that Claude 3.7 Sonnet models get the special 64000 max_output_tokens value and default max_input_tokens."""
+    # Create LLM instance with Claude 3.7 Sonnet model
+    config = LLMConfig(model='claude-3-7-sonnet', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens is set to 64000 for Claude 3.7 Sonnet
+    assert llm.config.max_output_tokens == 64000
+    # Verify max_input_tokens is set to None (default value)
+    assert llm.config.max_input_tokens is None
+
+
+def test_claude_sonnet_4_max_output_tokens():
+    """Test that Claude Sonnet 4 models get the correct max_output_tokens and max_input_tokens values."""
+    # Create LLM instance with a Claude Sonnet 4 model
+    config = LLMConfig(model='claude-sonnet-4-20250514', api_key='test_key')
+    llm = LLM(config)
+
+    # Verify max_output_tokens is set to the expected value
+    assert llm.config.max_output_tokens == 64000
+    # Verify max_input_tokens is set to the expected value
+    # For Claude models, we expect a specific value from litellm
+    assert llm.config.max_input_tokens == 200000
+
+
+def test_sambanova_deepseek_model_max_output_tokens():
+    """Test that SambaNova DeepSeek-V3-0324 model gets the correct max_output_tokens value."""
+    # Create LLM instance with SambaNova DeepSeek model
+    config = LLMConfig(model='sambanova/DeepSeek-V3-0324', api_key='test_key')
+    llm = LLM(config)
+
+    # SambaNova DeepSeek model has specific token limits
+    # This is the expected value from litellm
+    assert llm.config.max_output_tokens == 32768
+
+
+def test_max_output_tokens_override_in_config():
+    """Test that max_output_tokens can be overridden in the config."""
+    # Create LLM instance with minimal config and overridden max_output_tokens
+    config = LLMConfig(
+        model='claude-sonnet-4-20250514', api_key='test_key', max_output_tokens=2048
+    )
+    llm = LLM(config)
+
+    # Verify the config has the overridden max_output_tokens value
+    assert llm.config.max_output_tokens == 2048
+
+
+def test_azure_model_default_max_tokens():
+    """Test that Azure models have the default max_output_tokens value."""
+    # Create minimal config for Azure model (without specifying max_output_tokens)
+    azure_config = LLMConfig(
+        model='azure/non-existent-model',  # Use a non-existent model to avoid litellm having model info for it
+        api_key='test_key',
+        base_url='https://test.openai.azure.com/',
+        api_version='2024-12-01-preview',
+    )
+
+    # Create LLM instance with Azure model
+    llm = LLM(azure_config)
+
+    # Verify the config has the default max_output_tokens value
+    assert llm.config.max_output_tokens is None  # Default value