fix - Set claude sonnet output limit (#11098)

2025-12-26 05:48:36 +08:00 · 2025-09-24 08:31:20 -05:00 · 2025-09-24 08:31:20 -05:00 · 61fd49d351
commit 61fd49d351
parent 135d3aea09
2 changed files with 18 additions and 11 deletions
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -502,11 +502,13 @@ class LLM(RetryMixin, DebugMixin):

        # Set max_output_tokens from model info if not explicitly set
        if self.config.max_output_tokens is None:
-            # Special case for Claude 3.7 Sonnet models
-            if any(
-                model in self.config.model
-                for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
-            ):
+            # Special case for Claude Sonnet models
+            sonnet_models = [
+                'claude-3-7-sonnet',
+                'claude-3.7-sonnet',
+                'claude-sonnet-4',
+            ]
+            if any(model in self.config.model for model in sonnet_models):
                self.config.max_output_tokens = 64000  # litellm set max to 128k, but that requires a header to be set
            # Try to get from model info
            elif self.model_info is not None:
--- a/tests/unit/llm/test_llm.py
+++ b/tests/unit/llm/test_llm.py
@ -1053,17 +1053,22 @@ def test_claude_3_7_sonnet_max_output_tokens():
    assert llm.config.max_input_tokens is None


-def test_claude_sonnet_4_max_output_tokens():
+@patch('openhands.llm.llm.litellm.get_model_info')
+def test_claude_sonnet_4_max_output_tokens(mock_get_model_info):
    """Test that Claude Sonnet 4 models get the correct max_output_tokens and max_input_tokens values."""
+    mock_get_model_info.return_value = {
+        'max_input_tokens': 100000,
+        'max_output_tokens': 100000,
+    }
    # Create LLM instance with a Claude Sonnet 4 model
    config = LLMConfig(model='claude-sonnet-4-20250514', api_key='test_key')
    llm = LLM(config, service_id='test-service')
+    llm.init_model_info()

-    # Verify max_output_tokens is set to the expected value
-    assert llm.config.max_output_tokens == 64000
-    # Verify max_input_tokens is set to the expected value
-    # For Claude models, we expect a specific value from litellm
-    assert llm.config.max_input_tokens == 200000
+    assert llm.config.max_output_tokens == 64000, 'output max should be decreased'
+    assert llm.config.max_input_tokens == 100000, (
+        'input max should be the litellm value'
+    )


 def test_sambanova_deepseek_model_max_output_tokens():