mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Simplify max_output_tokens handling in LLM classes (#9296)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
8b90d610c6
commit
1e33624951
@ -163,6 +163,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
'temperature': self.config.temperature,
|
||||
'max_completion_tokens': self.config.max_output_tokens,
|
||||
}
|
||||
|
||||
if self.config.top_k is not None:
|
||||
# openai doesn't expose top_k
|
||||
# litellm will handle it a bit differently than the openai-compatible params
|
||||
@ -486,26 +487,6 @@ class LLM(RetryMixin, DebugMixin):
|
||||
# Safe fallback for any potentially viable model
|
||||
self.config.max_input_tokens = 4096
|
||||
|
||||
if self.config.max_output_tokens is None:
|
||||
# Safe default for any potentially viable model
|
||||
self.config.max_output_tokens = 4096
|
||||
if self.model_info is not None:
|
||||
# max_output_tokens has precedence over max_tokens, if either exists.
|
||||
# litellm has models with both, one or none of these 2 parameters!
|
||||
if 'max_output_tokens' in self.model_info and isinstance(
|
||||
self.model_info['max_output_tokens'], int
|
||||
):
|
||||
self.config.max_output_tokens = self.model_info['max_output_tokens']
|
||||
elif 'max_tokens' in self.model_info and isinstance(
|
||||
self.model_info['max_tokens'], int
|
||||
):
|
||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||
if any(
|
||||
model in self.config.model
|
||||
for model in ['claude-3-7-sonnet', 'claude-3.7-sonnet']
|
||||
):
|
||||
self.config.max_output_tokens = 64000 # litellm set max to 128k, but that requires a header to be set
|
||||
|
||||
# Initialize function calling capability
|
||||
# Check if model name is in our supported list
|
||||
model_name_supported = (
|
||||
|
||||
@ -132,7 +132,7 @@ def test_llm_init_with_model_info(mock_get_model_info, default_config):
|
||||
llm = LLM(default_config)
|
||||
llm.init_model_info()
|
||||
assert llm.config.max_input_tokens == 8000
|
||||
assert llm.config.max_output_tokens == 2000
|
||||
assert llm.config.max_output_tokens is None
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
@ -141,7 +141,7 @@ def test_llm_init_without_model_info(mock_get_model_info, default_config):
|
||||
llm = LLM(default_config)
|
||||
llm.init_model_info()
|
||||
assert llm.config.max_input_tokens == 4096
|
||||
assert llm.config.max_output_tokens == 4096
|
||||
assert llm.config.max_output_tokens is None
|
||||
|
||||
|
||||
def test_llm_init_with_custom_config():
|
||||
@ -260,7 +260,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
||||
llm = LLM(default_config)
|
||||
llm.init_model_info()
|
||||
assert llm.config.max_input_tokens == 7000
|
||||
assert llm.config.max_output_tokens == 1500
|
||||
assert llm.config.max_output_tokens is None
|
||||
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user