mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Centralize model feature checks (#10414)
Co-authored-by: OpenHands-GPT-5 <openhands@all-hands.dev>
This commit is contained in:
parent
aa6b454772
commit
bb0e24d23b
@ -172,9 +172,6 @@ class LLMConfig(BaseModel):
|
||||
|
||||
# Set reasoning_effort to 'high' by default for non-Gemini models
|
||||
# Gemini models use optimized thinking budget when reasoning_effort is None
|
||||
logger.debug(
|
||||
f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}'
|
||||
)
|
||||
if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
|
||||
self.reasoning_effort = 'high'
|
||||
|
||||
|
||||
@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.llm.llm import (
|
||||
LLM,
|
||||
LLM_RETRY_EXCEPTIONS,
|
||||
REASONING_EFFORT_SUPPORTED_MODELS,
|
||||
)
|
||||
from openhands.llm.model_features import get_features
|
||||
from openhands.utils.shutdown_listener import should_continue
|
||||
|
||||
|
||||
@ -63,7 +63,7 @@ class AsyncLLM(LLM):
|
||||
messages = kwargs['messages']
|
||||
|
||||
# Set reasoning effort for models that support it
|
||||
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
|
||||
if get_features(self.config.model).supports_reasoning_effort:
|
||||
kwargs['reasoning_effort'] = self.config.reasoning_effort
|
||||
|
||||
# ensure we work with a list of messages
|
||||
|
||||
@ -9,6 +9,7 @@ import httpx
|
||||
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.llm.metrics import Metrics
|
||||
from openhands.llm.model_features import get_features
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('ignore')
|
||||
@ -49,79 +50,6 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
|
||||
LLMNoResponseError,
|
||||
)
|
||||
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
CACHE_PROMPT_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
]
|
||||
|
||||
# function calling supporting models
|
||||
FUNCTION_CALLING_SUPPORTED_MODELS = [
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-sonnet-20240620',
|
||||
'claude-3-5-sonnet-20241022',
|
||||
'claude-3.5-haiku',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-sonnet-4-20250514',
|
||||
'claude-sonnet-4',
|
||||
'claude-opus-4-20250514',
|
||||
'claude-opus-4-1-20250805',
|
||||
'gpt-4o-mini',
|
||||
'gpt-4o',
|
||||
'o1-2024-12-17',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-4.1',
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'Qwen3-Coder-480B-A35B-Instruct',
|
||||
'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
]
|
||||
|
||||
REASONING_EFFORT_SUPPORTED_MODELS = [
|
||||
'o1-2024-12-17',
|
||||
'o1',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
'claude-opus-4-1-20250805', # we need to remove top_p for opus 4.1
|
||||
]
|
||||
|
||||
MODELS_WITHOUT_STOP_WORDS = [
|
||||
'o1-mini',
|
||||
'o1-preview',
|
||||
'o1',
|
||||
'o1-2024-12-17',
|
||||
'xai/grok-4-0709',
|
||||
]
|
||||
|
||||
|
||||
class LLM(RetryMixin, DebugMixin):
|
||||
"""The LLM class represents a Language Model instance.
|
||||
@ -154,6 +82,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
)
|
||||
|
||||
self.model_info: ModelInfo | None = None
|
||||
self._function_calling_active: bool = False
|
||||
self.retry_listener = retry_listener
|
||||
if self.config.log_completions:
|
||||
if self.config.log_completions_folder is None:
|
||||
@ -202,10 +131,8 @@ class LLM(RetryMixin, DebugMixin):
|
||||
f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}'
|
||||
)
|
||||
|
||||
if (
|
||||
self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
|
||||
):
|
||||
features = get_features(self.config.model)
|
||||
if features.supports_reasoning_effort:
|
||||
# For Gemini models, only map 'low' to optimized thinking budget
|
||||
# Let other reasoning_effort values pass through to API as-is
|
||||
if 'gemini-2.5-pro' in self.config.model:
|
||||
@ -312,7 +239,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
|
||||
# add stop words if the model supports it and stop words are not disabled
|
||||
if (
|
||||
self.config.model not in MODELS_WITHOUT_STOP_WORDS
|
||||
get_features(self.config.model).supports_stop_words
|
||||
and not self.config.disable_stop_word
|
||||
):
|
||||
kwargs['stop'] = STOP_WORDS
|
||||
@ -556,17 +483,10 @@ class LLM(RetryMixin, DebugMixin):
|
||||
):
|
||||
self.config.max_output_tokens = self.model_info['max_tokens']
|
||||
|
||||
# Initialize function calling capability
|
||||
# Check if model name is in our supported list
|
||||
model_name_supported = (
|
||||
self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
|
||||
or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
|
||||
)
|
||||
|
||||
# Handle native_tool_calling user-defined configuration
|
||||
# Initialize function calling using centralized model features
|
||||
features = get_features(self.config.model)
|
||||
if self.config.native_tool_calling is None:
|
||||
self._function_calling_active = model_name_supported
|
||||
self._function_calling_active = features.supports_function_calling
|
||||
else:
|
||||
self._function_calling_active = self.config.native_tool_calling
|
||||
|
||||
@ -601,14 +521,10 @@ class LLM(RetryMixin, DebugMixin):
|
||||
Returns:
|
||||
boolean: True if prompt caching is supported and enabled for the given model.
|
||||
"""
|
||||
return (
|
||||
self.config.caching_prompt is True
|
||||
and (
|
||||
self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
|
||||
)
|
||||
# We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
|
||||
)
|
||||
if not self.config.caching_prompt:
|
||||
return False
|
||||
# We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints
|
||||
return get_features(self.config.model).supports_prompt_cache
|
||||
|
||||
def is_function_calling_active(self) -> bool:
|
||||
"""Returns whether function calling is supported and enabled for this LLM instance.
|
||||
|
||||
139
openhands/llm/model_features.py
Normal file
139
openhands/llm/model_features.py
Normal file
@ -0,0 +1,139 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from fnmatch import fnmatch
|
||||
|
||||
|
||||
def normalize_model_name(model: str) -> str:
|
||||
"""Normalize a model string to a canonical, comparable name.
|
||||
|
||||
Strategy:
|
||||
- Trim whitespace
|
||||
- Lowercase
|
||||
- If there is a '/', keep only the basename after the last '/'
|
||||
(handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
|
||||
and treat ':' inside that basename as an Ollama-style variant tag to be removed
|
||||
- There is no provider:model form; providers, when present, use 'provider/model'
|
||||
- Drop a trailing "-gguf" suffix if present
|
||||
"""
|
||||
raw = (model or '').strip().lower()
|
||||
if '/' in raw:
|
||||
name = raw.split('/')[-1]
|
||||
if ':' in name:
|
||||
# Drop Ollama-style variant tag in basename
|
||||
name = name.split(':', 1)[0]
|
||||
else:
|
||||
# No '/', keep the whole raw name (we do not support provider:model)
|
||||
name = raw
|
||||
if name.endswith('-gguf'):
|
||||
name = name[: -len('-gguf')]
|
||||
return name
|
||||
|
||||
|
||||
def model_matches(model: str, patterns: list[str]) -> bool:
|
||||
"""Return True if the model matches any of the glob patterns.
|
||||
|
||||
If a pattern contains a '/', it is treated as provider-qualified and matched
|
||||
against the full, lowercased model string (including provider prefix).
|
||||
Otherwise, it is matched against the normalized basename.
|
||||
"""
|
||||
raw = (model or '').strip().lower()
|
||||
name = normalize_model_name(model)
|
||||
for pat in patterns:
|
||||
pat_l = pat.lower()
|
||||
if '/' in pat_l:
|
||||
if fnmatch(raw, pat_l):
|
||||
return True
|
||||
else:
|
||||
if fnmatch(name, pat_l):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelFeatures:
|
||||
supports_function_calling: bool
|
||||
supports_reasoning_effort: bool
|
||||
supports_prompt_cache: bool
|
||||
supports_stop_words: bool
|
||||
|
||||
|
||||
# Pattern tables capturing current behavior. Keep patterns lowercase.
|
||||
FUNCTION_CALLING_PATTERNS: list[str] = [
|
||||
# Anthropic families
|
||||
'claude-3-7-sonnet*',
|
||||
'claude-3.7-sonnet*',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet*',
|
||||
'claude-3.5-haiku*',
|
||||
'claude-3-5-haiku*',
|
||||
'claude-sonnet-4*',
|
||||
'claude-opus-4*',
|
||||
# OpenAI families
|
||||
'gpt-4o*',
|
||||
'gpt-4.1',
|
||||
'gpt-5*',
|
||||
# o-series (keep exact o1 support per existing list)
|
||||
'o1-2024-12-17',
|
||||
'o3*',
|
||||
'o4-mini*',
|
||||
# Google Gemini
|
||||
'gemini-2.5-pro*',
|
||||
# Others
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'qwen3-coder*',
|
||||
'qwen3-coder-480b-a35b-instruct',
|
||||
]
|
||||
|
||||
REASONING_EFFORT_PATTERNS: list[str] = [
|
||||
# Mirror main behavior exactly (no unintended expansion), plus DeepSeek support
|
||||
'o1-2024-12-17',
|
||||
'o1',
|
||||
'o3',
|
||||
'o3-2025-04-16',
|
||||
'o3-mini-2025-01-31',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'o4-mini-2025-04-16',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
'gpt-5-2025-08-07',
|
||||
'claude-opus-4-1-20250805',
|
||||
# DeepSeek reasoning family
|
||||
'deepseek-r1-0528*',
|
||||
]
|
||||
|
||||
PROMPT_CACHE_PATTERNS: list[str] = [
|
||||
'claude-3-7-sonnet*',
|
||||
'claude-3.7-sonnet*',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet*',
|
||||
'claude-3-5-haiku*',
|
||||
'claude-3.5-haiku*',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4*',
|
||||
'claude-opus-4*',
|
||||
]
|
||||
|
||||
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
|
||||
# o1 family doesn't support stop words
|
||||
'o1*',
|
||||
# grok-4 specific model name (basename)
|
||||
'grok-4-0709',
|
||||
# DeepSeek R1 family
|
||||
'deepseek-r1-0528*',
|
||||
]
|
||||
|
||||
|
||||
def get_features(model: str) -> ModelFeatures:
|
||||
return ModelFeatures(
|
||||
supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
|
||||
supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
|
||||
supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
|
||||
supports_stop_words=not model_matches(
|
||||
model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
|
||||
),
|
||||
)
|
||||
@ -5,7 +5,7 @@ from typing import Any, Callable
|
||||
from openhands.core.exceptions import UserCancelledError
|
||||
from openhands.core.logger import openhands_logger as logger
|
||||
from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
|
||||
from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS
|
||||
from openhands.llm.model_features import get_features
|
||||
|
||||
|
||||
class StreamingLLM(AsyncLLM):
|
||||
@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM):
|
||||
)
|
||||
|
||||
# Set reasoning effort for models that support it
|
||||
if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
|
||||
if get_features(self.config.model).supports_reasoning_effort:
|
||||
kwargs['reasoning_effort'] = self.config.reasoning_effort
|
||||
|
||||
self.log_prompt(messages)
|
||||
|
||||
8
poetry.lock
generated
8
poetry.lock
generated
@ -11387,14 +11387,14 @@ test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "xlsxwriter"
|
||||
version = "3.2.3"
|
||||
version = "3.2.5"
|
||||
description = "A Python module for creating Excel XLSX files."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d"},
|
||||
{file = "xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5"},
|
||||
{file = "xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd"},
|
||||
{file = "xlsxwriter-3.2.5.tar.gz", hash = "sha256:7e88469d607cdc920151c0ab3ce9cf1a83992d4b7bc730c5ffdd1a12115a7dbe"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@ -12,8 +12,10 @@ from litellm.exceptions import (
|
||||
from openhands.core.config import LLMConfig
|
||||
from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
|
||||
from openhands.core.message import Message, TextContent
|
||||
from openhands.llm.async_llm import AsyncLLM
|
||||
from openhands.llm.llm import LLM
|
||||
from openhands.llm.metrics import Metrics, TokenUsage
|
||||
from openhands.llm.streaming_llm import StreamingLLM
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@ -252,7 +254,7 @@ def test_response_latency_tracking(mock_time, mock_litellm_completion):
|
||||
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
||||
default_config.model = 'openrouter:gpt-4o-mini'
|
||||
default_config.model = 'openrouter/gpt-4o-mini'
|
||||
mock_get_model_info.return_value = {
|
||||
'max_input_tokens': 7000,
|
||||
'max_output_tokens': 1500,
|
||||
@ -261,7 +263,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
|
||||
llm.init_model_info()
|
||||
assert llm.config.max_input_tokens == 7000
|
||||
assert llm.config.max_output_tokens == 1500
|
||||
mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
|
||||
mock_get_model_info.assert_called_once_with('openrouter/gpt-4o-mini')
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
@ -1201,6 +1203,92 @@ def test_gemini_medium_reasoning_effort_passes_through(mock_completion):
|
||||
assert call_kwargs.get('reasoning_effort') == 'medium'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_opus_41_reasoning_pops_temperature_top_p(mock_completion):
|
||||
mock_completion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-opus-4-1-20250805',
|
||||
api_key='k',
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert 'temperature' not in call_kwargs
|
||||
assert 'top_p' not in call_kwargs
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_opus_4_keeps_temperature_top_p(mock_completion):
|
||||
mock_completion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-opus-4-20250514',
|
||||
api_key='k',
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_is_caching_prompt_active_anthropic_prefixed(mock_get_model_info):
|
||||
# Avoid external calls, but behavior shouldn't depend on model info
|
||||
mock_get_model_info.side_effect = Exception('skip')
|
||||
config = LLMConfig(
|
||||
model='anthropic/claude-3-7-sonnet', api_key='k', caching_prompt=True
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
assert llm.is_caching_prompt_active() is True
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.httpx.get')
|
||||
@patch('openhands.llm.llm.litellm.get_model_info')
|
||||
def test_openhands_provider_rewrite_and_caching_prompt(
|
||||
mock_get_model_info, mock_httpx_get
|
||||
):
|
||||
# Mock LiteLLM proxy /v1/model/info response
|
||||
mock_httpx_get.return_value = type(
|
||||
'Resp',
|
||||
(),
|
||||
{
|
||||
'json': lambda self=None: {
|
||||
'data': [
|
||||
{
|
||||
'model_name': 'claude-3.7-sonnet',
|
||||
'model_info': {
|
||||
'max_input_tokens': 200000,
|
||||
'max_output_tokens': 64000,
|
||||
'supports_vision': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
)()
|
||||
mock_get_model_info.return_value = {
|
||||
'max_input_tokens': 200000,
|
||||
'max_output_tokens': 64000,
|
||||
}
|
||||
|
||||
config = LLMConfig(
|
||||
model='openhands/claude-3.7-sonnet', api_key='k', caching_prompt=True
|
||||
)
|
||||
llm = LLM(config, service_id='svc')
|
||||
# Model should be rewritten to litellm_proxy/...
|
||||
assert llm.config.model.startswith('litellm_proxy/claude-3.7-sonnet')
|
||||
# Caching prompt should be active for Claude
|
||||
assert llm.is_caching_prompt_active() is True
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_gemini_high_reasoning_effort_passes_through(mock_completion):
|
||||
"""Test that Gemini with reasoning_effort='high' passes through to litellm."""
|
||||
@ -1239,10 +1327,61 @@ def test_non_gemini_uses_reasoning_effort(mock_completion):
|
||||
sample_messages = [{'role': 'user', 'content': 'Hello, how are you?'}]
|
||||
llm.completion(messages=sample_messages)
|
||||
|
||||
# Verify that reasoning_effort was used and thinking budget was not set
|
||||
call_kwargs = mock_completion.call_args[1]
|
||||
|
||||
@patch('openhands.llm.async_llm.litellm_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_reasoning_effort_passthrough(mock_acompletion):
|
||||
mock_acompletion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(
|
||||
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
|
||||
)
|
||||
llm = AsyncLLM(config, service_id='svc')
|
||||
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_acompletion.call_args[1]
|
||||
assert call_kwargs.get('reasoning_effort') == 'low'
|
||||
# Async path does not pop temperature/top_p (parity with main)
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.streaming_llm.AsyncLLM._call_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_reasoning_effort_passthrough(mock_call):
|
||||
async def fake_stream(*args, **kwargs):
|
||||
class Dummy:
|
||||
async def __aiter__(self):
|
||||
yield {'choices': [{'delta': {'content': 'x'}}]}
|
||||
|
||||
return Dummy()
|
||||
|
||||
mock_call.side_effect = fake_stream
|
||||
config = LLMConfig(
|
||||
model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
|
||||
)
|
||||
sllm = StreamingLLM(config, service_id='svc')
|
||||
async for _ in sllm.async_streaming_completion(
|
||||
messages=[{'role': 'user', 'content': 'hi'}]
|
||||
):
|
||||
break
|
||||
call_kwargs = mock_call.call_args[1]
|
||||
assert call_kwargs.get('reasoning_effort') == 'low'
|
||||
assert call_kwargs.get('temperature') == 0.7
|
||||
assert call_kwargs.get('top_p') == 0.9
|
||||
|
||||
|
||||
@patch('openhands.llm.async_llm.litellm_acompletion')
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_streaming_no_thinking_for_gemini(mock_acompletion):
|
||||
mock_acompletion.return_value = {
|
||||
'choices': [{'message': {'content': 'ok'}}],
|
||||
}
|
||||
config = LLMConfig(model='gemini-2.5-pro', api_key='k', reasoning_effort='low')
|
||||
llm = AsyncLLM(config, service_id='svc')
|
||||
await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
|
||||
call_kwargs = mock_acompletion.call_args[1]
|
||||
assert 'thinking' not in call_kwargs
|
||||
assert call_kwargs.get('reasoning_effort') == 'high'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
|
||||
293
tests/unit/llm/test_model_features.py
Normal file
293
tests/unit/llm/test_model_features.py
Normal file
@ -0,0 +1,293 @@
|
||||
import pytest
|
||||
|
||||
from openhands.llm.model_features import (
|
||||
ModelFeatures,
|
||||
get_features,
|
||||
model_matches,
|
||||
normalize_model_name,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'raw,expected',
|
||||
[
|
||||
(' OPENAI/gpt-4o ', 'gpt-4o'),
|
||||
('anthropic/claude-3-7-sonnet', 'claude-3-7-sonnet'),
|
||||
('litellm_proxy/gemini-2.5-pro', 'gemini-2.5-pro'),
|
||||
('qwen3-coder-480b-a35b-instruct', 'qwen3-coder-480b-a35b-instruct'),
|
||||
('gpt-5', 'gpt-5'),
|
||||
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', 'deepseek-r1-0528'),
|
||||
('openai/GLM-4.5-GGUF', 'glm-4.5'),
|
||||
('openrouter/gpt-4o-mini', 'gpt-4o-mini'),
|
||||
(
|
||||
'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0',
|
||||
'anthropic.claude-3-5-sonnet-20241022-v2',
|
||||
),
|
||||
('', ''),
|
||||
(None, ''), # type: ignore[arg-type]
|
||||
],
|
||||
)
|
||||
def test_normalize_model_name(raw, expected):
|
||||
assert normalize_model_name(raw) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'name,pattern,expected',
|
||||
[
|
||||
('gpt-4o', 'gpt-4o*', True),
|
||||
('openai/gpt-4o', 'gpt-4o*', True),
|
||||
('litellm_proxy/gpt-4o-mini', 'gpt-4o*', True),
|
||||
('claude-3-7-sonnet-20250219', 'claude-3-7-sonnet*', True),
|
||||
('o1-2024-12-17', 'o1*', True),
|
||||
('grok-4-0709', 'grok-4-0709', True),
|
||||
('grok-4-0801', 'grok-4-0709', False),
|
||||
],
|
||||
)
|
||||
def test_model_matches(name, pattern, expected):
|
||||
assert model_matches(name, [pattern]) is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'name,pattern,expected',
|
||||
[
|
||||
('openai/gpt-4o', 'openai/gpt-4o*', True),
|
||||
('openrouter/gpt-4o', 'openai/gpt-4o*', False),
|
||||
('litellm_proxy/gpt-4o-mini', 'litellm_proxy/gpt-4o*', True),
|
||||
(
|
||||
'gpt-4o',
|
||||
'openai/gpt-4o*',
|
||||
False,
|
||||
), # basename alone should not match provider-qualified
|
||||
('unknown-model', 'gpt-5*', False),
|
||||
],
|
||||
)
|
||||
def test_model_matches_provider_qualified(name, pattern, expected):
|
||||
assert model_matches(name, [pattern]) is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model,expect',
|
||||
[
|
||||
(
|
||||
'gpt-4o',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'gpt-5',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'o3-mini',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'o1-2024-12-17',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=False,
|
||||
),
|
||||
),
|
||||
(
|
||||
'xai/grok-4-0709',
|
||||
ModelFeatures(
|
||||
supports_function_calling=False,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=False,
|
||||
),
|
||||
),
|
||||
(
|
||||
'anthropic/claude-3-7-sonnet',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=True,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'litellm_proxy/claude-3.7-sonnet',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=True,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'gemini-2.5-pro',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=True,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
),
|
||||
(
|
||||
'openai/gpt-4o',
|
||||
ModelFeatures(
|
||||
supports_function_calling=True,
|
||||
supports_reasoning_effort=False,
|
||||
supports_prompt_cache=False,
|
||||
supports_stop_words=True,
|
||||
),
|
||||
), # provider-qualified still matches basename patterns
|
||||
],
|
||||
)
|
||||
def test_get_features(model, expect):
|
||||
features = get_features(model)
|
||||
assert features == expect
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
# Anthropic families
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3.5-haiku',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-sonnet-4-latest',
|
||||
'claude-opus-4-1-20250805',
|
||||
# OpenAI families
|
||||
'gpt-4o',
|
||||
'gpt-4.1',
|
||||
'gpt-5',
|
||||
# o-series
|
||||
'o1-2024-12-17',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
# Google Gemini
|
||||
'gemini-2.5-pro',
|
||||
# Others
|
||||
'kimi-k2-0711-preview',
|
||||
'kimi-k2-instruct',
|
||||
'qwen3-coder',
|
||||
'qwen3-coder-480b-a35b-instruct',
|
||||
],
|
||||
)
|
||||
def test_function_calling_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_function_calling is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'o1-2024-12-17',
|
||||
'o3-mini',
|
||||
'o4-mini',
|
||||
'gemini-2.5-flash',
|
||||
'gemini-2.5-pro',
|
||||
'gpt-5',
|
||||
'claude-opus-4-1-20250805',
|
||||
],
|
||||
)
|
||||
def test_reasoning_effort_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_reasoning_effort is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
|
||||
'DeepSeek-R1-0528',
|
||||
],
|
||||
)
|
||||
def test_deepseek_reasoning_effort_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_reasoning_effort is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'claude-3-7-sonnet-20250219',
|
||||
'claude-3.7-sonnet',
|
||||
'claude-sonnet-3-7-latest',
|
||||
'claude-3-5-sonnet',
|
||||
'claude-3-5-haiku-20241022',
|
||||
'claude-3-haiku-20240307',
|
||||
'claude-3-opus-20240229',
|
||||
'claude-sonnet-4-latest',
|
||||
'claude-opus-4-1-20250805',
|
||||
],
|
||||
)
|
||||
def test_prompt_cache_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_prompt_cache is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model,expected',
|
||||
[
|
||||
# Positive cases: exactly those supported on main
|
||||
('o1', True),
|
||||
('o1-2024-12-17', True),
|
||||
('o3', True),
|
||||
('o3-2025-04-16', True),
|
||||
('o3-mini', True),
|
||||
('o3-mini-2025-01-31', True),
|
||||
('o4-mini', True),
|
||||
('o4-mini-2025-04-16', True),
|
||||
('gemini-2.5-flash', True),
|
||||
('gemini-2.5-pro', True),
|
||||
('gpt-5', True),
|
||||
('gpt-5-2025-08-07', True),
|
||||
('claude-opus-4-1-20250805', True),
|
||||
# DeepSeek
|
||||
('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', True),
|
||||
('DeepSeek-R1-0528', True),
|
||||
# Negative cases: ensure we didn't unintentionally expand
|
||||
('o1-mini', False),
|
||||
('o1-preview', False),
|
||||
('gemini-1.0-pro', False),
|
||||
],
|
||||
)
|
||||
def test_reasoning_effort_parity_with_main(model, expected):
|
||||
assert get_features(model).supports_reasoning_effort is expected
|
||||
|
||||
|
||||
def test_prompt_cache_haiku_variants():
|
||||
assert get_features('claude-3-5-haiku-20241022').supports_prompt_cache is True
|
||||
assert get_features('claude-3.5-haiku-20241022').supports_prompt_cache is True
|
||||
|
||||
|
||||
def test_stop_words_grok_provider_prefixed():
|
||||
assert get_features('xai/grok-4-0709').supports_stop_words is False
|
||||
assert get_features('grok-4-0709').supports_stop_words is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'model',
|
||||
[
|
||||
'o1-mini',
|
||||
'o1-2024-12-17',
|
||||
'xai/grok-4-0709',
|
||||
'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
|
||||
'DeepSeek-R1-0528',
|
||||
],
|
||||
)
|
||||
def test_supports_stop_words_false_models(model):
|
||||
features = get_features(model)
|
||||
assert features.supports_stop_words is False
|
||||
Loading…
x
Reference in New Issue
Block a user