diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index 5942c99f7e..40d27ea78b 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -172,9 +172,6 @@ class LLMConfig(BaseModel): # Set reasoning_effort to 'high' by default for non-Gemini models # Gemini models use optimized thinking budget when reasoning_effort is None - logger.debug( - f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}' - ) if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model: self.reasoning_effort = 'high' diff --git a/openhands/llm/async_llm.py b/openhands/llm/async_llm.py index ef3d4e1848..10ae80a19e 100644 --- a/openhands/llm/async_llm.py +++ b/openhands/llm/async_llm.py @@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger from openhands.llm.llm import ( LLM, LLM_RETRY_EXCEPTIONS, - REASONING_EFFORT_SUPPORTED_MODELS, ) +from openhands.llm.model_features import get_features from openhands.utils.shutdown_listener import should_continue @@ -63,7 +63,7 @@ class AsyncLLM(LLM): messages = kwargs['messages'] # Set reasoning effort for models that support it - if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS: + if get_features(self.config.model).supports_reasoning_effort: kwargs['reasoning_effort'] = self.config.reasoning_effort # ensure we work with a list of messages diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 3d76116af9..f8a92793a4 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -9,6 +9,7 @@ import httpx from openhands.core.config import LLMConfig from openhands.llm.metrics import Metrics +from openhands.llm.model_features import get_features with warnings.catch_warnings(): warnings.simplefilter('ignore') @@ -49,79 +50,6 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = ( LLMNoResponseError, ) -# cache prompt supporting models -# remove this when we gemini and deepseek are supported -CACHE_PROMPT_SUPPORTED_MODELS = [ - 'claude-3-7-sonnet-20250219', - 'claude-sonnet-3-7-latest', - 'claude-3.7-sonnet', - 'claude-3-5-sonnet-20241022', - 'claude-3-5-sonnet-20240620', - 'claude-3-5-haiku-20241022', - 'claude-3-haiku-20240307', - 'claude-3-opus-20240229', - 'claude-sonnet-4-20250514', - 'claude-sonnet-4', - 'claude-opus-4-20250514', - 'claude-opus-4-1-20250805', -] - -# function calling supporting models -FUNCTION_CALLING_SUPPORTED_MODELS = [ - 'claude-3-7-sonnet-20250219', - 'claude-sonnet-3-7-latest', - 'claude-3-5-sonnet', - 'claude-3-5-sonnet-20240620', - 'claude-3-5-sonnet-20241022', - 'claude-3.5-haiku', - 'claude-3-5-haiku-20241022', - 'claude-sonnet-4-20250514', - 'claude-sonnet-4', - 'claude-opus-4-20250514', - 'claude-opus-4-1-20250805', - 'gpt-4o-mini', - 'gpt-4o', - 'o1-2024-12-17', - 'o3-mini-2025-01-31', - 'o3-mini', - 'o3', - 'o3-2025-04-16', - 'o4-mini', - 'o4-mini-2025-04-16', - 'gemini-2.5-pro', - 'gpt-4.1', - 'kimi-k2-0711-preview', - 'kimi-k2-instruct', - 'Qwen3-Coder-480B-A35B-Instruct', - 'qwen3-coder', # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter) - 'gpt-5', - 'gpt-5-2025-08-07', -] - -REASONING_EFFORT_SUPPORTED_MODELS = [ - 'o1-2024-12-17', - 'o1', - 'o3', - 'o3-2025-04-16', - 'o3-mini-2025-01-31', - 'o3-mini', - 'o4-mini', - 'o4-mini-2025-04-16', - 'gemini-2.5-flash', - 'gemini-2.5-pro', - 'gpt-5', - 'gpt-5-2025-08-07', - 'claude-opus-4-1-20250805', # we need to remove top_p for opus 4.1 -] - -MODELS_WITHOUT_STOP_WORDS = [ - 'o1-mini', - 'o1-preview', - 'o1', - 'o1-2024-12-17', - 'xai/grok-4-0709', -] - class LLM(RetryMixin, DebugMixin): """The LLM class represents a Language Model instance. @@ -154,6 +82,7 @@ class LLM(RetryMixin, DebugMixin): ) self.model_info: ModelInfo | None = None + self._function_calling_active: bool = False self.retry_listener = retry_listener if self.config.log_completions: if self.config.log_completions_folder is None: @@ -202,10 +131,8 @@ class LLM(RetryMixin, DebugMixin): f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}' ) - if ( - self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS - or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS - ): + features = get_features(self.config.model) + if features.supports_reasoning_effort: # For Gemini models, only map 'low' to optimized thinking budget # Let other reasoning_effort values pass through to API as-is if 'gemini-2.5-pro' in self.config.model: @@ -312,7 +239,7 @@ class LLM(RetryMixin, DebugMixin): # add stop words if the model supports it and stop words are not disabled if ( - self.config.model not in MODELS_WITHOUT_STOP_WORDS + get_features(self.config.model).supports_stop_words and not self.config.disable_stop_word ): kwargs['stop'] = STOP_WORDS @@ -556,17 +483,10 @@ class LLM(RetryMixin, DebugMixin): ): self.config.max_output_tokens = self.model_info['max_tokens'] - # Initialize function calling capability - # Check if model name is in our supported list - model_name_supported = ( - self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS - or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS - or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS) - ) - - # Handle native_tool_calling user-defined configuration + # Initialize function calling using centralized model features + features = get_features(self.config.model) if self.config.native_tool_calling is None: - self._function_calling_active = model_name_supported + self._function_calling_active = features.supports_function_calling else: self._function_calling_active = self.config.native_tool_calling @@ -601,14 +521,10 @@ class LLM(RetryMixin, DebugMixin): Returns: boolean: True if prompt caching is supported and enabled for the given model. """ - return ( - self.config.caching_prompt is True - and ( - self.config.model in CACHE_PROMPT_SUPPORTED_MODELS - or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS - ) - # We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint - ) + if not self.config.caching_prompt: + return False + # We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints + return get_features(self.config.model).supports_prompt_cache def is_function_calling_active(self) -> bool: """Returns whether function calling is supported and enabled for this LLM instance. diff --git a/openhands/llm/model_features.py b/openhands/llm/model_features.py new file mode 100644 index 0000000000..c863963649 --- /dev/null +++ b/openhands/llm/model_features.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +from dataclasses import dataclass +from fnmatch import fnmatch + + +def normalize_model_name(model: str) -> str: + """Normalize a model string to a canonical, comparable name. + + Strategy: + - Trim whitespace + - Lowercase + - If there is a '/', keep only the basename after the last '/' + (handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.) + and treat ':' inside that basename as an Ollama-style variant tag to be removed + - There is no provider:model form; providers, when present, use 'provider/model' + - Drop a trailing "-gguf" suffix if present + """ + raw = (model or '').strip().lower() + if '/' in raw: + name = raw.split('/')[-1] + if ':' in name: + # Drop Ollama-style variant tag in basename + name = name.split(':', 1)[0] + else: + # No '/', keep the whole raw name (we do not support provider:model) + name = raw + if name.endswith('-gguf'): + name = name[: -len('-gguf')] + return name + + +def model_matches(model: str, patterns: list[str]) -> bool: + """Return True if the model matches any of the glob patterns. + + If a pattern contains a '/', it is treated as provider-qualified and matched + against the full, lowercased model string (including provider prefix). + Otherwise, it is matched against the normalized basename. + """ + raw = (model or '').strip().lower() + name = normalize_model_name(model) + for pat in patterns: + pat_l = pat.lower() + if '/' in pat_l: + if fnmatch(raw, pat_l): + return True + else: + if fnmatch(name, pat_l): + return True + return False + + +@dataclass(frozen=True) +class ModelFeatures: + supports_function_calling: bool + supports_reasoning_effort: bool + supports_prompt_cache: bool + supports_stop_words: bool + + +# Pattern tables capturing current behavior. Keep patterns lowercase. +FUNCTION_CALLING_PATTERNS: list[str] = [ + # Anthropic families + 'claude-3-7-sonnet*', + 'claude-3.7-sonnet*', + 'claude-sonnet-3-7-latest', + 'claude-3-5-sonnet*', + 'claude-3.5-haiku*', + 'claude-3-5-haiku*', + 'claude-sonnet-4*', + 'claude-opus-4*', + # OpenAI families + 'gpt-4o*', + 'gpt-4.1', + 'gpt-5*', + # o-series (keep exact o1 support per existing list) + 'o1-2024-12-17', + 'o3*', + 'o4-mini*', + # Google Gemini + 'gemini-2.5-pro*', + # Others + 'kimi-k2-0711-preview', + 'kimi-k2-instruct', + 'qwen3-coder*', + 'qwen3-coder-480b-a35b-instruct', +] + +REASONING_EFFORT_PATTERNS: list[str] = [ + # Mirror main behavior exactly (no unintended expansion), plus DeepSeek support + 'o1-2024-12-17', + 'o1', + 'o3', + 'o3-2025-04-16', + 'o3-mini-2025-01-31', + 'o3-mini', + 'o4-mini', + 'o4-mini-2025-04-16', + 'gemini-2.5-flash', + 'gemini-2.5-pro', + 'gpt-5', + 'gpt-5-2025-08-07', + 'claude-opus-4-1-20250805', + # DeepSeek reasoning family + 'deepseek-r1-0528*', +] + +PROMPT_CACHE_PATTERNS: list[str] = [ + 'claude-3-7-sonnet*', + 'claude-3.7-sonnet*', + 'claude-sonnet-3-7-latest', + 'claude-3-5-sonnet*', + 'claude-3-5-haiku*', + 'claude-3.5-haiku*', + 'claude-3-haiku-20240307', + 'claude-3-opus-20240229', + 'claude-sonnet-4*', + 'claude-opus-4*', +] + +SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [ + # o1 family doesn't support stop words + 'o1*', + # grok-4 specific model name (basename) + 'grok-4-0709', + # DeepSeek R1 family + 'deepseek-r1-0528*', +] + + +def get_features(model: str) -> ModelFeatures: + return ModelFeatures( + supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS), + supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS), + supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS), + supports_stop_words=not model_matches( + model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS + ), + ) diff --git a/openhands/llm/streaming_llm.py b/openhands/llm/streaming_llm.py index d722f80d06..410344bd64 100644 --- a/openhands/llm/streaming_llm.py +++ b/openhands/llm/streaming_llm.py @@ -5,7 +5,7 @@ from typing import Any, Callable from openhands.core.exceptions import UserCancelledError from openhands.core.logger import openhands_logger as logger from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM -from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS +from openhands.llm.model_features import get_features class StreamingLLM(AsyncLLM): @@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM): ) # Set reasoning effort for models that support it - if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS: + if get_features(self.config.model).supports_reasoning_effort: kwargs['reasoning_effort'] = self.config.reasoning_effort self.log_prompt(messages) diff --git a/poetry.lock b/poetry.lock index 2fd83fac16..4b973b53ac 100644 --- a/poetry.lock +++ b/poetry.lock @@ -11387,14 +11387,14 @@ test = ["pytest", "pytest-cov"] [[package]] name = "xlsxwriter" -version = "3.2.3" +version = "3.2.5" description = "A Python module for creating Excel XLSX files." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d"}, - {file = "xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5"}, + {file = "xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd"}, + {file = "xlsxwriter-3.2.5.tar.gz", hash = "sha256:7e88469d607cdc920151c0ab3ce9cf1a83992d4b7bc730c5ffdd1a12115a7dbe"}, ] [[package]] diff --git a/tests/unit/llm/test_llm.py b/tests/unit/llm/test_llm.py index 83c2c50020..3bba8337ac 100644 --- a/tests/unit/llm/test_llm.py +++ b/tests/unit/llm/test_llm.py @@ -12,8 +12,10 @@ from litellm.exceptions import ( from openhands.core.config import LLMConfig from openhands.core.exceptions import LLMNoResponseError, OperationCancelled from openhands.core.message import Message, TextContent +from openhands.llm.async_llm import AsyncLLM from openhands.llm.llm import LLM from openhands.llm.metrics import Metrics, TokenUsage +from openhands.llm.streaming_llm import StreamingLLM @pytest.fixture(autouse=True) @@ -252,7 +254,7 @@ def test_response_latency_tracking(mock_time, mock_litellm_completion): @patch('openhands.llm.llm.litellm.get_model_info') def test_llm_init_with_openrouter_model(mock_get_model_info, default_config): - default_config.model = 'openrouter:gpt-4o-mini' + default_config.model = 'openrouter/gpt-4o-mini' mock_get_model_info.return_value = { 'max_input_tokens': 7000, 'max_output_tokens': 1500, @@ -261,7 +263,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config): llm.init_model_info() assert llm.config.max_input_tokens == 7000 assert llm.config.max_output_tokens == 1500 - mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini') + mock_get_model_info.assert_called_once_with('openrouter/gpt-4o-mini') @patch('openhands.llm.llm.litellm_completion') @@ -1201,6 +1203,92 @@ def test_gemini_medium_reasoning_effort_passes_through(mock_completion): assert call_kwargs.get('reasoning_effort') == 'medium' +@patch('openhands.llm.llm.litellm_completion') +def test_opus_41_reasoning_pops_temperature_top_p(mock_completion): + mock_completion.return_value = { + 'choices': [{'message': {'content': 'ok'}}], + } + config = LLMConfig( + model='anthropic/claude-opus-4-1-20250805', + api_key='k', + temperature=0.7, + top_p=0.9, + ) + llm = LLM(config, service_id='svc') + llm.completion(messages=[{'role': 'user', 'content': 'hi'}]) + call_kwargs = mock_completion.call_args[1] + assert 'temperature' not in call_kwargs + assert 'top_p' not in call_kwargs + + +@patch('openhands.llm.llm.litellm_completion') +def test_opus_4_keeps_temperature_top_p(mock_completion): + mock_completion.return_value = { + 'choices': [{'message': {'content': 'ok'}}], + } + config = LLMConfig( + model='anthropic/claude-opus-4-20250514', + api_key='k', + temperature=0.7, + top_p=0.9, + ) + llm = LLM(config, service_id='svc') + llm.completion(messages=[{'role': 'user', 'content': 'hi'}]) + call_kwargs = mock_completion.call_args[1] + assert call_kwargs.get('temperature') == 0.7 + assert call_kwargs.get('top_p') == 0.9 + + +@patch('openhands.llm.llm.litellm.get_model_info') +def test_is_caching_prompt_active_anthropic_prefixed(mock_get_model_info): + # Avoid external calls, but behavior shouldn't depend on model info + mock_get_model_info.side_effect = Exception('skip') + config = LLMConfig( + model='anthropic/claude-3-7-sonnet', api_key='k', caching_prompt=True + ) + llm = LLM(config, service_id='svc') + assert llm.is_caching_prompt_active() is True + + +@patch('openhands.llm.llm.httpx.get') +@patch('openhands.llm.llm.litellm.get_model_info') +def test_openhands_provider_rewrite_and_caching_prompt( + mock_get_model_info, mock_httpx_get +): + # Mock LiteLLM proxy /v1/model/info response + mock_httpx_get.return_value = type( + 'Resp', + (), + { + 'json': lambda self=None: { + 'data': [ + { + 'model_name': 'claude-3.7-sonnet', + 'model_info': { + 'max_input_tokens': 200000, + 'max_output_tokens': 64000, + 'supports_vision': True, + }, + } + ] + } + }, + )() + mock_get_model_info.return_value = { + 'max_input_tokens': 200000, + 'max_output_tokens': 64000, + } + + config = LLMConfig( + model='openhands/claude-3.7-sonnet', api_key='k', caching_prompt=True + ) + llm = LLM(config, service_id='svc') + # Model should be rewritten to litellm_proxy/... + assert llm.config.model.startswith('litellm_proxy/claude-3.7-sonnet') + # Caching prompt should be active for Claude + assert llm.is_caching_prompt_active() is True + + @patch('openhands.llm.llm.litellm_completion') def test_gemini_high_reasoning_effort_passes_through(mock_completion): """Test that Gemini with reasoning_effort='high' passes through to litellm.""" @@ -1239,10 +1327,61 @@ def test_non_gemini_uses_reasoning_effort(mock_completion): sample_messages = [{'role': 'user', 'content': 'Hello, how are you?'}] llm.completion(messages=sample_messages) - # Verify that reasoning_effort was used and thinking budget was not set - call_kwargs = mock_completion.call_args[1] + +@patch('openhands.llm.async_llm.litellm_acompletion') +@pytest.mark.asyncio +async def test_async_reasoning_effort_passthrough(mock_acompletion): + mock_acompletion.return_value = { + 'choices': [{'message': {'content': 'ok'}}], + } + config = LLMConfig( + model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low' + ) + llm = AsyncLLM(config, service_id='svc') + await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}]) + call_kwargs = mock_acompletion.call_args[1] + assert call_kwargs.get('reasoning_effort') == 'low' + # Async path does not pop temperature/top_p (parity with main) + assert call_kwargs.get('temperature') == 0.7 + assert call_kwargs.get('top_p') == 0.9 + + +@patch('openhands.llm.streaming_llm.AsyncLLM._call_acompletion') +@pytest.mark.asyncio +async def test_streaming_reasoning_effort_passthrough(mock_call): + async def fake_stream(*args, **kwargs): + class Dummy: + async def __aiter__(self): + yield {'choices': [{'delta': {'content': 'x'}}]} + + return Dummy() + + mock_call.side_effect = fake_stream + config = LLMConfig( + model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low' + ) + sllm = StreamingLLM(config, service_id='svc') + async for _ in sllm.async_streaming_completion( + messages=[{'role': 'user', 'content': 'hi'}] + ): + break + call_kwargs = mock_call.call_args[1] + assert call_kwargs.get('reasoning_effort') == 'low' + assert call_kwargs.get('temperature') == 0.7 + assert call_kwargs.get('top_p') == 0.9 + + +@patch('openhands.llm.async_llm.litellm_acompletion') +@pytest.mark.asyncio +async def test_async_streaming_no_thinking_for_gemini(mock_acompletion): + mock_acompletion.return_value = { + 'choices': [{'message': {'content': 'ok'}}], + } + config = LLMConfig(model='gemini-2.5-pro', api_key='k', reasoning_effort='low') + llm = AsyncLLM(config, service_id='svc') + await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}]) + call_kwargs = mock_acompletion.call_args[1] assert 'thinking' not in call_kwargs - assert call_kwargs.get('reasoning_effort') == 'high' @patch('openhands.llm.llm.litellm_completion') diff --git a/tests/unit/llm/test_model_features.py b/tests/unit/llm/test_model_features.py new file mode 100644 index 0000000000..f999fc5839 --- /dev/null +++ b/tests/unit/llm/test_model_features.py @@ -0,0 +1,293 @@ +import pytest + +from openhands.llm.model_features import ( + ModelFeatures, + get_features, + model_matches, + normalize_model_name, +) + + +@pytest.mark.parametrize( + 'raw,expected', + [ + (' OPENAI/gpt-4o ', 'gpt-4o'), + ('anthropic/claude-3-7-sonnet', 'claude-3-7-sonnet'), + ('litellm_proxy/gemini-2.5-pro', 'gemini-2.5-pro'), + ('qwen3-coder-480b-a35b-instruct', 'qwen3-coder-480b-a35b-instruct'), + ('gpt-5', 'gpt-5'), + ('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', 'deepseek-r1-0528'), + ('openai/GLM-4.5-GGUF', 'glm-4.5'), + ('openrouter/gpt-4o-mini', 'gpt-4o-mini'), + ( + 'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0', + 'anthropic.claude-3-5-sonnet-20241022-v2', + ), + ('', ''), + (None, ''), # type: ignore[arg-type] + ], +) +def test_normalize_model_name(raw, expected): + assert normalize_model_name(raw) == expected + + +@pytest.mark.parametrize( + 'name,pattern,expected', + [ + ('gpt-4o', 'gpt-4o*', True), + ('openai/gpt-4o', 'gpt-4o*', True), + ('litellm_proxy/gpt-4o-mini', 'gpt-4o*', True), + ('claude-3-7-sonnet-20250219', 'claude-3-7-sonnet*', True), + ('o1-2024-12-17', 'o1*', True), + ('grok-4-0709', 'grok-4-0709', True), + ('grok-4-0801', 'grok-4-0709', False), + ], +) +def test_model_matches(name, pattern, expected): + assert model_matches(name, [pattern]) is expected + + +@pytest.mark.parametrize( + 'name,pattern,expected', + [ + ('openai/gpt-4o', 'openai/gpt-4o*', True), + ('openrouter/gpt-4o', 'openai/gpt-4o*', False), + ('litellm_proxy/gpt-4o-mini', 'litellm_proxy/gpt-4o*', True), + ( + 'gpt-4o', + 'openai/gpt-4o*', + False, + ), # basename alone should not match provider-qualified + ('unknown-model', 'gpt-5*', False), + ], +) +def test_model_matches_provider_qualified(name, pattern, expected): + assert model_matches(name, [pattern]) is expected + + +@pytest.mark.parametrize( + 'model,expect', + [ + ( + 'gpt-4o', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=False, + supports_prompt_cache=False, + supports_stop_words=True, + ), + ), + ( + 'gpt-5', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=True, + supports_prompt_cache=False, + supports_stop_words=True, + ), + ), + ( + 'o3-mini', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=True, + supports_prompt_cache=False, + supports_stop_words=True, + ), + ), + ( + 'o1-2024-12-17', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=True, + supports_prompt_cache=False, + supports_stop_words=False, + ), + ), + ( + 'xai/grok-4-0709', + ModelFeatures( + supports_function_calling=False, + supports_reasoning_effort=False, + supports_prompt_cache=False, + supports_stop_words=False, + ), + ), + ( + 'anthropic/claude-3-7-sonnet', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=False, + supports_prompt_cache=True, + supports_stop_words=True, + ), + ), + ( + 'litellm_proxy/claude-3.7-sonnet', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=False, + supports_prompt_cache=True, + supports_stop_words=True, + ), + ), + ( + 'gemini-2.5-pro', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=True, + supports_prompt_cache=False, + supports_stop_words=True, + ), + ), + ( + 'openai/gpt-4o', + ModelFeatures( + supports_function_calling=True, + supports_reasoning_effort=False, + supports_prompt_cache=False, + supports_stop_words=True, + ), + ), # provider-qualified still matches basename patterns + ], +) +def test_get_features(model, expect): + features = get_features(model) + assert features == expect + + +@pytest.mark.parametrize( + 'model', + [ + # Anthropic families + 'claude-3-7-sonnet-20250219', + 'claude-3.7-sonnet', + 'claude-sonnet-3-7-latest', + 'claude-3-5-sonnet', + 'claude-3.5-haiku', + 'claude-3-5-haiku-20241022', + 'claude-sonnet-4-latest', + 'claude-opus-4-1-20250805', + # OpenAI families + 'gpt-4o', + 'gpt-4.1', + 'gpt-5', + # o-series + 'o1-2024-12-17', + 'o3-mini', + 'o4-mini', + # Google Gemini + 'gemini-2.5-pro', + # Others + 'kimi-k2-0711-preview', + 'kimi-k2-instruct', + 'qwen3-coder', + 'qwen3-coder-480b-a35b-instruct', + ], +) +def test_function_calling_models(model): + features = get_features(model) + assert features.supports_function_calling is True + + +@pytest.mark.parametrize( + 'model', + [ + 'o1-2024-12-17', + 'o3-mini', + 'o4-mini', + 'gemini-2.5-flash', + 'gemini-2.5-pro', + 'gpt-5', + 'claude-opus-4-1-20250805', + ], +) +def test_reasoning_effort_models(model): + features = get_features(model) + assert features.supports_reasoning_effort is True + + +@pytest.mark.parametrize( + 'model', + [ + 'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', + 'DeepSeek-R1-0528', + ], +) +def test_deepseek_reasoning_effort_models(model): + features = get_features(model) + assert features.supports_reasoning_effort is True + + +@pytest.mark.parametrize( + 'model', + [ + 'claude-3-7-sonnet-20250219', + 'claude-3.7-sonnet', + 'claude-sonnet-3-7-latest', + 'claude-3-5-sonnet', + 'claude-3-5-haiku-20241022', + 'claude-3-haiku-20240307', + 'claude-3-opus-20240229', + 'claude-sonnet-4-latest', + 'claude-opus-4-1-20250805', + ], +) +def test_prompt_cache_models(model): + features = get_features(model) + assert features.supports_prompt_cache is True + + +@pytest.mark.parametrize( + 'model,expected', + [ + # Positive cases: exactly those supported on main + ('o1', True), + ('o1-2024-12-17', True), + ('o3', True), + ('o3-2025-04-16', True), + ('o3-mini', True), + ('o3-mini-2025-01-31', True), + ('o4-mini', True), + ('o4-mini-2025-04-16', True), + ('gemini-2.5-flash', True), + ('gemini-2.5-pro', True), + ('gpt-5', True), + ('gpt-5-2025-08-07', True), + ('claude-opus-4-1-20250805', True), + # DeepSeek + ('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', True), + ('DeepSeek-R1-0528', True), + # Negative cases: ensure we didn't unintentionally expand + ('o1-mini', False), + ('o1-preview', False), + ('gemini-1.0-pro', False), + ], +) +def test_reasoning_effort_parity_with_main(model, expected): + assert get_features(model).supports_reasoning_effort is expected + + +def test_prompt_cache_haiku_variants(): + assert get_features('claude-3-5-haiku-20241022').supports_prompt_cache is True + assert get_features('claude-3.5-haiku-20241022').supports_prompt_cache is True + + +def test_stop_words_grok_provider_prefixed(): + assert get_features('xai/grok-4-0709').supports_stop_words is False + assert get_features('grok-4-0709').supports_stop_words is False + + +@pytest.mark.parametrize( + 'model', + [ + 'o1-mini', + 'o1-2024-12-17', + 'xai/grok-4-0709', + 'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', + 'DeepSeek-R1-0528', + ], +) +def test_supports_stop_words_false_models(model): + features = get_features(model) + assert features.supports_stop_words is False