Centralize model feature checks (#10414)

Co-authored-by: OpenHands-GPT-5 <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-08-19 22:30:07 +02:00 · 2025-08-19 22:30:07 +02:00 · bb0e24d23b
commit bb0e24d23b
parent aa6b454772
8 changed files with 596 additions and 112 deletions
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@ -172,9 +172,6 @@ class LLMConfig(BaseModel):

        # Set reasoning_effort to 'high' by default for non-Gemini models
        # Gemini models use optimized thinking budget when reasoning_effort is None
-        logger.debug(
-            f'Setting reasoning_effort for model {self.model} with reasoning_effort {self.reasoning_effort}'
-        )
        if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
            self.reasoning_effort = 'high'

--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.llm.llm import (
    LLM,
    LLM_RETRY_EXCEPTIONS,
-    REASONING_EFFORT_SUPPORTED_MODELS,
 )
+from openhands.llm.model_features import get_features
 from openhands.utils.shutdown_listener import should_continue


@ -63,7 +63,7 @@ class AsyncLLM(LLM):
                messages = kwargs['messages']

            # Set reasoning effort for models that support it
-            if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
+            if get_features(self.config.model).supports_reasoning_effort:
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            # ensure we work with a list of messages
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -9,6 +9,7 @@ import httpx

 from openhands.core.config import LLMConfig
 from openhands.llm.metrics import Metrics
+from openhands.llm.model_features import get_features

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
@ -49,79 +50,6 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
    LLMNoResponseError,
 )

-# cache prompt supporting models
-# remove this when we gemini and deepseek are supported
-CACHE_PROMPT_SUPPORTED_MODELS = [
-    'claude-3-7-sonnet-20250219',
-    'claude-sonnet-3-7-latest',
-    'claude-3.7-sonnet',
-    'claude-3-5-sonnet-20241022',
-    'claude-3-5-sonnet-20240620',
-    'claude-3-5-haiku-20241022',
-    'claude-3-haiku-20240307',
-    'claude-3-opus-20240229',
-    'claude-sonnet-4-20250514',
-    'claude-sonnet-4',
-    'claude-opus-4-20250514',
-    'claude-opus-4-1-20250805',
-]
-
-# function calling supporting models
-FUNCTION_CALLING_SUPPORTED_MODELS = [
-    'claude-3-7-sonnet-20250219',
-    'claude-sonnet-3-7-latest',
-    'claude-3-5-sonnet',
-    'claude-3-5-sonnet-20240620',
-    'claude-3-5-sonnet-20241022',
-    'claude-3.5-haiku',
-    'claude-3-5-haiku-20241022',
-    'claude-sonnet-4-20250514',
-    'claude-sonnet-4',
-    'claude-opus-4-20250514',
-    'claude-opus-4-1-20250805',
-    'gpt-4o-mini',
-    'gpt-4o',
-    'o1-2024-12-17',
-    'o3-mini-2025-01-31',
-    'o3-mini',
-    'o3',
-    'o3-2025-04-16',
-    'o4-mini',
-    'o4-mini-2025-04-16',
-    'gemini-2.5-pro',
-    'gpt-4.1',
-    'kimi-k2-0711-preview',
-    'kimi-k2-instruct',
-    'Qwen3-Coder-480B-A35B-Instruct',
-    'qwen3-coder',  # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
-    'gpt-5',
-    'gpt-5-2025-08-07',
-]
-
-REASONING_EFFORT_SUPPORTED_MODELS = [
-    'o1-2024-12-17',
-    'o1',
-    'o3',
-    'o3-2025-04-16',
-    'o3-mini-2025-01-31',
-    'o3-mini',
-    'o4-mini',
-    'o4-mini-2025-04-16',
-    'gemini-2.5-flash',
-    'gemini-2.5-pro',
-    'gpt-5',
-    'gpt-5-2025-08-07',
-    'claude-opus-4-1-20250805',  # we need to remove top_p for opus 4.1
-]
-
-MODELS_WITHOUT_STOP_WORDS = [
-    'o1-mini',
-    'o1-preview',
-    'o1',
-    'o1-2024-12-17',
-    'xai/grok-4-0709',
-]
-

 class LLM(RetryMixin, DebugMixin):
    """The LLM class represents a Language Model instance.
@ -154,6 +82,7 @@ class LLM(RetryMixin, DebugMixin):
        )

        self.model_info: ModelInfo | None = None
+        self._function_calling_active: bool = False
        self.retry_listener = retry_listener
        if self.config.log_completions:
            if self.config.log_completions_folder is None:
@ -202,10 +131,8 @@ class LLM(RetryMixin, DebugMixin):
                f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}'
            )

-        if (
-            self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
-            or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
-        ):
+        features = get_features(self.config.model)
+        if features.supports_reasoning_effort:
            # For Gemini models, only map 'low' to optimized thinking budget
            # Let other reasoning_effort values pass through to API as-is
            if 'gemini-2.5-pro' in self.config.model:
@ -312,7 +239,7 @@ class LLM(RetryMixin, DebugMixin):

                # add stop words if the model supports it and stop words are not disabled
                if (
-                    self.config.model not in MODELS_WITHOUT_STOP_WORDS
+                    get_features(self.config.model).supports_stop_words
                    and not self.config.disable_stop_word
                ):
                    kwargs['stop'] = STOP_WORDS
@ -556,17 +483,10 @@ class LLM(RetryMixin, DebugMixin):
                ):
                    self.config.max_output_tokens = self.model_info['max_tokens']

-        # Initialize function calling capability
-        # Check if model name is in our supported list
-        model_name_supported = (
-            self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
-            or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
-            or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
-        )
-
-        # Handle native_tool_calling user-defined configuration
+        # Initialize function calling using centralized model features
+        features = get_features(self.config.model)
        if self.config.native_tool_calling is None:
-            self._function_calling_active = model_name_supported
+            self._function_calling_active = features.supports_function_calling
        else:
            self._function_calling_active = self.config.native_tool_calling

@ -601,14 +521,10 @@ class LLM(RetryMixin, DebugMixin):
        Returns:
            boolean: True if prompt caching is supported and enabled for the given model.
        """
-        return (
-            self.config.caching_prompt is True
-            and (
-                self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
-                or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
-            )
-            # We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
-        )
+        if not self.config.caching_prompt:
+            return False
+        # We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints
+        return get_features(self.config.model).supports_prompt_cache

    def is_function_calling_active(self) -> bool:
        """Returns whether function calling is supported and enabled for this LLM instance.
--- a/openhands/llm/model_features.py
+++ b/openhands/llm/model_features.py
@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from fnmatch import fnmatch
+
+
+def normalize_model_name(model: str) -> str:
+    """Normalize a model string to a canonical, comparable name.
+
+    Strategy:
+    - Trim whitespace
+    - Lowercase
+    - If there is a '/', keep only the basename after the last '/'
+      (handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
+      and treat ':' inside that basename as an Ollama-style variant tag to be removed
+    - There is no provider:model form; providers, when present, use 'provider/model'
+    - Drop a trailing "-gguf" suffix if present
+    """
+    raw = (model or '').strip().lower()
+    if '/' in raw:
+        name = raw.split('/')[-1]
+        if ':' in name:
+            # Drop Ollama-style variant tag in basename
+            name = name.split(':', 1)[0]
+    else:
+        # No '/', keep the whole raw name (we do not support provider:model)
+        name = raw
+    if name.endswith('-gguf'):
+        name = name[: -len('-gguf')]
+    return name
+
+
+def model_matches(model: str, patterns: list[str]) -> bool:
+    """Return True if the model matches any of the glob patterns.
+
+    If a pattern contains a '/', it is treated as provider-qualified and matched
+    against the full, lowercased model string (including provider prefix).
+    Otherwise, it is matched against the normalized basename.
+    """
+    raw = (model or '').strip().lower()
+    name = normalize_model_name(model)
+    for pat in patterns:
+        pat_l = pat.lower()
+        if '/' in pat_l:
+            if fnmatch(raw, pat_l):
+                return True
+        else:
+            if fnmatch(name, pat_l):
+                return True
+    return False
+
+
+@dataclass(frozen=True)
+class ModelFeatures:
+    supports_function_calling: bool
+    supports_reasoning_effort: bool
+    supports_prompt_cache: bool
+    supports_stop_words: bool
+
+
+# Pattern tables capturing current behavior. Keep patterns lowercase.
+FUNCTION_CALLING_PATTERNS: list[str] = [
+    # Anthropic families
+    'claude-3-7-sonnet*',
+    'claude-3.7-sonnet*',
+    'claude-sonnet-3-7-latest',
+    'claude-3-5-sonnet*',
+    'claude-3.5-haiku*',
+    'claude-3-5-haiku*',
+    'claude-sonnet-4*',
+    'claude-opus-4*',
+    # OpenAI families
+    'gpt-4o*',
+    'gpt-4.1',
+    'gpt-5*',
+    # o-series (keep exact o1 support per existing list)
+    'o1-2024-12-17',
+    'o3*',
+    'o4-mini*',
+    # Google Gemini
+    'gemini-2.5-pro*',
+    # Others
+    'kimi-k2-0711-preview',
+    'kimi-k2-instruct',
+    'qwen3-coder*',
+    'qwen3-coder-480b-a35b-instruct',
+]
+
+REASONING_EFFORT_PATTERNS: list[str] = [
+    # Mirror main behavior exactly (no unintended expansion), plus DeepSeek support
+    'o1-2024-12-17',
+    'o1',
+    'o3',
+    'o3-2025-04-16',
+    'o3-mini-2025-01-31',
+    'o3-mini',
+    'o4-mini',
+    'o4-mini-2025-04-16',
+    'gemini-2.5-flash',
+    'gemini-2.5-pro',
+    'gpt-5',
+    'gpt-5-2025-08-07',
+    'claude-opus-4-1-20250805',
+    # DeepSeek reasoning family
+    'deepseek-r1-0528*',
+]
+
+PROMPT_CACHE_PATTERNS: list[str] = [
+    'claude-3-7-sonnet*',
+    'claude-3.7-sonnet*',
+    'claude-sonnet-3-7-latest',
+    'claude-3-5-sonnet*',
+    'claude-3-5-haiku*',
+    'claude-3.5-haiku*',
+    'claude-3-haiku-20240307',
+    'claude-3-opus-20240229',
+    'claude-sonnet-4*',
+    'claude-opus-4*',
+]
+
+SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
+    # o1 family doesn't support stop words
+    'o1*',
+    # grok-4 specific model name (basename)
+    'grok-4-0709',
+    # DeepSeek R1 family
+    'deepseek-r1-0528*',
+]
+
+
+def get_features(model: str) -> ModelFeatures:
+    return ModelFeatures(
+        supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
+        supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
+        supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
+        supports_stop_words=not model_matches(
+            model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
+        ),
+    )
--- a/openhands/llm/streaming_llm.py
+++ b/openhands/llm/streaming_llm.py
@ -5,7 +5,7 @@ from typing import Any, Callable
 from openhands.core.exceptions import UserCancelledError
 from openhands.core.logger import openhands_logger as logger
 from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
-from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS
+from openhands.llm.model_features import get_features


 class StreamingLLM(AsyncLLM):
@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM):
                )

            # Set reasoning effort for models that support it
-            if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
+            if get_features(self.config.model).supports_reasoning_effort:
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            self.log_prompt(messages)
--- a/poetry.lock
+++ b/poetry.lock
@ -11387,14 +11387,14 @@ test = ["pytest", "pytest-cov"]

 [[package]]
 name = "xlsxwriter"
-version = "3.2.3"
+version = "3.2.5"
 description = "A Python module for creating Excel XLSX files."
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "XlsxWriter-3.2.3-py3-none-any.whl", hash = "sha256:593f8296e8a91790c6d0378ab08b064f34a642b3feb787cf6738236bd0a4860d"},
-    {file = "xlsxwriter-3.2.3.tar.gz", hash = "sha256:ad6fd41bdcf1b885876b1f6b7087560aecc9ae5a9cc2ba97dcac7ab2e210d3d5"},
+    {file = "xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd"},
+    {file = "xlsxwriter-3.2.5.tar.gz", hash = "sha256:7e88469d607cdc920151c0ab3ce9cf1a83992d4b7bc730c5ffdd1a12115a7dbe"},
 ]

 [[package]]
--- a/tests/unit/llm/test_llm.py
+++ b/tests/unit/llm/test_llm.py
@ -12,8 +12,10 @@ from litellm.exceptions import (
 from openhands.core.config import LLMConfig
 from openhands.core.exceptions import LLMNoResponseError, OperationCancelled
 from openhands.core.message import Message, TextContent
+from openhands.llm.async_llm import AsyncLLM
 from openhands.llm.llm import LLM
 from openhands.llm.metrics import Metrics, TokenUsage
+from openhands.llm.streaming_llm import StreamingLLM


@pytest.fixture(autouse=True)
@ -252,7 +254,7 @@ def test_response_latency_tracking(mock_time, mock_litellm_completion):

@patch('openhands.llm.llm.litellm.get_model_info')
 def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
-    default_config.model = 'openrouter:gpt-4o-mini'
+    default_config.model = 'openrouter/gpt-4o-mini'
    mock_get_model_info.return_value = {
        'max_input_tokens': 7000,
        'max_output_tokens': 1500,
@ -261,7 +263,7 @@ def test_llm_init_with_openrouter_model(mock_get_model_info, default_config):
    llm.init_model_info()
    assert llm.config.max_input_tokens == 7000
    assert llm.config.max_output_tokens == 1500
-    mock_get_model_info.assert_called_once_with('openrouter:gpt-4o-mini')
+    mock_get_model_info.assert_called_once_with('openrouter/gpt-4o-mini')


@patch('openhands.llm.llm.litellm_completion')
@ -1201,6 +1203,92 @@ def test_gemini_medium_reasoning_effort_passes_through(mock_completion):
    assert call_kwargs.get('reasoning_effort') == 'medium'


+@patch('openhands.llm.llm.litellm_completion')
+def test_opus_41_reasoning_pops_temperature_top_p(mock_completion):
+    mock_completion.return_value = {
+        'choices': [{'message': {'content': 'ok'}}],
+    }
+    config = LLMConfig(
+        model='anthropic/claude-opus-4-1-20250805',
+        api_key='k',
+        temperature=0.7,
+        top_p=0.9,
+    )
+    llm = LLM(config, service_id='svc')
+    llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
+    call_kwargs = mock_completion.call_args[1]
+    assert 'temperature' not in call_kwargs
+    assert 'top_p' not in call_kwargs
+
+
+@patch('openhands.llm.llm.litellm_completion')
+def test_opus_4_keeps_temperature_top_p(mock_completion):
+    mock_completion.return_value = {
+        'choices': [{'message': {'content': 'ok'}}],
+    }
+    config = LLMConfig(
+        model='anthropic/claude-opus-4-20250514',
+        api_key='k',
+        temperature=0.7,
+        top_p=0.9,
+    )
+    llm = LLM(config, service_id='svc')
+    llm.completion(messages=[{'role': 'user', 'content': 'hi'}])
+    call_kwargs = mock_completion.call_args[1]
+    assert call_kwargs.get('temperature') == 0.7
+    assert call_kwargs.get('top_p') == 0.9
+
+
+@patch('openhands.llm.llm.litellm.get_model_info')
+def test_is_caching_prompt_active_anthropic_prefixed(mock_get_model_info):
+    # Avoid external calls, but behavior shouldn't depend on model info
+    mock_get_model_info.side_effect = Exception('skip')
+    config = LLMConfig(
+        model='anthropic/claude-3-7-sonnet', api_key='k', caching_prompt=True
+    )
+    llm = LLM(config, service_id='svc')
+    assert llm.is_caching_prompt_active() is True
+
+
+@patch('openhands.llm.llm.httpx.get')
+@patch('openhands.llm.llm.litellm.get_model_info')
+def test_openhands_provider_rewrite_and_caching_prompt(
+    mock_get_model_info, mock_httpx_get
+):
+    # Mock LiteLLM proxy /v1/model/info response
+    mock_httpx_get.return_value = type(
+        'Resp',
+        (),
+        {
+            'json': lambda self=None: {
+                'data': [
+                    {
+                        'model_name': 'claude-3.7-sonnet',
+                        'model_info': {
+                            'max_input_tokens': 200000,
+                            'max_output_tokens': 64000,
+                            'supports_vision': True,
+                        },
+                    }
+                ]
+            }
+        },
+    )()
+    mock_get_model_info.return_value = {
+        'max_input_tokens': 200000,
+        'max_output_tokens': 64000,
+    }
+
+    config = LLMConfig(
+        model='openhands/claude-3.7-sonnet', api_key='k', caching_prompt=True
+    )
+    llm = LLM(config, service_id='svc')
+    # Model should be rewritten to litellm_proxy/...
+    assert llm.config.model.startswith('litellm_proxy/claude-3.7-sonnet')
+    # Caching prompt should be active for Claude
+    assert llm.is_caching_prompt_active() is True
+
+
@patch('openhands.llm.llm.litellm_completion')
 def test_gemini_high_reasoning_effort_passes_through(mock_completion):
    """Test that Gemini with reasoning_effort='high' passes through to litellm."""
@ -1239,10 +1327,61 @@ def test_non_gemini_uses_reasoning_effort(mock_completion):
    sample_messages = [{'role': 'user', 'content': 'Hello, how are you?'}]
    llm.completion(messages=sample_messages)

-    # Verify that reasoning_effort was used and thinking budget was not set
-    call_kwargs = mock_completion.call_args[1]
+
+@patch('openhands.llm.async_llm.litellm_acompletion')
+@pytest.mark.asyncio
+async def test_async_reasoning_effort_passthrough(mock_acompletion):
+    mock_acompletion.return_value = {
+        'choices': [{'message': {'content': 'ok'}}],
+    }
+    config = LLMConfig(
+        model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
+    )
+    llm = AsyncLLM(config, service_id='svc')
+    await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
+    call_kwargs = mock_acompletion.call_args[1]
+    assert call_kwargs.get('reasoning_effort') == 'low'
+    # Async path does not pop temperature/top_p (parity with main)
+    assert call_kwargs.get('temperature') == 0.7
+    assert call_kwargs.get('top_p') == 0.9
+
+
+@patch('openhands.llm.streaming_llm.AsyncLLM._call_acompletion')
+@pytest.mark.asyncio
+async def test_streaming_reasoning_effort_passthrough(mock_call):
+    async def fake_stream(*args, **kwargs):
+        class Dummy:
+            async def __aiter__(self):
+                yield {'choices': [{'delta': {'content': 'x'}}]}
+
+        return Dummy()
+
+    mock_call.side_effect = fake_stream
+    config = LLMConfig(
+        model='o3', api_key='k', temperature=0.7, top_p=0.9, reasoning_effort='low'
+    )
+    sllm = StreamingLLM(config, service_id='svc')
+    async for _ in sllm.async_streaming_completion(
+        messages=[{'role': 'user', 'content': 'hi'}]
+    ):
+        break
+    call_kwargs = mock_call.call_args[1]
+    assert call_kwargs.get('reasoning_effort') == 'low'
+    assert call_kwargs.get('temperature') == 0.7
+    assert call_kwargs.get('top_p') == 0.9
+
+
+@patch('openhands.llm.async_llm.litellm_acompletion')
+@pytest.mark.asyncio
+async def test_async_streaming_no_thinking_for_gemini(mock_acompletion):
+    mock_acompletion.return_value = {
+        'choices': [{'message': {'content': 'ok'}}],
+    }
+    config = LLMConfig(model='gemini-2.5-pro', api_key='k', reasoning_effort='low')
+    llm = AsyncLLM(config, service_id='svc')
+    await llm.async_completion(messages=[{'role': 'user', 'content': 'hi'}])
+    call_kwargs = mock_acompletion.call_args[1]
    assert 'thinking' not in call_kwargs
-    assert call_kwargs.get('reasoning_effort') == 'high'


@patch('openhands.llm.llm.litellm_completion')
--- a/tests/unit/llm/test_model_features.py
+++ b/tests/unit/llm/test_model_features.py
@ -0,0 +1,293 @@
+import pytest
+
+from openhands.llm.model_features import (
+    ModelFeatures,
+    get_features,
+    model_matches,
+    normalize_model_name,
+)
+
+
+@pytest.mark.parametrize(
+    'raw,expected',
+    [
+        ('  OPENAI/gpt-4o  ', 'gpt-4o'),
+        ('anthropic/claude-3-7-sonnet', 'claude-3-7-sonnet'),
+        ('litellm_proxy/gemini-2.5-pro', 'gemini-2.5-pro'),
+        ('qwen3-coder-480b-a35b-instruct', 'qwen3-coder-480b-a35b-instruct'),
+        ('gpt-5', 'gpt-5'),
+        ('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', 'deepseek-r1-0528'),
+        ('openai/GLM-4.5-GGUF', 'glm-4.5'),
+        ('openrouter/gpt-4o-mini', 'gpt-4o-mini'),
+        (
+            'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0',
+            'anthropic.claude-3-5-sonnet-20241022-v2',
+        ),
+        ('', ''),
+        (None, ''),  # type: ignore[arg-type]
+    ],
+)
+def test_normalize_model_name(raw, expected):
+    assert normalize_model_name(raw) == expected
+
+
+@pytest.mark.parametrize(
+    'name,pattern,expected',
+    [
+        ('gpt-4o', 'gpt-4o*', True),
+        ('openai/gpt-4o', 'gpt-4o*', True),
+        ('litellm_proxy/gpt-4o-mini', 'gpt-4o*', True),
+        ('claude-3-7-sonnet-20250219', 'claude-3-7-sonnet*', True),
+        ('o1-2024-12-17', 'o1*', True),
+        ('grok-4-0709', 'grok-4-0709', True),
+        ('grok-4-0801', 'grok-4-0709', False),
+    ],
+)
+def test_model_matches(name, pattern, expected):
+    assert model_matches(name, [pattern]) is expected
+
+
+@pytest.mark.parametrize(
+    'name,pattern,expected',
+    [
+        ('openai/gpt-4o', 'openai/gpt-4o*', True),
+        ('openrouter/gpt-4o', 'openai/gpt-4o*', False),
+        ('litellm_proxy/gpt-4o-mini', 'litellm_proxy/gpt-4o*', True),
+        (
+            'gpt-4o',
+            'openai/gpt-4o*',
+            False,
+        ),  # basename alone should not match provider-qualified
+        ('unknown-model', 'gpt-5*', False),
+    ],
+)
+def test_model_matches_provider_qualified(name, pattern, expected):
+    assert model_matches(name, [pattern]) is expected
+
+
+@pytest.mark.parametrize(
+    'model,expect',
+    [
+        (
+            'gpt-4o',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=False,
+                supports_prompt_cache=False,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'gpt-5',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=True,
+                supports_prompt_cache=False,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'o3-mini',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=True,
+                supports_prompt_cache=False,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'o1-2024-12-17',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=True,
+                supports_prompt_cache=False,
+                supports_stop_words=False,
+            ),
+        ),
+        (
+            'xai/grok-4-0709',
+            ModelFeatures(
+                supports_function_calling=False,
+                supports_reasoning_effort=False,
+                supports_prompt_cache=False,
+                supports_stop_words=False,
+            ),
+        ),
+        (
+            'anthropic/claude-3-7-sonnet',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=False,
+                supports_prompt_cache=True,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'litellm_proxy/claude-3.7-sonnet',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=False,
+                supports_prompt_cache=True,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'gemini-2.5-pro',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=True,
+                supports_prompt_cache=False,
+                supports_stop_words=True,
+            ),
+        ),
+        (
+            'openai/gpt-4o',
+            ModelFeatures(
+                supports_function_calling=True,
+                supports_reasoning_effort=False,
+                supports_prompt_cache=False,
+                supports_stop_words=True,
+            ),
+        ),  # provider-qualified still matches basename patterns
+    ],
+)
+def test_get_features(model, expect):
+    features = get_features(model)
+    assert features == expect
+
+
+@pytest.mark.parametrize(
+    'model',
+    [
+        # Anthropic families
+        'claude-3-7-sonnet-20250219',
+        'claude-3.7-sonnet',
+        'claude-sonnet-3-7-latest',
+        'claude-3-5-sonnet',
+        'claude-3.5-haiku',
+        'claude-3-5-haiku-20241022',
+        'claude-sonnet-4-latest',
+        'claude-opus-4-1-20250805',
+        # OpenAI families
+        'gpt-4o',
+        'gpt-4.1',
+        'gpt-5',
+        # o-series
+        'o1-2024-12-17',
+        'o3-mini',
+        'o4-mini',
+        # Google Gemini
+        'gemini-2.5-pro',
+        # Others
+        'kimi-k2-0711-preview',
+        'kimi-k2-instruct',
+        'qwen3-coder',
+        'qwen3-coder-480b-a35b-instruct',
+    ],
+)
+def test_function_calling_models(model):
+    features = get_features(model)
+    assert features.supports_function_calling is True
+
+
+@pytest.mark.parametrize(
+    'model',
+    [
+        'o1-2024-12-17',
+        'o3-mini',
+        'o4-mini',
+        'gemini-2.5-flash',
+        'gemini-2.5-pro',
+        'gpt-5',
+        'claude-opus-4-1-20250805',
+    ],
+)
+def test_reasoning_effort_models(model):
+    features = get_features(model)
+    assert features.supports_reasoning_effort is True
+
+
+@pytest.mark.parametrize(
+    'model',
+    [
+        'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
+        'DeepSeek-R1-0528',
+    ],
+)
+def test_deepseek_reasoning_effort_models(model):
+    features = get_features(model)
+    assert features.supports_reasoning_effort is True
+
+
+@pytest.mark.parametrize(
+    'model',
+    [
+        'claude-3-7-sonnet-20250219',
+        'claude-3.7-sonnet',
+        'claude-sonnet-3-7-latest',
+        'claude-3-5-sonnet',
+        'claude-3-5-haiku-20241022',
+        'claude-3-haiku-20240307',
+        'claude-3-opus-20240229',
+        'claude-sonnet-4-latest',
+        'claude-opus-4-1-20250805',
+    ],
+)
+def test_prompt_cache_models(model):
+    features = get_features(model)
+    assert features.supports_prompt_cache is True
+
+
+@pytest.mark.parametrize(
+    'model,expected',
+    [
+        # Positive cases: exactly those supported on main
+        ('o1', True),
+        ('o1-2024-12-17', True),
+        ('o3', True),
+        ('o3-2025-04-16', True),
+        ('o3-mini', True),
+        ('o3-mini-2025-01-31', True),
+        ('o4-mini', True),
+        ('o4-mini-2025-04-16', True),
+        ('gemini-2.5-flash', True),
+        ('gemini-2.5-pro', True),
+        ('gpt-5', True),
+        ('gpt-5-2025-08-07', True),
+        ('claude-opus-4-1-20250805', True),
+        # DeepSeek
+        ('deepseek/DeepSeek-R1-0528:671b-Q4_K_XL', True),
+        ('DeepSeek-R1-0528', True),
+        # Negative cases: ensure we didn't unintentionally expand
+        ('o1-mini', False),
+        ('o1-preview', False),
+        ('gemini-1.0-pro', False),
+    ],
+)
+def test_reasoning_effort_parity_with_main(model, expected):
+    assert get_features(model).supports_reasoning_effort is expected
+
+
+def test_prompt_cache_haiku_variants():
+    assert get_features('claude-3-5-haiku-20241022').supports_prompt_cache is True
+    assert get_features('claude-3.5-haiku-20241022').supports_prompt_cache is True
+
+
+def test_stop_words_grok_provider_prefixed():
+    assert get_features('xai/grok-4-0709').supports_stop_words is False
+    assert get_features('grok-4-0709').supports_stop_words is False
+
+
+@pytest.mark.parametrize(
+    'model',
+    [
+        'o1-mini',
+        'o1-2024-12-17',
+        'xai/grok-4-0709',
+        'deepseek/DeepSeek-R1-0528:671b-Q4_K_XL',
+        'DeepSeek-R1-0528',
+    ],
+)
+def test_supports_stop_words_false_models(model):
+    features = get_features(model)
+    assert features.supports_stop_words is False