Centralize model feature checks (#10414)

Co-authored-by: OpenHands-GPT-5 <openhands@all-hands.dev>
2026-03-22 13:47:19 +08:00 · 2025-08-19 22:30:07 +02:00
parent aa6b454772
commit bb0e24d23b
8 changed files with 596 additions and 112 deletions
--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@@ -9,8 +9,8 @@ from openhands.core.logger import openhands_logger as logger
 from openhands.llm.llm import (
    LLM,
    LLM_RETRY_EXCEPTIONS,
-    REASONING_EFFORT_SUPPORTED_MODELS,
 )
+from openhands.llm.model_features import get_features
 from openhands.utils.shutdown_listener import should_continue


@@ -63,7 +63,7 @@ class AsyncLLM(LLM):
                messages = kwargs['messages']

            # Set reasoning effort for models that support it
-            if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
+            if get_features(self.config.model).supports_reasoning_effort:
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            # ensure we work with a list of messages
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -9,6 +9,7 @@ import httpx

 from openhands.core.config import LLMConfig
 from openhands.llm.metrics import Metrics
+from openhands.llm.model_features import get_features

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
@@ -49,79 +50,6 @@ LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
    LLMNoResponseError,
 )

-# cache prompt supporting models
-# remove this when we gemini and deepseek are supported
-CACHE_PROMPT_SUPPORTED_MODELS = [
-    'claude-3-7-sonnet-20250219',
-    'claude-sonnet-3-7-latest',
-    'claude-3.7-sonnet',
-    'claude-3-5-sonnet-20241022',
-    'claude-3-5-sonnet-20240620',
-    'claude-3-5-haiku-20241022',
-    'claude-3-haiku-20240307',
-    'claude-3-opus-20240229',
-    'claude-sonnet-4-20250514',
-    'claude-sonnet-4',
-    'claude-opus-4-20250514',
-    'claude-opus-4-1-20250805',
-]
-
-# function calling supporting models
-FUNCTION_CALLING_SUPPORTED_MODELS = [
-    'claude-3-7-sonnet-20250219',
-    'claude-sonnet-3-7-latest',
-    'claude-3-5-sonnet',
-    'claude-3-5-sonnet-20240620',
-    'claude-3-5-sonnet-20241022',
-    'claude-3.5-haiku',
-    'claude-3-5-haiku-20241022',
-    'claude-sonnet-4-20250514',
-    'claude-sonnet-4',
-    'claude-opus-4-20250514',
-    'claude-opus-4-1-20250805',
-    'gpt-4o-mini',
-    'gpt-4o',
-    'o1-2024-12-17',
-    'o3-mini-2025-01-31',
-    'o3-mini',
-    'o3',
-    'o3-2025-04-16',
-    'o4-mini',
-    'o4-mini-2025-04-16',
-    'gemini-2.5-pro',
-    'gpt-4.1',
-    'kimi-k2-0711-preview',
-    'kimi-k2-instruct',
-    'Qwen3-Coder-480B-A35B-Instruct',
-    'qwen3-coder',  # this will match both qwen3-coder-480b (openhands provider) and qwen3-coder (for openrouter)
-    'gpt-5',
-    'gpt-5-2025-08-07',
-]
-
-REASONING_EFFORT_SUPPORTED_MODELS = [
-    'o1-2024-12-17',
-    'o1',
-    'o3',
-    'o3-2025-04-16',
-    'o3-mini-2025-01-31',
-    'o3-mini',
-    'o4-mini',
-    'o4-mini-2025-04-16',
-    'gemini-2.5-flash',
-    'gemini-2.5-pro',
-    'gpt-5',
-    'gpt-5-2025-08-07',
-    'claude-opus-4-1-20250805',  # we need to remove top_p for opus 4.1
-]
-
-MODELS_WITHOUT_STOP_WORDS = [
-    'o1-mini',
-    'o1-preview',
-    'o1',
-    'o1-2024-12-17',
-    'xai/grok-4-0709',
-]
-

 class LLM(RetryMixin, DebugMixin):
    """The LLM class represents a Language Model instance.
@@ -154,6 +82,7 @@ class LLM(RetryMixin, DebugMixin):
        )

        self.model_info: ModelInfo | None = None
+        self._function_calling_active: bool = False
        self.retry_listener = retry_listener
        if self.config.log_completions:
            if self.config.log_completions_folder is None:
@@ -202,10 +131,8 @@ class LLM(RetryMixin, DebugMixin):
                f'Rewrote openhands/{model_name} to {self.config.model} with base URL {self.config.base_url}'
            )

-        if (
-            self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS
-            or self.config.model.split('/')[-1] in REASONING_EFFORT_SUPPORTED_MODELS
-        ):
+        features = get_features(self.config.model)
+        if features.supports_reasoning_effort:
            # For Gemini models, only map 'low' to optimized thinking budget
            # Let other reasoning_effort values pass through to API as-is
            if 'gemini-2.5-pro' in self.config.model:
@@ -312,7 +239,7 @@ class LLM(RetryMixin, DebugMixin):

                # add stop words if the model supports it and stop words are not disabled
                if (
-                    self.config.model not in MODELS_WITHOUT_STOP_WORDS
+                    get_features(self.config.model).supports_stop_words
                    and not self.config.disable_stop_word
                ):
                    kwargs['stop'] = STOP_WORDS
@@ -556,17 +483,10 @@ class LLM(RetryMixin, DebugMixin):
                ):
                    self.config.max_output_tokens = self.model_info['max_tokens']

-        # Initialize function calling capability
-        # Check if model name is in our supported list
-        model_name_supported = (
-            self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
-            or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
-            or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
-        )
-
-        # Handle native_tool_calling user-defined configuration
+        # Initialize function calling using centralized model features
+        features = get_features(self.config.model)
        if self.config.native_tool_calling is None:
-            self._function_calling_active = model_name_supported
+            self._function_calling_active = features.supports_function_calling
        else:
            self._function_calling_active = self.config.native_tool_calling

@@ -601,14 +521,10 @@ class LLM(RetryMixin, DebugMixin):
        Returns:
            boolean: True if prompt caching is supported and enabled for the given model.
        """
-        return (
-            self.config.caching_prompt is True
-            and (
-                self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
-                or self.config.model.split('/')[-1] in CACHE_PROMPT_SUPPORTED_MODELS
-            )
-            # We don't need to look-up model_info, because only Anthropic models needs the explicit caching breakpoint
-        )
+        if not self.config.caching_prompt:
+            return False
+        # We don't need to look-up model_info, because only Anthropic models need explicit caching breakpoints
+        return get_features(self.config.model).supports_prompt_cache

    def is_function_calling_active(self) -> bool:
        """Returns whether function calling is supported and enabled for this LLM instance.
--- a/openhands/llm/model_features.py
+++ b/openhands/llm/model_features.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from fnmatch import fnmatch
+
+
+def normalize_model_name(model: str) -> str:
+    """Normalize a model string to a canonical, comparable name.
+
+    Strategy:
+    - Trim whitespace
+    - Lowercase
+    - If there is a '/', keep only the basename after the last '/'
+      (handles prefixes like openrouter/, litellm_proxy/, anthropic/, etc.)
+      and treat ':' inside that basename as an Ollama-style variant tag to be removed
+    - There is no provider:model form; providers, when present, use 'provider/model'
+    - Drop a trailing "-gguf" suffix if present
+    """
+    raw = (model or '').strip().lower()
+    if '/' in raw:
+        name = raw.split('/')[-1]
+        if ':' in name:
+            # Drop Ollama-style variant tag in basename
+            name = name.split(':', 1)[0]
+    else:
+        # No '/', keep the whole raw name (we do not support provider:model)
+        name = raw
+    if name.endswith('-gguf'):
+        name = name[: -len('-gguf')]
+    return name
+
+
+def model_matches(model: str, patterns: list[str]) -> bool:
+    """Return True if the model matches any of the glob patterns.
+
+    If a pattern contains a '/', it is treated as provider-qualified and matched
+    against the full, lowercased model string (including provider prefix).
+    Otherwise, it is matched against the normalized basename.
+    """
+    raw = (model or '').strip().lower()
+    name = normalize_model_name(model)
+    for pat in patterns:
+        pat_l = pat.lower()
+        if '/' in pat_l:
+            if fnmatch(raw, pat_l):
+                return True
+        else:
+            if fnmatch(name, pat_l):
+                return True
+    return False
+
+
+@dataclass(frozen=True)
+class ModelFeatures:
+    supports_function_calling: bool
+    supports_reasoning_effort: bool
+    supports_prompt_cache: bool
+    supports_stop_words: bool
+
+
+# Pattern tables capturing current behavior. Keep patterns lowercase.
+FUNCTION_CALLING_PATTERNS: list[str] = [
+    # Anthropic families
+    'claude-3-7-sonnet*',
+    'claude-3.7-sonnet*',
+    'claude-sonnet-3-7-latest',
+    'claude-3-5-sonnet*',
+    'claude-3.5-haiku*',
+    'claude-3-5-haiku*',
+    'claude-sonnet-4*',
+    'claude-opus-4*',
+    # OpenAI families
+    'gpt-4o*',
+    'gpt-4.1',
+    'gpt-5*',
+    # o-series (keep exact o1 support per existing list)
+    'o1-2024-12-17',
+    'o3*',
+    'o4-mini*',
+    # Google Gemini
+    'gemini-2.5-pro*',
+    # Others
+    'kimi-k2-0711-preview',
+    'kimi-k2-instruct',
+    'qwen3-coder*',
+    'qwen3-coder-480b-a35b-instruct',
+]
+
+REASONING_EFFORT_PATTERNS: list[str] = [
+    # Mirror main behavior exactly (no unintended expansion), plus DeepSeek support
+    'o1-2024-12-17',
+    'o1',
+    'o3',
+    'o3-2025-04-16',
+    'o3-mini-2025-01-31',
+    'o3-mini',
+    'o4-mini',
+    'o4-mini-2025-04-16',
+    'gemini-2.5-flash',
+    'gemini-2.5-pro',
+    'gpt-5',
+    'gpt-5-2025-08-07',
+    'claude-opus-4-1-20250805',
+    # DeepSeek reasoning family
+    'deepseek-r1-0528*',
+]
+
+PROMPT_CACHE_PATTERNS: list[str] = [
+    'claude-3-7-sonnet*',
+    'claude-3.7-sonnet*',
+    'claude-sonnet-3-7-latest',
+    'claude-3-5-sonnet*',
+    'claude-3-5-haiku*',
+    'claude-3.5-haiku*',
+    'claude-3-haiku-20240307',
+    'claude-3-opus-20240229',
+    'claude-sonnet-4*',
+    'claude-opus-4*',
+]
+
+SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [
+    # o1 family doesn't support stop words
+    'o1*',
+    # grok-4 specific model name (basename)
+    'grok-4-0709',
+    # DeepSeek R1 family
+    'deepseek-r1-0528*',
+]
+
+
+def get_features(model: str) -> ModelFeatures:
+    return ModelFeatures(
+        supports_function_calling=model_matches(model, FUNCTION_CALLING_PATTERNS),
+        supports_reasoning_effort=model_matches(model, REASONING_EFFORT_PATTERNS),
+        supports_prompt_cache=model_matches(model, PROMPT_CACHE_PATTERNS),
+        supports_stop_words=not model_matches(
+            model, SUPPORTS_STOP_WORDS_FALSE_PATTERNS
+        ),
+    )
--- a/openhands/llm/streaming_llm.py
+++ b/openhands/llm/streaming_llm.py
@@ -5,7 +5,7 @@ from typing import Any, Callable
 from openhands.core.exceptions import UserCancelledError
 from openhands.core.logger import openhands_logger as logger
 from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
-from openhands.llm.llm import REASONING_EFFORT_SUPPORTED_MODELS
+from openhands.llm.model_features import get_features


 class StreamingLLM(AsyncLLM):
@@ -65,7 +65,7 @@ class StreamingLLM(AsyncLLM):
                )

            # Set reasoning effort for models that support it
-            if self.config.model.lower() in REASONING_EFFORT_SUPPORTED_MODELS:
+            if get_features(self.config.model).supports_reasoning_effort:
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            self.log_prompt(messages)