fix(llm): remove default reasoning_effort; fix Gemini special case (#11567)

2025-12-26 05:48:36 +08:00 · 2025-11-05 17:30:46 -05:00 · 2025-11-05 17:30:46 -05:00 · 75e54e3552
commit 75e54e3552
parent 6b211f3b29
5 changed files with 23 additions and 13 deletions
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@ -179,10 +179,7 @@ class LLMConfig(BaseModel):
        if self.openrouter_app_name:
            os.environ['OR_APP_NAME'] = self.openrouter_app_name

-        # Set reasoning_effort to 'high' by default for non-Gemini models
-        # Gemini models use optimized thinking budget when reasoning_effort is None
-        if self.reasoning_effort is None and 'gemini-2.5-pro' not in self.model:
-            self.reasoning_effort = 'high'
+        # Do not set a default reasoning_effort. Leave as None unless user-configured.

        # Set an API version by default for Azure models
        # Required for newer models.
--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@ -62,8 +62,11 @@ class AsyncLLM(LLM):
            elif 'messages' in kwargs:
                messages = kwargs['messages']

-            # Set reasoning effort for models that support it
-            if get_features(self.config.model).supports_reasoning_effort:
+            # Set reasoning effort for models that support it, only if explicitly provided
+            if (
+                get_features(self.config.model).supports_reasoning_effort
+                and self.config.reasoning_effort is not None
+            ):
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            # ensure we work with a list of messages
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -155,7 +155,8 @@ class LLM(RetryMixin, DebugMixin):
                # don't send reasoning_effort to specific Claude Sonnet/Haiku 4.5 variants
                kwargs.pop('reasoning_effort', None)
            else:
-                kwargs['reasoning_effort'] = self.config.reasoning_effort
+                if self.config.reasoning_effort is not None:
+                    kwargs['reasoning_effort'] = self.config.reasoning_effort
            kwargs.pop(
                'temperature'
            )  # temperature is not supported for reasoning models
--- a/openhands/llm/streaming_llm.py
+++ b/openhands/llm/streaming_llm.py
@ -64,8 +64,11 @@ class StreamingLLM(AsyncLLM):
                    'The messages list is empty. At least one message is required.'
                )

-            # Set reasoning effort for models that support it
-            if get_features(self.config.model).supports_reasoning_effort:
+            # Set reasoning effort for models that support it, only if explicitly provided
+            if (
+                get_features(self.config.model).supports_reasoning_effort
+                and self.config.reasoning_effort is not None
+            ):
                kwargs['reasoning_effort'] = self.config.reasoning_effort

            self.log_prompt(messages)
--- a/tests/unit/llm/test_llm.py
+++ b/tests/unit/llm/test_llm.py
@ -1143,13 +1143,19 @@ def test_gemini_model_keeps_none_reasoning_effort():
    assert config.reasoning_effort is None


-def test_non_gemini_model_gets_high_reasoning_effort():
-    """Test that non-Gemini models get reasoning_effort='high' by default."""
-    config = LLMConfig(model='gpt-4o', api_key='test_key')
-    # Non-Gemini models should get reasoning_effort='high'
+def test_non_gemini_model_explicit_reasoning_effort():
+    """Test that non-Gemini models get reasoning_effort ONLY if explicitly set."""
+    config = LLMConfig(model='gpt-4o', api_key='test_key', reasoning_effort='high')
    assert config.reasoning_effort == 'high'


+def test_non_gemini_model_default_reasoning_effort_none():
+    """Test that non-Gemini models do NOT get reasoning_effort by default after PR."""
+    config = LLMConfig(model='gpt-4o', api_key='test_key')
+    # Should be None by default after your change
+    assert config.reasoning_effort is None
+
+
 def test_explicit_reasoning_effort_preserved():
    """Test that explicitly set reasoning_effort is preserved."""
    config = LLMConfig(