From c567c1126745fe2fefc1c6e90be7a388d657d067 Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Fri, 3 Jan 2025 01:40:49 +0100
Subject: [PATCH] Enable/disable function calling by user configuration (#5992)

Co-authored-by: co <yc5@tju.edu.cn>
Co-authored-by: Cheng Yang <93481273+young010101@users.noreply.github.com>
---
 config.template.toml                |  6 ++++
 openhands/core/config/llm_config.py |  2 ++
 openhands/llm/llm.py                | 22 ++++++++++--
 tests/unit/test_config.py           | 54 +++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/config.template.toml b/config.template.toml
index 26291976fd..5890c5f301 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -180,6 +180,12 @@ model = "gpt-4o"
 # https://docs.litellm.ai/docs/completion/token_usage
 #custom_tokenizer = ""
 
+# Whether to use native tool calling if supported by the model. Can be true, false, or None by default, which chooses the model's default behavior based on the evaluation.
+# ATTENTION: Based on evaluation, enabling native function calling may lead to worse results
+# in some scenarios. Use with caution and consider testing with your specific use case.
+# https://github.com/All-Hands-AI/OpenHands/pull/4711
+#native_tool_calling = None
+
 [llm.gpt4o-mini]
 api_key = "your-api-key"
 model = "gpt-4o"
diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py
index 7db3731e72..1d7dfb8f17 100644
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -45,6 +45,7 @@ class LLMConfig:
         log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
         draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
         custom_tokenizer: A custom tokenizer to use for token counting.
+        native_tool_calling: Whether to use native tool calling if supported by the model. Can be True, False, or not set.
     """
 
     model: str = 'claude-3-5-sonnet-20241022'
@@ -83,6 +84,7 @@ class LLMConfig:
     log_completions_folder: str = os.path.join(LOG_DIR, 'completions')
     draft_editor: Optional['LLMConfig'] = None
     custom_tokenizer: str | None = None
+    native_tool_calling: bool | None = None
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index 13d4dfc250..351743cdcf 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -440,13 +440,31 @@ class LLM(RetryMixin, DebugMixin):
         )
 
     def is_function_calling_active(self) -> bool:
-        # Check if model name is in supported list before checking model_info
+        # Check if model name is in our supported list
         model_name_supported = (
             self.config.model in FUNCTION_CALLING_SUPPORTED_MODELS
             or self.config.model.split('/')[-1] in FUNCTION_CALLING_SUPPORTED_MODELS
             or any(m in self.config.model for m in FUNCTION_CALLING_SUPPORTED_MODELS)
         )
-        return model_name_supported
+
+        # Handle native_tool_calling user-defined configuration
+        if self.config.native_tool_calling is None:
+            logger.debug(
+                f'Using default tool calling behavior based on model evaluation: {model_name_supported}'
+            )
+            return model_name_supported
+        elif self.config.native_tool_calling is False:
+            logger.debug('Function calling explicitly disabled via configuration')
+            return False
+        else:
+            # try to enable native tool calling if supported by the model
+            supports_fn_call = litellm.supports_function_calling(
+                model=self.config.model
+            )
+            logger.debug(
+                f'Function calling explicitly enabled, litellm support: {supports_fn_call}'
+            )
+            return supports_fn_call
 
     def _post_completion(self, response: ModelResponse) -> float:
         """Post-process the completion response.
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index ceaa37450b..d1306ca631 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -157,6 +157,60 @@ default_agent = "TestAgent"
     assert default_config.workspace_mount_path == '/opt/files2/workspace'
 
 
+def test_llm_config_native_tool_calling(default_config, temp_toml_file, monkeypatch):
+    # default is None
+    assert default_config.get_llm_config().native_tool_calling is None
+
+    # without `[core]` section, native_tool_calling is not set because the file is not loaded
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write(
+            """
+[llm.gpt4o-mini]
+native_tool_calling = true
+"""
+        )
+
+    load_from_toml(default_config, temp_toml_file)
+    assert default_config.get_llm_config().native_tool_calling is None
+    assert default_config.get_llm_config('gpt4o-mini').native_tool_calling is None
+
+    # set to false
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write(
+            """
+[core]
+
+[llm.gpt4o-mini]
+native_tool_calling = false
+"""
+        )
+    load_from_toml(default_config, temp_toml_file)
+    assert default_config.get_llm_config().native_tool_calling is None
+    assert default_config.get_llm_config('gpt4o-mini').native_tool_calling is False
+
+    # set to true using string
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write(
+            """
+[core]
+
+[llm.gpt4o-mini]
+native_tool_calling = true
+"""
+        )
+    load_from_toml(default_config, temp_toml_file)
+    assert default_config.get_llm_config('gpt4o-mini').native_tool_calling is True
+
+    # override to false by env
+    # see utils.set_attr_from_env
+    monkeypatch.setenv('LLM_NATIVE_TOOL_CALLING', 'false')
+    load_from_env(default_config, os.environ)
+    assert default_config.get_llm_config().native_tool_calling is False
+    assert (
+        default_config.get_llm_config('gpt4o-mini').native_tool_calling is True
+    )  # load_from_env didn't override the named config set in the toml file under [llm.gpt4o-mini]
+
+
 def test_compat_load_sandbox_from_toml(default_config: AppConfig, temp_toml_file: str):
     # test loading configuration from a new-style TOML file
     # uses a toml file with sandbox_vars instead of a sandbox section