Possibility to disable default tools (#7951)

2025-12-26 05:48:36 +08:00 · 2025-04-20 02:14:46 +02:00 · 2025-04-20 02:14:46 +02:00 · 5b3270be2d
commit 5b3270be2d
parent ae43744052
5 changed files with 92 additions and 122 deletions
--- a/config.template.toml
+++ b/config.template.toml
@ -221,9 +221,22 @@ enable_browsing = true
 # Whether the LLM draft editor is enabled
 enable_llm_editor = false

+# Whether the standard editor tool (str_replace_editor) is enabled
+# Only has an effect if enable_llm_editor is False
+enable_editor = true
+
 # Whether the IPython tool is enabled
 enable_jupyter = true

+# Whether the command tool is enabled
+enable_cmd = true
+
+# Whether the think tool is enabled
+enable_think = true
+
+# Whether the finish tool is enabled
+enable_finish = true
+
 # LLM config group to use
 #llm_config = 'your-llm-config-group'

--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@ -1,7 +1,19 @@
 import os
 from collections import deque

+from litellm import ChatCompletionToolParam
+
 import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
+from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
+from openhands.agenthub.codeact_agent.tools.browser import BrowserTool
+from openhands.agenthub.codeact_agent.tools.finish import FinishTool
+from openhands.agenthub.codeact_agent.tools.ipython import IPythonTool
+from openhands.agenthub.codeact_agent.tools.llm_based_edit import LLMBasedFileEditTool
+from openhands.agenthub.codeact_agent.tools.str_replace_editor import (
+    create_str_replace_editor_tool,
+)
+from openhands.agenthub.codeact_agent.tools.think import ThinkTool
+from openhands.agenthub.codeact_agent.tools.web_read import WebReadTool
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
@ -67,15 +79,7 @@ class CodeActAgent(Agent):
        super().__init__(llm, config)
        self.pending_actions: deque[Action] = deque()
        self.reset()
-
-        built_in_tools = codeact_function_calling.get_tools(
-            enable_browsing=self.config.enable_browsing,
-            enable_jupyter=self.config.enable_jupyter,
-            enable_llm_editor=self.config.enable_llm_editor,
-            llm=self.llm,
-        )
-
-        self.tools = built_in_tools
+        self.tools = self._get_tools()

        self.prompt_manager = PromptManager(
            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
@ -89,6 +93,40 @@ class CodeActAgent(Agent):

        self.response_to_actions_fn = codeact_function_calling.response_to_actions

+    def _get_tools(self) -> list[ChatCompletionToolParam]:
+        SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1']
+
+        use_simplified_tool_desc = False
+        if self.llm is not None:
+            use_simplified_tool_desc = any(
+                model_substr in self.llm.config.model
+                for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS
+            )
+
+        tools = []
+        if self.config.enable_cmd:
+            tools.append(
+                create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc)
+            )
+        if self.config.enable_think:
+            tools.append(ThinkTool)
+        if self.config.enable_finish:
+            tools.append(FinishTool)
+        if self.config.enable_browsing:
+            tools.append(WebReadTool)
+            tools.append(BrowserTool)
+        if self.config.enable_jupyter:
+            tools.append(IPythonTool)
+        if self.config.enable_llm_editor:
+            tools.append(LLMBasedFileEditTool)
+        elif self.config.enable_editor:
+            tools.append(
+                create_str_replace_editor_tool(
+                    use_simplified_description=use_simplified_tool_desc
+                )
+            )
+        return tools
+
    def reset(self) -> None:
        """Resets the CodeAct Agent."""
        super().reset()
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@ -6,7 +6,6 @@ This is similar to the functionality of `CodeActResponseParser`.
 import json

 from litellm import (
-    ChatCompletionToolParam,
    ModelResponse,
 )

@ -41,7 +40,6 @@ from openhands.events.action import (
 from openhands.events.action.mcp import McpAction
 from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.tool import ToolCallMetadata
-from openhands.llm import LLM
 from openhands.mcp import MCPClientTool


@ -237,39 +235,3 @@ def response_to_actions(response: ModelResponse) -> list[Action]:

    assert len(actions) >= 1
    return actions
-
-
-def get_tools(
-    enable_browsing: bool = False,
-    enable_llm_editor: bool = False,
-    enable_jupyter: bool = False,
-    llm: LLM | None = None,
-) -> list[ChatCompletionToolParam]:
-    SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1']
-
-    use_simplified_tool_desc = False
-    if llm is not None:
-        use_simplified_tool_desc = any(
-            model_substr in llm.config.model
-            for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS
-        )
-
-    tools = [
-        create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc),
-        ThinkTool,
-        FinishTool,
-    ]
-    if enable_browsing:
-        tools.append(WebReadTool)
-        tools.append(BrowserTool)
-    if enable_jupyter:
-        tools.append(IPythonTool)
-    if enable_llm_editor:
-        tools.append(LLMBasedFileEditTool)
-    else:
-        tools.append(
-            create_str_replace_editor_tool(
-                use_simplified_description=use_simplified_tool_desc
-            )
-        )
-    return tools
--- a/openhands/core/config/agent_config.py
+++ b/openhands/core/config/agent_config.py
@ -7,28 +7,30 @@ from openhands.core.logger import openhands_logger as logger


 class AgentConfig(BaseModel):
-    """Configuration for the agent.
-
-    Attributes:
-        enable_browsing: Whether browsing delegate is enabled in the action space. Default is False. Only works with function calling.
-        enable_llm_editor: Whether LLM editor is enabled in the action space. Default is False. Only works with function calling.
-        enable_jupyter: Whether Jupyter is enabled in the action space. Default is False.
-        llm_config: The name of the llm config to use. If specified, this will override global llm config.
-        enable_prompt_extensions: Whether to use prompt extensions (e.g., microagents, inject runtime info). Default is True.
-        disabled_microagents: A list of microagents to disable (by name, without .py extension, e.g. ["github", "lint"]). Default is None.
-        condenser: Configuration for the memory condenser. Default is NoOpCondenserConfig.
-        enable_history_truncation: Whether history should be truncated to continue the session when hitting LLM context length limit.
-        enable_som_visual_browsing: Whether to enable SoM (Set of Marks) visual browsing. Default is False.
-    """
-
    llm_config: str | None = Field(default=None)
+    """The name of the llm config to use. If specified, this will override global llm config."""
    enable_browsing: bool = Field(default=True)
+    """Whether to enable browsing tool"""
    enable_llm_editor: bool = Field(default=False)
+    """Whether to enable LLM editor tool"""
+    enable_editor: bool = Field(default=True)
+    """Whether to enable the standard editor tool (str_replace_editor), only has an effect if enable_llm_editor is False."""
    enable_jupyter: bool = Field(default=True)
+    """Whether to enable Jupyter tool"""
+    enable_cmd: bool = Field(default=True)
+    """Whether to enable bash tool"""
+    enable_think: bool = Field(default=True)
+    """Whether to enable think tool"""
+    enable_finish: bool = Field(default=True)
+    """Whether to enable finish tool"""
    enable_prompt_extensions: bool = Field(default=True)
+    """Whether to enable prompt extensions"""
    disabled_microagents: list[str] = Field(default_factory=list)
+    """A list of microagents to disable (by name, without .py extension, e.g. ["github", "lint"]). Default is None."""
    enable_history_truncation: bool = Field(default=True)
+    """Whether history should be truncated to continue the session when hitting LLM context length limit."""
    enable_som_visual_browsing: bool = Field(default=True)
+    """Whether to enable SoM (Set of Marks) visual browsing."""
    condenser: CondenserConfig = Field(
        default_factory=lambda: NoOpCondenserConfig(type='noop')
    )
--- a/tests/unit/test_agents.py
+++ b/tests/unit/test_agents.py
@ -5,9 +5,6 @@ import pytest
 from litellm import ChatCompletionMessageToolCall

 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
-from openhands.agenthub.codeact_agent.function_calling import (
-    get_tools as codeact_get_tools,
-)
 from openhands.agenthub.codeact_agent.function_calling import (
    response_to_actions as codeact_response_to_actions,
 )
@ -24,9 +21,6 @@ from openhands.agenthub.codeact_agent.tools.browser import (
    _BROWSER_DESCRIPTION,
    _BROWSER_TOOL_DESCRIPTION,
 )
-from openhands.agenthub.readonly_agent.function_calling import (
-    get_tools as readonly_get_tools,
-)
 from openhands.agenthub.readonly_agent.function_calling import (
    response_to_actions as readonly_response_to_actions,
 )
@ -72,6 +66,22 @@ def agent(agent_class) -> Union[CodeActAgent, ReadOnlyAgent]:
    return agent


+def test_agent_with_default_config_has_default_tools():
+    config = AgentConfig()
+    codeact_agent = CodeActAgent(llm=LLM(LLMConfig()), config=config)
+    assert len(codeact_agent.tools) > 0
+    default_tool_names = [tool['function']['name'] for tool in codeact_agent.tools]
+    assert {
+        'browser',
+        'execute_bash',
+        'execute_ipython_cell',
+        'finish',
+        'str_replace_editor',
+        'think',
+        'web_read',
+    }.issubset(default_tool_names)
+
+
@pytest.fixture
 def mock_state() -> State:
    state = Mock(spec=State)
@ -106,61 +116,6 @@ def test_step_with_pending_actions(agent):
    assert len(agent.pending_actions) == 0


-def test_codeact_get_tools_default():
-    tools = codeact_get_tools(
-        enable_jupyter=True,
-        enable_llm_editor=True,
-        enable_browsing=True,
-    )
-    assert len(tools) > 0
-
-    # Check required tools are present
-    tool_names = [tool['function']['name'] for tool in tools]
-    assert 'execute_bash' in tool_names
-    assert 'execute_ipython_cell' in tool_names
-    assert 'edit_file' in tool_names
-    assert 'web_read' in tool_names
-
-
-def test_readonly_get_tools_default():
-    tools = readonly_get_tools()
-    assert len(tools) > 0
-
-    # Check required tools are present
-    tool_names = [tool['function']['name'] for tool in tools]
-    assert 'execute_bash' not in tool_names
-    assert 'execute_ipython_cell' not in tool_names
-    assert 'edit_file' not in tool_names
-    assert 'web_read' in tool_names
-    assert 'grep' in tool_names
-    assert 'glob' in tool_names
-    assert 'think' in tool_names
-
-
-def test_codeact_get_tools_with_options():
-    # Test with all options enabled
-    tools = codeact_get_tools(
-        enable_browsing=True,
-        enable_jupyter=True,
-        enable_llm_editor=True,
-    )
-    tool_names = [tool['function']['name'] for tool in tools]
-    assert 'browser' in tool_names
-    assert 'execute_ipython_cell' in tool_names
-    assert 'edit_file' in tool_names
-
-    # Test with all options disabled
-    tools = codeact_get_tools(
-        enable_browsing=False,
-        enable_jupyter=False,
-        enable_llm_editor=False,
-    )
-    tool_names = [tool['function']['name'] for tool in tools]
-    assert 'browser' not in tool_names
-    assert 'execute_ipython_cell' not in tool_names
-    assert 'edit_file' not in tool_names
-
-
 def test_cmd_run_tool():
    CmdRunTool = create_cmd_run_tool()
    assert CmdRunTool['type'] == 'function'