Separate tool descriptions to support models with limited description length (#7258)

2025-12-26 05:48:36 +08:00 · 2025-03-16 15:48:13 +07:00 · 2025-03-16 15:48:13 +07:00 · 30109e8f20
commit 30109e8f20
parent cc45f5d9c3
6 changed files with 167 additions and 75 deletions
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@ -70,6 +70,7 @@ class CodeActAgent(Agent):
            codeact_enable_browsing=self.config.codeact_enable_browsing,
            codeact_enable_jupyter=self.config.codeact_enable_jupyter,
            codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
+            llm=self.llm,
        )
        logger.debug(
            f'TOOLS loaded for CodeActAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}'
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@ -12,13 +12,13 @@ from litellm import (

 from openhands.agenthub.codeact_agent.tools import (
    BrowserTool,
-    CmdRunTool,
    FinishTool,
    IPythonTool,
    LLMBasedFileEditTool,
-    StrReplaceEditorTool,
    ThinkTool,
    WebReadTool,
+    create_cmd_run_tool,
+    create_str_replace_editor_tool,
 )
 from openhands.core.exceptions import (
    FunctionCallNotExistsError,
@ -39,6 +39,7 @@ from openhands.events.action import (
 )
 from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.tool import ToolCallMetadata
+from openhands.llm import LLM


 def combine_thought(action: Action, thought: str) -> Action:
@ -80,7 +81,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
            # CmdRunTool (Bash)
            # ================================================

-            if tool_call.function.name == CmdRunTool['function']['name']:
+            if tool_call.function.name == create_cmd_run_tool()['function']['name']:
                if 'command' not in arguments:
                    raise FunctionCallValidationError(
                        f'Missing required argument "command" in tool call {tool_call.function.name}'
@ -131,7 +132,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                    start=arguments.get('start', 1),
                    end=arguments.get('end', -1),
                )
-            elif tool_call.function.name == StrReplaceEditorTool['function']['name']:
+            elif (
+                tool_call.function.name
+                == create_str_replace_editor_tool()['function']['name']
+            ):
                if 'command' not in arguments:
                    raise FunctionCallValidationError(
                        f'Missing required argument "command" in tool call {tool_call.function.name}'
@ -219,8 +223,22 @@ def get_tools(
    codeact_enable_browsing: bool = False,
    codeact_enable_llm_editor: bool = False,
    codeact_enable_jupyter: bool = False,
+    llm: LLM | None = None,
 ) -> list[ChatCompletionToolParam]:
-    tools = [CmdRunTool, ThinkTool, FinishTool]
+    SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1']
+
+    use_simplified_tool_desc = False
+    if llm is not None:
+        use_simplified_tool_desc = any(
+            model_substr in llm.config.model
+            for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS
+        )
+
+    tools = [
+        create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc),
+        ThinkTool,
+        FinishTool,
+    ]
    if codeact_enable_browsing:
        tools.append(WebReadTool)
        tools.append(BrowserTool)
@ -229,5 +247,9 @@ def get_tools(
    if codeact_enable_llm_editor:
        tools.append(LLMBasedFileEditTool)
    else:
-        tools.append(StrReplaceEditorTool)
+        tools.append(
+            create_str_replace_editor_tool(
+                use_simplified_description=use_simplified_tool_desc
+            )
+        )
    return tools
--- a/openhands/agenthub/codeact_agent/tools/init.py
+++ b/openhands/agenthub/codeact_agent/tools/init.py
@ -1,19 +1,19 @@
-from .bash import CmdRunTool
+from .bash import create_cmd_run_tool
 from .browser import BrowserTool
 from .finish import FinishTool
 from .ipython import IPythonTool
 from .llm_based_edit import LLMBasedFileEditTool
-from .str_replace_editor import StrReplaceEditorTool
+from .str_replace_editor import create_str_replace_editor_tool
 from .think import ThinkTool
 from .web_read import WebReadTool

 __all__ = [
    'BrowserTool',
-    'CmdRunTool',
+    'create_cmd_run_tool',
    'FinishTool',
    'IPythonTool',
    'LLMBasedFileEditTool',
-    'StrReplaceEditorTool',
+    'create_str_replace_editor_tool',
    'WebReadTool',
    'ThinkTool',
 ]
--- a/openhands/agenthub/codeact_agent/tools/bash.py
+++ b/openhands/agenthub/codeact_agent/tools/bash.py
@ -1,6 +1,6 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

-_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
+_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.

 ### Command Execution
 * One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together.
@ -22,25 +22,39 @@ _BASH_DESCRIPTION = """Execute a bash command in the terminal within a persisten
 * Output truncation: If the output exceeds a maximum length, it will be truncated before being returned.
 """

-CmdRunTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='execute_bash',
-        description=_BASH_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'type': 'string',
-                    'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
-                },
-                'is_input': {
-                    'type': 'string',
-                    'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
-                    'enum': ['true', 'false'],
+_SIMPLIFIED_BASH_DESCRIPTION = """Execute a bash command in the terminal.
+* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
+* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
+* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together."""
+
+
+def create_cmd_run_tool(
+    use_simplified_description: bool = False,
+) -> ChatCompletionToolParam:
+    description = (
+        _SIMPLIFIED_BASH_DESCRIPTION
+        if use_simplified_description
+        else _DETAILED_BASH_DESCRIPTION
+    )
+    return ChatCompletionToolParam(
+        type='function',
+        function=ChatCompletionToolParamFunctionChunk(
+            name='execute_bash',
+            description=description,
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'type': 'string',
+                        'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
+                    },
+                    'is_input': {
+                        'type': 'string',
+                        'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
+                        'enum': ['true', 'false'],
+                    },
                },
+                'required': ['command'],
            },
-            'required': ['command'],
-        },
-    ),
-)
+        ),
+    )
--- a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
+++ b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py
@ -1,6 +1,6 @@
 from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk

-_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
+_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
 * State is persistent across command calls and discussions with the user
 * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
 * The `create` command cannot be used if the specified `path` already exists as a file
@ -31,46 +31,73 @@ CRITICAL REQUIREMENTS FOR USING THIS TOOL:
 Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.
 """

-StrReplaceEditorTool = ChatCompletionToolParam(
-    type='function',
-    function=ChatCompletionToolParamFunctionChunk(
-        name='str_replace_editor',
-        description=_STR_REPLACE_EDITOR_DESCRIPTION,
-        parameters={
-            'type': 'object',
-            'properties': {
-                'command': {
-                    'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
-                    'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
-                    'type': 'string',
-                },
-                'path': {
-                    'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
-                    'type': 'string',
-                },
-                'file_text': {
-                    'description': 'Required parameter of `create` command, with the content of the file to be created.',
-                    'type': 'string',
-                },
-                'old_str': {
-                    'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
-                    'type': 'string',
-                },
-                'new_str': {
-                    'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
-                    'type': 'string',
-                },
-                'insert_line': {
-                    'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
-                    'type': 'integer',
-                },
-                'view_range': {
-                    'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
-                    'items': {'type': 'integer'},
-                    'type': 'array',
+_SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
+* State is persistent across command calls and discussions with the user
+* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
+* The `create` command cannot be used if the specified `path` already exists as a file
+* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
+* The `undo_edit` command will revert the last edit made to the file at `path`
+Notes for using the `str_replace` command:
+* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
+* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
+* The `new_str` parameter should contain the edited lines that should replace the `old_str`
+"""
+
+
+def create_str_replace_editor_tool(
+    use_simplified_description: bool = False,
+) -> ChatCompletionToolParam:
+    description = (
+        _SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION
+        if use_simplified_description
+        else _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION
+    )
+    return ChatCompletionToolParam(
+        type='function',
+        function=ChatCompletionToolParamFunctionChunk(
+            name='str_replace_editor',
+            description=description,
+            parameters={
+                'type': 'object',
+                'properties': {
+                    'command': {
+                        'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
+                        'enum': [
+                            'view',
+                            'create',
+                            'str_replace',
+                            'insert',
+                            'undo_edit',
+                        ],
+                        'type': 'string',
+                    },
+                    'path': {
+                        'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
+                        'type': 'string',
+                    },
+                    'file_text': {
+                        'description': 'Required parameter of `create` command, with the content of the file to be created.',
+                        'type': 'string',
+                    },
+                    'old_str': {
+                        'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
+                        'type': 'string',
+                    },
+                    'new_str': {
+                        'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
+                        'type': 'string',
+                    },
+                    'insert_line': {
+                        'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
+                        'type': 'integer',
+                    },
+                    'view_range': {
+                        'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
+                        'items': {'type': 'integer'},
+                        'type': 'array',
+                    },
                },
+                'required': ['command', 'path'],
            },
-            'required': ['command', 'path'],
-        },
-    ),
-)
+        ),
+    )
--- a/tests/unit/test_codeact_agent.py
+++ b/tests/unit/test_codeact_agent.py
@ -6,11 +6,11 @@ from litellm import ChatCompletionMessageToolCall
 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
 from openhands.agenthub.codeact_agent.function_calling import (
    BrowserTool,
-    CmdRunTool,
    IPythonTool,
    LLMBasedFileEditTool,
-    StrReplaceEditorTool,
    WebReadTool,
+    create_cmd_run_tool,
+    create_str_replace_editor_tool,
    get_tools,
    response_to_actions,
 )
@ -119,6 +119,7 @@ def test_get_tools_with_options():


 def test_cmd_run_tool():
+    CmdRunTool = create_cmd_run_tool()
    assert CmdRunTool['type'] == 'function'
    assert CmdRunTool['function']['name'] == 'execute_bash'
    assert 'command' in CmdRunTool['function']['parameters']['properties']
@ -149,6 +150,7 @@ def test_llm_based_file_edit_tool():


 def test_str_replace_editor_tool():
+    StrReplaceEditorTool = create_str_replace_editor_tool()
    assert StrReplaceEditorTool['type'] == 'function'
    assert StrReplaceEditorTool['function']['name'] == 'str_replace_editor'

@ -236,7 +238,11 @@ def test_step_with_no_pending_actions(mock_state: State):
    mock_response.choices[0].message.content = 'Task completed'
    mock_response.choices[0].message.tool_calls = []

+    mock_config = Mock()
+    mock_config.model = 'mock_model'
+
    llm = Mock()
+    llm.config = mock_config
    llm.completion = Mock(return_value=mock_response)
    llm.is_function_calling_active = Mock(return_value=True)  # Enable function calling
    llm.is_caching_prompt_active = Mock(return_value=False)
@ -260,6 +266,28 @@ def test_step_with_no_pending_actions(mock_state: State):
    assert action.content == 'Task completed'


+def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
+    """Tests that the simplified tool descriptions are loaded for specific models."""
+    o3_mock_config = Mock()
+    o3_mock_config.model = 'mock_o3_model'
+
+    llm = Mock()
+    llm.config = o3_mock_config
+
+    agent = CodeActAgent(llm=llm, config=AgentConfig())
+    for tool in agent.tools:
+        # Assert all descriptions have less than 1024 characters
+        assert len(tool['function']['description']) < 1024
+
+    sonnet_mock_config = Mock()
+    sonnet_mock_config.model = 'mock_sonnet_model'
+
+    llm.config = sonnet_mock_config
+    agent = CodeActAgent(llm=llm, config=AgentConfig())
+    # Assert existence of the detailed tool descriptions that are longer than 1024 characters
+    assert any(len(tool['function']['description']) > 1024 for tool in agent.tools)
+
+
 def test_mismatched_tool_call_events(mock_state: State):
    """Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages."""
    agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())