mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Separate tool descriptions to support models with limited description length (#7258)
This commit is contained in:
parent
cc45f5d9c3
commit
30109e8f20
@ -70,6 +70,7 @@ class CodeActAgent(Agent):
|
||||
codeact_enable_browsing=self.config.codeact_enable_browsing,
|
||||
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
|
||||
codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
|
||||
llm=self.llm,
|
||||
)
|
||||
logger.debug(
|
||||
f'TOOLS loaded for CodeActAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}'
|
||||
|
||||
@ -12,13 +12,13 @@ from litellm import (
|
||||
|
||||
from openhands.agenthub.codeact_agent.tools import (
|
||||
BrowserTool,
|
||||
CmdRunTool,
|
||||
FinishTool,
|
||||
IPythonTool,
|
||||
LLMBasedFileEditTool,
|
||||
StrReplaceEditorTool,
|
||||
ThinkTool,
|
||||
WebReadTool,
|
||||
create_cmd_run_tool,
|
||||
create_str_replace_editor_tool,
|
||||
)
|
||||
from openhands.core.exceptions import (
|
||||
FunctionCallNotExistsError,
|
||||
@ -39,6 +39,7 @@ from openhands.events.action import (
|
||||
)
|
||||
from openhands.events.event import FileEditSource, FileReadSource
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
from openhands.llm import LLM
|
||||
|
||||
|
||||
def combine_thought(action: Action, thought: str) -> Action:
|
||||
@ -80,7 +81,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
# CmdRunTool (Bash)
|
||||
# ================================================
|
||||
|
||||
if tool_call.function.name == CmdRunTool['function']['name']:
|
||||
if tool_call.function.name == create_cmd_run_tool()['function']['name']:
|
||||
if 'command' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "command" in tool call {tool_call.function.name}'
|
||||
@ -131,7 +132,10 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
start=arguments.get('start', 1),
|
||||
end=arguments.get('end', -1),
|
||||
)
|
||||
elif tool_call.function.name == StrReplaceEditorTool['function']['name']:
|
||||
elif (
|
||||
tool_call.function.name
|
||||
== create_str_replace_editor_tool()['function']['name']
|
||||
):
|
||||
if 'command' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "command" in tool call {tool_call.function.name}'
|
||||
@ -219,8 +223,22 @@ def get_tools(
|
||||
codeact_enable_browsing: bool = False,
|
||||
codeact_enable_llm_editor: bool = False,
|
||||
codeact_enable_jupyter: bool = False,
|
||||
llm: LLM | None = None,
|
||||
) -> list[ChatCompletionToolParam]:
|
||||
tools = [CmdRunTool, ThinkTool, FinishTool]
|
||||
SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1']
|
||||
|
||||
use_simplified_tool_desc = False
|
||||
if llm is not None:
|
||||
use_simplified_tool_desc = any(
|
||||
model_substr in llm.config.model
|
||||
for model_substr in SIMPLIFIED_TOOL_DESCRIPTION_LLM_SUBSTRS
|
||||
)
|
||||
|
||||
tools = [
|
||||
create_cmd_run_tool(use_simplified_description=use_simplified_tool_desc),
|
||||
ThinkTool,
|
||||
FinishTool,
|
||||
]
|
||||
if codeact_enable_browsing:
|
||||
tools.append(WebReadTool)
|
||||
tools.append(BrowserTool)
|
||||
@ -229,5 +247,9 @@ def get_tools(
|
||||
if codeact_enable_llm_editor:
|
||||
tools.append(LLMBasedFileEditTool)
|
||||
else:
|
||||
tools.append(StrReplaceEditorTool)
|
||||
tools.append(
|
||||
create_str_replace_editor_tool(
|
||||
use_simplified_description=use_simplified_tool_desc
|
||||
)
|
||||
)
|
||||
return tools
|
||||
|
||||
@ -1,19 +1,19 @@
|
||||
from .bash import CmdRunTool
|
||||
from .bash import create_cmd_run_tool
|
||||
from .browser import BrowserTool
|
||||
from .finish import FinishTool
|
||||
from .ipython import IPythonTool
|
||||
from .llm_based_edit import LLMBasedFileEditTool
|
||||
from .str_replace_editor import StrReplaceEditorTool
|
||||
from .str_replace_editor import create_str_replace_editor_tool
|
||||
from .think import ThinkTool
|
||||
from .web_read import WebReadTool
|
||||
|
||||
__all__ = [
|
||||
'BrowserTool',
|
||||
'CmdRunTool',
|
||||
'create_cmd_run_tool',
|
||||
'FinishTool',
|
||||
'IPythonTool',
|
||||
'LLMBasedFileEditTool',
|
||||
'StrReplaceEditorTool',
|
||||
'create_str_replace_editor_tool',
|
||||
'WebReadTool',
|
||||
'ThinkTool',
|
||||
]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
|
||||
_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
|
||||
|
||||
### Command Execution
|
||||
* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together.
|
||||
@ -22,25 +22,39 @@ _BASH_DESCRIPTION = """Execute a bash command in the terminal within a persisten
|
||||
* Output truncation: If the output exceeds a maximum length, it will be truncated before being returned.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_bash',
|
||||
description=_BASH_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
|
||||
},
|
||||
'is_input': {
|
||||
'type': 'string',
|
||||
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
|
||||
'enum': ['true', 'false'],
|
||||
_SIMPLIFIED_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
|
||||
* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together."""
|
||||
|
||||
|
||||
def create_cmd_run_tool(
|
||||
use_simplified_description: bool = False,
|
||||
) -> ChatCompletionToolParam:
|
||||
description = (
|
||||
_SIMPLIFIED_BASH_DESCRIPTION
|
||||
if use_simplified_description
|
||||
else _DETAILED_BASH_DESCRIPTION
|
||||
)
|
||||
return ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_bash',
|
||||
description=description,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
|
||||
},
|
||||
'is_input': {
|
||||
'type': 'string',
|
||||
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
|
||||
'enum': ['true', 'false'],
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
* The `create` command cannot be used if the specified `path` already exists as a file
|
||||
@ -31,46 +31,73 @@ CRITICAL REQUIREMENTS FOR USING THIS TOOL:
|
||||
Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.
|
||||
"""
|
||||
|
||||
StrReplaceEditorTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
description=_STR_REPLACE_EDITOR_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
|
||||
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
|
||||
'type': 'string',
|
||||
},
|
||||
'path': {
|
||||
'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'file_text': {
|
||||
'description': 'Required parameter of `create` command, with the content of the file to be created.',
|
||||
'type': 'string',
|
||||
},
|
||||
'old_str': {
|
||||
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
|
||||
'type': 'string',
|
||||
},
|
||||
'new_str': {
|
||||
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
|
||||
'type': 'string',
|
||||
},
|
||||
'insert_line': {
|
||||
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
|
||||
'type': 'integer',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
_SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
* The `create` command cannot be used if the specified `path` already exists as a file
|
||||
* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
|
||||
* The `undo_edit` command will revert the last edit made to the file at `path`
|
||||
Notes for using the `str_replace` command:
|
||||
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
|
||||
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
|
||||
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
|
||||
"""
|
||||
|
||||
|
||||
def create_str_replace_editor_tool(
|
||||
use_simplified_description: bool = False,
|
||||
) -> ChatCompletionToolParam:
|
||||
description = (
|
||||
_SIMPLIFIED_STR_REPLACE_EDITOR_DESCRIPTION
|
||||
if use_simplified_description
|
||||
else _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION
|
||||
)
|
||||
return ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
description=description,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
|
||||
'enum': [
|
||||
'view',
|
||||
'create',
|
||||
'str_replace',
|
||||
'insert',
|
||||
'undo_edit',
|
||||
],
|
||||
'type': 'string',
|
||||
},
|
||||
'path': {
|
||||
'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'file_text': {
|
||||
'description': 'Required parameter of `create` command, with the content of the file to be created.',
|
||||
'type': 'string',
|
||||
},
|
||||
'old_str': {
|
||||
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
|
||||
'type': 'string',
|
||||
},
|
||||
'new_str': {
|
||||
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
|
||||
'type': 'string',
|
||||
},
|
||||
'insert_line': {
|
||||
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
|
||||
'type': 'integer',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
@ -6,11 +6,11 @@ from litellm import ChatCompletionMessageToolCall
|
||||
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
|
||||
from openhands.agenthub.codeact_agent.function_calling import (
|
||||
BrowserTool,
|
||||
CmdRunTool,
|
||||
IPythonTool,
|
||||
LLMBasedFileEditTool,
|
||||
StrReplaceEditorTool,
|
||||
WebReadTool,
|
||||
create_cmd_run_tool,
|
||||
create_str_replace_editor_tool,
|
||||
get_tools,
|
||||
response_to_actions,
|
||||
)
|
||||
@ -119,6 +119,7 @@ def test_get_tools_with_options():
|
||||
|
||||
|
||||
def test_cmd_run_tool():
|
||||
CmdRunTool = create_cmd_run_tool()
|
||||
assert CmdRunTool['type'] == 'function'
|
||||
assert CmdRunTool['function']['name'] == 'execute_bash'
|
||||
assert 'command' in CmdRunTool['function']['parameters']['properties']
|
||||
@ -149,6 +150,7 @@ def test_llm_based_file_edit_tool():
|
||||
|
||||
|
||||
def test_str_replace_editor_tool():
|
||||
StrReplaceEditorTool = create_str_replace_editor_tool()
|
||||
assert StrReplaceEditorTool['type'] == 'function'
|
||||
assert StrReplaceEditorTool['function']['name'] == 'str_replace_editor'
|
||||
|
||||
@ -236,7 +238,11 @@ def test_step_with_no_pending_actions(mock_state: State):
|
||||
mock_response.choices[0].message.content = 'Task completed'
|
||||
mock_response.choices[0].message.tool_calls = []
|
||||
|
||||
mock_config = Mock()
|
||||
mock_config.model = 'mock_model'
|
||||
|
||||
llm = Mock()
|
||||
llm.config = mock_config
|
||||
llm.completion = Mock(return_value=mock_response)
|
||||
llm.is_function_calling_active = Mock(return_value=True) # Enable function calling
|
||||
llm.is_caching_prompt_active = Mock(return_value=False)
|
||||
@ -260,6 +266,28 @@ def test_step_with_no_pending_actions(mock_state: State):
|
||||
assert action.content == 'Task completed'
|
||||
|
||||
|
||||
def test_correct_tool_description_loaded_based_on_model_name(mock_state: State):
|
||||
"""Tests that the simplified tool descriptions are loaded for specific models."""
|
||||
o3_mock_config = Mock()
|
||||
o3_mock_config.model = 'mock_o3_model'
|
||||
|
||||
llm = Mock()
|
||||
llm.config = o3_mock_config
|
||||
|
||||
agent = CodeActAgent(llm=llm, config=AgentConfig())
|
||||
for tool in agent.tools:
|
||||
# Assert all descriptions have less than 1024 characters
|
||||
assert len(tool['function']['description']) < 1024
|
||||
|
||||
sonnet_mock_config = Mock()
|
||||
sonnet_mock_config.model = 'mock_sonnet_model'
|
||||
|
||||
llm.config = sonnet_mock_config
|
||||
agent = CodeActAgent(llm=llm, config=AgentConfig())
|
||||
# Assert existence of the detailed tool descriptions that are longer than 1024 characters
|
||||
assert any(len(tool['function']['description']) > 1024 for tool in agent.tools)
|
||||
|
||||
|
||||
def test_mismatched_tool_call_events(mock_state: State):
|
||||
"""Tests that the agent can convert mismatched tool call events (i.e., an observation with no corresponding action) into messages."""
|
||||
agent = CodeActAgent(llm=LLM(LLMConfig()), config=AgentConfig())
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user