From c17b0ebfc6195fd37bea999abbc629da4335c0ad Mon Sep 17 00:00:00 2001 From: OpenHands Date: Sat, 17 May 2025 12:37:45 +0800 Subject: [PATCH] Fix issue #8304: [Bug]: Non-native tool use converter fails when builtin tools are disabled (#8310) Co-authored-by: Engel Nyst Co-authored-by: Engel Nyst Co-authored-by: Xingyao Wang --- openhands/agenthub/codeact_agent/README.md | 2 +- .../agenthub/codeact_agent/tools/bash.py | 4 +- .../agenthub/codeact_agent/tools/browser.py | 4 +- .../agenthub/codeact_agent/tools/finish.py | 4 +- .../codeact_agent/tools/str_replace_editor.py | 4 +- openhands/llm/fn_call_converter.py | 311 +++++++++++------- openhands/llm/tool_names.py | 6 + tests/unit/test_llm_fncall_converter.py | 262 ++++++++++++++- 8 files changed, 476 insertions(+), 121 deletions(-) create mode 100644 openhands/llm/tool_names.py diff --git a/openhands/agenthub/codeact_agent/README.md b/openhands/agenthub/codeact_agent/README.md index bace913075..9686845e50 100644 --- a/openhands/agenthub/codeact_agent/README.md +++ b/openhands/agenthub/codeact_agent/README.md @@ -10,7 +10,7 @@ The CodeAct agent operates through a function calling interface. At each turn, t 2. **CodeAct**: Execute actions through a set of well-defined tools: - Execute Linux `bash` commands with `execute_bash` - Run Python code in an [IPython](https://ipython.org/) environment with `execute_ipython_cell` - - Interact with web browsers using `browser` and `web_read` + - Interact with web browsers using `browser` and `fetch` - Edit files using `str_replace_editor` or `edit_file` ![image](https://github.com/All-Hands-AI/OpenHands/assets/38853559/92b622e3-72ad-4a61-8f41-8c040b6d5fb3) diff --git a/openhands/agenthub/codeact_agent/tools/bash.py b/openhands/agenthub/codeact_agent/tools/bash.py index 1152cf9418..5f890fc559 100644 --- a/openhands/agenthub/codeact_agent/tools/bash.py +++ b/openhands/agenthub/codeact_agent/tools/bash.py @@ -2,6 +2,8 @@ import sys from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk +from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME + _DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. ### Command Execution @@ -45,7 +47,7 @@ def create_cmd_run_tool( return ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( - name=refine_prompt('execute_bash'), + name=EXECUTE_BASH_TOOL_NAME, description=refine_prompt(description), parameters={ 'type': 'object', diff --git a/openhands/agenthub/codeact_agent/tools/browser.py b/openhands/agenthub/codeact_agent/tools/browser.py index 9b5003926c..71b809d140 100644 --- a/openhands/agenthub/codeact_agent/tools/browser.py +++ b/openhands/agenthub/codeact_agent/tools/browser.py @@ -1,6 +1,8 @@ from browsergym.core.action.highlevel import HighLevelActionSet from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk +from openhands.llm.tool_names import BROWSER_TOOL_NAME + # from browsergym/core/action/highlevel.py _browser_action_space = HighLevelActionSet( subsets=['bid', 'nav'], @@ -141,7 +143,7 @@ for _, action in _browser_action_space.action_set.items(): BrowserTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( - name='browser', + name=BROWSER_TOOL_NAME, description=_BROWSER_DESCRIPTION, parameters={ 'type': 'object', diff --git a/openhands/agenthub/codeact_agent/tools/finish.py b/openhands/agenthub/codeact_agent/tools/finish.py index dd3292f3ed..bc0018e3e7 100644 --- a/openhands/agenthub/codeact_agent/tools/finish.py +++ b/openhands/agenthub/codeact_agent/tools/finish.py @@ -1,5 +1,7 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk +from openhands.llm.tool_names import FINISH_TOOL_NAME + _FINISH_DESCRIPTION = """Signals the completion of the current task or conversation. Use this tool when: @@ -18,7 +20,7 @@ The task_completed field should be set to True if you believed you have complete FinishTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( - name='finish', + name=FINISH_TOOL_NAME, description=_FINISH_DESCRIPTION, parameters={ 'type': 'object', diff --git a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py index d55b0d21c8..f3a777ebd8 100644 --- a/openhands/agenthub/codeact_agent/tools/str_replace_editor.py +++ b/openhands/agenthub/codeact_agent/tools/str_replace_editor.py @@ -1,5 +1,7 @@ from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk +from openhands.llm.tool_names import STR_REPLACE_EDITOR_TOOL_NAME + _DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format * State is persistent across command calls and discussions with the user * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep @@ -55,7 +57,7 @@ def create_str_replace_editor_tool( return ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( - name='str_replace_editor', + name=STR_REPLACE_EDITOR_TOOL_NAME, description=description, parameters={ 'type': 'object', diff --git a/openhands/llm/fn_call_converter.py b/openhands/llm/fn_call_converter.py index 04ce6eb6f4..14166814f3 100644 --- a/openhands/llm/fn_call_converter.py +++ b/openhands/llm/fn_call_converter.py @@ -18,6 +18,12 @@ from openhands.core.exceptions import ( FunctionCallConversionError, FunctionCallValidationError, ) +from openhands.llm.tool_names import ( + BROWSER_TOOL_NAME, + EXECUTE_BASH_TOOL_NAME, + FINISH_TOOL_NAME, + STR_REPLACE_EDITOR_TOOL_NAME, +) # Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b SYSTEM_PROMPT_SUFFIX_TEMPLATE = """ @@ -55,14 +61,12 @@ def refine_prompt(prompt: str) -> str: return prompt -# NOTE: we need to make sure this example is always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py -IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = refine_prompt(""" -Here's a running example of how to perform a task with the provided tools. - ---------------------- START OF EXAMPLE --------------------- - -USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. +# NOTE: we need to make sure these examples are always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py +# Example snippets for each tool +TOOL_EXAMPLES = { + 'execute_bash': { + 'check_dir': """ ASSISTANT: Sure! Let me first check the current directory: @@ -73,29 +77,10 @@ pwd && ls USER: EXECUTION RESULT of [execute_bash]: /workspace openhands@runtime:~/workspace$ - -ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`: - -create -/workspace/app.py - -from flask import Flask -app = Flask(__name__) - -@app.route('/') -def index() -> str: - numbers = list(range(1, 11)) - return str(numbers) - -if __name__ == '__main__': - app.run(port=5000) - - - -USER: EXECUTION RESULT of [str_replace_editor]: -File created successfully at: /workspace/app.py - -ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: +""", + 'run_server': """ +ASSISTANT: +Let me run the Python file for you: python3 app.py > server.log 2>&1 & @@ -106,7 +91,8 @@ USER: EXECUTION RESULT of [execute_bash]: [1] 121 [1]+ Exit 1 python3 app.py > server.log 2>&1 -ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: +ASSISTANT: +Looks like the server was running with PID 121 then crashed. Let me check the server log: cat server.log @@ -119,7 +105,8 @@ Traceback (most recent call last): from flask import Flask ModuleNotFoundError: No module named 'flask' -ASSISTANT: Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you: +ASSISTANT: +Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you: pip3 install flask @@ -143,7 +130,8 @@ Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/pyt Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 -ASSISTANT: The `flask` module is installed successfully. Let me run the Python file again: +ASSISTANT: +The `flask` module is installed successfully. Let me run the Python file again: python3 app.py > server.log 2>&1 & @@ -170,7 +158,8 @@ Press CTRL+C to quit ASSISTANT: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! - +""", + 'kill_server': """ USER: Now kill the server, make it display the numbers in a table format. ASSISTANT: @@ -183,7 +172,48 @@ kill 124 USER: EXECUTION RESULT of [execute_bash]: [1]+ Terminated python3 app.py > server.log 2>&1 +""", + 'run_server_again': """ +ASSISTANT: +Running the updated file: + + +python3 app.py > server.log 2>&1 & + + +USER: EXECUTION RESULT of [execute_bash]: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. +""", + }, + 'str_replace_editor': { + 'create_file': """ +ASSISTANT: +There is no `app.py` file in the current directory. Let me create a Python file `app.py`: + +create +/workspace/app.py + +from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return str(numbers) + +if __name__ == '__main__': + app.run(port=5000) + + + +USER: EXECUTION RESULT of [str_replace_editor]: +File created successfully at: /workspace/app.py +""", + 'edit_file': """ ASSISTANT: Now let me display the numbers in a table format: @@ -205,28 +235,100 @@ The file /workspace/app.py has been edited. Here's the result of running `cat -n 10 app.run(port=5000) Review the changes and make sure they are as expected. Edit the file again if necessary. +""", + }, + 'browser': { + 'view_page': """ ASSISTANT: -Running the updated file: - - -python3 app.py > server.log 2>&1 & +Let me check how the page looks in the browser: + + +goto('http://127.0.0.1:5000') +noop(1000) # Wait for page to load -USER: EXECUTION RESULT of [execute_bash]: -[1] 126 - +USER: EXECUTION RESULT of [browser]: +[Browser shows the numbers in a table format] +""" + }, + 'finish': { + 'task_completed': """ ASSISTANT: The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! +The task has been completed. The web server is running and displaying numbers 1-10 in a table format at http://127.0.0.1:5000. +true +""" + }, +} + +def get_example_for_tools(tools: list[dict]) -> str: + """Generate an in-context learning example based on available tools.""" + available_tools = set() + for tool in tools: + if tool['type'] == 'function': + name = tool['function']['name'] + if name == EXECUTE_BASH_TOOL_NAME: + available_tools.add('execute_bash') + elif name == STR_REPLACE_EDITOR_TOOL_NAME: + available_tools.add('str_replace_editor') + elif name == BROWSER_TOOL_NAME: + available_tools.add('browser') + elif name == FINISH_TOOL_NAME: + available_tools.add('finish') + + if not available_tools: + return '' + + example = """Here's a running example of how to perform a task with the provided tools. + +--------------------- START OF EXAMPLE --------------------- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +""" + + # Build example based on available tools + if 'execute_bash' in available_tools: + example += TOOL_EXAMPLES['execute_bash']['check_dir'] + + if 'str_replace_editor' in available_tools: + example += TOOL_EXAMPLES['str_replace_editor']['create_file'] + + if 'execute_bash' in available_tools: + example += TOOL_EXAMPLES['execute_bash']['run_server'] + + if 'browser' in available_tools: + example += TOOL_EXAMPLES['browser']['view_page'] + + if 'execute_bash' in available_tools: + example += TOOL_EXAMPLES['execute_bash']['kill_server'] + + if 'str_replace_editor' in available_tools: + example += TOOL_EXAMPLES['str_replace_editor']['edit_file'] + + if 'execute_bash' in available_tools: + example += TOOL_EXAMPLES['execute_bash']['run_server_again'] + + if 'finish' in available_tools: + example += TOOL_EXAMPLES['finish']['task_completed'] + + example += """ --------------------- END OF EXAMPLE --------------------- Do NOT assume the environment is the same as in the example above. --------------------- NEW TASK DESCRIPTION --------------------- -""").lstrip() +""" + example = example.lstrip() + + return example + + +IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX = """ --------------------- END OF NEW TASK DESCRIPTION --------------------- @@ -352,74 +454,42 @@ def convert_fncall_messages_to_non_fncall_messages( # Add in-context learning example for the first user message if not first_user_message_encountered and add_in_context_learning_example: first_user_message_encountered = True - # Check tools - if not ( - tools - and len(tools) > 0 - and any( - ( - tool['type'] == 'function' - and tool['function']['name'] - == refine_prompt('execute_bash') - and 'command' - in tool['function']['parameters']['properties'] - ) - for tool in tools - ) - and any( - ( - tool['type'] == 'function' - and tool['function']['name'] == 'str_replace_editor' - and 'path' in tool['function']['parameters']['properties'] - and 'file_text' - in tool['function']['parameters']['properties'] - and 'old_str' - in tool['function']['parameters']['properties'] - and 'new_str' - in tool['function']['parameters']['properties'] - ) - for tool in tools - ) - ): - raise FunctionCallConversionError( - 'The currently provided tool set are NOT compatible with the in-context learning example for FnCall to Non-FnCall conversion. ' - 'Please update your tool set OR the in-context learning example in openhands/llm/fn_call_converter.py' - ) - # add in-context learning example - if isinstance(content, str): - content = ( - IN_CONTEXT_LEARNING_EXAMPLE_PREFIX - + content - + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX - ) - elif isinstance(content, list): - if content and content[0]['type'] == 'text': - content[0]['text'] = ( - IN_CONTEXT_LEARNING_EXAMPLE_PREFIX - + content[0]['text'] - + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX - ) + # Generate example based on available tools + example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools) + + # Add example if we have any tools + if example: + # add in-context learning example + if isinstance(content, str): + content = example + content + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX + elif isinstance(content, list): + if content and content[0]['type'] == 'text': + content[0]['text'] = ( + example + + content[0]['text'] + + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX + ) + else: + content = ( + [ + { + 'type': 'text', + 'text': example, + } + ] + + content + + [ + { + 'type': 'text', + 'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, + } + ] + ) else: - content = ( - [ - { - 'type': 'text', - 'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, - } - ] - + content - + [ - { - 'type': 'text', - 'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, - } - ] + raise FunctionCallConversionError( + f'Unexpected content type {type(content)}. Expected str or list. Content: {content}' ) - else: - raise FunctionCallConversionError( - f'Unexpected content type {type(content)}. Expected str or list. Content: {content}' - ) converted_messages.append( { 'role': 'user', @@ -604,17 +674,28 @@ def convert_non_fncall_messages_to_fncall_messages( if not first_user_message_encountered: first_user_message_encountered = True if isinstance(content, str): - content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, '') - content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '') + # Remove any existing example + if content.startswith(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)): + content = content.replace( + IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools), '', 1 + ) + if content.endswith(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX): + content = content.replace( + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1 + ) elif isinstance(content, list): for item in content: if item['type'] == 'text': - item['text'] = item['text'].replace( - IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, '' - ) - item['text'] = item['text'].replace( - IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '' - ) + # Remove any existing example + example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools) + if item['text'].startswith(example): + item['text'] = item['text'].replace(example, '', 1) + if item['text'].endswith( + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX + ): + item['text'] = item['text'].replace( + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1 + ) else: raise FunctionCallConversionError( f'Unexpected content type {type(content)}. Expected str or list. Content: {content}' diff --git a/openhands/llm/tool_names.py b/openhands/llm/tool_names.py new file mode 100644 index 0000000000..b61c8198ed --- /dev/null +++ b/openhands/llm/tool_names.py @@ -0,0 +1,6 @@ +"""Constants for tool names used in function calling.""" + +EXECUTE_BASH_TOOL_NAME = 'execute_bash' +STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor' +BROWSER_TOOL_NAME = 'browser' +FINISH_TOOL_NAME = 'finish' diff --git a/tests/unit/test_llm_fncall_converter.py b/tests/unit/test_llm_fncall_converter.py index 29a8e54754..29cfd80840 100644 --- a/tests/unit/test_llm_fncall_converter.py +++ b/tests/unit/test_llm_fncall_converter.py @@ -9,12 +9,14 @@ from litellm import ChatCompletionToolParam from openhands.llm.fn_call_converter import ( IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, + TOOL_EXAMPLES, FunctionCallConversionError, convert_fncall_messages_to_non_fncall_messages, convert_from_multiple_tool_calls_to_single_tool_call_messages, convert_non_fncall_messages_to_fncall_messages, convert_tool_call_to_string, convert_tools_to_description, + get_example_for_tools, ) FNCALL_TOOLS: list[ChatCompletionToolParam] = [ @@ -140,6 +142,264 @@ Allowed values: [`view`, `create`, `str_replace`, `insert`, `undo_edit`] ) +def test_get_example_for_tools_no_tools(): + """Test that get_example_for_tools returns empty string when no tools are available.""" + tools = [] + example = get_example_for_tools(tools) + assert example == '' + + +def test_get_example_for_tools_single_tool(): + """Test that get_example_for_tools generates correct example with a single tool.""" + tools = [ + { + 'type': 'function', + 'function': { + 'name': 'execute_bash', + 'description': 'Execute a bash command in the terminal.', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The bash command to execute.', + } + }, + 'required': ['command'], + }, + }, + } + ] + example = get_example_for_tools(tools) + assert example.startswith( + "Here's a running example of how to perform a task with the provided tools." + ) + assert ( + 'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.' + in example + ) + assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example + assert TOOL_EXAMPLES['execute_bash']['run_server'] in example + assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example + assert TOOL_EXAMPLES['str_replace_editor']['create_file'] not in example + assert TOOL_EXAMPLES['browser']['view_page'] not in example + assert TOOL_EXAMPLES['finish']['task_completed'] not in example + + +def test_get_example_for_tools_single_tool_is_finish(): + """Test get_example_for_tools with only the finish tool.""" + tools = [ + { + 'type': 'function', + 'function': { + 'name': 'finish', + 'description': 'Finish the interaction when the task is complete.', + }, + } + ] + example = get_example_for_tools(tools) + assert example.startswith( + "Here's a running example of how to perform a task with the provided tools." + ) + assert ( + 'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.' + in example + ) + assert TOOL_EXAMPLES['finish']['task_completed'] in example + assert TOOL_EXAMPLES['execute_bash']['check_dir'] not in example + assert TOOL_EXAMPLES['str_replace_editor']['create_file'] not in example + assert TOOL_EXAMPLES['browser']['view_page'] not in example + + +def test_get_example_for_tools_multiple_tools(): + """Test that get_example_for_tools generates correct example with multiple tools.""" + tools = [ + { + 'type': 'function', + 'function': { + 'name': 'execute_bash', + 'description': 'Execute a bash command in the terminal.', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The bash command to execute.', + } + }, + 'required': ['command'], + }, + }, + }, + { + 'type': 'function', + 'function': { + 'name': 'str_replace_editor', + 'description': 'Custom editing tool for viewing, creating and editing files.', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The commands to run.', + 'enum': [ + 'view', + 'create', + 'str_replace', + 'insert', + 'undo_edit', + ], + }, + 'path': { + 'type': 'string', + 'description': 'Absolute path to file or directory.', + }, + }, + 'required': ['command', 'path'], + }, + }, + }, + ] + example = get_example_for_tools(tools) + assert example.startswith( + "Here's a running example of how to perform a task with the provided tools." + ) + assert ( + 'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.' + in example + ) + assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example + assert TOOL_EXAMPLES['execute_bash']['run_server'] in example + assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example + assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example + assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example + assert TOOL_EXAMPLES['browser']['view_page'] not in example + assert TOOL_EXAMPLES['finish']['task_completed'] not in example + + +def test_get_example_for_tools_multiple_tools_with_finish(): + """Test get_example_for_tools with multiple tools including finish.""" + # Uses execute_bash and finish tools + tools = [ + { + 'type': 'function', + 'function': { + 'name': 'execute_bash', + 'description': 'Execute a bash command in the terminal.', + 'parameters': { # Params added for completeness, not strictly needed by get_example_for_tools + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The bash command to execute.', + } + }, + 'required': ['command'], + }, + }, + }, + { + 'type': 'function', + 'function': { + 'name': 'str_replace_editor', + 'description': 'Custom editing tool for viewing, creating and editing files.', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': 'The commands to run.', + 'enum': [ + 'view', + 'create', + 'str_replace', + 'insert', + 'undo_edit', + ], + }, + 'path': { + 'type': 'string', + 'description': 'Absolute path to file or directory.', + }, + }, + 'required': ['command', 'path'], + }, + }, + }, + { + 'type': 'function', + 'function': { + 'name': 'browser', + 'description': 'Interact with the browser.', + 'parameters': { + 'type': 'object', + 'properties': { + 'code': { + 'type': 'string', + 'description': 'The Python code that interacts with the browser.', + } + }, + 'required': ['code'], + }, + }, + }, + { + 'type': 'function', + 'function': { + 'name': 'finish', + 'description': 'Finish the interaction.', + }, + }, + ] + example = get_example_for_tools(tools) + assert example.startswith( + "Here's a running example of how to perform a task with the provided tools." + ) + assert ( + 'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.' + in example + ) + + # Check for execute_bash parts (order matters for get_example_for_tools) + assert TOOL_EXAMPLES['execute_bash']['check_dir'].strip() in example + assert TOOL_EXAMPLES['execute_bash']['run_server'].strip() in example + assert TOOL_EXAMPLES['execute_bash']['kill_server'].strip() in example + assert TOOL_EXAMPLES['execute_bash']['run_server_again'].strip() in example + + # Check for str_replace_editor parts + assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example + assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example + + # Check for browser part + assert TOOL_EXAMPLES['browser']['view_page'] in example + + # Check for finish part + assert TOOL_EXAMPLES['finish']['task_completed'] in example + + +def test_get_example_for_tools_all_tools(): + """Test that get_example_for_tools generates correct example with all tools.""" + tools = FNCALL_TOOLS # FNCALL_TOOLS already includes 'finish' + example = get_example_for_tools(tools) + assert example.startswith( + "Here's a running example of how to perform a task with the provided tools." + ) + assert ( + 'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.' + in example + ) + assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example + assert TOOL_EXAMPLES['execute_bash']['run_server'] in example + assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example + assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example + assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example + assert TOOL_EXAMPLES['finish']['task_completed'] in example + + # These are not in global FNCALL_TOOLS + # assert TOOL_EXAMPLES['web_read']['read_docs'] not in example # web_read is removed + assert TOOL_EXAMPLES['browser']['view_page'] not in example + + FNCALL_MESSAGES = [ { 'content': [ @@ -270,7 +530,7 @@ NON_FNCALL_MESSAGES = [ 'content': [ { 'type': 'text', - 'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX + 'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(FNCALL_TOOLS) + "\n/workspace/astropy__astropy__5.1\n\nI've uploaded a python code repository in the directory astropy__astropy__5.1. LONG DESCRIPTION:\n\n" + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, }