mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Engel Nyst <engel.nyst@gmail.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
This commit is contained in:
parent
1f390430e5
commit
c17b0ebfc6
@ -10,7 +10,7 @@ The CodeAct agent operates through a function calling interface. At each turn, t
|
||||
2. **CodeAct**: Execute actions through a set of well-defined tools:
|
||||
- Execute Linux `bash` commands with `execute_bash`
|
||||
- Run Python code in an [IPython](https://ipython.org/) environment with `execute_ipython_cell`
|
||||
- Interact with web browsers using `browser` and `web_read`
|
||||
- Interact with web browsers using `browser` and `fetch`
|
||||
- Edit files using `str_replace_editor` or `edit_file`
|
||||
|
||||

|
||||
|
||||
@ -2,6 +2,8 @@ import sys
|
||||
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
from openhands.llm.tool_names import EXECUTE_BASH_TOOL_NAME
|
||||
|
||||
_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session.
|
||||
|
||||
### Command Execution
|
||||
@ -45,7 +47,7 @@ def create_cmd_run_tool(
|
||||
return ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name=refine_prompt('execute_bash'),
|
||||
name=EXECUTE_BASH_TOOL_NAME,
|
||||
description=refine_prompt(description),
|
||||
parameters={
|
||||
'type': 'object',
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
from browsergym.core.action.highlevel import HighLevelActionSet
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
from openhands.llm.tool_names import BROWSER_TOOL_NAME
|
||||
|
||||
# from browsergym/core/action/highlevel.py
|
||||
_browser_action_space = HighLevelActionSet(
|
||||
subsets=['bid', 'nav'],
|
||||
@ -141,7 +143,7 @@ for _, action in _browser_action_space.action_set.items():
|
||||
BrowserTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='browser',
|
||||
name=BROWSER_TOOL_NAME,
|
||||
description=_BROWSER_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
from openhands.llm.tool_names import FINISH_TOOL_NAME
|
||||
|
||||
_FINISH_DESCRIPTION = """Signals the completion of the current task or conversation.
|
||||
|
||||
Use this tool when:
|
||||
@ -18,7 +20,7 @@ The task_completed field should be set to True if you believed you have complete
|
||||
FinishTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='finish',
|
||||
name=FINISH_TOOL_NAME,
|
||||
description=_FINISH_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
from openhands.llm.tool_names import STR_REPLACE_EDITOR_TOOL_NAME
|
||||
|
||||
_DETAILED_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
@ -55,7 +57,7 @@ def create_str_replace_editor_tool(
|
||||
return ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
name=STR_REPLACE_EDITOR_TOOL_NAME,
|
||||
description=description,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
|
||||
@ -18,6 +18,12 @@ from openhands.core.exceptions import (
|
||||
FunctionCallConversionError,
|
||||
FunctionCallValidationError,
|
||||
)
|
||||
from openhands.llm.tool_names import (
|
||||
BROWSER_TOOL_NAME,
|
||||
EXECUTE_BASH_TOOL_NAME,
|
||||
FINISH_TOOL_NAME,
|
||||
STR_REPLACE_EDITOR_TOOL_NAME,
|
||||
)
|
||||
|
||||
# Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
|
||||
SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
|
||||
@ -55,14 +61,12 @@ def refine_prompt(prompt: str) -> str:
|
||||
return prompt
|
||||
|
||||
|
||||
# NOTE: we need to make sure this example is always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = refine_prompt("""
|
||||
Here's a running example of how to perform a task with the provided tools.
|
||||
|
||||
--------------------- START OF EXAMPLE ---------------------
|
||||
|
||||
USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
|
||||
# NOTE: we need to make sure these examples are always in-sync with the tool interface designed in openhands/agenthub/codeact_agent/function_calling.py
|
||||
|
||||
# Example snippets for each tool
|
||||
TOOL_EXAMPLES = {
|
||||
'execute_bash': {
|
||||
'check_dir': """
|
||||
ASSISTANT: Sure! Let me first check the current directory:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
@ -73,29 +77,10 @@ pwd && ls
|
||||
USER: EXECUTION RESULT of [execute_bash]:
|
||||
/workspace
|
||||
openhands@runtime:~/workspace$
|
||||
|
||||
ASSISTANT: There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
|
||||
<function=str_replace_editor>
|
||||
<parameter=command>create</parameter>
|
||||
<parameter=path>/workspace/app.py</parameter>
|
||||
<parameter=file_text>
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/')
|
||||
def index() -> str:
|
||||
numbers = list(range(1, 11))
|
||||
return str(numbers)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=5000)
|
||||
</parameter>
|
||||
</function>
|
||||
|
||||
USER: EXECUTION RESULT of [str_replace_editor]:
|
||||
File created successfully at: /workspace/app.py
|
||||
|
||||
ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
|
||||
""",
|
||||
'run_server': """
|
||||
ASSISTANT:
|
||||
Let me run the Python file for you:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
python3 app.py > server.log 2>&1 &
|
||||
@ -106,7 +91,8 @@ USER: EXECUTION RESULT of [execute_bash]:
|
||||
[1] 121
|
||||
[1]+ Exit 1 python3 app.py > server.log 2>&1
|
||||
|
||||
ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
|
||||
ASSISTANT:
|
||||
Looks like the server was running with PID 121 then crashed. Let me check the server log:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
cat server.log
|
||||
@ -119,7 +105,8 @@ Traceback (most recent call last):
|
||||
from flask import Flask
|
||||
ModuleNotFoundError: No module named 'flask'
|
||||
|
||||
ASSISTANT: Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
|
||||
ASSISTANT:
|
||||
Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
pip3 install flask
|
||||
@ -143,7 +130,8 @@ Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/pyt
|
||||
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
|
||||
Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0
|
||||
|
||||
ASSISTANT: The `flask` module is installed successfully. Let me run the Python file again:
|
||||
ASSISTANT:
|
||||
The `flask` module is installed successfully. Let me run the Python file again:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
python3 app.py > server.log 2>&1 &
|
||||
@ -170,7 +158,8 @@ Press CTRL+C to quit
|
||||
|
||||
ASSISTANT:
|
||||
The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask!
|
||||
|
||||
""",
|
||||
'kill_server': """
|
||||
USER: Now kill the server, make it display the numbers in a table format.
|
||||
|
||||
ASSISTANT:
|
||||
@ -183,7 +172,48 @@ kill 124
|
||||
|
||||
USER: EXECUTION RESULT of [execute_bash]:
|
||||
[1]+ Terminated python3 app.py > server.log 2>&1
|
||||
""",
|
||||
'run_server_again': """
|
||||
ASSISTANT:
|
||||
Running the updated file:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
python3 app.py > server.log 2>&1 &
|
||||
</parameter>
|
||||
</function>
|
||||
|
||||
USER: EXECUTION RESULT of [execute_bash]:
|
||||
[1] 126
|
||||
|
||||
ASSISTANT:
|
||||
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000.
|
||||
""",
|
||||
},
|
||||
'str_replace_editor': {
|
||||
'create_file': """
|
||||
ASSISTANT:
|
||||
There is no `app.py` file in the current directory. Let me create a Python file `app.py`:
|
||||
<function=str_replace_editor>
|
||||
<parameter=command>create</parameter>
|
||||
<parameter=path>/workspace/app.py</parameter>
|
||||
<parameter=file_text>
|
||||
from flask import Flask
|
||||
app = Flask(__name__)
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
numbers = list(range(1, 11))
|
||||
return str(numbers)
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=5000)
|
||||
</parameter>
|
||||
</function>
|
||||
|
||||
USER: EXECUTION RESULT of [str_replace_editor]:
|
||||
File created successfully at: /workspace/app.py
|
||||
""",
|
||||
'edit_file': """
|
||||
ASSISTANT:
|
||||
Now let me display the numbers in a table format:
|
||||
<function=str_replace_editor>
|
||||
@ -205,28 +235,100 @@ The file /workspace/app.py has been edited. Here's the result of running `cat -n
|
||||
10 app.run(port=5000)
|
||||
Review the changes and make sure they are as expected. Edit the file again if necessary.
|
||||
|
||||
""",
|
||||
},
|
||||
'browser': {
|
||||
'view_page': """
|
||||
ASSISTANT:
|
||||
Running the updated file:
|
||||
<function=execute_bash>
|
||||
<parameter=command>
|
||||
python3 app.py > server.log 2>&1 &
|
||||
Let me check how the page looks in the browser:
|
||||
<function=browser>
|
||||
<parameter=code>
|
||||
goto('http://127.0.0.1:5000')
|
||||
noop(1000) # Wait for page to load
|
||||
</parameter>
|
||||
</function>
|
||||
|
||||
USER: EXECUTION RESULT of [execute_bash]:
|
||||
[1] 126
|
||||
|
||||
USER: EXECUTION RESULT of [browser]:
|
||||
[Browser shows the numbers in a table format]
|
||||
"""
|
||||
},
|
||||
'finish': {
|
||||
'task_completed': """
|
||||
ASSISTANT:
|
||||
The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests!
|
||||
<function=finish>
|
||||
<parameter=message>The task has been completed. The web server is running and displaying numbers 1-10 in a table format at http://127.0.0.1:5000.</parameter>
|
||||
<parameter=task_completed>true</parameter>
|
||||
</function>
|
||||
"""
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_example_for_tools(tools: list[dict]) -> str:
|
||||
"""Generate an in-context learning example based on available tools."""
|
||||
available_tools = set()
|
||||
for tool in tools:
|
||||
if tool['type'] == 'function':
|
||||
name = tool['function']['name']
|
||||
if name == EXECUTE_BASH_TOOL_NAME:
|
||||
available_tools.add('execute_bash')
|
||||
elif name == STR_REPLACE_EDITOR_TOOL_NAME:
|
||||
available_tools.add('str_replace_editor')
|
||||
elif name == BROWSER_TOOL_NAME:
|
||||
available_tools.add('browser')
|
||||
elif name == FINISH_TOOL_NAME:
|
||||
available_tools.add('finish')
|
||||
|
||||
if not available_tools:
|
||||
return ''
|
||||
|
||||
example = """Here's a running example of how to perform a task with the provided tools.
|
||||
|
||||
--------------------- START OF EXAMPLE ---------------------
|
||||
|
||||
USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.
|
||||
|
||||
"""
|
||||
|
||||
# Build example based on available tools
|
||||
if 'execute_bash' in available_tools:
|
||||
example += TOOL_EXAMPLES['execute_bash']['check_dir']
|
||||
|
||||
if 'str_replace_editor' in available_tools:
|
||||
example += TOOL_EXAMPLES['str_replace_editor']['create_file']
|
||||
|
||||
if 'execute_bash' in available_tools:
|
||||
example += TOOL_EXAMPLES['execute_bash']['run_server']
|
||||
|
||||
if 'browser' in available_tools:
|
||||
example += TOOL_EXAMPLES['browser']['view_page']
|
||||
|
||||
if 'execute_bash' in available_tools:
|
||||
example += TOOL_EXAMPLES['execute_bash']['kill_server']
|
||||
|
||||
if 'str_replace_editor' in available_tools:
|
||||
example += TOOL_EXAMPLES['str_replace_editor']['edit_file']
|
||||
|
||||
if 'execute_bash' in available_tools:
|
||||
example += TOOL_EXAMPLES['execute_bash']['run_server_again']
|
||||
|
||||
if 'finish' in available_tools:
|
||||
example += TOOL_EXAMPLES['finish']['task_completed']
|
||||
|
||||
example += """
|
||||
--------------------- END OF EXAMPLE ---------------------
|
||||
|
||||
Do NOT assume the environment is the same as in the example above.
|
||||
|
||||
--------------------- NEW TASK DESCRIPTION ---------------------
|
||||
""").lstrip()
|
||||
"""
|
||||
example = example.lstrip()
|
||||
|
||||
return example
|
||||
|
||||
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX = get_example_for_tools
|
||||
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX = """
|
||||
--------------------- END OF NEW TASK DESCRIPTION ---------------------
|
||||
@ -352,74 +454,42 @@ def convert_fncall_messages_to_non_fncall_messages(
|
||||
# Add in-context learning example for the first user message
|
||||
if not first_user_message_encountered and add_in_context_learning_example:
|
||||
first_user_message_encountered = True
|
||||
# Check tools
|
||||
if not (
|
||||
tools
|
||||
and len(tools) > 0
|
||||
and any(
|
||||
(
|
||||
tool['type'] == 'function'
|
||||
and tool['function']['name']
|
||||
== refine_prompt('execute_bash')
|
||||
and 'command'
|
||||
in tool['function']['parameters']['properties']
|
||||
)
|
||||
for tool in tools
|
||||
)
|
||||
and any(
|
||||
(
|
||||
tool['type'] == 'function'
|
||||
and tool['function']['name'] == 'str_replace_editor'
|
||||
and 'path' in tool['function']['parameters']['properties']
|
||||
and 'file_text'
|
||||
in tool['function']['parameters']['properties']
|
||||
and 'old_str'
|
||||
in tool['function']['parameters']['properties']
|
||||
and 'new_str'
|
||||
in tool['function']['parameters']['properties']
|
||||
)
|
||||
for tool in tools
|
||||
)
|
||||
):
|
||||
raise FunctionCallConversionError(
|
||||
'The currently provided tool set are NOT compatible with the in-context learning example for FnCall to Non-FnCall conversion. '
|
||||
'Please update your tool set OR the in-context learning example in openhands/llm/fn_call_converter.py'
|
||||
)
|
||||
|
||||
# add in-context learning example
|
||||
if isinstance(content, str):
|
||||
content = (
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
|
||||
+ content
|
||||
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
|
||||
)
|
||||
elif isinstance(content, list):
|
||||
if content and content[0]['type'] == 'text':
|
||||
content[0]['text'] = (
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
|
||||
+ content[0]['text']
|
||||
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
|
||||
)
|
||||
# Generate example based on available tools
|
||||
example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
|
||||
|
||||
# Add example if we have any tools
|
||||
if example:
|
||||
# add in-context learning example
|
||||
if isinstance(content, str):
|
||||
content = example + content + IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
|
||||
elif isinstance(content, list):
|
||||
if content and content[0]['type'] == 'text':
|
||||
content[0]['text'] = (
|
||||
example
|
||||
+ content[0]['text']
|
||||
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
|
||||
)
|
||||
else:
|
||||
content = (
|
||||
[
|
||||
{
|
||||
'type': 'text',
|
||||
'text': example,
|
||||
}
|
||||
]
|
||||
+ content
|
||||
+ [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
|
||||
}
|
||||
]
|
||||
)
|
||||
else:
|
||||
content = (
|
||||
[
|
||||
{
|
||||
'type': 'text',
|
||||
'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX,
|
||||
}
|
||||
]
|
||||
+ content
|
||||
+ [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
|
||||
}
|
||||
]
|
||||
raise FunctionCallConversionError(
|
||||
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
|
||||
)
|
||||
else:
|
||||
raise FunctionCallConversionError(
|
||||
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
|
||||
)
|
||||
converted_messages.append(
|
||||
{
|
||||
'role': 'user',
|
||||
@ -604,17 +674,28 @@ def convert_non_fncall_messages_to_fncall_messages(
|
||||
if not first_user_message_encountered:
|
||||
first_user_message_encountered = True
|
||||
if isinstance(content, str):
|
||||
content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, '')
|
||||
content = content.replace(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '')
|
||||
# Remove any existing example
|
||||
if content.startswith(IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)):
|
||||
content = content.replace(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools), '', 1
|
||||
)
|
||||
if content.endswith(IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX):
|
||||
content = content.replace(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
|
||||
)
|
||||
elif isinstance(content, list):
|
||||
for item in content:
|
||||
if item['type'] == 'text':
|
||||
item['text'] = item['text'].replace(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX, ''
|
||||
)
|
||||
item['text'] = item['text'].replace(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, ''
|
||||
)
|
||||
# Remove any existing example
|
||||
example = IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(tools)
|
||||
if item['text'].startswith(example):
|
||||
item['text'] = item['text'].replace(example, '', 1)
|
||||
if item['text'].endswith(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX
|
||||
):
|
||||
item['text'] = item['text'].replace(
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX, '', 1
|
||||
)
|
||||
else:
|
||||
raise FunctionCallConversionError(
|
||||
f'Unexpected content type {type(content)}. Expected str or list. Content: {content}'
|
||||
|
||||
6
openhands/llm/tool_names.py
Normal file
6
openhands/llm/tool_names.py
Normal file
@ -0,0 +1,6 @@
|
||||
"""Constants for tool names used in function calling."""
|
||||
|
||||
EXECUTE_BASH_TOOL_NAME = 'execute_bash'
|
||||
STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor'
|
||||
BROWSER_TOOL_NAME = 'browser'
|
||||
FINISH_TOOL_NAME = 'finish'
|
||||
@ -9,12 +9,14 @@ from litellm import ChatCompletionToolParam
|
||||
from openhands.llm.fn_call_converter import (
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_PREFIX,
|
||||
IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
|
||||
TOOL_EXAMPLES,
|
||||
FunctionCallConversionError,
|
||||
convert_fncall_messages_to_non_fncall_messages,
|
||||
convert_from_multiple_tool_calls_to_single_tool_call_messages,
|
||||
convert_non_fncall_messages_to_fncall_messages,
|
||||
convert_tool_call_to_string,
|
||||
convert_tools_to_description,
|
||||
get_example_for_tools,
|
||||
)
|
||||
|
||||
FNCALL_TOOLS: list[ChatCompletionToolParam] = [
|
||||
@ -140,6 +142,264 @@ Allowed values: [`view`, `create`, `str_replace`, `insert`, `undo_edit`]
|
||||
)
|
||||
|
||||
|
||||
def test_get_example_for_tools_no_tools():
|
||||
"""Test that get_example_for_tools returns empty string when no tools are available."""
|
||||
tools = []
|
||||
example = get_example_for_tools(tools)
|
||||
assert example == ''
|
||||
|
||||
|
||||
def test_get_example_for_tools_single_tool():
|
||||
"""Test that get_example_for_tools generates correct example with a single tool."""
|
||||
tools = [
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'execute_bash',
|
||||
'description': 'Execute a bash command in the terminal.',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute.',
|
||||
}
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
example = get_example_for_tools(tools)
|
||||
assert example.startswith(
|
||||
"Here's a running example of how to perform a task with the provided tools."
|
||||
)
|
||||
assert (
|
||||
'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.'
|
||||
in example
|
||||
)
|
||||
assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['run_server'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['create_file'] not in example
|
||||
assert TOOL_EXAMPLES['browser']['view_page'] not in example
|
||||
assert TOOL_EXAMPLES['finish']['task_completed'] not in example
|
||||
|
||||
|
||||
def test_get_example_for_tools_single_tool_is_finish():
|
||||
"""Test get_example_for_tools with only the finish tool."""
|
||||
tools = [
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'finish',
|
||||
'description': 'Finish the interaction when the task is complete.',
|
||||
},
|
||||
}
|
||||
]
|
||||
example = get_example_for_tools(tools)
|
||||
assert example.startswith(
|
||||
"Here's a running example of how to perform a task with the provided tools."
|
||||
)
|
||||
assert (
|
||||
'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.'
|
||||
in example
|
||||
)
|
||||
assert TOOL_EXAMPLES['finish']['task_completed'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['check_dir'] not in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['create_file'] not in example
|
||||
assert TOOL_EXAMPLES['browser']['view_page'] not in example
|
||||
|
||||
|
||||
def test_get_example_for_tools_multiple_tools():
|
||||
"""Test that get_example_for_tools generates correct example with multiple tools."""
|
||||
tools = [
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'execute_bash',
|
||||
'description': 'Execute a bash command in the terminal.',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute.',
|
||||
}
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'str_replace_editor',
|
||||
'description': 'Custom editing tool for viewing, creating and editing files.',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The commands to run.',
|
||||
'enum': [
|
||||
'view',
|
||||
'create',
|
||||
'str_replace',
|
||||
'insert',
|
||||
'undo_edit',
|
||||
],
|
||||
},
|
||||
'path': {
|
||||
'type': 'string',
|
||||
'description': 'Absolute path to file or directory.',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
example = get_example_for_tools(tools)
|
||||
assert example.startswith(
|
||||
"Here's a running example of how to perform a task with the provided tools."
|
||||
)
|
||||
assert (
|
||||
'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.'
|
||||
in example
|
||||
)
|
||||
assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['run_server'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example
|
||||
assert TOOL_EXAMPLES['browser']['view_page'] not in example
|
||||
assert TOOL_EXAMPLES['finish']['task_completed'] not in example
|
||||
|
||||
|
||||
def test_get_example_for_tools_multiple_tools_with_finish():
|
||||
"""Test get_example_for_tools with multiple tools including finish."""
|
||||
# Uses execute_bash and finish tools
|
||||
tools = [
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'execute_bash',
|
||||
'description': 'Execute a bash command in the terminal.',
|
||||
'parameters': { # Params added for completeness, not strictly needed by get_example_for_tools
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute.',
|
||||
}
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'str_replace_editor',
|
||||
'description': 'Custom editing tool for viewing, creating and editing files.',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The commands to run.',
|
||||
'enum': [
|
||||
'view',
|
||||
'create',
|
||||
'str_replace',
|
||||
'insert',
|
||||
'undo_edit',
|
||||
],
|
||||
},
|
||||
'path': {
|
||||
'type': 'string',
|
||||
'description': 'Absolute path to file or directory.',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'browser',
|
||||
'description': 'Interact with the browser.',
|
||||
'parameters': {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': 'The Python code that interacts with the browser.',
|
||||
}
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'type': 'function',
|
||||
'function': {
|
||||
'name': 'finish',
|
||||
'description': 'Finish the interaction.',
|
||||
},
|
||||
},
|
||||
]
|
||||
example = get_example_for_tools(tools)
|
||||
assert example.startswith(
|
||||
"Here's a running example of how to perform a task with the provided tools."
|
||||
)
|
||||
assert (
|
||||
'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.'
|
||||
in example
|
||||
)
|
||||
|
||||
# Check for execute_bash parts (order matters for get_example_for_tools)
|
||||
assert TOOL_EXAMPLES['execute_bash']['check_dir'].strip() in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['run_server'].strip() in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['kill_server'].strip() in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['run_server_again'].strip() in example
|
||||
|
||||
# Check for str_replace_editor parts
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example
|
||||
|
||||
# Check for browser part
|
||||
assert TOOL_EXAMPLES['browser']['view_page'] in example
|
||||
|
||||
# Check for finish part
|
||||
assert TOOL_EXAMPLES['finish']['task_completed'] in example
|
||||
|
||||
|
||||
def test_get_example_for_tools_all_tools():
|
||||
"""Test that get_example_for_tools generates correct example with all tools."""
|
||||
tools = FNCALL_TOOLS # FNCALL_TOOLS already includes 'finish'
|
||||
example = get_example_for_tools(tools)
|
||||
assert example.startswith(
|
||||
"Here's a running example of how to perform a task with the provided tools."
|
||||
)
|
||||
assert (
|
||||
'USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000.'
|
||||
in example
|
||||
)
|
||||
assert TOOL_EXAMPLES['execute_bash']['check_dir'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['run_server'] in example
|
||||
assert TOOL_EXAMPLES['execute_bash']['kill_server'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['create_file'] in example
|
||||
assert TOOL_EXAMPLES['str_replace_editor']['edit_file'] in example
|
||||
assert TOOL_EXAMPLES['finish']['task_completed'] in example
|
||||
|
||||
# These are not in global FNCALL_TOOLS
|
||||
# assert TOOL_EXAMPLES['web_read']['read_docs'] not in example # web_read is removed
|
||||
assert TOOL_EXAMPLES['browser']['view_page'] not in example
|
||||
|
||||
|
||||
FNCALL_MESSAGES = [
|
||||
{
|
||||
'content': [
|
||||
@ -270,7 +530,7 @@ NON_FNCALL_MESSAGES = [
|
||||
'content': [
|
||||
{
|
||||
'type': 'text',
|
||||
'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX
|
||||
'text': IN_CONTEXT_LEARNING_EXAMPLE_PREFIX(FNCALL_TOOLS)
|
||||
+ "<uploaded_files>\n/workspace/astropy__astropy__5.1\n</uploaded_files>\nI've uploaded a python code repository in the directory astropy__astropy__5.1. LONG DESCRIPTION:\n\n"
|
||||
+ IN_CONTEXT_LEARNING_EXAMPLE_SUFFIX,
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user