"""This file contains the function calling implementation for different actions.
This is similar to the functionality of `CodeActResponseParser`.
"""
import json
from litellm import (
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
ModelResponse,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
CmdRunAction,
FileEditAction,
IPythonRunCellAction,
MessageAction,
)
from openhands.events.tool import ToolCallMetadata
SYSTEM_PROMPT = """You are a helpful assistant that can interact with a computer to solve tasks.
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
"""
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
"""
CmdRunTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='execute_bash',
description=_BASH_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'command': {
'type': 'string',
'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
},
},
'required': ['command'],
},
),
)
_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
* The assistant should define variables and import packages before using them.
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
"""
# We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files
# """* Apart from the standard Python library, the assistant can also use the following functions (already imported):
# {AgentSkillsRequirement.documentation}"""
IPythonTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='execute_ipython_cell',
description=_IPYTHON_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'code': {
'type': 'string',
'description': 'The Python code to execute. Supports magic commands like %pip.',
},
},
'required': ['code'],
},
),
)
_FILE_EDIT_DESCRIPTION = """Edit a file.
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
* To append to a file, set both `start` and `end` to `-1`.
* If the file doesn't exist, a new file will be created with the provided content.
**Example 1: general edit for short files**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
The assistant wants to edit the file to look like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5|
6|print(MyClass().y)
(this is the end of the file)
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=1 end=-1
content=```
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
```
**Example 2: append to file for short files**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
To append the following lines to the file:
```python
print(MyClass().y)
```
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=-1 end=-1
content=```
print(MyClass().y)
```
**Example 3: edit for long files**
Given an existing file `/path/to/file.py` that looks like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005| self.z = 3
1006|
1007|print(MyClass().z)
1008|print(MyClass().x)
(2000 more lines below)
The assistant wants to edit the file to look like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005|
1006|print(MyClass().y)
(2000 more lines below)
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=1001 end=1008
content=```
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
```
"""
LLMBasedFileEditTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='edit_file',
description=_FILE_EDIT_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'The absolute path to the file to be edited.',
},
'new_content_draft': {
'type': 'string',
'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
},
'start': {
'type': 'integer',
'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
},
'end': {
'type': 'integer',
'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
},
},
'required': ['path', 'content'],
},
),
)
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
* State is persistent across command calls and discussions with the user
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
* The `create` command cannot be used if the specified `path` already exists as a file
* If a `command` generates a long output, it will be truncated and marked with ``
* The `undo_edit` command will revert the last edit made to the file at `path`
Notes for using the `str_replace` command:
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
"""
StrReplaceEditorTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='str_replace_editor',
description=_STR_REPLACE_EDITOR_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'command': {
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
'type': 'string',
},
'path': {
'description': 'Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.',
'type': 'string',
},
'file_text': {
'description': 'Required parameter of `create` command, with the content of the file to be created.',
'type': 'string',
},
'old_str': {
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
'type': 'string',
},
'new_str': {
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
'type': 'string',
},
'insert_line': {
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
'type': 'integer',
},
'view_range': {
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
'items': {'type': 'integer'},
'type': 'array',
},
},
'required': ['command', 'path'],
},
),
)
_BROWSER_DELEGATION = """Delegate the task to another browsing agent.
The assistant should delegate the task if it needs to browse the Internet.
"""
BrowserDelegationTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='delegate_to_browsing_agent',
description=_BROWSER_DELEGATION,
parameters={
'type': 'object',
'properties': {
'task': {
'type': 'string',
'description': 'The task for the browsing agent to execute. It should include all the necessary context and specify what information the browsing agent should return.',
},
},
'required': ['task'],
},
),
)
_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
FinishTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='finish',
description=_FINISH_DESCRIPTION,
),
)
def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
return action
if thought:
action.thought = thought
return action
def response_to_actions(response: ModelResponse) -> list[Action]:
actions: list[Action] = []
assert len(response.choices) == 1, 'Only one choice is supported for now'
assistant_msg = response.choices[0].message
if assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
thought = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
thought += msg['text']
# Process each tool call to OpenHands action
for i, tool_call in enumerate(assistant_msg.tool_calls):
action: Action
try:
arguments = json.loads(tool_call.function.arguments)
except json.decoder.JSONDecodeError as e:
raise RuntimeError(
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
) from e
if tool_call.function.name == 'execute_bash':
action = CmdRunAction(**arguments)
elif tool_call.function.name == 'execute_ipython_cell':
action = IPythonRunCellAction(**arguments)
elif tool_call.function.name == 'delegate_to_browsing_agent':
action = AgentDelegateAction(
agent='BrowsingAgent',
inputs=arguments,
)
elif tool_call.function.name == 'finish':
action = AgentFinishAction()
elif tool_call.function.name == 'edit_file':
action = FileEditAction(**arguments)
elif tool_call.function.name == 'str_replace_editor':
# We implement this in agent_skills, which can be used via Jupyter
# convert tool_call.function.arguments to kwargs that can be passed to file_editor
code = f'print(file_editor(**{arguments}))'
logger.debug(
f'TOOL CALL: str_replace_editor -> file_editor with code: {code}'
)
action = IPythonRunCellAction(code=code, include_extra=False)
else:
raise RuntimeError(f'Unknown tool call: {tool_call.function.name}')
# We only add thought to the first action
if i == 0:
action = combine_thought(action, thought)
# Add metadata for tool calling
action.tool_call_metadata = ToolCallMetadata(
tool_call_id=tool_call.id,
function_name=tool_call.function.name,
model_response=response,
total_calls_in_response=len(assistant_msg.tool_calls),
)
actions.append(action)
else:
actions.append(
MessageAction(content=assistant_msg.content, wait_for_response=True)
)
assert len(actions) >= 1
return actions
def get_tools(
codeact_enable_browsing_delegate: bool = False,
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
) -> list[ChatCompletionToolParam]:
tools = [CmdRunTool, FinishTool]
if codeact_enable_browsing_delegate:
tools.append(BrowserDelegationTool)
if codeact_enable_jupyter:
tools.append(IPythonTool)
if codeact_enable_llm_editor:
tools.append(LLMBasedFileEditTool)
else:
tools.append(StrReplaceEditorTool)
return tools