"""This file contains the function calling implementation for different actions.
This is similar to the functionality of `CodeActResponseParser`.
"""
import json
from browsergym.core.action.highlevel import HighLevelActionSet
from litellm import (
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
ModelResponse,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
FileEditAction,
IPythonRunCellAction,
MessageAction,
)
from openhands.events.tool import ToolCallMetadata
SYSTEM_PROMPT = """You are a helpful assistant that can interact with a computer to solve tasks.
* If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it.
"""
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
* Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process.
* Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background.
"""
CmdRunTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='execute_bash',
description=_BASH_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'command': {
'type': 'string',
'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.',
},
},
'required': ['command'],
},
),
)
_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
* The assistant should define variables and import packages before using them.
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
"""
# We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files
# """* Apart from the standard Python library, the assistant can also use the following functions (already imported):
# {AgentSkillsRequirement.documentation}"""
IPythonTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='execute_ipython_cell',
description=_IPYTHON_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'code': {
'type': 'string',
'description': 'The Python code to execute. Supports magic commands like %pip.',
},
},
'required': ['code'],
},
),
)
_FILE_EDIT_DESCRIPTION = """Edit a file.
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
* To append to a file, set both `start` and `end` to `-1`.
* If the file doesn't exist, a new file will be created with the provided content.
**Example 1: general edit for short files**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
The assistant wants to edit the file to look like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5|
6|print(MyClass().y)
(this is the end of the file)
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=1 end=-1
content=```
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
```
**Example 2: append to file for short files**
For example, given an existing file `/path/to/file.py` that looks like this:
(this is the end of the file)
1|class MyClass:
2| def __init__(self):
3| self.x = 1
4| self.y = 2
5| self.z = 3
6|
7|print(MyClass().z)
8|print(MyClass().x)
(this is the end of the file)
To append the following lines to the file:
```python
print(MyClass().y)
```
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=-1 end=-1
content=```
print(MyClass().y)
```
**Example 3: edit for long files**
Given an existing file `/path/to/file.py` that looks like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005| self.z = 3
1006|
1007|print(MyClass().z)
1008|print(MyClass().x)
(2000 more lines below)
The assistant wants to edit the file to look like this:
(1000 more lines above)
1001|class MyClass:
1002| def __init__(self):
1003| self.x = 1
1004| self.y = 2
1005|
1006|print(MyClass().y)
(2000 more lines below)
The assistant may produce an edit action like this:
path="/path/to/file.txt" start=1001 end=1008
content=```
class MyClass:
def __init__(self):
# no changes before
self.y = 2
# self.z is removed
# MyClass().z is removed
print(MyClass().y)
```
"""
LLMBasedFileEditTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='edit_file',
description=_FILE_EDIT_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'path': {
'type': 'string',
'description': 'The absolute path to the file to be edited.',
},
'new_content_draft': {
'type': 'string',
'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
},
'start': {
'type': 'integer',
'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
},
'end': {
'type': 'integer',
'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
},
},
'required': ['path', 'content'],
},
),
)
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files
* State is persistent across command calls and discussions with the user
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
* The `create` command cannot be used if the specified `path` already exists as a file
* If a `command` generates a long output, it will be truncated and marked with ``
* The `undo_edit` command will revert the last edit made to the file at `path`
Notes for using the `str_replace` command:
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
"""
StrReplaceEditorTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='str_replace_editor',
description=_STR_REPLACE_EDITOR_DESCRIPTION,
parameters={
'type': 'object',
'properties': {
'command': {
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
'type': 'string',
},
'path': {
'description': 'Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.',
'type': 'string',
},
'file_text': {
'description': 'Required parameter of `create` command, with the content of the file to be created.',
'type': 'string',
},
'old_str': {
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
'type': 'string',
},
'new_str': {
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
'type': 'string',
},
'insert_line': {
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
'type': 'integer',
},
'view_range': {
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
'items': {'type': 'integer'},
'type': 'array',
},
},
'required': ['command', 'path'],
},
),
)
# from browsergym/core/action/highlevel.py
_browser_action_space = HighLevelActionSet(
subsets=['bid', 'nav'],
strict=False, # less strict on the parsing of the actions
multiaction=True, # enable to agent to take multiple actions at once
)
_BROWSER_DESCRIPTION = """Interact with the browser using Python code.
The following 15 functions are available. Nothing else is supported.
goto(url: str)
Description: Navigate to a url.
Examples:
goto('http://www.example.com')
go_back()
Description: Navigate to the previous page in history.
Examples:
go_back()
go_forward()
Description: Navigate to the next page in history.
Examples:
go_forward()
noop(wait_ms: float = 1000)
Description: Do nothing, and optionally wait for the given time (in milliseconds).
You can use this to get the current page content and/or wait for the page to load.
Examples:
noop()
noop(500)
scroll(delta_x: float, delta_y: float)
Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
Examples:
scroll(0, 200)
scroll(-50.2, -100.5)
fill(bid: str, value: str)
Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for ,