"""This file contains the function calling implementation for different actions. This is similar to the functionality of `CodeActResponseParser`. """ import json from litellm import ( ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk, ModelResponse, ) from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( Action, AgentDelegateAction, AgentFinishAction, CmdRunAction, FileEditAction, IPythonRunCellAction, MessageAction, ) from openhands.events.tool import ToolCallMetadata SYSTEM_PROMPT = """You are a helpful assistant that can interact with a computer to solve tasks. * If user provides a path, you should NOT assume it's relative to the current working directory. Instead, you should explore the file system to find the file before working on it. """ _BASH_DESCRIPTION = """Execute a bash command in the terminal. * Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`. * Interactive: If a bash command returns exit code `-1`, this means the process is not yet finished. The assistant must then send a second call to terminal with an empty `command` (which will retrieve any additional logs), or it can send additional text (set `command` to the text) to STDIN of the running process, or it can send command=`ctrl+c` to interrupt the process. * Timeout: If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background. """ CmdRunTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='execute_bash', description=_BASH_DESCRIPTION, parameters={ 'type': 'object', 'properties': { 'command': { 'type': 'string', 'description': 'The bash command to execute. Can be empty to view additional logs when previous exit code is `-1`. Can be `ctrl+c` to interrupt the currently running process.', }, }, 'required': ['command'], }, ), ) _IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment. * The assistant should define variables and import packages before using them. * The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal). """ # We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files # """* Apart from the standard Python library, the assistant can also use the following functions (already imported): # {AgentSkillsRequirement.documentation}""" IPythonTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='execute_ipython_cell', description=_IPYTHON_DESCRIPTION, parameters={ 'type': 'object', 'properties': { 'code': { 'type': 'string', 'description': 'The Python code to execute. Supports magic commands like %pip.', }, }, 'required': ['code'], }, ), ) _FILE_EDIT_DESCRIPTION = """Edit a file. * The assistant can edit files by specifying the file path and providing a draft of the new file content. * The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections. * IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines. * To append to a file, set both `start` and `end` to `-1`. * If the file doesn't exist, a new file will be created with the provided content. **Example 1: general edit for short files** For example, given an existing file `/path/to/file.py` that looks like this: (this is the end of the file) 1|class MyClass: 2| def __init__(self): 3| self.x = 1 4| self.y = 2 5| self.z = 3 6| 7|print(MyClass().z) 8|print(MyClass().x) (this is the end of the file) The assistant wants to edit the file to look like this: (this is the end of the file) 1|class MyClass: 2| def __init__(self): 3| self.x = 1 4| self.y = 2 5| 6|print(MyClass().y) (this is the end of the file) The assistant may produce an edit action like this: path="/path/to/file.txt" start=1 end=-1 content=``` class MyClass: def __init__(self): # no changes before self.y = 2 # self.z is removed # MyClass().z is removed print(MyClass().y) ``` **Example 2: append to file for short files** For example, given an existing file `/path/to/file.py` that looks like this: (this is the end of the file) 1|class MyClass: 2| def __init__(self): 3| self.x = 1 4| self.y = 2 5| self.z = 3 6| 7|print(MyClass().z) 8|print(MyClass().x) (this is the end of the file) To append the following lines to the file: ```python print(MyClass().y) ``` The assistant may produce an edit action like this: path="/path/to/file.txt" start=-1 end=-1 content=``` print(MyClass().y) ``` **Example 3: edit for long files** Given an existing file `/path/to/file.py` that looks like this: (1000 more lines above) 1001|class MyClass: 1002| def __init__(self): 1003| self.x = 1 1004| self.y = 2 1005| self.z = 3 1006| 1007|print(MyClass().z) 1008|print(MyClass().x) (2000 more lines below) The assistant wants to edit the file to look like this: (1000 more lines above) 1001|class MyClass: 1002| def __init__(self): 1003| self.x = 1 1004| self.y = 2 1005| 1006|print(MyClass().y) (2000 more lines below) The assistant may produce an edit action like this: path="/path/to/file.txt" start=1001 end=1008 content=``` class MyClass: def __init__(self): # no changes before self.y = 2 # self.z is removed # MyClass().z is removed print(MyClass().y) ``` """ LLMBasedFileEditTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='edit_file', description=_FILE_EDIT_DESCRIPTION, parameters={ 'type': 'object', 'properties': { 'path': { 'type': 'string', 'description': 'The absolute path to the file to be edited.', }, 'new_content_draft': { 'type': 'string', 'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.', }, 'start': { 'type': 'integer', 'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.', }, 'end': { 'type': 'integer', 'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).', }, }, 'required': ['path', 'content'], }, ), ) _STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files * State is persistent across command calls and discussions with the user * If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep * The `create` command cannot be used if the specified `path` already exists as a file * If a `command` generates a long output, it will be truncated and marked with `` * The `undo_edit` command will revert the last edit made to the file at `path` Notes for using the `str_replace` command: * The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces! * If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique * The `new_str` parameter should contain the edited lines that should replace the `old_str` """ StrReplaceEditorTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='str_replace_editor', description=_STR_REPLACE_EDITOR_DESCRIPTION, parameters={ 'type': 'object', 'properties': { 'command': { 'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.', 'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'], 'type': 'string', }, 'path': { 'description': 'Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.', 'type': 'string', }, 'file_text': { 'description': 'Required parameter of `create` command, with the content of the file to be created.', 'type': 'string', }, 'old_str': { 'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.', 'type': 'string', }, 'new_str': { 'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.', 'type': 'string', }, 'insert_line': { 'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.', 'type': 'integer', }, 'view_range': { 'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.', 'items': {'type': 'integer'}, 'type': 'array', }, }, 'required': ['command', 'path'], }, ), ) _BROWSER_DELEGATION = """Delegate the task to another browsing agent. The assistant should delegate the task if it needs to browse the Internet. """ BrowserDelegationTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='delegate_to_browsing_agent', description=_BROWSER_DELEGATION, parameters={ 'type': 'object', 'properties': { 'task': { 'type': 'string', 'description': 'The task for the browsing agent to execute. It should include all the necessary context and specify what information the browsing agent should return.', }, }, 'required': ['task'], }, ), ) _FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task.""" FinishTool = ChatCompletionToolParam( type='function', function=ChatCompletionToolParamFunctionChunk( name='finish', description=_FINISH_DESCRIPTION, ), ) def combine_thought(action: Action, thought: str) -> Action: if not hasattr(action, 'thought'): return action if thought: action.thought = thought return action def response_to_actions(response: ModelResponse) -> list[Action]: actions: list[Action] = [] assert len(response.choices) == 1, 'Only one choice is supported for now' assistant_msg = response.choices[0].message if assistant_msg.tool_calls: # Check if there's assistant_msg.content. If so, add it to the thought thought = '' if isinstance(assistant_msg.content, str): thought = assistant_msg.content elif isinstance(assistant_msg.content, list): for msg in assistant_msg.content: if msg['type'] == 'text': thought += msg['text'] # Process each tool call to OpenHands action for i, tool_call in enumerate(assistant_msg.tool_calls): action: Action try: arguments = json.loads(tool_call.function.arguments) except json.decoder.JSONDecodeError as e: raise RuntimeError( f'Failed to parse tool call arguments: {tool_call.function.arguments}' ) from e if tool_call.function.name == 'execute_bash': action = CmdRunAction(**arguments) elif tool_call.function.name == 'execute_ipython_cell': action = IPythonRunCellAction(**arguments) elif tool_call.function.name == 'delegate_to_browsing_agent': action = AgentDelegateAction( agent='BrowsingAgent', inputs=arguments, ) elif tool_call.function.name == 'finish': action = AgentFinishAction() elif tool_call.function.name == 'edit_file': action = FileEditAction(**arguments) elif tool_call.function.name == 'str_replace_editor': # We implement this in agent_skills, which can be used via Jupyter # convert tool_call.function.arguments to kwargs that can be passed to file_editor code = f'print(file_editor(**{arguments}))' logger.debug( f'TOOL CALL: str_replace_editor -> file_editor with code: {code}' ) action = IPythonRunCellAction(code=code, include_extra=False) else: raise RuntimeError(f'Unknown tool call: {tool_call.function.name}') # We only add thought to the first action if i == 0: action = combine_thought(action, thought) # Add metadata for tool calling action.tool_call_metadata = ToolCallMetadata( tool_call_id=tool_call.id, function_name=tool_call.function.name, model_response=response, total_calls_in_response=len(assistant_msg.tool_calls), ) actions.append(action) else: actions.append( MessageAction(content=assistant_msg.content, wait_for_response=True) ) assert len(actions) >= 1 return actions def get_tools( codeact_enable_browsing_delegate: bool = False, codeact_enable_llm_editor: bool = False, codeact_enable_jupyter: bool = False, ) -> list[ChatCompletionToolParam]: tools = [CmdRunTool, FinishTool] if codeact_enable_browsing_delegate: tools.append(BrowserDelegationTool) if codeact_enable_jupyter: tools.append(IPythonTool) if codeact_enable_llm_editor: tools.append(LLMBasedFileEditTool) else: tools.append(StrReplaceEditorTool) return tools