197 lines
7.8 KiB
Python

"""This file contains the function calling implementation for different actions.
This is similar to the functionality of `CodeActResponseParser`.
"""
import json
from litellm import (
ChatCompletionToolParam,
ModelResponse,
)
from openhands.agenthub.codeact_agent.tools import (
BrowserTool,
CmdRunTool,
FinishTool,
IPythonTool,
LLMBasedFileEditTool,
StrReplaceEditorTool,
WebReadTool,
)
from openhands.core.exceptions import (
FunctionCallNotExistsError,
FunctionCallValidationError,
)
from openhands.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
BrowseInteractiveAction,
BrowseURLAction,
CmdRunAction,
FileEditAction,
FileReadAction,
IPythonRunCellAction,
MessageAction,
)
from openhands.events.event import FileEditSource, FileReadSource
from openhands.events.tool import ToolCallMetadata
def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
return action
if thought:
action.thought = thought
return action
def response_to_actions(response: ModelResponse) -> list[Action]:
actions: list[Action] = []
assert len(response.choices) == 1, 'Only one choice is supported for now'
choice = response.choices[0]
assistant_msg = choice.message
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
thought = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
thought += msg['text']
# Process each tool call to OpenHands action
for i, tool_call in enumerate(assistant_msg.tool_calls):
action: Action
try:
arguments = json.loads(tool_call.function.arguments)
except json.decoder.JSONDecodeError as e:
raise RuntimeError(
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
) from e
if tool_call.function.name == CmdRunTool['function']['name']:
if 'command' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "command" in tool call {tool_call.function.name}'
)
# convert is_input to boolean
is_input = arguments.get('is_input', 'false') == 'true'
action = CmdRunAction(command=arguments['command'], is_input=is_input)
elif tool_call.function.name == IPythonTool['function']['name']:
if 'code' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "code" in tool call {tool_call.function.name}'
)
action = IPythonRunCellAction(code=arguments['code'])
elif tool_call.function.name == 'delegate_to_browsing_agent':
action = AgentDelegateAction(
agent='BrowsingAgent',
inputs=arguments,
)
elif tool_call.function.name == FinishTool['function']['name']:
action = AgentFinishAction()
elif tool_call.function.name == LLMBasedFileEditTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
if 'content' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "content" in tool call {tool_call.function.name}'
)
action = FileEditAction(
path=arguments['path'],
content=arguments['content'],
start=arguments.get('start', 1),
end=arguments.get('end', -1),
)
elif tool_call.function.name == StrReplaceEditorTool['function']['name']:
if 'command' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "command" in tool call {tool_call.function.name}'
)
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
path = arguments['path']
command = arguments['command']
other_kwargs = {
k: v for k, v in arguments.items() if k not in ['command', 'path']
}
if command == 'view':
action = FileReadAction(
path=path,
impl_source=FileReadSource.OH_ACI,
view_range=other_kwargs.get('view_range', None),
)
else:
if 'view_range' in other_kwargs:
# Remove view_range from other_kwargs since it is not needed for FileEditAction
other_kwargs.pop('view_range')
action = FileEditAction(
path=path,
command=command,
impl_source=FileEditSource.OH_ACI,
**other_kwargs,
)
elif tool_call.function.name == BrowserTool['function']['name']:
if 'code' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "code" in tool call {tool_call.function.name}'
)
action = BrowseInteractiveAction(browser_actions=arguments['code'])
elif tool_call.function.name == WebReadTool['function']['name']:
if 'url' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "url" in tool call {tool_call.function.name}'
)
action = BrowseURLAction(url=arguments['url'])
else:
raise FunctionCallNotExistsError(
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
)
# We only add thought to the first action
if i == 0:
action = combine_thought(action, thought)
# Add metadata for tool calling
action.tool_call_metadata = ToolCallMetadata(
tool_call_id=tool_call.id,
function_name=tool_call.function.name,
model_response=response,
total_calls_in_response=len(assistant_msg.tool_calls),
)
actions.append(action)
else:
actions.append(
MessageAction(
content=str(assistant_msg.content) if assistant_msg.content else '',
wait_for_response=True,
)
)
assert len(actions) >= 1
return actions
def get_tools(
codeact_enable_browsing: bool = False,
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
) -> list[ChatCompletionToolParam]:
tools = [CmdRunTool, FinishTool]
if codeact_enable_browsing:
tools.append(WebReadTool)
tools.append(BrowserTool)
if codeact_enable_jupyter:
tools.append(IPythonTool)
if codeact_enable_llm_editor:
tools.append(LLMBasedFileEditTool)
else:
tools.append(StrReplaceEditorTool)
return tools