mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
refactor: codeact tools into separate files (#6978)
This commit is contained in:
parent
4b7cca9bdf
commit
c4ba54122e
@ -5,13 +5,20 @@ This is similar to the functionality of `CodeActResponseParser`.
|
||||
|
||||
import json
|
||||
|
||||
from browsergym.core.action.highlevel import HighLevelActionSet
|
||||
from litellm import (
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ModelResponse,
|
||||
)
|
||||
|
||||
from openhands.agenthub.codeact_agent.tools import (
|
||||
BrowserTool,
|
||||
CmdRunTool,
|
||||
FinishTool,
|
||||
IPythonTool,
|
||||
LLMBasedFileEditTool,
|
||||
StrReplaceEditorTool,
|
||||
WebReadTool,
|
||||
)
|
||||
from openhands.core.exceptions import (
|
||||
FunctionCallNotExistsError,
|
||||
FunctionCallValidationError,
|
||||
@ -31,438 +38,6 @@ from openhands.events.action import (
|
||||
from openhands.events.event import FileEditSource, FileReadSource
|
||||
from openhands.events.tool import ToolCallMetadata
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
|
||||
* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_bash',
|
||||
description=_BASH_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
|
||||
},
|
||||
'is_input': {
|
||||
'type': 'string',
|
||||
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
|
||||
'enum': ['true', 'false'],
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
|
||||
* The assistant should define variables and import packages before using them.
|
||||
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
|
||||
"""
|
||||
|
||||
IPythonTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_ipython_cell',
|
||||
description=_IPYTHON_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': 'The Python code to execute. Supports magic commands like %pip.',
|
||||
},
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format.
|
||||
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
|
||||
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
|
||||
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
|
||||
* To append to a file, set both `start` and `end` to `-1`.
|
||||
* If the file doesn't exist, a new file will be created with the provided content.
|
||||
|
||||
**Example 1: general edit for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5|
|
||||
6|print(MyClass().y)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1 end=-1
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 2: append to file for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
To append the following lines to the file:
|
||||
```python
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=-1 end=-1
|
||||
content=```
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 3: edit for long files**
|
||||
|
||||
Given an existing file `/path/to/file.py` that looks like this:
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005| self.z = 3
|
||||
1006|
|
||||
1007|print(MyClass().z)
|
||||
1008|print(MyClass().x)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005|
|
||||
1006|print(MyClass().y)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1001 end=1008
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
"""
|
||||
|
||||
LLMBasedFileEditTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='edit_file',
|
||||
description=_FILE_EDIT_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'type': 'string',
|
||||
'description': 'The absolute path to the file to be edited.',
|
||||
},
|
||||
'content': {
|
||||
'type': 'string',
|
||||
'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
|
||||
},
|
||||
'start': {
|
||||
'type': 'integer',
|
||||
'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
|
||||
},
|
||||
'end': {
|
||||
'type': 'integer',
|
||||
'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
|
||||
},
|
||||
},
|
||||
'required': ['path', 'content'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
* The `create` command cannot be used if the specified `path` already exists as a file
|
||||
* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
|
||||
* The `undo_edit` command will revert the last edit made to the file at `path`
|
||||
|
||||
Notes for using the `str_replace` command:
|
||||
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
|
||||
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
|
||||
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
|
||||
"""
|
||||
|
||||
StrReplaceEditorTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
description=_STR_REPLACE_EDITOR_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
|
||||
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
|
||||
'type': 'string',
|
||||
},
|
||||
'path': {
|
||||
'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'file_text': {
|
||||
'description': 'Required parameter of `create` command, with the content of the file to be created.',
|
||||
'type': 'string',
|
||||
},
|
||||
'old_str': {
|
||||
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
|
||||
'type': 'string',
|
||||
},
|
||||
'new_str': {
|
||||
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
|
||||
'type': 'string',
|
||||
},
|
||||
'insert_line': {
|
||||
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
|
||||
'type': 'integer',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `web_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.).
|
||||
|
||||
You may use the `web_read` tool to read content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
|
||||
"""
|
||||
|
||||
WebReadTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='web_read',
|
||||
description=_WEB_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'url': {
|
||||
'type': 'string',
|
||||
'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
|
||||
}
|
||||
},
|
||||
'required': ['url'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
# from browsergym/core/action/highlevel.py
|
||||
_browser_action_space = HighLevelActionSet(
|
||||
subsets=['bid', 'nav'],
|
||||
strict=False, # less strict on the parsing of the actions
|
||||
multiaction=True, # enable to agent to take multiple actions at once
|
||||
)
|
||||
|
||||
|
||||
_BROWSER_DESCRIPTION = """Interact with the browser using Python code. Use it ONLY when you need to interact with a webpage.
|
||||
|
||||
See the description of "code" parameter for more details.
|
||||
|
||||
Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
|
||||
More than 2-3 actions usually leads to failure or unexpected behavior. Example:
|
||||
fill('a12', 'example with "quotes"')
|
||||
click('a51')
|
||||
click('48', button='middle', modifiers=['Shift'])
|
||||
"""
|
||||
|
||||
_BROWSER_TOOL_DESCRIPTION = """
|
||||
The following 15 functions are available. Nothing else is supported.
|
||||
|
||||
goto(url: str)
|
||||
Description: Navigate to a url.
|
||||
Examples:
|
||||
goto('http://www.example.com')
|
||||
|
||||
go_back()
|
||||
Description: Navigate to the previous page in history.
|
||||
Examples:
|
||||
go_back()
|
||||
|
||||
go_forward()
|
||||
Description: Navigate to the next page in history.
|
||||
Examples:
|
||||
go_forward()
|
||||
|
||||
noop(wait_ms: float = 1000)
|
||||
Description: Do nothing, and optionally wait for the given time (in milliseconds).
|
||||
You can use this to get the current page content and/or wait for the page to load.
|
||||
Examples:
|
||||
noop()
|
||||
|
||||
noop(500)
|
||||
|
||||
scroll(delta_x: float, delta_y: float)
|
||||
Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
|
||||
Examples:
|
||||
scroll(0, 200)
|
||||
|
||||
scroll(-50.2, -100.5)
|
||||
|
||||
fill(bid: str, value: str)
|
||||
Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for <input>, <textarea> and [contenteditable] elements.
|
||||
Examples:
|
||||
fill('237', 'example value')
|
||||
|
||||
fill('45', 'multi-line\nexample')
|
||||
|
||||
fill('a12', 'example with "quotes"')
|
||||
|
||||
select_option(bid: str, options: str | list[str])
|
||||
Description: Select one or multiple options in a <select> element. You can specify option value or label to select. Multiple options can be selected.
|
||||
Examples:
|
||||
select_option('a48', 'blue')
|
||||
|
||||
select_option('c48', ['red', 'green', 'blue'])
|
||||
|
||||
click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
|
||||
Description: Click an element.
|
||||
Examples:
|
||||
click('a51')
|
||||
|
||||
click('b22', button='right')
|
||||
|
||||
click('48', button='middle', modifiers=['Shift'])
|
||||
|
||||
dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
|
||||
Description: Double click an element.
|
||||
Examples:
|
||||
dblclick('12')
|
||||
|
||||
dblclick('ca42', button='right')
|
||||
|
||||
dblclick('178', button='middle', modifiers=['Shift'])
|
||||
|
||||
hover(bid: str)
|
||||
Description: Hover over an element.
|
||||
Examples:
|
||||
hover('b8')
|
||||
|
||||
press(bid: str, key_comb: str)
|
||||
Description: Focus the matching element and press a combination of keys. It accepts the logical key names that are emitted in the keyboardEvent.key property of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can alternatively specify a single character you'd like to produce such as "a" or "#". Following modification shortcuts are also supported: Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on Windows and Linux and to Meta on macOS.
|
||||
Examples:
|
||||
press('88', 'Backspace')
|
||||
|
||||
press('a26', 'ControlOrMeta+a')
|
||||
|
||||
press('a61', 'Meta+Shift+t')
|
||||
|
||||
focus(bid: str)
|
||||
Description: Focus the matching element.
|
||||
Examples:
|
||||
focus('b455')
|
||||
|
||||
clear(bid: str)
|
||||
Description: Clear the input field.
|
||||
Examples:
|
||||
clear('996')
|
||||
|
||||
drag_and_drop(from_bid: str, to_bid: str)
|
||||
Description: Perform a drag & drop. Hover the element that will be dragged. Press left mouse button. Move mouse to the element that will receive the drop. Release left mouse button.
|
||||
Examples:
|
||||
drag_and_drop('56', '498')
|
||||
|
||||
upload_file(bid: str, file: str | list[str])
|
||||
Description: Click an element and wait for a "filechooser" event, then select one or multiple input files for upload. Relative file paths are resolved relative to the current working directory. An empty list clears the selected files.
|
||||
Examples:
|
||||
upload_file('572', '/home/user/my_receipt.pdf')
|
||||
|
||||
upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
|
||||
"""
|
||||
|
||||
|
||||
for _, action in _browser_action_space.action_set.items():
|
||||
assert (
|
||||
action.signature in _BROWSER_TOOL_DESCRIPTION
|
||||
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
|
||||
assert (
|
||||
action.description in _BROWSER_TOOL_DESCRIPTION
|
||||
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
|
||||
|
||||
BrowserTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='browser',
|
||||
description=_BROWSER_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The Python code that interacts with the browser.\n'
|
||||
+ _BROWSER_TOOL_DESCRIPTION
|
||||
),
|
||||
}
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
|
||||
|
||||
FinishTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='finish',
|
||||
description=_FINISH_DESCRIPTION,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def combine_thought(action: Action, thought: str) -> Action:
|
||||
if not hasattr(action, 'thought'):
|
||||
@ -496,7 +71,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
raise RuntimeError(
|
||||
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
|
||||
) from e
|
||||
if tool_call.function.name == 'execute_bash':
|
||||
if tool_call.function.name == CmdRunTool['function']['name']:
|
||||
if 'command' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "command" in tool call {tool_call.function.name}'
|
||||
@ -504,7 +79,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
# convert is_input to boolean
|
||||
is_input = arguments.get('is_input', 'false') == 'true'
|
||||
action = CmdRunAction(command=arguments['command'], is_input=is_input)
|
||||
elif tool_call.function.name == 'execute_ipython_cell':
|
||||
elif tool_call.function.name == IPythonTool['function']['name']:
|
||||
if 'code' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "code" in tool call {tool_call.function.name}'
|
||||
@ -515,9 +90,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
agent='BrowsingAgent',
|
||||
inputs=arguments,
|
||||
)
|
||||
elif tool_call.function.name == 'finish':
|
||||
elif tool_call.function.name == FinishTool['function']['name']:
|
||||
action = AgentFinishAction()
|
||||
elif tool_call.function.name == 'edit_file':
|
||||
elif tool_call.function.name == LLMBasedFileEditTool['function']['name']:
|
||||
if 'path' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "path" in tool call {tool_call.function.name}'
|
||||
@ -532,7 +107,7 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
start=arguments.get('start', 1),
|
||||
end=arguments.get('end', -1),
|
||||
)
|
||||
elif tool_call.function.name == 'str_replace_editor':
|
||||
elif tool_call.function.name == StrReplaceEditorTool['function']['name']:
|
||||
if 'command' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "command" in tool call {tool_call.function.name}'
|
||||
@ -563,13 +138,13 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
|
||||
impl_source=FileEditSource.OH_ACI,
|
||||
**other_kwargs,
|
||||
)
|
||||
elif tool_call.function.name == 'browser':
|
||||
elif tool_call.function.name == BrowserTool['function']['name']:
|
||||
if 'code' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "code" in tool call {tool_call.function.name}'
|
||||
)
|
||||
action = BrowseInteractiveAction(browser_actions=arguments['code'])
|
||||
elif tool_call.function.name == 'web_read':
|
||||
elif tool_call.function.name == WebReadTool['function']['name']:
|
||||
if 'url' not in arguments:
|
||||
raise FunctionCallValidationError(
|
||||
f'Missing required argument "url" in tool call {tool_call.function.name}'
|
||||
|
||||
17
openhands/agenthub/codeact_agent/tools/__init__.py
Normal file
17
openhands/agenthub/codeact_agent/tools/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
from .bash import CmdRunTool
|
||||
from .browser import BrowserTool
|
||||
from .finish import FinishTool
|
||||
from .ipython import IPythonTool
|
||||
from .llm_based_edit import LLMBasedFileEditTool
|
||||
from .str_replace_editor import StrReplaceEditorTool
|
||||
from .web_read import WebReadTool
|
||||
|
||||
__all__ = [
|
||||
'BrowserTool',
|
||||
'CmdRunTool',
|
||||
'FinishTool',
|
||||
'IPythonTool',
|
||||
'LLMBasedFileEditTool',
|
||||
'StrReplaceEditorTool',
|
||||
'WebReadTool',
|
||||
]
|
||||
30
openhands/agenthub/codeact_agent/tools/bash.py
Normal file
30
openhands/agenthub/codeact_agent/tools/bash.py
Normal file
@ -0,0 +1,30 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_BASH_DESCRIPTION = """Execute a bash command in the terminal.
|
||||
* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`.
|
||||
* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process.
|
||||
* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.
|
||||
"""
|
||||
|
||||
CmdRunTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_bash',
|
||||
description=_BASH_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'type': 'string',
|
||||
'description': 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.',
|
||||
},
|
||||
'is_input': {
|
||||
'type': 'string',
|
||||
'description': 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.',
|
||||
'enum': ['true', 'false'],
|
||||
},
|
||||
},
|
||||
'required': ['command'],
|
||||
},
|
||||
),
|
||||
)
|
||||
155
openhands/agenthub/codeact_agent/tools/browser.py
Normal file
155
openhands/agenthub/codeact_agent/tools/browser.py
Normal file
@ -0,0 +1,155 @@
|
||||
from browsergym.core.action.highlevel import HighLevelActionSet
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
# from browsergym/core/action/highlevel.py
|
||||
_browser_action_space = HighLevelActionSet(
|
||||
subsets=['bid', 'nav'],
|
||||
strict=False, # less strict on the parsing of the actions
|
||||
multiaction=True, # enable to agent to take multiple actions at once
|
||||
)
|
||||
|
||||
|
||||
_BROWSER_DESCRIPTION = """Interact with the browser using Python code. Use it ONLY when you need to interact with a webpage.
|
||||
|
||||
See the description of "code" parameter for more details.
|
||||
|
||||
Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
|
||||
More than 2-3 actions usually leads to failure or unexpected behavior. Example:
|
||||
fill('a12', 'example with "quotes"')
|
||||
click('a51')
|
||||
click('48', button='middle', modifiers=['Shift'])
|
||||
"""
|
||||
|
||||
_BROWSER_TOOL_DESCRIPTION = """
|
||||
The following 15 functions are available. Nothing else is supported.
|
||||
|
||||
goto(url: str)
|
||||
Description: Navigate to a url.
|
||||
Examples:
|
||||
goto('http://www.example.com')
|
||||
|
||||
go_back()
|
||||
Description: Navigate to the previous page in history.
|
||||
Examples:
|
||||
go_back()
|
||||
|
||||
go_forward()
|
||||
Description: Navigate to the next page in history.
|
||||
Examples:
|
||||
go_forward()
|
||||
|
||||
noop(wait_ms: float = 1000)
|
||||
Description: Do nothing, and optionally wait for the given time (in milliseconds).
|
||||
You can use this to get the current page content and/or wait for the page to load.
|
||||
Examples:
|
||||
noop()
|
||||
|
||||
noop(500)
|
||||
|
||||
scroll(delta_x: float, delta_y: float)
|
||||
Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event.
|
||||
Examples:
|
||||
scroll(0, 200)
|
||||
|
||||
scroll(-50.2, -100.5)
|
||||
|
||||
fill(bid: str, value: str)
|
||||
Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for <input>, <textarea> and [contenteditable] elements.
|
||||
Examples:
|
||||
fill('237', 'example value')
|
||||
|
||||
fill('45', 'multi-line\nexample')
|
||||
|
||||
fill('a12', 'example with "quotes"')
|
||||
|
||||
select_option(bid: str, options: str | list[str])
|
||||
Description: Select one or multiple options in a <select> element. You can specify option value or label to select. Multiple options can be selected.
|
||||
Examples:
|
||||
select_option('a48', 'blue')
|
||||
|
||||
select_option('c48', ['red', 'green', 'blue'])
|
||||
|
||||
click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
|
||||
Description: Click an element.
|
||||
Examples:
|
||||
click('a51')
|
||||
|
||||
click('b22', button='right')
|
||||
|
||||
click('48', button='middle', modifiers=['Shift'])
|
||||
|
||||
dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
|
||||
Description: Double click an element.
|
||||
Examples:
|
||||
dblclick('12')
|
||||
|
||||
dblclick('ca42', button='right')
|
||||
|
||||
dblclick('178', button='middle', modifiers=['Shift'])
|
||||
|
||||
hover(bid: str)
|
||||
Description: Hover over an element.
|
||||
Examples:
|
||||
hover('b8')
|
||||
|
||||
press(bid: str, key_comb: str)
|
||||
Description: Focus the matching element and press a combination of keys. It accepts the logical key names that are emitted in the keyboardEvent.key property of the keyboard events: Backquote, Minus, Equal, Backslash, Backspace, Tab, Delete, Escape, ArrowDown, End, Enter, Home, Insert, PageDown, PageUp, ArrowRight, ArrowUp, F1 - F12, Digit0 - Digit9, KeyA - KeyZ, etc. You can alternatively specify a single character you'd like to produce such as "a" or "#". Following modification shortcuts are also supported: Shift, Control, Alt, Meta, ShiftLeft, ControlOrMeta. ControlOrMeta resolves to Control on Windows and Linux and to Meta on macOS.
|
||||
Examples:
|
||||
press('88', 'Backspace')
|
||||
|
||||
press('a26', 'ControlOrMeta+a')
|
||||
|
||||
press('a61', 'Meta+Shift+t')
|
||||
|
||||
focus(bid: str)
|
||||
Description: Focus the matching element.
|
||||
Examples:
|
||||
focus('b455')
|
||||
|
||||
clear(bid: str)
|
||||
Description: Clear the input field.
|
||||
Examples:
|
||||
clear('996')
|
||||
|
||||
drag_and_drop(from_bid: str, to_bid: str)
|
||||
Description: Perform a drag & drop. Hover the element that will be dragged. Press left mouse button. Move mouse to the element that will receive the drop. Release left mouse button.
|
||||
Examples:
|
||||
drag_and_drop('56', '498')
|
||||
|
||||
upload_file(bid: str, file: str | list[str])
|
||||
Description: Click an element and wait for a "filechooser" event, then select one or multiple input files for upload. Relative file paths are resolved relative to the current working directory. An empty list clears the selected files.
|
||||
Examples:
|
||||
upload_file('572', '/home/user/my_receipt.pdf')
|
||||
|
||||
upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
|
||||
"""
|
||||
|
||||
|
||||
for _, action in _browser_action_space.action_set.items():
|
||||
assert (
|
||||
action.signature in _BROWSER_TOOL_DESCRIPTION
|
||||
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
|
||||
assert (
|
||||
action.description in _BROWSER_TOOL_DESCRIPTION
|
||||
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
|
||||
|
||||
BrowserTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='browser',
|
||||
description=_BROWSER_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': (
|
||||
'The Python code that interacts with the browser.\n'
|
||||
+ _BROWSER_TOOL_DESCRIPTION
|
||||
),
|
||||
}
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
),
|
||||
)
|
||||
11
openhands/agenthub/codeact_agent/tools/finish.py
Normal file
11
openhands/agenthub/codeact_agent/tools/finish.py
Normal file
@ -0,0 +1,11 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
|
||||
|
||||
FinishTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='finish',
|
||||
description=_FINISH_DESCRIPTION,
|
||||
),
|
||||
)
|
||||
24
openhands/agenthub/codeact_agent/tools/ipython.py
Normal file
24
openhands/agenthub/codeact_agent/tools/ipython.py
Normal file
@ -0,0 +1,24 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_IPYTHON_DESCRIPTION = """Run a cell of Python code in an IPython environment.
|
||||
* The assistant should define variables and import packages before using them.
|
||||
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
|
||||
"""
|
||||
|
||||
IPythonTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='execute_ipython_cell',
|
||||
description=_IPYTHON_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'code': {
|
||||
'type': 'string',
|
||||
'description': 'The Python code to execute. Supports magic commands like %pip.',
|
||||
},
|
||||
},
|
||||
'required': ['code'],
|
||||
},
|
||||
),
|
||||
)
|
||||
137
openhands/agenthub/codeact_agent/tools/llm_based_edit.py
Normal file
137
openhands/agenthub/codeact_agent/tools/llm_based_edit.py
Normal file
@ -0,0 +1,137 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_FILE_EDIT_DESCRIPTION = """Edit a file in plain-text format.
|
||||
* The assistant can edit files by specifying the file path and providing a draft of the new file content.
|
||||
* The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections.
|
||||
* IMPORTANT: For large files (e.g., > 300 lines), specify the range of lines to edit using `start` and `end` (1-indexed, inclusive). The range should be smaller than 300 lines.
|
||||
* To append to a file, set both `start` and `end` to `-1`.
|
||||
* If the file doesn't exist, a new file will be created with the provided content.
|
||||
|
||||
**Example 1: general edit for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5|
|
||||
6|print(MyClass().y)
|
||||
(this is the end of the file)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1 end=-1
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 2: append to file for short files**
|
||||
For example, given an existing file `/path/to/file.py` that looks like this:
|
||||
(this is the end of the file)
|
||||
1|class MyClass:
|
||||
2| def __init__(self):
|
||||
3| self.x = 1
|
||||
4| self.y = 2
|
||||
5| self.z = 3
|
||||
6|
|
||||
7|print(MyClass().z)
|
||||
8|print(MyClass().x)
|
||||
(this is the end of the file)
|
||||
|
||||
To append the following lines to the file:
|
||||
```python
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=-1 end=-1
|
||||
content=```
|
||||
print(MyClass().y)
|
||||
```
|
||||
|
||||
**Example 3: edit for long files**
|
||||
|
||||
Given an existing file `/path/to/file.py` that looks like this:
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005| self.z = 3
|
||||
1006|
|
||||
1007|print(MyClass().z)
|
||||
1008|print(MyClass().x)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant wants to edit the file to look like this:
|
||||
|
||||
(1000 more lines above)
|
||||
1001|class MyClass:
|
||||
1002| def __init__(self):
|
||||
1003| self.x = 1
|
||||
1004| self.y = 2
|
||||
1005|
|
||||
1006|print(MyClass().y)
|
||||
(2000 more lines below)
|
||||
|
||||
The assistant may produce an edit action like this:
|
||||
path="/path/to/file.txt" start=1001 end=1008
|
||||
content=```
|
||||
class MyClass:
|
||||
def __init__(self):
|
||||
# no changes before
|
||||
self.y = 2
|
||||
# self.z is removed
|
||||
|
||||
# MyClass().z is removed
|
||||
print(MyClass().y)
|
||||
```
|
||||
"""
|
||||
|
||||
LLMBasedFileEditTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='edit_file',
|
||||
description=_FILE_EDIT_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'path': {
|
||||
'type': 'string',
|
||||
'description': 'The absolute path to the file to be edited.',
|
||||
},
|
||||
'content': {
|
||||
'type': 'string',
|
||||
'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
|
||||
},
|
||||
'start': {
|
||||
'type': 'integer',
|
||||
'description': 'The starting line number for the edit (1-indexed, inclusive). Default is 1.',
|
||||
},
|
||||
'end': {
|
||||
'type': 'integer',
|
||||
'description': 'The ending line number for the edit (1-indexed, inclusive). Default is -1 (end of file).',
|
||||
},
|
||||
},
|
||||
'required': ['path', 'content'],
|
||||
},
|
||||
),
|
||||
)
|
||||
58
openhands/agenthub/codeact_agent/tools/str_replace_editor.py
Normal file
58
openhands/agenthub/codeact_agent/tools/str_replace_editor.py
Normal file
@ -0,0 +1,58 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_STR_REPLACE_EDITOR_DESCRIPTION = """Custom editing tool for viewing, creating and editing files in plain-text format
|
||||
* State is persistent across command calls and discussions with the user
|
||||
* If `path` is a file, `view` displays the result of applying `cat -n`. If `path` is a directory, `view` lists non-hidden files and directories up to 2 levels deep
|
||||
* The `create` command cannot be used if the specified `path` already exists as a file
|
||||
* If a `command` generates a long output, it will be truncated and marked with `<response clipped>`
|
||||
* The `undo_edit` command will revert the last edit made to the file at `path`
|
||||
|
||||
Notes for using the `str_replace` command:
|
||||
* The `old_str` parameter should match EXACTLY one or more consecutive lines from the original file. Be mindful of whitespaces!
|
||||
* If the `old_str` parameter is not unique in the file, the replacement will not be performed. Make sure to include enough context in `old_str` to make it unique
|
||||
* The `new_str` parameter should contain the edited lines that should replace the `old_str`
|
||||
"""
|
||||
|
||||
StrReplaceEditorTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='str_replace_editor',
|
||||
description=_STR_REPLACE_EDITOR_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'command': {
|
||||
'description': 'The commands to run. Allowed options are: `view`, `create`, `str_replace`, `insert`, `undo_edit`.',
|
||||
'enum': ['view', 'create', 'str_replace', 'insert', 'undo_edit'],
|
||||
'type': 'string',
|
||||
},
|
||||
'path': {
|
||||
'description': 'Absolute path to file or directory, e.g. `/workspace/file.py` or `/workspace`.',
|
||||
'type': 'string',
|
||||
},
|
||||
'file_text': {
|
||||
'description': 'Required parameter of `create` command, with the content of the file to be created.',
|
||||
'type': 'string',
|
||||
},
|
||||
'old_str': {
|
||||
'description': 'Required parameter of `str_replace` command containing the string in `path` to replace.',
|
||||
'type': 'string',
|
||||
},
|
||||
'new_str': {
|
||||
'description': 'Optional parameter of `str_replace` command containing the new string (if not given, no string will be added). Required parameter of `insert` command containing the string to insert.',
|
||||
'type': 'string',
|
||||
},
|
||||
'insert_line': {
|
||||
'description': 'Required parameter of `insert` command. The `new_str` will be inserted AFTER the line `insert_line` of `path`.',
|
||||
'type': 'integer',
|
||||
},
|
||||
'view_range': {
|
||||
'description': 'Optional parameter of `view` command when `path` points to a file. If none is given, the full file is shown. If provided, the file will be shown in the indicated line number range, e.g. [11, 12] will show lines 11 and 12. Indexing at 1 to start. Setting `[start_line, -1]` shows all lines from `start_line` to the end of the file.',
|
||||
'items': {'type': 'integer'},
|
||||
'type': 'array',
|
||||
},
|
||||
},
|
||||
'required': ['command', 'path'],
|
||||
},
|
||||
),
|
||||
)
|
||||
24
openhands/agenthub/codeact_agent/tools/web_read.py
Normal file
24
openhands/agenthub/codeact_agent/tools/web_read.py
Normal file
@ -0,0 +1,24 @@
|
||||
from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk
|
||||
|
||||
_WEB_DESCRIPTION = """Read (convert to markdown) content from a webpage. You should prefer using the `web_read` tool over the `browser` tool, but do use the `browser` tool if you need to interact with a webpage (e.g., click a button, fill out a form, etc.).
|
||||
|
||||
You may use the `web_read` tool to read content from a webpage, and even search the webpage content using a Google search query (e.g., url=`https://www.google.com/search?q=YOUR_QUERY`).
|
||||
"""
|
||||
|
||||
WebReadTool = ChatCompletionToolParam(
|
||||
type='function',
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name='web_read',
|
||||
description=_WEB_DESCRIPTION,
|
||||
parameters={
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'url': {
|
||||
'type': 'string',
|
||||
'description': 'The URL of the webpage to read. You can also use a Google search query here (e.g., `https://www.google.com/search?q=YOUR_QUERY`).',
|
||||
}
|
||||
},
|
||||
'required': ['url'],
|
||||
},
|
||||
),
|
||||
)
|
||||
@ -5,8 +5,6 @@ from litellm import ChatCompletionMessageToolCall
|
||||
|
||||
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
|
||||
from openhands.agenthub.codeact_agent.function_calling import (
|
||||
_BROWSER_DESCRIPTION,
|
||||
_BROWSER_TOOL_DESCRIPTION,
|
||||
BrowserTool,
|
||||
CmdRunTool,
|
||||
IPythonTool,
|
||||
@ -16,6 +14,10 @@ from openhands.agenthub.codeact_agent.function_calling import (
|
||||
get_tools,
|
||||
response_to_actions,
|
||||
)
|
||||
from openhands.agenthub.codeact_agent.tools.browser import (
|
||||
_BROWSER_DESCRIPTION,
|
||||
_BROWSER_TOOL_DESCRIPTION,
|
||||
)
|
||||
from openhands.controller.state.state import State
|
||||
from openhands.core.config import AgentConfig, LLMConfig
|
||||
from openhands.core.exceptions import FunctionCallNotExistsError
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user