fix(agent, browsing): too long tool description for openai (#4778)

This commit is contained in:
Xingyao Wang 2024-11-07 10:11:08 -06:00 committed by GitHub
parent c3991c870d
commit 8166bf768a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -284,6 +284,17 @@ _browser_action_space = HighLevelActionSet(
_BROWSER_DESCRIPTION = """Interact with the browser using Python code.
See the description of "code" parameter for more details.
Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
More than 2-3 actions usually leads to failure or unexpected behavior. Example:
fill('a12', 'example with "quotes"')
click('a51')
click('48', button='middle', modifiers=['Shift'])
"""
_BROWSER_TOOL_DESCRIPTION = """
The following 15 functions are available. Nothing else is supported.
goto(url: str)
@ -385,20 +396,15 @@ upload_file(bid: str, file: str | list[str])
upload_file('572', '/home/user/my_receipt.pdf')
upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
More than 2-3 actions usually leads to failure or unexpected behavior. Example:
fill('a12', 'example with "quotes"')
click('a51')
click('48', button='middle', modifiers=['Shift'])
"""
for _, action in _browser_action_space.action_set.items():
assert (
action.signature in _BROWSER_DESCRIPTION
action.signature in _BROWSER_TOOL_DESCRIPTION
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
assert (
action.description in _BROWSER_DESCRIPTION
action.description in _BROWSER_TOOL_DESCRIPTION
), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
BrowserTool = ChatCompletionToolParam(
@ -411,7 +417,10 @@ BrowserTool = ChatCompletionToolParam(
'properties': {
'code': {
'type': 'string',
'description': 'The Python code that interacts with the browser.',
'description': (
'The Python code that interacts with the browser.\n'
+ _BROWSER_TOOL_DESCRIPTION
),
}
},
'required': ['code'],