Restore previous browsing agent behavior when evaluating on WebArena and miniwob++ only (#2341)

* restore eval mode

* fix
This commit is contained in:
Frank Xu 2024-06-09 04:10:02 -04:00 committed by GitHub
parent fab8c9003b
commit bd00f0f049
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -28,6 +28,11 @@ USE_CONCISE_ANSWER = (
os.environ.get('USE_CONCISE_ANSWER', 'false') == 'true'
) # only return concise answer when running webarena and miniwob benchmarks
if not USE_NAV and USE_CONCISE_ANSWER:
EVAL_MODE = True # disabled NAV actions and only return concise answer, for webarena and miniwob benchmarks\
else:
EVAL_MODE = False
class BrowsingAgent(Agent):
VERSION = '1.0'
@ -118,6 +123,12 @@ class BrowsingAgent(Agent):
last_obs = None
last_action = None
if EVAL_MODE and len(state.history) == 1:
# for webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
# initialize and retrieve the first observation by issuing an noop OP
# For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
return BrowseInteractiveAction(browser_actions='noop()')
for prev_action, obs in state.history:
if isinstance(prev_action, BrowseInteractiveAction):
prev_actions.append(prev_action.browser_actions)
@ -130,6 +141,9 @@ class BrowsingAgent(Agent):
# agent has responded, task finish.
return AgentFinishAction(outputs={'content': prev_action.content})
if EVAL_MODE:
prev_actions = prev_actions[1:] # remove the first noop action
prev_action_str = '\n'.join(prev_actions)
# if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
# we should also send a message back to the user in OpenDevin and call it a day