mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Restore previous browsing agent behavior when evaluating on WebArena and miniwob++ only (#2341)
* restore eval mode * fix
This commit is contained in:
parent
fab8c9003b
commit
bd00f0f049
@ -28,6 +28,11 @@ USE_CONCISE_ANSWER = (
|
||||
os.environ.get('USE_CONCISE_ANSWER', 'false') == 'true'
|
||||
) # only return concise answer when running webarena and miniwob benchmarks
|
||||
|
||||
if not USE_NAV and USE_CONCISE_ANSWER:
|
||||
EVAL_MODE = True # disabled NAV actions and only return concise answer, for webarena and miniwob benchmarks\
|
||||
else:
|
||||
EVAL_MODE = False
|
||||
|
||||
|
||||
class BrowsingAgent(Agent):
|
||||
VERSION = '1.0'
|
||||
@ -118,6 +123,12 @@ class BrowsingAgent(Agent):
|
||||
last_obs = None
|
||||
last_action = None
|
||||
|
||||
if EVAL_MODE and len(state.history) == 1:
|
||||
# for webarena and miniwob++ eval, we need to retrieve the initial observation already in browser env
|
||||
# initialize and retrieve the first observation by issuing an noop OP
|
||||
# For non-benchmark browsing, the browser env starts with a blank page, and the agent is expected to first navigate to desired websites
|
||||
return BrowseInteractiveAction(browser_actions='noop()')
|
||||
|
||||
for prev_action, obs in state.history:
|
||||
if isinstance(prev_action, BrowseInteractiveAction):
|
||||
prev_actions.append(prev_action.browser_actions)
|
||||
@ -130,6 +141,9 @@ class BrowsingAgent(Agent):
|
||||
# agent has responded, task finish.
|
||||
return AgentFinishAction(outputs={'content': prev_action.content})
|
||||
|
||||
if EVAL_MODE:
|
||||
prev_actions = prev_actions[1:] # remove the first noop action
|
||||
|
||||
prev_action_str = '\n'.join(prev_actions)
|
||||
# if the final BrowserInteractiveAction exec BrowserGym's send_msg_to_user,
|
||||
# we should also send a message back to the user in OpenDevin and call it a day
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user