mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Graham Neubig <neubig@gmail.com>
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
import argparse
|
|
import json
|
|
|
|
import browsergym.visualwebarena # noqa F401 register visualwebarena tasks as gym environments
|
|
import gymnasium as gym
|
|
|
|
parser = argparse.ArgumentParser(description='Calculate average reward.')
|
|
parser.add_argument('output_path', type=str, help='path to output.jsonl')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if __name__ == '__main__':
|
|
env_ids = [
|
|
id
|
|
for id in gym.envs.registry.keys()
|
|
if id.startswith('browsergym/visualwebarena')
|
|
]
|
|
total_num = len(env_ids)
|
|
print('Total number of tasks: ', total_num)
|
|
total_reward = 0
|
|
total_cost = 0
|
|
actual_num = 0
|
|
with open(args.output_path, 'r') as f:
|
|
for line in f:
|
|
data = json.loads(line)
|
|
actual_num += 1
|
|
total_cost += data['metrics']['accumulated_cost']
|
|
reward = data['test_result']['reward']
|
|
if reward >= 0:
|
|
total_reward += data['test_result']['reward']
|
|
else:
|
|
actual_num -= 1
|
|
avg_reward = total_reward / total_num
|
|
print('Total reward: ', total_reward)
|
|
print('Success Rate: ', avg_reward)
|
|
|
|
avg_cost = total_cost / actual_num
|
|
print('Avg Cost: ', avg_cost)
|
|
print('Total Cost: ', total_cost)
|
|
print('Actual number of tasks finished: ', actual_num)
|