mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
* Add gaia test * Improve gaia prompts * Fix browser_env hang bug * Fix gaia bugs * add gaia to eval readme * Fix gaia bugs * minor fix * add run_infer.sh and update readme * set num eval worker to 1 * default to 2023 gaia level1 subset * default to level 1 * add prompt to instruct model enclose answer within <solution> tag * add missing break --------- Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk> Co-authored-by: yufansong <yufan@risingwave-labs.com> Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
29 lines
727 B
Python
29 lines
727 B
Python
import argparse
|
|
import json
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Get agent's gaia score")
|
|
parser.add_argument('--file', type=str, help="Path to the agent's output.jsonl")
|
|
args = parser.parse_args()
|
|
this_log = args.file
|
|
outs = []
|
|
with open(this_log, 'r') as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
outs.append(json.loads(line))
|
|
print(f'Reading {this_log}')
|
|
print(f'Metadata:\n {outs[0]["metadata"]}')
|
|
|
|
total = 0
|
|
success = 0
|
|
for out in outs:
|
|
total += 1
|
|
if out['test_result']['score']:
|
|
success += 1
|
|
print(f'Success rate: {success}/{total} = {success/total}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|