mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 13:52:43 +08:00
Allow Generalized SWE-Bench format for evaluation (#3752)
* allow generalized swe-bench format * Update run_infer.py * fix linter --------- Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
This commit is contained in:
parent
57187417b7
commit
43c4a7fff4
@ -456,6 +456,12 @@ if __name__ == '__main__':
|
||||
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
|
||||
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
|
||||
|
||||
if not isinstance(
|
||||
instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
|
||||
):
|
||||
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
|
||||
instances[col] = instances[col].apply(lambda x: str(list(x)))
|
||||
|
||||
run_evaluation(
|
||||
instances, metadata, output_file, args.eval_num_workers, process_instance
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user