Allow Generalized SWE-Bench format for evaluation (#3752)

* allow generalized swe-bench format

* Update run_infer.py

* fix linter

---------

Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
This commit is contained in:
Jiayi Pan 2024-09-06 06:05:00 -07:00 committed by GitHub
parent 57187417b7
commit 43c4a7fff4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -456,6 +456,12 @@ if __name__ == '__main__':
output_file = os.path.join(metadata.eval_output_dir, 'output.jsonl')
instances = prepare_dataset(swe_bench_tests, output_file, args.eval_n_limit)
if not isinstance(
instances['PASS_TO_PASS'][instances['PASS_TO_PASS'].index[0]], str
):
for col in ['PASS_TO_PASS', 'FAIL_TO_PASS']:
instances[col] = instances[col].apply(lambda x: str(list(x)))
run_evaluation(
instances, metadata, output_file, args.eval_num_workers, process_instance
)