Qi Liu 3d22520992
[Feat] add multi-swe-bench (#8174)
Co-authored-by: ByteDance User <tiger@bytedance.localdomain>
2025-05-01 00:23:19 +00:00

31 lines
1005 B
Python

import json
input_file = 'XXX.jsonl'
output_file = 'YYY.jsonl'
with open(input_file, 'r', encoding='utf-8') as fin, open(output_file, 'w', encoding='utf-8') as fout:
for line in fin:
line = line.strip()
if not line:
continue
data = json.loads(line)
item = data
# 提取原始数据
org = item.get("org", "")
repo = item.get("repo", "")
number = str(item.get("number", ""))
new_item = {}
new_item["repo"] = f"{org}/{repo}"
new_item["instance_id"] = f"{org}__{repo}-{number}"
new_item["problem_statement"] = item["resolved_issues"][0].get("title", "") + "\n" + item["resolved_issues"][0].get("body", "")
new_item["FAIL_TO_PASS"] = []
new_item["PASS_TO_PASS"] = []
new_item["base_commit"] = item['base'].get("sha","")
new_item["version"] = "0.1" # depends
output_data = new_item
fout.write(json.dumps(output_data, ensure_ascii=False) + "\n")