mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
import argparse
|
|
import json
|
|
|
|
|
|
def main(input_file, output_file):
|
|
with (
|
|
open(input_file, 'r', encoding='utf-8') as fin,
|
|
open(output_file, 'w', encoding='utf-8') as fout,
|
|
):
|
|
for line in fin:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
data = json.loads(line)
|
|
item = data
|
|
|
|
# Skip instances that don't have resolved_issues or have empty resolved_issues
|
|
if not item.get('resolved_issues') or len(item['resolved_issues']) == 0:
|
|
print(
|
|
f'Skipping instance {item.get("org", "")}/{item.get("repo", "")}-{item.get("number", "")} - no resolved_issues'
|
|
)
|
|
continue
|
|
|
|
# 提取原始数据
|
|
org = item.get('org', '')
|
|
repo = item.get('repo', '')
|
|
number = str(item.get('number', ''))
|
|
|
|
new_item = {}
|
|
new_item['repo'] = f'{org}/{repo}'
|
|
new_item['instance_id'] = f'{org}__{repo}-{number}'
|
|
|
|
# Get the first resolved issue
|
|
resolved_issue = item['resolved_issues'][0]
|
|
title = resolved_issue.get('title') or ''
|
|
body = resolved_issue.get('body') or ''
|
|
|
|
new_item['problem_statement'] = title + '\n' + body
|
|
new_item['FAIL_TO_PASS'] = []
|
|
new_item['PASS_TO_PASS'] = []
|
|
new_item['base_commit'] = item['base'].get('sha', '')
|
|
new_item['version'] = '0.1' # depends
|
|
|
|
output_data = new_item
|
|
fout.write(json.dumps(output_data, ensure_ascii=False) + '\n')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--input', required=True, help='Input .jsonl file path')
|
|
parser.add_argument('--output', required=True, help='Output .jsonl file path')
|
|
args = parser.parse_args()
|
|
main(args.input, args.output)
|