mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
127 lines
3.7 KiB
Python
127 lines
3.7 KiB
Python
"""block completion"""
|
||
|
||
import copy
|
||
import gc
|
||
import json
|
||
import os
|
||
import time
|
||
from multiprocessing import Process
|
||
|
||
import tiktoken
|
||
import torch
|
||
from vllm import LLM, SamplingParams
|
||
|
||
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||
|
||
|
||
def truncate_text(text, max_tokens):
|
||
encoding = tiktoken.get_encoding('cl100k_base')
|
||
disallowed_special = ()
|
||
|
||
tokens = encoding.encode(text, disallowed_special=disallowed_special)
|
||
print(len(tokens))
|
||
|
||
if len(tokens) > max_tokens:
|
||
tokens = tokens[:max_tokens]
|
||
|
||
truncated_text = encoding.decode(tokens)
|
||
|
||
return truncated_text
|
||
|
||
|
||
model_list = ['/data2/base models/starcoder2-15b', '/data2/base models/CodeGemma-7B']
|
||
|
||
|
||
def run_inference(model_name, origin_data_list):
|
||
temp_data_list = copy.deepcopy(origin_data_list)
|
||
test_list = []
|
||
for data in temp_data_list:
|
||
version = data['dependency'] + data['version'] # package == x.x.x
|
||
description = data['description'] # func description
|
||
|
||
instruction = bulid_prompt(version, description)
|
||
test_list.append(instruction)
|
||
|
||
sampling_params = SamplingParams(n=6, temperature=0.8, top_p=0.95, max_tokens=64)
|
||
llm = LLM(
|
||
model=model_name,
|
||
tensor_parallel_size=4,
|
||
gpu_memory_utilization=0.9,
|
||
swap_space=20,
|
||
)
|
||
|
||
outputs = llm.generate(test_list, sampling_params)
|
||
for output in outputs:
|
||
requests_id = int(output.request_id)
|
||
temp_ans_list = []
|
||
output_list = output.outputs
|
||
for o in output_list:
|
||
text = o.text
|
||
temp_ans_list.append(text)
|
||
|
||
temp_data_list[requests_id]['model_output'] = str(temp_ans_list)
|
||
|
||
save_folder_path = os.path.join(
|
||
'../data/result_data/block_completion', model_name.split('/')[-1]
|
||
)
|
||
if not os.path.exists(save_folder_path):
|
||
os.makedirs(save_folder_path)
|
||
|
||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||
|
||
with open(save_json_path, 'w', encoding='utf-8') as fw:
|
||
json.dump(temp_data_list, fw, indent=4, ensure_ascii=False)
|
||
|
||
gc.collect()
|
||
torch.cuda.empty_cache()
|
||
|
||
|
||
def bulid_prompt(version, description) -> str:
|
||
"""Build prompt
|
||
:param version:
|
||
:param description:
|
||
:param masked_code:
|
||
:param options:
|
||
:return:
|
||
"""
|
||
prompt = f"""
|
||
You are a professional Python engineer, and I will provide functional descriptions and versions of specified dependency packages.
|
||
You need to write code in Python to implement this feature based on the functional description and using the dependency package and version I specified.
|
||
Please note that you only need to return the code that implements the function, and do not return any other content.
|
||
Please use <start> and <end> to enclose the generated code. Here is an example:
|
||
###Function Description:
|
||
The function of this code is to print the results predicted by calling the model using vllm.
|
||
###dependeny and version:
|
||
vllm==0.3.3
|
||
###response:
|
||
<start>
|
||
for output in outputs:
|
||
prompt = output.prompt
|
||
generated_text = output.outputs[0].text
|
||
print("Prompt,Generated text")
|
||
<end>
|
||
|
||
###Function Description:
|
||
{description}
|
||
###dependeny and version:
|
||
{version}
|
||
###response:
|
||
|
||
|
||
"""
|
||
return prompt
|
||
|
||
|
||
json_path = '../data/test_data/VersiCode_block_completion.json'
|
||
|
||
with open(json_path, 'r', encoding='utf-8') as fr:
|
||
lodict = json.load(fr)
|
||
|
||
origin_data_list = lodict
|
||
|
||
for model_name in model_list:
|
||
process = Process(target=run_inference, args=(model_name, origin_data_list))
|
||
process.start()
|
||
process.join()
|
||
time.sleep(120)
|