mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
add_versicode (#8221)
This commit is contained in:
parent
13c298d35f
commit
be62ba6b35
103
evaluation/benchmarks/versicode/README.md
Normal file
103
evaluation/benchmarks/versicode/README.md
Normal file
@ -0,0 +1,103 @@
|
||||
# VersiCode benchmark
|
||||
|
||||
This project is used to evaluate the performance of the model on VersiCode. It includes:
|
||||
|
||||
- data: the test data needed and the model outputs
|
||||
- inference_utils: inference scripts for ours tasks and models
|
||||
- metric: scripts for calculating various metric
|
||||
- output_processing: process the model output to facilitate the calculation of model metrics
|
||||
|
||||
# Details
|
||||
|
||||
1. **Prepare the environment**
|
||||
|
||||
```shell
|
||||
#create conda environment
|
||||
conda create -n VersiCode python==3.12
|
||||
|
||||
#install requirements
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. **Experiment Data**
|
||||
|
||||
To obtain the experimental data, please visit the Hugging Face link: https://huggingface.co/datasets/AstoneNg/VersiCode.
|
||||
Locate the files `VersiCode_block_completion.json` and `VersiCode_migration.json` under the `experiment_data` directory, and place them in the `/data/test_data directory` of this project.
|
||||
|
||||
|
||||
3. **Model inference**
|
||||
|
||||
```shell
|
||||
#cd inference_utils directory
|
||||
cd inference_utils
|
||||
|
||||
#The script file starting with 'test' is used to test the local model
|
||||
#The script file at the beginning of the API is used to test the API call model
|
||||
|
||||
#block level code completipn
|
||||
#Modify the 10th and 12th lines of code to specify the base URL and model name
|
||||
python api_test_block_completion.py
|
||||
#Modify the 30th line of code to specify the local model path
|
||||
python test_block.py
|
||||
|
||||
# code migration (migration order is 'old_to_new')
|
||||
#Modify the 10th and 12th lines of code to specify the base URL and model name
|
||||
python api_code_migration.py
|
||||
#Modify the 30th line of code to specify the local model path
|
||||
python test_migration.py
|
||||
```
|
||||
|
||||
4. **Process output**
|
||||
Process the output content of the model, remove redundant content, extract specified content for easy calculation of indicators.
|
||||
|
||||
```shell
|
||||
#cd output_processing
|
||||
cd output_processing
|
||||
|
||||
#Extract content from<start> and <end>
|
||||
#Modify the 8th and 9th lines of code to specify the model and task granularity
|
||||
python clear_ans.py
|
||||
|
||||
#In the block completion task and migration task, cdc@k The calculation of indicators needs to be targeted at key rows,
|
||||
#Modify lines 76 and 79 to specify the data path
|
||||
python choose_core_line_from_block_versicode.py
|
||||
python choose_core_line_from_migration_versicode.py
|
||||
```
|
||||
|
||||
5. **Metric**
|
||||
We have three metrics pass@k,em@k and cdc@k Due to our inability to automatically build a dynamic evaluation environment, we have not provided pass@k .
|
||||
|
||||
```shell
|
||||
#cd metric
|
||||
cd metric
|
||||
|
||||
#Modify lines 137-140 in migration task (compute_migration_cdc_score.py) or 143-145 in block and line completion task (compute_versicode_cdc_score.py and compute_versicode_em_score.py) of the code to specify the data path and calculate the k-value of the metric
|
||||
python compute_migration_cdc_score.py
|
||||
python compute_versicode_cdc_score.py
|
||||
python compute_versicode_em_score.py
|
||||
|
||||
#Notes
|
||||
#We found limitations in the ISM@k and PM@k metrics for evaluating code generation, so they are used only as reference in our experiments.
|
||||
#Modify lines 261-265 in block and line completion task of the code to specify the data path and calculate the k-value of the metric
|
||||
python compute_ism_pm_score.py
|
||||
```
|
||||
|
||||
# Citation
|
||||
|
||||
```
|
||||
@article{versicode,
|
||||
author={Tongtong Wu and Weigang Wu and Xingyu Wang and Kang Xu and Suyu Ma and Bo Jiang and Ping Yang and Zhenchang Xing and Yuan-Fang Li and Gholamreza Haffari},
|
||||
title = {VersiCode: Towards Version-controllable Code Generation},
|
||||
journal = {CoRR},
|
||||
volume = {abs/2406.07411},
|
||||
year = {2024},
|
||||
url = {https://arxiv.org/abs/2406.07411},
|
||||
}
|
||||
```
|
||||
|
||||
**Github url**: https://github.com/wutong8023/VersiCode
|
||||
|
||||
# Contributor
|
||||
|
||||
[Tongtong Wu](https://scholar.google.com/citations?hl=zh-CN&user=u1Qp8lUAAAAJ&view_op=list_works&sortby=pubdate), [Weigang Wu](https://scholar.google.com/citations?hl=zh-CN&user=UneIZo8AAAAJ), [Xingyu Wang](https://scholar.google.com/citations?hl=zh-CN&user=wqPJcxcAAAAJ), [Kang Xu](https://scholar.google.com/citations?hl=zh-CN&user=N1UUDi0AAAAJ), [Suyu Ma](https://scholar.google.com/citations?hl=zh-CN&user=NJHR1ukAAAAJ), [Bo Jiang](https://wutong8023.site/VersiCode/), [Ping Yang](https://scholar.google.com/citations?view_op=list_works&hl=en&hl=en&user=hrogvxoAAAAJ), [Zhenchang Xing](https://scholar.google.com/citations?hl=zh-CN&user=0vCxuH4AAAAJ), [Yuan-Fang Li](https://scholar.google.com/citations?hl=zh-CN&user=wufXO1kAAAAJ), [Gholamreza Haffari](https://scholar.google.com/citations?hl=zh-CN&user=Perjx5EAAAAJ)
|
||||
|
||||
@ -0,0 +1,134 @@
|
||||
"""
|
||||
GPT performs line level generation prediction and truncates overly long tokens
|
||||
"""
|
||||
import json
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import tiktoken
|
||||
max_tokens = 127000 #gpt3.5 is 16ktoken gpt4o is 128k
|
||||
model_name = ""
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
client = OpenAI()
|
||||
|
||||
def truncate_text(text, max_tokens):
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
disallowed_special = ()
|
||||
|
||||
tokens = encoding.encode(text, disallowed_special=disallowed_special)
|
||||
print(len(tokens))
|
||||
|
||||
if len(tokens) > max_tokens:
|
||||
tokens = tokens[:max_tokens]
|
||||
|
||||
truncated_text = encoding.decode(tokens)
|
||||
|
||||
return truncated_text
|
||||
|
||||
def predict(content, model_name):
|
||||
response = client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
],
|
||||
frequency_penalty=0.1,
|
||||
max_tokens=128,
|
||||
logit_bias=None,
|
||||
logprobs=None,
|
||||
n=6,
|
||||
presence_penalty=0.0,
|
||||
seed=None,
|
||||
stop=None,
|
||||
stream=False,
|
||||
temperature=0.8,
|
||||
top_p=0.95
|
||||
)
|
||||
ans_list = []
|
||||
choices_list = response.choices
|
||||
for c in choices_list:
|
||||
content = c.message.content
|
||||
ans_list.append(content)
|
||||
final_ans = str(ans_list)
|
||||
return final_ans
|
||||
|
||||
def bulid_prompt(description, old_version, old_code, new_version) -> str:
|
||||
"""
|
||||
build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
prompt = f"""
|
||||
You are now a professional Python programming engineer. I will provide you with a code snippet and a description of its functionality,
|
||||
including the dependencies and versions used in the code. Then, I will provide the same dependencies but with a specified new version.
|
||||
Your task is to refactor the code using the methods provided by the specified new version and return the refactored code.
|
||||
Please note that you only need to return the refactored code and enclose it with <start> and <end>:
|
||||
###Functionality description of the code
|
||||
{description}
|
||||
###Dependency and old version
|
||||
{old_version}
|
||||
###Old version code
|
||||
{old_code}
|
||||
###Dependency and new version
|
||||
{new_version}
|
||||
###Refactored new code
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
json_path = '../data/test_data/VersiCode_migration.json'
|
||||
|
||||
|
||||
with open(json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_dict = lodict
|
||||
data_list = data_dict
|
||||
|
||||
|
||||
for data in data_list:
|
||||
if "model_output" in data:
|
||||
print(f"the {data_list.index(data) + 1} has already been predicted, skipping this data!")
|
||||
continue
|
||||
try:
|
||||
print(f"Predicting {data_list.index(data) + 1} ")
|
||||
old_version = data['dependency'] + data['old_version'] # package == x.x.x
|
||||
new_version = data['dependency'] + data['new_version'] # package == x.x.x
|
||||
description = data['description'] # 功能描述
|
||||
old_code = data['old_code'] # mask后的代码
|
||||
|
||||
instruction = bulid_prompt(description, old_version, old_code, new_version)
|
||||
truncated_text = truncate_text(instruction, max_tokens)
|
||||
prediction = predict(truncated_text, model_name)
|
||||
|
||||
data['model_output'] = prediction
|
||||
except Exception as e:
|
||||
print(f"error:{e}")
|
||||
print("save current data")
|
||||
save_folder_path = os.path.join('../data/result_data/code_migration', model_name)
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8') as fw:
|
||||
json.dump(data_dict, fw, indent=4, ensure_ascii=False)
|
||||
break
|
||||
|
||||
|
||||
|
||||
save_folder_path = os.path.join('../data/result_data/code_migration', model_name)
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8')as fw:
|
||||
json.dump(data_dict, fw, indent=4, ensure_ascii=False)
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,141 @@
|
||||
"""
|
||||
GPT performs line level generation prediction and truncates overly long tokens
|
||||
"""
|
||||
import json
|
||||
import openai
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import tiktoken
|
||||
max_tokens = 127000 #gpt3.5 is 16ktoken gpt4o is 128k
|
||||
model_name = ""
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = ""
|
||||
client = OpenAI()
|
||||
|
||||
def truncate_text(text, max_tokens):
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
disallowed_special = ()
|
||||
|
||||
tokens = encoding.encode(text, disallowed_special=disallowed_special)
|
||||
print(len(tokens))
|
||||
|
||||
if len(tokens) > max_tokens:
|
||||
tokens = tokens[:max_tokens]
|
||||
|
||||
truncated_text = encoding.decode(tokens)
|
||||
|
||||
return truncated_text
|
||||
|
||||
def predict(content, model_name):
|
||||
response = client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
],
|
||||
frequency_penalty=0.1,
|
||||
max_tokens=128,
|
||||
logit_bias=None,
|
||||
logprobs=None,
|
||||
n=6,
|
||||
presence_penalty=0.0,
|
||||
seed=None,
|
||||
stop=None,
|
||||
stream=False,
|
||||
temperature=0.8,
|
||||
top_p=0.95
|
||||
)
|
||||
ans_list = []
|
||||
choices_list = response.choices
|
||||
for c in choices_list:
|
||||
content = c.message.content
|
||||
ans_list.append(content)
|
||||
final_ans = str(ans_list)
|
||||
return final_ans
|
||||
|
||||
def bulid_prompt(version, description) -> str:
|
||||
"""
|
||||
build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
prompt = f'''
|
||||
You are a professional Python engineer, and I will provide functional descriptions and versions of specified dependency packages.
|
||||
You need to write code in Python to implement this feature based on the functional description and using the dependency package and version I specified.
|
||||
Please note that you only need to return the code that implements the function, and do not return any other content.
|
||||
Please use <start> and <end> to enclose the generated code. Here is an example:
|
||||
###Function Description:
|
||||
The function of this code is to print the results predicted by calling the model using vllm.
|
||||
###dependeny and version:
|
||||
vllm==0.3.3
|
||||
###response:
|
||||
<start>
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
print("Prompt,Generated text")
|
||||
<end>
|
||||
|
||||
###Function Description:
|
||||
{description}
|
||||
###dependeny and version:
|
||||
{version}
|
||||
###response:
|
||||
|
||||
|
||||
'''
|
||||
return prompt
|
||||
|
||||
|
||||
json_path = '../data/test_data/VersiCode_block_completion.json'
|
||||
|
||||
|
||||
with open(json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_dict = lodict
|
||||
data_list = data_dict
|
||||
|
||||
|
||||
for data in data_list:
|
||||
if "model_output" in data:
|
||||
print(f"the {data_list.index(data) + 1} has already been predicted, skipping this data!")
|
||||
continue
|
||||
try:
|
||||
print(f"Predicting {data_list.index(data) + 1} ")
|
||||
version = data['dependency'] + data['version'] # package == x.x.x
|
||||
description = data['description'] # func description
|
||||
|
||||
instruction = bulid_prompt(version, description)
|
||||
truncated_text = truncate_text(instruction, max_tokens)
|
||||
prediction = predict(truncated_text, model_name)
|
||||
|
||||
data['model_output'] = prediction
|
||||
except Exception as e:
|
||||
print(f"error:{e}")
|
||||
print("save current data")
|
||||
save_folder_path = os.path.join('../data/result_data/block_completion', model_name)
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8') as fw:
|
||||
json.dump(data_dict, fw, indent=4, ensure_ascii=False)
|
||||
break
|
||||
|
||||
|
||||
|
||||
save_folder_path = os.path.join('../data/result_data/block_completion', model_name)
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8')as fw:
|
||||
json.dump(data_dict, fw, indent=4, ensure_ascii=False)
|
||||
|
||||
|
||||
|
||||
118
evaluation/benchmarks/versicode/inference_utils/test_block.py
Normal file
118
evaluation/benchmarks/versicode/inference_utils/test_block.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""
|
||||
block completion
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
from vllm import LLM, SamplingParams
|
||||
import tiktoken
|
||||
import time
|
||||
import gc
|
||||
import torch
|
||||
from multiprocessing import Process
|
||||
|
||||
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||
|
||||
def truncate_text(text, max_tokens):
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
disallowed_special = ()
|
||||
|
||||
tokens = encoding.encode(text, disallowed_special=disallowed_special)
|
||||
print(len(tokens))
|
||||
|
||||
if len(tokens) > max_tokens:
|
||||
tokens = tokens[:max_tokens]
|
||||
|
||||
truncated_text = encoding.decode(tokens)
|
||||
|
||||
return truncated_text
|
||||
|
||||
model_list = ['/data2/base models/starcoder2-15b', '/data2/base models/CodeGemma-7B']
|
||||
|
||||
def run_inference(model_name, origin_data_list):
|
||||
temp_data_list = copy.deepcopy(origin_data_list)
|
||||
test_list = []
|
||||
for data in temp_data_list:
|
||||
version = data['dependency'] + data['version'] # package == x.x.x
|
||||
description = data['description'] # func description
|
||||
|
||||
instruction = bulid_prompt(version, description)
|
||||
test_list.append(instruction)
|
||||
|
||||
sampling_params = SamplingParams(n=6, temperature=0.8, top_p=0.95, max_tokens=64)
|
||||
llm = LLM(model=model_name, tensor_parallel_size=4, gpu_memory_utilization=0.9, swap_space=20)
|
||||
|
||||
outputs = llm.generate(test_list, sampling_params)
|
||||
for output in outputs:
|
||||
requests_id = int(output.request_id)
|
||||
temp_ans_list = []
|
||||
output_list = output.outputs
|
||||
for o in output_list:
|
||||
text = o.text
|
||||
temp_ans_list.append(text)
|
||||
|
||||
temp_data_list[requests_id]['model_output'] = str(temp_ans_list)
|
||||
|
||||
save_folder_path = os.path.join('../data/result_data/block_completion', model_name.split('/')[-1])
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8') as fw:
|
||||
json.dump(temp_data_list, fw, indent=4, ensure_ascii=False)
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
def bulid_prompt(version, description) -> str:
|
||||
"""
|
||||
build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
prompt = f'''
|
||||
You are a professional Python engineer, and I will provide functional descriptions and versions of specified dependency packages.
|
||||
You need to write code in Python to implement this feature based on the functional description and using the dependency package and version I specified.
|
||||
Please note that you only need to return the code that implements the function, and do not return any other content.
|
||||
Please use <start> and <end> to enclose the generated code. Here is an example:
|
||||
###Function Description:
|
||||
The function of this code is to print the results predicted by calling the model using vllm.
|
||||
###dependeny and version:
|
||||
vllm==0.3.3
|
||||
###response:
|
||||
<start>
|
||||
for output in outputs:
|
||||
prompt = output.prompt
|
||||
generated_text = output.outputs[0].text
|
||||
print("Prompt,Generated text")
|
||||
<end>
|
||||
|
||||
###Function Description:
|
||||
{description}
|
||||
###dependeny and version:
|
||||
{version}
|
||||
###response:
|
||||
|
||||
|
||||
'''
|
||||
return prompt
|
||||
|
||||
|
||||
json_path = '../data/test_data/VersiCode_block_completion.json'
|
||||
|
||||
with open(json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
|
||||
origin_data_list = lodict
|
||||
|
||||
for model_name in model_list:
|
||||
process = Process(target=run_inference, args=(model_name, origin_data_list))
|
||||
process.start()
|
||||
process.join()
|
||||
time.sleep(120)
|
||||
|
||||
@ -0,0 +1,111 @@
|
||||
"""
|
||||
code migration
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
from vllm import LLM, SamplingParams
|
||||
import tiktoken
|
||||
import time
|
||||
import gc
|
||||
import torch
|
||||
from multiprocessing import Process
|
||||
|
||||
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
|
||||
|
||||
def truncate_text(text, max_tokens):
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
disallowed_special = ()
|
||||
|
||||
tokens = encoding.encode(text, disallowed_special=disallowed_special)
|
||||
print(len(tokens))
|
||||
|
||||
if len(tokens) > max_tokens:
|
||||
tokens = tokens[:max_tokens]
|
||||
|
||||
truncated_text = encoding.decode(tokens)
|
||||
|
||||
return truncated_text
|
||||
|
||||
model_list = ['/data2/base models/starcoder2-15b', '/data2/base models/CodeGemma-7B']
|
||||
|
||||
def run_inference(model_name, origin_data_list):
|
||||
temp_data_list = copy.deepcopy(origin_data_list)
|
||||
test_list = []
|
||||
for data in temp_data_list:
|
||||
old_version = data['dependency'] + data['old_version'] # package == x.x.x
|
||||
new_version = data['dependency'] + data['new_version'] # package == x.x.x
|
||||
description = data['description'] # 功能描述
|
||||
old_code = data['old_code'] # mask后的代码
|
||||
|
||||
instruction = bulid_prompt(description, old_version, old_code, new_version)
|
||||
test_list.append(instruction)
|
||||
|
||||
sampling_params = SamplingParams(n=6, temperature=0.8, top_p=0.95, max_tokens=512)
|
||||
llm = LLM(model=model_name, tensor_parallel_size=4, gpu_memory_utilization=0.6, swap_space=40)
|
||||
|
||||
outputs = llm.generate(test_list, sampling_params)
|
||||
for output in outputs:
|
||||
requests_id = int(output.request_id)
|
||||
temp_ans_list = []
|
||||
output_list = output.outputs
|
||||
for o in output_list:
|
||||
text = o.text
|
||||
temp_ans_list.append(text)
|
||||
|
||||
temp_data_list[requests_id]['model_output'] = str(temp_ans_list)
|
||||
|
||||
save_folder_path = os.path.join('../data/result_data/code_migration', model_name.split('/')[-1])
|
||||
if not os.path.exists(save_folder_path):
|
||||
os.makedirs(save_folder_path)
|
||||
|
||||
save_json_path = os.path.join(save_folder_path, json_path.split('/')[-1])
|
||||
|
||||
with open(save_json_path, 'w', encoding='utf-8') as fw:
|
||||
json.dump(temp_data_list, fw, indent=4, ensure_ascii=False)
|
||||
|
||||
gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
def bulid_prompt(description, old_version, old_code, new_version) -> str:
|
||||
"""
|
||||
build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
:param options:
|
||||
:return:
|
||||
"""
|
||||
prompt = f"""
|
||||
You are now a professional Python programming engineer. I will provide you with a code snippet and a description of its functionality,
|
||||
including the dependencies and versions used in the code. Then, I will provide the same dependencies but with a specified new version.
|
||||
Your task is to refactor the code using the methods provided by the specified new version and return the refactored code.
|
||||
Please note that you only need to return the refactored code and enclose it with <start> and <end>:
|
||||
###Functionality description of the code
|
||||
{description}
|
||||
###Dependency and old version
|
||||
{old_version}
|
||||
###Old version code
|
||||
{old_code}
|
||||
###Dependency and new version
|
||||
{new_version}
|
||||
###Refactored new code
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
json_path = '../data/test_data/VersiCode_migration.json'
|
||||
|
||||
with open(json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
|
||||
origin_data_list = lodict
|
||||
|
||||
for model_name in model_list:
|
||||
process = Process(target=run_inference, args=(model_name, origin_data_list))
|
||||
process.start()
|
||||
process.join()
|
||||
time.sleep(120)
|
||||
|
||||
345
evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py
Normal file
345
evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py
Normal file
@ -0,0 +1,345 @@
|
||||
"""
|
||||
评测block的预测能力
|
||||
1、判断是否包含正确的函数名
|
||||
2、判断是否合法
|
||||
3、计算ISM,和PM
|
||||
"""
|
||||
import json
|
||||
import tokenize
|
||||
import io
|
||||
import math
|
||||
import ast
|
||||
import re
|
||||
import os
|
||||
|
||||
def is_code_valid(code):
|
||||
|
||||
try:
|
||||
compile(code, '<string>', 'exec')
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def longest_common_prefix_between_lists_with_elements(list1, list2):
|
||||
"""
|
||||
计算两个字符串列表中元素的最长前缀匹配长度
|
||||
:param list1:
|
||||
:param list2:
|
||||
:return:
|
||||
"""
|
||||
max_prefix_length = 0
|
||||
max_prefix_elements = ()
|
||||
for str1 in list1:
|
||||
for str2 in list2:
|
||||
prefix_length = 0
|
||||
min_len = min(len(str1), len(str2))
|
||||
for i in range(min_len):
|
||||
if str1[i] == str2[i]:
|
||||
prefix_length += 1
|
||||
else:
|
||||
break
|
||||
if prefix_length > max_prefix_length:
|
||||
max_prefix_length = prefix_length
|
||||
max_prefix_elements = (str1, str2)
|
||||
return max_prefix_length, max_prefix_elements
|
||||
|
||||
def get_token(ans_code:str, output_code:str):
|
||||
"""
|
||||
对代码进行词法分析,分解成标识符,返回两个标识符列表
|
||||
:param ans_code:
|
||||
:param output_code:
|
||||
:return:
|
||||
"""
|
||||
output_flag = True
|
||||
ans_flag = True
|
||||
try:
|
||||
tokens_ans = tokenize.tokenize(io.BytesIO(ans_code.encode('utf-8')).readline)
|
||||
except Exception as e:
|
||||
tokens_ans = ans_code.splitlines()
|
||||
ans_flag = False
|
||||
|
||||
try:
|
||||
tokens_output = tokenize.tokenize(io.BytesIO(output_code.encode('utf-8')).readline)
|
||||
except Exception as e:
|
||||
tokens_output = output_code.splitlines()
|
||||
output_flag = False
|
||||
|
||||
|
||||
identifiers_ans = []
|
||||
identifiers_output = []
|
||||
if ans_flag == True:
|
||||
try:
|
||||
for token in tokens_ans:
|
||||
if token.type == tokenize.NAME:
|
||||
identifiers_ans.append(token.string)
|
||||
except Exception as e:
|
||||
identifiers_ans = tokens_ans
|
||||
else:
|
||||
identifiers_ans = tokens_ans
|
||||
|
||||
if output_flag == True:
|
||||
try:
|
||||
for to in tokens_output:
|
||||
if to.type == tokenize.NAME:
|
||||
identifiers_output.append(to.string)
|
||||
except Exception as e:
|
||||
identifiers_output = tokens_output
|
||||
else:
|
||||
identifiers_output = tokens_output
|
||||
|
||||
|
||||
return identifiers_ans, identifiers_output
|
||||
|
||||
|
||||
def get_token_per_line(code: str):
|
||||
"""
|
||||
对每一行代码进行词法分析,记录每一行的标识符
|
||||
:param code: 代码字符串
|
||||
:return: 每一行的标识符列表组成的列表
|
||||
"""
|
||||
lines = code.split('\n') # 将代码按行分割成列表
|
||||
identifiers_per_line = [] # 用于存储每一行的标识符列表的列表
|
||||
|
||||
for line in lines:
|
||||
tokens = tokenize.tokenize(io.BytesIO(line.encode('utf-8')).readline)
|
||||
identifiers = []
|
||||
try:
|
||||
for token in tokens:
|
||||
if token.type == tokenize.NAME:
|
||||
identifiers.append(token.string)
|
||||
except:
|
||||
identifiers = line.split(' ')
|
||||
identifiers_per_line.append(identifiers)
|
||||
|
||||
return identifiers_per_line
|
||||
|
||||
|
||||
|
||||
def get_ISM(answer_code:str, model_output_list:list, asnwer_name:str)->list:
|
||||
"""
|
||||
计算ISM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
for code in model_output_list:
|
||||
if '```python' in code:
|
||||
code = code.replace('```python', '')
|
||||
code = code.replace('```', '')
|
||||
if not re.search(rf'\b{re.escape(asnwer_name)}\b', code) or is_code_valid(code) == False:
|
||||
score_list.append(0)
|
||||
continue
|
||||
|
||||
# if asnwer_name not in code:
|
||||
# score_list.append(0)
|
||||
# continue
|
||||
|
||||
identifiers_ans, identifiers_output = get_token(answer_code, code)
|
||||
max_len, elements = longest_common_prefix_between_lists_with_elements(identifiers_ans, identifiers_output)
|
||||
if max_len != 0:
|
||||
base_element_len = max(len(elements[0]), len(elements[1]))
|
||||
temp_score = max_len/base_element_len
|
||||
score_list.append(temp_score)
|
||||
else:
|
||||
score_list.append(0)
|
||||
# base_element_len = max(len(elements[0]), len(elements[1]))
|
||||
# temp_score = max_len/base_element_len
|
||||
# score_list.append(temp_score)
|
||||
|
||||
score_list = sorted(score_list, reverse=True)
|
||||
return score_list
|
||||
|
||||
def get_ISM_without_verification(answer_code:str, model_output_list:list, asnwer_name:str)->list:
|
||||
"""
|
||||
计算ISM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
for code in model_output_list:
|
||||
|
||||
if asnwer_name not in code:
|
||||
score_list.append(0)
|
||||
continue
|
||||
|
||||
# if asnwer_name not in code:
|
||||
# score_list.append(0)
|
||||
# continue
|
||||
|
||||
identifiers_ans, identifiers_output = get_token(answer_code, code)
|
||||
max_len, elements = longest_common_prefix_between_lists_with_elements(identifiers_ans, identifiers_output)
|
||||
if max_len != 0:
|
||||
base_element_len = max(len(elements[0]), len(elements[1]))
|
||||
temp_score = max_len/base_element_len
|
||||
score_list.append(temp_score)
|
||||
else:
|
||||
score_list.append(0)
|
||||
# base_element_len = max(len(elements[0]), len(elements[1]))
|
||||
# temp_score = max_len/base_element_len
|
||||
# score_list.append(temp_score)
|
||||
|
||||
score_list = sorted(score_list, reverse=True)
|
||||
return score_list
|
||||
|
||||
def longest_common_prefix_with_lengths(list1, list2):
|
||||
"""
|
||||
计算两个二维列表中每个子列表的最长前缀匹配长度,并记录拥有最长前缀匹配长度的两个子列表的长度
|
||||
:param list1: 第一个二维列表
|
||||
:param list2: 第二个二维列表
|
||||
:return: 最长前缀匹配长度以及拥有最长前缀匹配长度的两个子列表的长度
|
||||
"""
|
||||
max_length = 0
|
||||
len_list1 = 0
|
||||
len_list2 = 0
|
||||
for i, sublist1 in enumerate(list1):
|
||||
for j, sublist2 in enumerate(list2):
|
||||
match_length = 0
|
||||
min_length = min(len(sublist1), len(sublist2))
|
||||
for k in range(min_length):
|
||||
if sublist1[k] == sublist2[k]:
|
||||
match_length += 1
|
||||
else:
|
||||
break
|
||||
if match_length > max_length:
|
||||
max_length = match_length
|
||||
len_list1 = len(sublist1)
|
||||
len_list2 = len(sublist2)
|
||||
return max_length, len_list1, len_list2
|
||||
|
||||
|
||||
def get_PM(answer_code:str, model_output_list:list, asnwer_name:str)->list:
|
||||
"""
|
||||
计算PM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
for code in model_output_list:
|
||||
if '```python' in code:
|
||||
code = code.replace('```python', '')
|
||||
code = code.replace('```', '')
|
||||
if not re.search(rf'\b{re.escape(asnwer_name)}\b', code) or is_code_valid(code) == False:
|
||||
|
||||
# if asnwer_name not in code or is_code_valid(code) == False:
|
||||
score_list.append(0)
|
||||
continue
|
||||
|
||||
# if asnwer_name not in code:
|
||||
# score_list.append(0)
|
||||
# continue
|
||||
|
||||
ans_list = get_token_per_line(answer_code)
|
||||
output_token_list = get_token_per_line(code)
|
||||
max_len, len1, len2 = longest_common_prefix_with_lengths(ans_list, output_token_list)
|
||||
base_element_len = max(len1, len2)
|
||||
|
||||
if base_element_len != 0:
|
||||
temp_score = max_len/base_element_len
|
||||
score_list.append(temp_score)
|
||||
else:
|
||||
score_list.append(0)
|
||||
|
||||
score_list = sorted(score_list, reverse=True)
|
||||
return score_list
|
||||
|
||||
def get_score(score_list:list, k):
|
||||
"""
|
||||
计算score@n,k
|
||||
:param score_list:
|
||||
:param k:
|
||||
:return:
|
||||
"""
|
||||
n = len(score_list)
|
||||
sum = 0
|
||||
final = n-k+1
|
||||
for i in range(1, final+1):
|
||||
sum += math.comb(n-i, k-1) * score_list[i-1]
|
||||
|
||||
final_score = sum/math.comb(n, k)
|
||||
|
||||
return final_score
|
||||
|
||||
|
||||
k = 1
|
||||
task = 'block' # block or line
|
||||
json_name = f"Versicode_{task}_completion.json"
|
||||
|
||||
folder_path = f'../data/result_data/{task}_completion'
|
||||
model_list = os.listdir(folder_path)
|
||||
|
||||
for model in model_list:
|
||||
model_json_path = os.path.join(folder_path, model, json_name)
|
||||
with open(model_json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_dict = lodict
|
||||
data_list = data_dict
|
||||
data_len = len(data_list)
|
||||
sum_ISM = 0
|
||||
sum_PM = 0
|
||||
|
||||
for data in data_list:
|
||||
# model_output_list = eval(data['model_output'])
|
||||
model_output_list = eval(data['model_output_clear'])[:1]
|
||||
temp_list = []
|
||||
for o in model_output_list:
|
||||
temp_out = o.replace('```python', '')
|
||||
temp_out = temp_out.replace('```', '')
|
||||
temp_list.append(temp_out)
|
||||
model_output_list = temp_list
|
||||
answer_code = data['code']
|
||||
answer_name = data['core_token']
|
||||
#
|
||||
# answer_code = data['new_code'] #code editing
|
||||
# answer_name = data['new_name'] #code editing
|
||||
|
||||
# answer_code = data['old_code'] # code editing new to old
|
||||
# answer_name = data['old_name'] # code editing new to old
|
||||
#
|
||||
ISM_score_list = get_ISM(answer_code, model_output_list, answer_name)
|
||||
# ISM_score_without_verification_list = get_ISM_without_verification(answer_code, model_output_list, answer_name) #新增
|
||||
PM_score_list = get_PM(answer_code, model_output_list, answer_name)
|
||||
|
||||
# if not ISM_score_without_verification_list == ISM_score_list:#新增
|
||||
# for s in ISM_score_list:#新增
|
||||
# if s != ISM_score_without_verification_list[ISM_score_list.index(s)]:#新增
|
||||
# print('元数据如下')#新增
|
||||
# print(data)#新增
|
||||
# print('答案如下')#新增
|
||||
# print(model_output_list[ISM_score_list.index(s)])#新增
|
||||
|
||||
# flag = int(input('输入1继续,0退出'))#新增
|
||||
# if flag == 1:
|
||||
# continue
|
||||
|
||||
|
||||
ISM_score = get_score(ISM_score_list, k)
|
||||
PM_score = get_score(PM_score_list, k)
|
||||
|
||||
sum_ISM += ISM_score
|
||||
sum_PM += PM_score
|
||||
# print(f"ISM分数:{ISM_score}")
|
||||
# print(f"PM分数:{PM_score}")
|
||||
|
||||
print(f"{model}, {task} completion task, ISM@{k} score: {sum_ISM/data_len}")
|
||||
print(f"{model}, {task} completion task, PM@{k} score: {sum_PM/data_len}")
|
||||
|
||||
|
||||
|
||||
# def get_token(ans_code:str, output_code:str):
|
||||
# """
|
||||
# 对代码进行词法分析,分解成标识符,返回两个标识符列表
|
||||
# :param ans_code:
|
||||
# :param output_code:
|
||||
# :return:
|
||||
# """
|
||||
# tokens_ans = tokenize.tokenize(io.BytesIO(ans_code.encode('utf-8')).readline)
|
||||
# tokens_output = tokenize.tokenize(io.BytesIO(output_code.encode('utf-8')).readline)
|
||||
# identifiers_ans = []
|
||||
# identifiers_output = []
|
||||
# for token in tokens_ans:
|
||||
# if token.type == tokenize.NAME:
|
||||
# identifiers_ans.append(token.string)
|
||||
#
|
||||
# for to in tokens_output:
|
||||
# if to.type == tokenize.NAME:
|
||||
# identifiers_output.append(to.string)
|
||||
#
|
||||
# return identifiers_ans, identifiers_output
|
||||
@ -0,0 +1,165 @@
|
||||
"""
|
||||
Calculate the cdc score for migration
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import warnings
|
||||
# warnings.filterwarnings("ignore", category=SyntaxWarning)
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 获取正确代码中的参数数量
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
expected_count = len(correct_param_list)
|
||||
else:
|
||||
expected_count = 0 # 如果没有参数,期望数量为0
|
||||
|
||||
# 在需要判断的代码中查找函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
return len(test_param_list) == expected_count # 检查参数数量
|
||||
else:
|
||||
# 如果没有括号,检查函数名是否在字符串中
|
||||
return expected_count == 0 and function_name in test_code
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 正则表达式匹配正确代码中的函数调用
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
|
||||
# 检查待检测代码中的函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
|
||||
# 确保待检测的每个参数都以关键字参数形式赋值
|
||||
for correct_param in correct_param_list:
|
||||
if '=' in correct_param: # 仅当正确代码中有关键词参数
|
||||
param_name = correct_param.split('=')[0].strip()
|
||||
if not any(param_name in test_param and '=' in test_param for test_param in test_param_list):
|
||||
return False # 如果对应参数不是关键词参数,则返回False
|
||||
|
||||
return True # 所有关键字参数匹配
|
||||
|
||||
return False # 如果没有匹配,返回False
|
||||
|
||||
def with_correct(answer_code:str, model_output:str)->bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
"""
|
||||
# return True
|
||||
if not answer_code.startswith('with') and not model_output.startswith('with'):
|
||||
return True
|
||||
elif answer_code.startswith('with') and model_output.startswith('with'):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def compute_block_score_k(answer:str, model_output:list, k:int, model_filled_code, core_line_in_core_block, core_line_in_output_clear):
|
||||
"""
|
||||
cdc需要满足五个条件,em只需要满足第一个条件
|
||||
"""
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code) and is_code_valid(model_filled_code[index]) and is_correct_parameter_count(answer, core_line_in_core_block, core_line_in_output_clear[index]) and with_correct(core_line_in_core_block, core_line_in_output_clear[index]) and check_keyword_parameters(answer, core_line_in_core_block, core_line_in_output_clear[index]):#block
|
||||
# if re.search(rf'\b{re.escape(answer)}\b', code):#block
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def is_code_valid(code):
|
||||
|
||||
try:
|
||||
compile(code, '<string>', 'exec')
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def compute_score_k(answer:str, model_output:list, k:int):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for output in model_output:
|
||||
if '```python' in output:
|
||||
output = output.replace('```python', '')
|
||||
output = output.replace('```', '')
|
||||
# if answer == output:
|
||||
|
||||
if re.search(rf'\b{re.escape(answer)}\b', output) and is_code_valid(output) == True:
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
k = 1 #cdc@k
|
||||
json_name = 'VersiCode_migration.json'
|
||||
task = 'migration'
|
||||
folder_path = f'../data/result_data/code_migration'
|
||||
|
||||
model_list = os.listdir(folder_path)
|
||||
for model in model_list:
|
||||
# if model != 'gpt-4o':
|
||||
# continue
|
||||
model_json_path = os.path.join(folder_path, model, json_name)
|
||||
with open(model_json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_list = lodict
|
||||
|
||||
score_list = []
|
||||
for data in data_list:
|
||||
answer = data['new_name']# old -> new
|
||||
model_output = data[f'model_output_clear']# old -> new
|
||||
|
||||
model_filled_code = model_output
|
||||
# core_line_in_core_block = data['core_line_in_new_core_block']# old -> new
|
||||
core_line_in_core_block = data['core_line_in_code'] # old -> new
|
||||
core_line_in_output_clear = data['core_line_in_output_clear']# old -> new
|
||||
|
||||
|
||||
score_list.append(compute_block_score_k(answer, model_output, k, model_filled_code, core_line_in_core_block, core_line_in_output_clear))
|
||||
|
||||
final_score = sum(score_list)/len(score_list)
|
||||
print(f"{model}, {task} task, cdc@{k} score: {final_score}")
|
||||
@ -0,0 +1,175 @@
|
||||
"""
|
||||
Calculate the cdc score for line and block
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import warnings
|
||||
# warnings.filterwarnings("ignore", category=SyntaxWarning)
|
||||
|
||||
def is_code_valid(code):
|
||||
|
||||
try:
|
||||
compile(code, '<string>', 'exec')
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 获取正确代码中的参数数量
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
expected_count = len(correct_param_list)
|
||||
else:
|
||||
expected_count = 0 # 如果没有参数,期望数量为0
|
||||
|
||||
# 在需要判断的代码中查找函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
return len(test_param_list) == expected_count # 检查参数数量
|
||||
else:
|
||||
# 如果没有括号,检查函数名是否在字符串中
|
||||
return expected_count == 0 and function_name in test_code
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 正则表达式匹配正确代码中的函数调用
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
|
||||
# 检查待检测代码中的函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
|
||||
# 确保待检测的每个参数都以关键字参数形式赋值
|
||||
for correct_param in correct_param_list:
|
||||
if '=' in correct_param: # 仅当正确代码中有关键词参数
|
||||
param_name = correct_param.split('=')[0].strip()
|
||||
if not any(param_name in test_param and '=' in test_param for test_param in test_param_list):
|
||||
return False # 如果对应参数不是关键词参数,则返回False
|
||||
|
||||
return True # 所有关键字参数匹配
|
||||
|
||||
return False # 如果没有匹配,返回False
|
||||
|
||||
def with_correct(answer_code:str, model_output:str)->bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
"""
|
||||
# return True
|
||||
if not answer_code.startswith('with') and not model_output.startswith('with'):
|
||||
return True
|
||||
elif answer_code.startswith('with') and model_output.startswith('with'):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def compute_line_score_k(answer:str, model_output:list, k:int, model_filled_code, core_line):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code) and is_code_valid(model_filled_code[index]) == True and is_correct_parameter_count(answer, core_line, code) and with_correct(core_line, code) and check_keyword_parameters(answer, core_line, code):#line
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
def compute_block_score_k(answer:str, model_output:list, k:int, model_filled_code, core_line_in_core_block, core_line_in_output_clear):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code) and is_code_valid(model_filled_code[index]) and is_correct_parameter_count(answer, core_line_in_core_block, core_line_in_output_clear[index]) and with_correct(core_line_in_core_block, core_line_in_output_clear[index]) and check_keyword_parameters(answer, core_line_in_core_block, core_line_in_output_clear[index]):#block
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
def compute_score_k(answer:str, model_output:list, k:int):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code) and is_code_valid(code):#block
|
||||
# if re.search(rf'\b{re.escape(answer)}\b', code):#line
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
k = 3 #cdc@k
|
||||
task = 'block' # line or block
|
||||
json_name = f"Versicode_{task}_completion.json"
|
||||
|
||||
folder_path = f'../data/result_data/{task}_completion'
|
||||
model_list = os.listdir(folder_path)
|
||||
|
||||
for model in model_list:
|
||||
model_json_path = os.path.join(folder_path, model, json_name)
|
||||
with open(model_json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_list = lodict
|
||||
|
||||
if task == 'line':
|
||||
score_list = []
|
||||
for data in data_list:
|
||||
answer = data['core_token']
|
||||
model_output = eval(data['model_output_clear'])
|
||||
model_filled_code = [data['masked_code'].replace('<mask>', i) for i in model_output]
|
||||
core_line = data['core_line']
|
||||
score_list.append(compute_line_score_k(answer, model_output, k, model_filled_code, core_line))
|
||||
else:
|
||||
score_list = []
|
||||
for data in data_list:
|
||||
answer = data['core_token']
|
||||
model_output = eval(data['model_output_clear'])
|
||||
model_filled_code = eval(data['model_output_clear'])
|
||||
core_line = data['core_line']
|
||||
core_line_in_output_clear = data['core_line_in_output_clear']
|
||||
score_list.append(compute_block_score_k(answer, model_output, k, model_filled_code, core_line, core_line_in_output_clear))
|
||||
|
||||
final_score = sum(score_list)/len(score_list)
|
||||
print(f"{model}, {task} completion task, cdc@{k} score: {final_score}")
|
||||
@ -0,0 +1,175 @@
|
||||
"""
|
||||
Calculate the cdc score for line and block
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import warnings
|
||||
# warnings.filterwarnings("ignore", category=SyntaxWarning)
|
||||
|
||||
def is_code_valid(code):
|
||||
|
||||
try:
|
||||
compile(code, '<string>', 'exec')
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 获取正确代码中的参数数量
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
expected_count = len(correct_param_list)
|
||||
else:
|
||||
expected_count = 0 # 如果没有参数,期望数量为0
|
||||
|
||||
# 在需要判断的代码中查找函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
return len(test_param_list) == expected_count # 检查参数数量
|
||||
else:
|
||||
# 如果没有括号,检查函数名是否在字符串中
|
||||
return expected_count == 0 and function_name in test_code
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
:return:
|
||||
"""
|
||||
# 正则表达式匹配正确代码中的函数调用
|
||||
# return True
|
||||
pattern = rf'{function_name}\((.*?)\)'
|
||||
correct_match = re.search(pattern, correct_code)
|
||||
|
||||
if correct_match:
|
||||
correct_params = correct_match.group(1).strip()
|
||||
correct_param_list = [p.strip() for p in correct_params.split(',') if p.strip()]
|
||||
|
||||
# 检查待检测代码中的函数调用
|
||||
test_match = re.search(pattern, test_code)
|
||||
|
||||
if test_match:
|
||||
test_params = test_match.group(1).strip()
|
||||
test_param_list = [p.strip() for p in test_params.split(',') if p.strip()]
|
||||
|
||||
# 确保待检测的每个参数都以关键字参数形式赋值
|
||||
for correct_param in correct_param_list:
|
||||
if '=' in correct_param: # 仅当正确代码中有关键词参数
|
||||
param_name = correct_param.split('=')[0].strip()
|
||||
if not any(param_name in test_param and '=' in test_param for test_param in test_param_list):
|
||||
return False # 如果对应参数不是关键词参数,则返回False
|
||||
|
||||
return True # 所有关键字参数匹配
|
||||
|
||||
return False # 如果没有匹配,返回False
|
||||
|
||||
def with_correct(answer_code:str, model_output:str)->bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
"""
|
||||
# return True
|
||||
if not answer_code.startswith('with') and not model_output.startswith('with'):
|
||||
return True
|
||||
elif answer_code.startswith('with') and model_output.startswith('with'):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def compute_line_score_k(answer:str, model_output:list, k:int, model_filled_code, core_line):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code):#line
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
def compute_block_score_k(answer:str, model_output:list, k:int, model_filled_code, core_line_in_core_block, core_line_in_output_clear):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code):#block
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
def compute_score_k(answer:str, model_output:list, k:int):
|
||||
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
if re.search(rf'\b{re.escape(answer)}\b', code) and is_code_valid(code):#block
|
||||
# if re.search(rf'\b{re.escape(answer)}\b', code):#line
|
||||
c += 1
|
||||
if n-c < k:
|
||||
return 1.0
|
||||
|
||||
score = 1 - (math.comb(n - c, k))/(math.comb(n, k))
|
||||
|
||||
return score
|
||||
|
||||
k = 3 #em@k
|
||||
task = 'block' # line or block
|
||||
json_name = f"Versicode_{task}_completion.json"
|
||||
|
||||
folder_path = f'../data/result_data/{task}_completion'
|
||||
model_list = os.listdir(folder_path)
|
||||
|
||||
for model in model_list:
|
||||
model_json_path = os.path.join(folder_path, model, json_name)
|
||||
with open(model_json_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_list = lodict
|
||||
|
||||
if task == 'line':
|
||||
score_list = []
|
||||
for data in data_list:
|
||||
answer = data['core_token']
|
||||
model_output = eval(data['model_output_clear'])
|
||||
model_filled_code = [data['masked_code'].replace('<mask>', i) for i in model_output]
|
||||
core_line = data['core_line']
|
||||
score_list.append(compute_line_score_k(answer, model_output, k, model_filled_code, core_line))
|
||||
else:
|
||||
score_list = []
|
||||
for data in data_list:
|
||||
answer = data['core_token']
|
||||
model_output = eval(data['model_output_clear'])
|
||||
model_filled_code = eval(data['model_output_clear'])
|
||||
core_line = data['core_line']
|
||||
core_line_in_output_clear = data['core_line_in_output_clear']
|
||||
score_list.append(compute_block_score_k(answer, model_output, k, model_filled_code, core_line, core_line_in_output_clear))
|
||||
|
||||
final_score = sum(score_list)/len(score_list)
|
||||
print(f"{model}, {task} completion task, em@{k} score: {final_score}")
|
||||
@ -0,0 +1,107 @@
|
||||
"""
|
||||
Find the line of code generated by the model using the block in the version code
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
|
||||
def process_line_mask(code_snippet, core_token):
|
||||
if not core_token:
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
replaced_lines = {}
|
||||
lines = code_snippet.split("\n")
|
||||
|
||||
|
||||
in_multi_line_comment = False
|
||||
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if in_multi_line_comment:
|
||||
|
||||
if ('"""' in line or "'''" in line) and not re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
in_multi_line_comment = False
|
||||
continue
|
||||
elif line.strip().startswith("#"):
|
||||
|
||||
continue
|
||||
elif re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
|
||||
continue
|
||||
elif ('"""' in line or "'''" in line) and not re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
|
||||
in_multi_line_comment = True
|
||||
continue
|
||||
else:
|
||||
|
||||
if re.search(r'\bdef\s+task_function\b', line):
|
||||
continue
|
||||
|
||||
|
||||
if re.search(r'\b{}\b(?!\s*=)'.format(re.escape(core_token)), line):
|
||||
|
||||
replaced_lines.update({i: line})
|
||||
|
||||
if replaced_lines:
|
||||
random_line_location = random.choice(list(replaced_lines.keys()))
|
||||
|
||||
masked_line = lines[random_line_location]
|
||||
leading_spaces = re.match(r'^\s*', masked_line).group(0)
|
||||
masked_line = masked_line.strip()
|
||||
lines[random_line_location] = leading_spaces + "<line_mask>"
|
||||
|
||||
masked_code = '\n'.join(lines)
|
||||
|
||||
return masked_code, masked_line
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def load_json(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
|
||||
def save_json(file_path, data):
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_list = os.listdir('../data/result_data/block_completion')
|
||||
for model in model_list:
|
||||
|
||||
input_json_file = f'../data/result_data/block_completion/{model}/VersiCode_block_completion.json'
|
||||
output_json_file = input_json_file
|
||||
data = load_json(input_json_file)
|
||||
|
||||
for item in data:
|
||||
core_token = item['core_token']
|
||||
code = item['code']
|
||||
|
||||
_, core_line_in_code = process_line_mask(code, core_token)
|
||||
if core_line_in_code:
|
||||
item['core_line_in_code'] = core_line_in_code
|
||||
else:
|
||||
item['core_line_in_code'] = "N/A"
|
||||
|
||||
model_output_clear = item['model_output_clear']
|
||||
core_line_in_output_list = []
|
||||
|
||||
for entry in eval(model_output_clear):
|
||||
_, core_line_in_output = process_line_mask(entry, core_token)
|
||||
if core_line_in_output:
|
||||
core_line_in_output_list.append(core_line_in_output)
|
||||
else:
|
||||
core_line_in_output_list.append("N/A")
|
||||
|
||||
item['core_line_in_output_clear'] = core_line_in_output_list
|
||||
|
||||
save_json(output_json_file, data)
|
||||
print("Done!")
|
||||
|
||||
@ -0,0 +1,108 @@
|
||||
"""
|
||||
Find the line of code generated by the model using the block in the version code
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
|
||||
def process_line_mask(code_snippet, core_token):
|
||||
if not core_token:
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
replaced_lines = {}
|
||||
lines = code_snippet.split("\n")
|
||||
|
||||
|
||||
in_multi_line_comment = False
|
||||
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if in_multi_line_comment:
|
||||
|
||||
if ('"""' in line or "'''" in line) and not re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
in_multi_line_comment = False
|
||||
continue
|
||||
elif line.strip().startswith("#"):
|
||||
|
||||
continue
|
||||
elif re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
|
||||
continue
|
||||
elif ('"""' in line or "'''" in line) and not re.findall(r"'''(.*?)'''|\"\"\"(.*?)\"\"\"", line):
|
||||
|
||||
in_multi_line_comment = True
|
||||
continue
|
||||
else:
|
||||
|
||||
if re.search(r'\bdef\s+task_function\b', line):
|
||||
continue
|
||||
|
||||
|
||||
if re.search(r'\b{}\b(?!\s*=)'.format(re.escape(core_token)), line):
|
||||
|
||||
replaced_lines.update({i: line})
|
||||
|
||||
if replaced_lines:
|
||||
random_line_location = random.choice(list(replaced_lines.keys()))
|
||||
|
||||
masked_line = lines[random_line_location]
|
||||
leading_spaces = re.match(r'^\s*', masked_line).group(0)
|
||||
masked_line = masked_line.strip()
|
||||
lines[random_line_location] = leading_spaces + "<line_mask>"
|
||||
|
||||
masked_code = '\n'.join(lines)
|
||||
|
||||
return masked_code, masked_line
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def load_json(file_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
|
||||
def save_json(file_path, data):
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_list = os.listdir('../data/result_data/code_migration')
|
||||
for model in model_list:
|
||||
|
||||
input_json_file = f'../data/result_data/code_migration/{model}/VersiCode_migration.json'
|
||||
output_json_file = input_json_file
|
||||
data = load_json(input_json_file)
|
||||
|
||||
for item in data:
|
||||
core_token = item['old_name']
|
||||
code = item['old_code']
|
||||
|
||||
_, core_line_in_code = process_line_mask(code, core_token)
|
||||
if core_line_in_code:
|
||||
item['core_line_in_code'] = core_line_in_code
|
||||
else:
|
||||
item['core_line_in_code'] = "N/A"
|
||||
|
||||
model_output_clear = item['model_output_clear']
|
||||
core_line_in_output_list = []
|
||||
|
||||
core_token = item['new_name']
|
||||
for entry in eval(model_output_clear):
|
||||
_, core_line_in_output = process_line_mask(entry, core_token)
|
||||
if core_line_in_output:
|
||||
core_line_in_output_list.append(core_line_in_output)
|
||||
else:
|
||||
core_line_in_output_list.append("N/A")
|
||||
|
||||
item['core_line_in_output_clear'] = core_line_in_output_list
|
||||
|
||||
save_json(output_json_file, data)
|
||||
print("Done!")
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
"""
|
||||
Clear the<start>and<end>generated by the model in inference
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
model_name = ''
|
||||
task = 'block_completion'
|
||||
|
||||
result_path = f'../data/result_data/{task}/{model_name}/VersiCode_block_completion.json' #Modify the file according to the task format
|
||||
|
||||
|
||||
with open(result_path, 'r', encoding='utf-8')as fr:
|
||||
lodict = json.load(fr)
|
||||
data_dict = lodict
|
||||
data_list = data_dict
|
||||
|
||||
for data in data_list:
|
||||
temp_list = []
|
||||
model_output_list = eval(data['model_output'])
|
||||
for output in model_output_list:
|
||||
|
||||
if "<start>" in output and "<end>" in output:
|
||||
start_index = output.find("<start>") + len("<start>")
|
||||
end_index = output.find("<end>")
|
||||
content = output[start_index:end_index].replace('```python', '').replace('```', '')
|
||||
else:
|
||||
content = "no_answer"
|
||||
|
||||
temp_list.append(content)
|
||||
|
||||
data['model_output_clear'] = str(temp_list)
|
||||
|
||||
with open(result_path, 'w', encoding='utf-8')as fw:
|
||||
json.dump(data_dict, fw, indent=4, ensure_ascii=False)
|
||||
146
evaluation/benchmarks/versicode/requirements.txt
Normal file
146
evaluation/benchmarks/versicode/requirements.txt
Normal file
@ -0,0 +1,146 @@
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.11.18
|
||||
aiosignal==1.3.2
|
||||
airportsdata==20250224
|
||||
annotated-types==0.7.0
|
||||
anyio==4.9.0
|
||||
astor==0.8.1
|
||||
attrs==25.3.0
|
||||
blake3==1.0.4
|
||||
cachetools==5.5.2
|
||||
certifi==2025.1.31
|
||||
charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
cloudpickle==3.1.1
|
||||
compressed-tensors==0.9.3
|
||||
cupy-cuda12x==13.4.1
|
||||
Deprecated==1.2.18
|
||||
depyf==0.18.0
|
||||
dill==0.4.0
|
||||
diskcache==5.6.3
|
||||
distro==1.9.0
|
||||
dnspython==2.7.0
|
||||
einops==0.8.1
|
||||
email_validator==2.2.0
|
||||
fastapi==0.115.12
|
||||
fastapi-cli==0.0.7
|
||||
fastrlock==0.8.3
|
||||
filelock==3.18.0
|
||||
frozenlist==1.6.0
|
||||
fsspec==2025.3.2
|
||||
gguf==0.16.2
|
||||
googleapis-common-protos==1.70.0
|
||||
grpcio==1.71.0
|
||||
h11==0.14.0
|
||||
hf-xet==1.0.3
|
||||
httpcore==1.0.8
|
||||
httptools==0.6.4
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.30.2
|
||||
idna==3.10
|
||||
importlib_metadata==8.0.0
|
||||
interegular==0.3.3
|
||||
Jinja2==3.1.6
|
||||
jiter==0.9.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2024.10.1
|
||||
lark==1.2.2
|
||||
llguidance==0.7.16
|
||||
llvmlite==0.44.0
|
||||
lm-format-enforcer==0.10.11
|
||||
markdown-it-py==3.0.0
|
||||
MarkupSafe==3.0.2
|
||||
mdurl==0.1.2
|
||||
mistral_common==1.5.4
|
||||
mpmath==1.3.0
|
||||
msgpack==1.1.0
|
||||
msgspec==0.19.0
|
||||
multidict==6.4.3
|
||||
nest-asyncio==1.6.0
|
||||
networkx==3.4.2
|
||||
ninja==1.11.1.4
|
||||
numba==0.61.2
|
||||
numpy==2.2.5
|
||||
nvidia-cublas-cu12==12.4.5.8
|
||||
nvidia-cuda-cupti-cu12==12.4.127
|
||||
nvidia-cuda-nvrtc-cu12==12.4.127
|
||||
nvidia-cuda-runtime-cu12==12.4.127
|
||||
nvidia-cudnn-cu12==9.1.0.70
|
||||
nvidia-cufft-cu12==11.2.1.3
|
||||
nvidia-curand-cu12==10.3.5.147
|
||||
nvidia-cusolver-cu12==11.6.1.9
|
||||
nvidia-cusparse-cu12==12.3.1.170
|
||||
nvidia-cusparselt-cu12==0.6.2
|
||||
nvidia-nccl-cu12==2.21.5
|
||||
nvidia-nvjitlink-cu12==12.4.127
|
||||
nvidia-nvtx-cu12==12.4.127
|
||||
openai==1.75.0
|
||||
opencv-python-headless==4.11.0.86
|
||||
opentelemetry-api==1.26.0
|
||||
opentelemetry-exporter-otlp==1.26.0
|
||||
opentelemetry-exporter-otlp-proto-common==1.26.0
|
||||
opentelemetry-exporter-otlp-proto-grpc==1.26.0
|
||||
opentelemetry-exporter-otlp-proto-http==1.26.0
|
||||
opentelemetry-proto==1.26.0
|
||||
opentelemetry-sdk==1.26.0
|
||||
opentelemetry-semantic-conventions==0.47b0
|
||||
opentelemetry-semantic-conventions-ai==0.4.3
|
||||
outlines==0.1.11
|
||||
outlines_core==0.1.26
|
||||
packaging==25.0
|
||||
partial-json-parser==0.2.1.1.post5
|
||||
pillow==11.2.1
|
||||
prometheus-fastapi-instrumentator==7.1.0
|
||||
prometheus_client==0.21.1
|
||||
propcache==0.3.1
|
||||
protobuf==4.25.6
|
||||
psutil==7.0.0
|
||||
py-cpuinfo==9.0.0
|
||||
pycountry==24.6.1
|
||||
pydantic==2.11.3
|
||||
pydantic_core==2.33.1
|
||||
Pygments==2.19.1
|
||||
python-dotenv==1.1.0
|
||||
python-json-logger==3.3.0
|
||||
python-multipart==0.0.20
|
||||
PyYAML==6.0.2
|
||||
pyzmq==26.4.0
|
||||
ray==2.43.0
|
||||
referencing==0.36.2
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
rich==14.0.0
|
||||
rich-toolkit==0.14.1
|
||||
rpds-py==0.24.0
|
||||
safetensors==0.5.3
|
||||
scipy==1.15.2
|
||||
sentencepiece==0.2.0
|
||||
setuptools==75.8.0
|
||||
shellingham==1.5.4
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
starlette==0.46.2
|
||||
sympy==1.13.1
|
||||
tiktoken==0.9.0
|
||||
tokenizers==0.21.1
|
||||
torch==2.6.0
|
||||
torchaudio==2.6.0
|
||||
torchvision==0.21.0
|
||||
tqdm==4.67.1
|
||||
transformers==4.51.3
|
||||
triton==3.2.0
|
||||
typer==0.15.2
|
||||
typing-inspection==0.4.0
|
||||
typing_extensions==4.13.2
|
||||
urllib3==2.4.0
|
||||
uvicorn==0.34.2
|
||||
uvloop==0.21.0
|
||||
vllm==0.8.4
|
||||
watchfiles==1.0.5
|
||||
websockets==15.0.1
|
||||
wheel==0.45.1
|
||||
wrapt==1.17.2
|
||||
xformers==0.0.29.post2
|
||||
xgrammar==0.1.18
|
||||
yarl==1.20.0
|
||||
zipp==3.21.0
|
||||
Loading…
x
Reference in New Issue
Block a user