mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Fix style issues with pre-commit (#7318)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
f1149defc9
commit
83458f5146
@ -1,8 +1,4 @@
|
||||
import math
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from tree_sitter import Language, Parser
|
||||
|
||||
|
||||
def total_byte_entropy_stats(python_code):
|
||||
@ -324,8 +320,8 @@ def compute_regression(results):
|
||||
def compute_readability(python_code):
|
||||
# Create parser and set up language
|
||||
import tree_sitter_python
|
||||
from tree_sitter import Parser, Language
|
||||
|
||||
from tree_sitter import Language, Parser
|
||||
|
||||
parser = Parser(Language(tree_sitter_python.language()))
|
||||
|
||||
results = code_stats(python_code)
|
||||
|
||||
@ -6,12 +6,11 @@ import numpy as np
|
||||
from fuzzywuzzy import fuzz
|
||||
from rouge import Rouge
|
||||
|
||||
|
||||
|
||||
# increase recursion depth to ensure ROUGE can be calculated for long sentences
|
||||
if sys.getrecursionlimit() < 10_000:
|
||||
sys.setrecursionlimit(10_000)
|
||||
|
||||
|
||||
def bleu(gold: List[str], pred: List[str]) -> float:
|
||||
"""
|
||||
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
|
||||
@ -39,7 +38,7 @@ def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
|
||||
:return: list of BLEU scores
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
|
||||
|
||||
|
||||
@ -52,7 +51,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
|
||||
:return: corpus-level BLEU score
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
return 100.0 * nltk.translate.bleu_score.corpus_bleu(
|
||||
[[gold] for gold in golds],
|
||||
preds,
|
||||
@ -62,7 +61,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
|
||||
|
||||
|
||||
def edit_sim(
|
||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
|
||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
|
||||
) -> float:
|
||||
"""
|
||||
Calculate char-level edit similarity, in the range of 0~100.
|
||||
@ -84,7 +83,7 @@ def edit_sim(
|
||||
def batch_edit_sim(
|
||||
golds: List[Union[str, List[str]]],
|
||||
preds: List[Union[str, List[str]]],
|
||||
sep: str = " ",
|
||||
sep: str = ' ',
|
||||
) -> List[float]:
|
||||
"""
|
||||
Calculate char-level edit similarity for a batch of sentences.
|
||||
@ -95,11 +94,11 @@ def batch_edit_sim(
|
||||
:return: list of char-level edit similarity
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
def exact_match(gold: T, pred: T) -> float:
|
||||
@ -124,12 +123,12 @@ def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
|
||||
:return: list of exact match accuracy
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
return [exact_match(gold, pred) for gold, pred in zip(golds, preds)]
|
||||
|
||||
|
||||
def rouge_l(
|
||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
|
||||
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
|
||||
@ -139,7 +138,7 @@ def rouge_l(
|
||||
:return: {"p": precision, "r": recall, "f": F1}
|
||||
"""
|
||||
if len(pred) == 0 or len(gold) == 0:
|
||||
return {"p": 0.0, "r": 0.0, "f": 0.0}
|
||||
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
|
||||
if isinstance(gold, list):
|
||||
gold = sep.join(gold)
|
||||
if isinstance(pred, list):
|
||||
@ -147,15 +146,15 @@ def rouge_l(
|
||||
try:
|
||||
rouge = Rouge()
|
||||
scores = rouge.get_scores(hyps=pred, refs=gold, avg=True)
|
||||
return {x: scores["rouge-l"][x] * 100.0 for x in ["p", "r", "f"]}
|
||||
return {x: scores['rouge-l'][x] * 100.0 for x in ['p', 'r', 'f']}
|
||||
except ValueError:
|
||||
return {"p": 0.0, "r": 0.0, "f": 0.0}
|
||||
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
|
||||
|
||||
|
||||
def batch_rouge_l(
|
||||
golds: List[Union[str, List[str]]],
|
||||
preds: List[Union[str, List[str]]],
|
||||
sep: str = " ",
|
||||
sep: str = ' ',
|
||||
) -> Dict[str, List[float]]:
|
||||
"""
|
||||
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
|
||||
@ -166,9 +165,9 @@ def batch_rouge_l(
|
||||
:return: list of {"p": precision, "r": recall, "f": F1}
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)]
|
||||
return {x: [score[x] for score in scores] for x in ["p", "r", "f"]}
|
||||
return {x: [score[x] for score in scores] for x in ['p', 'r', 'f']}
|
||||
|
||||
|
||||
def accuracy(
|
||||
@ -220,7 +219,7 @@ def batch_accuracy(
|
||||
:return: list of accuracy
|
||||
"""
|
||||
if len(golds) != len(preds):
|
||||
raise ValueError("golds and preds must have the same length")
|
||||
raise ValueError('golds and preds must have the same length')
|
||||
return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)]
|
||||
|
||||
|
||||
@ -274,7 +273,7 @@ def self_bleu(samples: List[List[str]]) -> float:
|
||||
return np.mean(scores).item()
|
||||
|
||||
|
||||
def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
|
||||
def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
|
||||
"""
|
||||
Calculate self-edit-distance among the samples.
|
||||
:param samples: the chosen m samples
|
||||
@ -300,12 +299,11 @@ def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
|
||||
return np.mean(scores).item()
|
||||
|
||||
|
||||
|
||||
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
|
||||
"bleu": bleu,
|
||||
"xmatch": exact_match,
|
||||
"edit-sim": edit_sim,
|
||||
"rouge-f": lambda g, p: rouge_l(g, p)["f"],
|
||||
"rouge-p": lambda g, p: rouge_l(g, p)["p"],
|
||||
"rouge-r": lambda g, p: rouge_l(g, p)["r"],
|
||||
}
|
||||
'bleu': bleu,
|
||||
'xmatch': exact_match,
|
||||
'edit-sim': edit_sim,
|
||||
'rouge-f': lambda g, p: rouge_l(g, p)['f'],
|
||||
'rouge-p': lambda g, p: rouge_l(g, p)['p'],
|
||||
'rouge-r': lambda g, p: rouge_l(g, p)['r'],
|
||||
}
|
||||
|
||||
@ -1,30 +1,41 @@
|
||||
import re
|
||||
|
||||
from pygments.lexers.python import PythonLexer
|
||||
|
||||
|
||||
def tokenize_code(code):
|
||||
lexer = PythonLexer()
|
||||
tokens = process_pygments_tokens(lexer.get_tokens(code))
|
||||
return tokens
|
||||
|
||||
|
||||
def process_pygments_tokens(tokens):
|
||||
new_tokens = []
|
||||
|
||||
for token in tokens:
|
||||
if str(token[0]) == "Token.Text" and re.match(r'\s+', token[1]) or str(token[0]) == "Token.Text.Whitespace":
|
||||
if (
|
||||
str(token[0]) == 'Token.Text'
|
||||
and re.match(r'\s+', token[1])
|
||||
or str(token[0]) == 'Token.Text.Whitespace'
|
||||
):
|
||||
continue
|
||||
new_tokens.append(token[1])
|
||||
|
||||
new_tokens_final = []
|
||||
i = 0
|
||||
while i < len(new_tokens)-2:
|
||||
if new_tokens[i] == '"' and new_tokens[i+1]=='STR' and new_tokens[i+2] == '"':
|
||||
new_tokens_final.append("\"STR\"")
|
||||
while i < len(new_tokens) - 2:
|
||||
if (
|
||||
new_tokens[i] == '"'
|
||||
and new_tokens[i + 1] == 'STR'
|
||||
and new_tokens[i + 2] == '"'
|
||||
):
|
||||
new_tokens_final.append('"STR"')
|
||||
i = i + 3
|
||||
else:
|
||||
new_tokens_final.append(new_tokens[i])
|
||||
i = i + 1
|
||||
|
||||
for i in range(len(new_tokens)-2, len(new_tokens)):
|
||||
|
||||
for i in range(len(new_tokens) - 2, len(new_tokens)):
|
||||
if i >= 0:
|
||||
new_tokens_final.append(new_tokens[i])
|
||||
|
||||
|
||||
@ -8,7 +8,6 @@ import os
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
from evaluation.testgeneval.eval_infer import process_test_suite
|
||||
from openhands.events.serialization import event_from_dict
|
||||
|
||||
tqdm.pandas()
|
||||
|
||||
@ -20,7 +20,8 @@ print(
|
||||
f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}'
|
||||
)
|
||||
test_suites = [
|
||||
{'instance_id': row['instance_id'], 'test_suite': row['test_src']} for row in dataset
|
||||
{'instance_id': row['instance_id'], 'test_suite': row['test_src']}
|
||||
for row in dataset
|
||||
]
|
||||
print(f'{len(test_suites)} test suites loaded')
|
||||
pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records')
|
||||
|
||||
@ -90,9 +90,7 @@ if __name__ == '__main__':
|
||||
break
|
||||
|
||||
# print the error counter (with percentage)
|
||||
print(
|
||||
f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)'
|
||||
)
|
||||
print(f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)')
|
||||
print(
|
||||
f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)'
|
||||
)
|
||||
|
||||
@ -79,7 +79,7 @@ describe("Actions Service", () => {
|
||||
// Mock implementation to capture the message
|
||||
let capturedPartialMessage = "";
|
||||
(store.dispatch as any).mockImplementation((action: any) => {
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
action.payload.includes("believe that the task was **completed partially**")) {
|
||||
capturedPartialMessage = action.payload;
|
||||
}
|
||||
@ -87,7 +87,7 @@ describe("Actions Service", () => {
|
||||
|
||||
handleActionMessage(messagePartial);
|
||||
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
|
||||
|
||||
|
||||
// Test not completed
|
||||
const messageNotCompleted: ActionMessage = {
|
||||
id: 2,
|
||||
@ -106,7 +106,7 @@ describe("Actions Service", () => {
|
||||
// Mock implementation to capture the message
|
||||
let capturedNotCompletedMessage = "";
|
||||
(store.dispatch as any).mockImplementation((action: any) => {
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
action.payload.includes("believe that the task was **not completed**")) {
|
||||
capturedNotCompletedMessage = action.payload;
|
||||
}
|
||||
@ -114,7 +114,7 @@ describe("Actions Service", () => {
|
||||
|
||||
handleActionMessage(messageNotCompleted);
|
||||
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
|
||||
|
||||
|
||||
// Test completed successfully
|
||||
const messageCompleted: ActionMessage = {
|
||||
id: 3,
|
||||
@ -133,7 +133,7 @@ describe("Actions Service", () => {
|
||||
// Mock implementation to capture the message
|
||||
let capturedCompletedMessage = "";
|
||||
(store.dispatch as any).mockImplementation((action: any) => {
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
if (action.type === "chat/addAssistantMessage" &&
|
||||
action.payload.includes("believe that the task was **completed successfully**")) {
|
||||
capturedCompletedMessage = action.payload;
|
||||
}
|
||||
|
||||
@ -65,7 +65,9 @@ async def get_github_user(
|
||||
access_token: SecretStr | None = Depends(get_access_token),
|
||||
):
|
||||
if provider_tokens:
|
||||
client = ProviderHandler(provider_tokens=provider_tokens, external_auth_token=access_token)
|
||||
client = ProviderHandler(
|
||||
provider_tokens=provider_tokens, external_auth_token=access_token
|
||||
)
|
||||
|
||||
try:
|
||||
user: User = await client.get_user()
|
||||
@ -164,7 +166,7 @@ async def search_github_repositories(
|
||||
@app.get('/suggested-tasks', response_model=list[SuggestedTask])
|
||||
async def get_suggested_tasks(
|
||||
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
|
||||
access_token: SecretStr | None = Depends(get_access_token)
|
||||
access_token: SecretStr | None = Depends(get_access_token),
|
||||
):
|
||||
"""Get suggested tasks for the authenticated user across their most recently pushed repositories.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user