Fix style issues with pre-commit (#7318)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst 2025-03-18 02:34:27 +01:00 committed by GitHub
parent f1149defc9
commit 83458f5146
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 55 additions and 50 deletions

View File

@ -1,8 +1,4 @@
import math
import os
from pathlib import Path
from tree_sitter import Language, Parser
def total_byte_entropy_stats(python_code):
@ -324,8 +320,8 @@ def compute_regression(results):
def compute_readability(python_code):
# Create parser and set up language
import tree_sitter_python
from tree_sitter import Parser, Language
from tree_sitter import Language, Parser
parser = Parser(Language(tree_sitter_python.language()))
results = code_stats(python_code)

View File

@ -6,12 +6,11 @@ import numpy as np
from fuzzywuzzy import fuzz
from rouge import Rouge
# increase recursion depth to ensure ROUGE can be calculated for long sentences
if sys.getrecursionlimit() < 10_000:
sys.setrecursionlimit(10_000)
def bleu(gold: List[str], pred: List[str]) -> float:
"""
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
@ -39,7 +38,7 @@ def batch_bleu(golds: List[List[str]], preds: List[List[str]]) -> List[float]:
:return: list of BLEU scores
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
return [bleu(gold, pred) for gold, pred in zip(golds, preds)]
@ -52,7 +51,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
:return: corpus-level BLEU score
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
return 100.0 * nltk.translate.bleu_score.corpus_bleu(
[[gold] for gold in golds],
preds,
@ -62,7 +61,7 @@ def corpus_bleu(golds: List[List[str]], preds: List[List[str]]) -> float:
def edit_sim(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
) -> float:
"""
Calculate char-level edit similarity, in the range of 0~100.
@ -84,7 +83,7 @@ def edit_sim(
def batch_edit_sim(
golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]],
sep: str = " ",
sep: str = ' ',
) -> List[float]:
"""
Calculate char-level edit similarity for a batch of sentences.
@ -95,11 +94,11 @@ def batch_edit_sim(
:return: list of char-level edit similarity
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
return [edit_sim(gold, pred, sep) for gold, pred in zip(golds, preds)]
T = TypeVar("T")
T = TypeVar('T')
def exact_match(gold: T, pred: T) -> float:
@ -124,12 +123,12 @@ def batch_exact_match(golds: List[T], preds: List[T]) -> List[float]:
:return: list of exact match accuracy
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
return [exact_match(gold, pred) for gold, pred in zip(golds, preds)]
def rouge_l(
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = " "
gold: Union[str, List[str]], pred: Union[str, List[str]], sep: str = ' '
) -> Dict[str, float]:
"""
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
@ -139,7 +138,7 @@ def rouge_l(
:return: {"p": precision, "r": recall, "f": F1}
"""
if len(pred) == 0 or len(gold) == 0:
return {"p": 0.0, "r": 0.0, "f": 0.0}
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
if isinstance(gold, list):
gold = sep.join(gold)
if isinstance(pred, list):
@ -147,15 +146,15 @@ def rouge_l(
try:
rouge = Rouge()
scores = rouge.get_scores(hyps=pred, refs=gold, avg=True)
return {x: scores["rouge-l"][x] * 100.0 for x in ["p", "r", "f"]}
return {x: scores['rouge-l'][x] * 100.0 for x in ['p', 'r', 'f']}
except ValueError:
return {"p": 0.0, "r": 0.0, "f": 0.0}
return {'p': 0.0, 'r': 0.0, 'f': 0.0}
def batch_rouge_l(
golds: List[Union[str, List[str]]],
preds: List[Union[str, List[str]]],
sep: str = " ",
sep: str = ' ',
) -> Dict[str, List[float]]:
"""
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
@ -166,9 +165,9 @@ def batch_rouge_l(
:return: list of {"p": precision, "r": recall, "f": F1}
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
scores = [rouge_l(gold, pred, sep) for gold, pred in zip(golds, preds)]
return {x: [score[x] for score in scores] for x in ["p", "r", "f"]}
return {x: [score[x] for score in scores] for x in ['p', 'r', 'f']}
def accuracy(
@ -220,7 +219,7 @@ def batch_accuracy(
:return: list of accuracy
"""
if len(golds) != len(preds):
raise ValueError("golds and preds must have the same length")
raise ValueError('golds and preds must have the same length')
return [accuracy(gold, pred, ignore) for gold, pred in zip(golds, preds)]
@ -274,7 +273,7 @@ def self_bleu(samples: List[List[str]]) -> float:
return np.mean(scores).item()
def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
def self_edit_distance(samples: List[Union[str, List[str]]], sep=' ') -> float:
"""
Calculate self-edit-distance among the samples.
:param samples: the chosen m samples
@ -300,12 +299,11 @@ def self_edit_distance(samples: List[Union[str, List[str]]], sep=" ") -> float:
return np.mean(scores).item()
QUALITY_METRICS: Dict[str, Callable[[List[str], List[str]], float]] = {
"bleu": bleu,
"xmatch": exact_match,
"edit-sim": edit_sim,
"rouge-f": lambda g, p: rouge_l(g, p)["f"],
"rouge-p": lambda g, p: rouge_l(g, p)["p"],
"rouge-r": lambda g, p: rouge_l(g, p)["r"],
}
'bleu': bleu,
'xmatch': exact_match,
'edit-sim': edit_sim,
'rouge-f': lambda g, p: rouge_l(g, p)['f'],
'rouge-p': lambda g, p: rouge_l(g, p)['p'],
'rouge-r': lambda g, p: rouge_l(g, p)['r'],
}

View File

@ -1,30 +1,41 @@
import re
from pygments.lexers.python import PythonLexer
def tokenize_code(code):
lexer = PythonLexer()
tokens = process_pygments_tokens(lexer.get_tokens(code))
return tokens
def process_pygments_tokens(tokens):
new_tokens = []
for token in tokens:
if str(token[0]) == "Token.Text" and re.match(r'\s+', token[1]) or str(token[0]) == "Token.Text.Whitespace":
if (
str(token[0]) == 'Token.Text'
and re.match(r'\s+', token[1])
or str(token[0]) == 'Token.Text.Whitespace'
):
continue
new_tokens.append(token[1])
new_tokens_final = []
i = 0
while i < len(new_tokens)-2:
if new_tokens[i] == '"' and new_tokens[i+1]=='STR' and new_tokens[i+2] == '"':
new_tokens_final.append("\"STR\"")
while i < len(new_tokens) - 2:
if (
new_tokens[i] == '"'
and new_tokens[i + 1] == 'STR'
and new_tokens[i + 2] == '"'
):
new_tokens_final.append('"STR"')
i = i + 3
else:
new_tokens_final.append(new_tokens[i])
i = i + 1
for i in range(len(new_tokens)-2, len(new_tokens)):
for i in range(len(new_tokens) - 2, len(new_tokens)):
if i >= 0:
new_tokens_final.append(new_tokens[i])

View File

@ -8,7 +8,6 @@ import os
import pandas as pd
from tqdm import tqdm
from evaluation.testgeneval.eval_infer import process_test_suite
from openhands.events.serialization import event_from_dict
tqdm.pandas()

View File

@ -20,7 +20,8 @@ print(
f'Downloading gold test suites from {args.dataset_name} (split: {args.split}) to {output_filepath}'
)
test_suites = [
{'instance_id': row['instance_id'], 'test_suite': row['test_src']} for row in dataset
{'instance_id': row['instance_id'], 'test_suite': row['test_src']}
for row in dataset
]
print(f'{len(test_suites)} test suites loaded')
pd.DataFrame(test_suites).to_json(output_filepath, lines=True, orient='records')

View File

@ -90,9 +90,7 @@ if __name__ == '__main__':
break
# print the error counter (with percentage)
print(
f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)'
)
print(f'Average coverage for {num_lines} ({coverage / num_lines * 100:.2f}%)')
print(
f'Average mutation score for {num_lines} ({mutation_score / num_lines * 100:.2f}%)'
)

View File

@ -79,7 +79,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message
let capturedPartialMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed partially**")) {
capturedPartialMessage = action.payload;
}
@ -87,7 +87,7 @@ describe("Actions Service", () => {
handleActionMessage(messagePartial);
expect(capturedPartialMessage).toContain("I believe that the task was **completed partially**");
// Test not completed
const messageNotCompleted: ActionMessage = {
id: 2,
@ -106,7 +106,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message
let capturedNotCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **not completed**")) {
capturedNotCompletedMessage = action.payload;
}
@ -114,7 +114,7 @@ describe("Actions Service", () => {
handleActionMessage(messageNotCompleted);
expect(capturedNotCompletedMessage).toContain("I believe that the task was **not completed**");
// Test completed successfully
const messageCompleted: ActionMessage = {
id: 3,
@ -133,7 +133,7 @@ describe("Actions Service", () => {
// Mock implementation to capture the message
let capturedCompletedMessage = "";
(store.dispatch as any).mockImplementation((action: any) => {
if (action.type === "chat/addAssistantMessage" &&
if (action.type === "chat/addAssistantMessage" &&
action.payload.includes("believe that the task was **completed successfully**")) {
capturedCompletedMessage = action.payload;
}

View File

@ -65,7 +65,9 @@ async def get_github_user(
access_token: SecretStr | None = Depends(get_access_token),
):
if provider_tokens:
client = ProviderHandler(provider_tokens=provider_tokens, external_auth_token=access_token)
client = ProviderHandler(
provider_tokens=provider_tokens, external_auth_token=access_token
)
try:
user: User = await client.get_user()
@ -164,7 +166,7 @@ async def search_github_repositories(
@app.get('/suggested-tasks', response_model=list[SuggestedTask])
async def get_suggested_tasks(
provider_tokens: PROVIDER_TOKEN_TYPE | None = Depends(get_provider_tokens),
access_token: SecretStr | None = Depends(get_access_token)
access_token: SecretStr | None = Depends(get_access_token),
):
"""Get suggested tasks for the authenticated user across their most recently pushed repositories.