mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-25 21:36:52 +08:00
chore(lint): Apply comprehensive linting and formatting fixes (#10287)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
e39bf80239
commit
c2f46200c0
@ -93,8 +93,7 @@ def build_vscode_extension():
|
||||
|
||||
|
||||
def build(setup_kwargs):
|
||||
"""
|
||||
This function is called by Poetry during the build process.
|
||||
"""This function is called by Poetry during the build process.
|
||||
`setup_kwargs` is a dictionary that will be passed to `setuptools.setup()`.
|
||||
"""
|
||||
print('--- Running custom Poetry build script (build_vscode.py) ---')
|
||||
|
||||
@ -506,7 +506,6 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:
|
||||
Returns:
|
||||
Filtered dataset based on split type
|
||||
"""
|
||||
|
||||
filtered_dataset = pd.concat(
|
||||
[
|
||||
dataset[dataset['repo'].str.split('/').str[1] == repo]
|
||||
|
||||
@ -89,8 +89,7 @@ def get_config(
|
||||
def get_dv_query_for_real(
|
||||
datasets, question, domain_knowledge=None, workflow_tags=None
|
||||
):
|
||||
"""
|
||||
Prepare a structured query for the agent to execute on the specified datasets.
|
||||
"""Prepare a structured query for the agent to execute on the specified datasets.
|
||||
|
||||
This function constructs a query by compiling metadata from the provided datasets, along with any relevant domain knowledge and workflow tags.
|
||||
|
||||
@ -104,7 +103,6 @@ def get_dv_query_for_real(
|
||||
query_to_dv: Query to be run on the dataset
|
||||
dataset_meta: Metadata of the dataset
|
||||
"""
|
||||
|
||||
dataset_meta = ''
|
||||
for dataset_metadata in datasets:
|
||||
dataset_meta += 'Dataset name: ' + dataset_metadata['name']
|
||||
@ -140,8 +138,7 @@ def get_dv_query_for_real(
|
||||
|
||||
|
||||
def initialize_runtime(runtime: Runtime, data_files: list[str]):
|
||||
"""
|
||||
Initialize the runtime for the agent.
|
||||
"""Initialize the runtime for the agent.
|
||||
|
||||
This function is called before the runtime is used to run the agent.
|
||||
"""
|
||||
@ -231,8 +228,7 @@ def process_instance(
|
||||
metadata: EvalMetadata,
|
||||
reset_logger: bool = True,
|
||||
):
|
||||
"""
|
||||
Process and evaluate a single instance of the dataset.
|
||||
"""Process and evaluate a single instance of the dataset.
|
||||
|
||||
This function executes the OpenHands agent
|
||||
for a specific instance of the dataset. It retrieves
|
||||
@ -247,7 +243,6 @@ def process_instance(
|
||||
Returns:
|
||||
output: EvalOutput object
|
||||
"""
|
||||
|
||||
config = get_config(metadata)
|
||||
|
||||
# Setup the logger properly, so you can run
|
||||
@ -356,8 +351,7 @@ def list_csv_files(list_of_datasets):
|
||||
|
||||
|
||||
def create_dataset(repo_location: str, split: str = 'test'):
|
||||
"""
|
||||
Create a dataset from the discoverybench repository
|
||||
"""Create a dataset from the discoverybench repository
|
||||
by walking through the repository and extracting metadata
|
||||
from the metadata_{}.json files
|
||||
|
||||
@ -368,7 +362,6 @@ def create_dataset(repo_location: str, split: str = 'test'):
|
||||
Returns:
|
||||
df: DataFrame containing the dataset instances
|
||||
"""
|
||||
|
||||
data_dict = {}
|
||||
|
||||
data_location = os.path.join(repo_location, 'discoverybench', 'real', split)
|
||||
|
||||
@ -105,8 +105,7 @@ def process_instance(
|
||||
log_dir: str | None = None,
|
||||
runtime_failure_count: int = 0,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a SWE-bench problem instance.
|
||||
"""Evaluate agent performance on a SWE-bench problem instance.
|
||||
|
||||
Note that this signature differs from the expected input to `run_evaluation`. Use
|
||||
`functools.partial` to provide optional arguments before passing to the evaluation harness.
|
||||
|
||||
@ -1,11 +1,8 @@
|
||||
"""
|
||||
Utilities for handling binary files and patch generation in SWE-bench evaluation.
|
||||
"""
|
||||
"""Utilities for handling binary files and patch generation in SWE-bench evaluation."""
|
||||
|
||||
|
||||
def remove_binary_diffs(patch_text):
|
||||
"""
|
||||
Remove binary file diffs from a git patch.
|
||||
"""Remove binary file diffs from a git patch.
|
||||
|
||||
Args:
|
||||
patch_text (str): The git patch text
|
||||
@ -36,8 +33,7 @@ def remove_binary_diffs(patch_text):
|
||||
|
||||
|
||||
def remove_binary_files_from_git():
|
||||
"""
|
||||
Generate a bash command to remove binary files from git staging.
|
||||
"""Generate a bash command to remove binary files from git staging.
|
||||
|
||||
Returns:
|
||||
str: A bash command that removes binary files from git staging
|
||||
|
||||
@ -111,8 +111,7 @@ def process_instance(
|
||||
runtime_failure_count: int = 0,
|
||||
conditional_imports: ConditionalImports | None = None,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a SWE-bench problem instance.
|
||||
"""Evaluate agent performance on a SWE-bench problem instance.
|
||||
|
||||
Note that this signature differs from the expected input to `run_evaluation`. Use
|
||||
`functools.partial` to provide optional arguments before passing to the evaluation harness.
|
||||
|
||||
@ -16,8 +16,7 @@ from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
class LocEvaluator:
|
||||
def __init__(self, args):
|
||||
"""
|
||||
Localization evaluation.
|
||||
"""Localization evaluation.
|
||||
|
||||
Args:
|
||||
args: all main arguments
|
||||
@ -76,8 +75,7 @@ class LocEvaluator:
|
||||
self.task_resolved = False
|
||||
|
||||
def _init_dir(self, directory_path):
|
||||
"""
|
||||
Check if a directory exists and create it if it doesn't.
|
||||
"""Check if a directory exists and create it if it doesn't.
|
||||
|
||||
Args:
|
||||
directory_path (str): Path to the directory to check/create
|
||||
@ -207,8 +205,7 @@ class LocEvaluator:
|
||||
self._compute_avg_over_all()
|
||||
|
||||
def _write_to_json(self, data, file_name):
|
||||
"""
|
||||
Writes the current object data to a JSON file.
|
||||
"""Writes the current object data to a JSON file.
|
||||
|
||||
Returns:
|
||||
bool: True if writing was successful, False otherwise.
|
||||
@ -225,8 +222,7 @@ class LocEvaluator:
|
||||
return False
|
||||
|
||||
def read_from_json(self, file_path):
|
||||
"""
|
||||
Reads data from a JSON file and loads it into the current object.
|
||||
"""Reads data from a JSON file and loads it into the current object.
|
||||
|
||||
Returns:
|
||||
dict: The loaded JSON data, or an empty dict if the file doesn't exist
|
||||
@ -253,8 +249,7 @@ class LocEvaluator:
|
||||
return {}
|
||||
|
||||
def read_from_jsonl(self, file_path):
|
||||
"""
|
||||
Reads data from a JSON file and loads it into the current object.
|
||||
"""Reads data from a JSON file and loads it into the current object.
|
||||
|
||||
Returns:
|
||||
dict: The loaded JSON data, or an empty dict if the file doesn't exist
|
||||
@ -294,8 +289,7 @@ class LocEvaluator:
|
||||
history_idx += 1
|
||||
|
||||
def _parse_string_to_dict(self, dict_string) -> dict:
|
||||
"""
|
||||
Convert a string representation of a dictionary to an actual dictionary.
|
||||
"""Convert a string representation of a dictionary to an actual dictionary.
|
||||
|
||||
Args:
|
||||
dict_string (str): String representation of a dictionary
|
||||
@ -328,8 +322,7 @@ class LocEvaluator:
|
||||
return None
|
||||
|
||||
def _parse_value_from_args(self, argument_str: str, key: str) -> str:
|
||||
"""
|
||||
Parse a specific key's value from argument string.
|
||||
"""Parse a specific key's value from argument string.
|
||||
|
||||
Args:
|
||||
argument_str (str): The argument string containing key-value pairs
|
||||
@ -407,8 +400,7 @@ class LocEvaluator:
|
||||
return ''
|
||||
|
||||
def _parse_path_from_args(self, argument_str: str) -> str:
|
||||
"""
|
||||
Parse path from argument string.
|
||||
"""Parse path from argument string.
|
||||
|
||||
Args:
|
||||
argument_str (str): The argument string containing path information
|
||||
@ -419,8 +411,7 @@ class LocEvaluator:
|
||||
return self._parse_value_from_args(argument_str, 'path')
|
||||
|
||||
def _parse_func_names_from_str(self, code_patch) -> list:
|
||||
"""
|
||||
Parse function names from the new_str code patch.
|
||||
"""Parse function names from the new_str code patch.
|
||||
|
||||
Args:
|
||||
code_patch: Either a string (argument string) or already extracted new_str code
|
||||
@ -801,8 +792,7 @@ class LocEvaluator:
|
||||
|
||||
|
||||
def swe_data_loader(args):
|
||||
"""
|
||||
Loading SWE-Bench data.
|
||||
"""Loading SWE-Bench data.
|
||||
|
||||
Args:
|
||||
args: Main arguments.
|
||||
@ -834,8 +824,7 @@ def swe_data_loader(args):
|
||||
|
||||
|
||||
def infer_data_loader(args):
|
||||
"""
|
||||
Load instance IDs.
|
||||
"""Load instance IDs.
|
||||
|
||||
Args:
|
||||
args: Main arguments.
|
||||
@ -868,8 +857,7 @@ def infer_data_loader(args):
|
||||
|
||||
|
||||
def infer_cost_calculator(args):
|
||||
"""
|
||||
Calculate total and average costs from metric JSON files with detailed output.
|
||||
"""Calculate total and average costs from metric JSON files with detailed output.
|
||||
|
||||
Args:
|
||||
args: Main arguments.
|
||||
|
||||
@ -28,8 +28,7 @@ class LocalizationInfo:
|
||||
hunks_per_file: dict[str, int] # File -> number of hunks
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
"""
|
||||
Convert LocalizationInfo to a dictionary for JSON serialization.
|
||||
"""Convert LocalizationInfo to a dictionary for JSON serialization.
|
||||
|
||||
Returns:
|
||||
Dictionary representation of the localization information
|
||||
@ -58,8 +57,7 @@ class LocalizationInfo:
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> 'LocalizationInfo':
|
||||
"""
|
||||
Create LocalizationInfo from a dictionary (for loading from JSON).
|
||||
"""Create LocalizationInfo from a dictionary (for loading from JSON).
|
||||
|
||||
Args:
|
||||
data: Dictionary containing localization information
|
||||
@ -91,8 +89,7 @@ class LocalizationInfo:
|
||||
|
||||
|
||||
class LocMeta:
|
||||
"""
|
||||
SWE-Bench dataset loader and ground-truth localization parser.
|
||||
"""SWE-Bench dataset loader and ground-truth localization parser.
|
||||
|
||||
This class handles loading SWE-Bench datasets and extracting ground-truth
|
||||
localization information from patches for code localization evaluation.
|
||||
@ -104,8 +101,7 @@ class LocMeta:
|
||||
dataset_name: str = 'princeton-nlp/SWE-bench_Verified',
|
||||
split: str = 'test',
|
||||
):
|
||||
"""
|
||||
Initialize LocMeta with a SWE-Bench dataset.
|
||||
"""Initialize LocMeta with a SWE-Bench dataset.
|
||||
|
||||
Args:
|
||||
dataset_name: HuggingFace dataset name (e.g., "princeton-nlp/SWE-bench_Verified")
|
||||
@ -124,8 +120,7 @@ class LocMeta:
|
||||
self._init_swe_dataset()
|
||||
|
||||
def _init_swe_dataset(self) -> None:
|
||||
"""
|
||||
Load and initialize the SWE-Bench dataset from HuggingFace.
|
||||
"""Load and initialize the SWE-Bench dataset from HuggingFace.
|
||||
Converts to pandas DataFrame for easy manipulation.
|
||||
"""
|
||||
try:
|
||||
@ -150,8 +145,7 @@ class LocMeta:
|
||||
raise
|
||||
|
||||
def get_instance_by_id(self, instance_id: str) -> pd.Series:
|
||||
"""
|
||||
Retrieve a specific instance by its ID.
|
||||
"""Retrieve a specific instance by its ID.
|
||||
|
||||
Args:
|
||||
instance_id: The instance identifier
|
||||
@ -169,8 +163,7 @@ class LocMeta:
|
||||
return self.df.iloc[idx]
|
||||
|
||||
def parse_instance_loc(self, instance: Union[pd.Series, str]) -> LocalizationInfo:
|
||||
"""
|
||||
Parse ground-truth localization information from a SWE-Bench instance.
|
||||
"""Parse ground-truth localization information from a SWE-Bench instance.
|
||||
|
||||
Args:
|
||||
instance: Either a pandas Series with instance data or an instance_id string
|
||||
@ -218,8 +211,7 @@ class LocMeta:
|
||||
def _parse_file_patch_lines(
|
||||
self, file_patch: str
|
||||
) -> tuple[list[tuple[int, int]], int, int]:
|
||||
"""
|
||||
Parse line ranges and count changes from a single file patch.
|
||||
"""Parse line ranges and count changes from a single file patch.
|
||||
|
||||
Args:
|
||||
file_patch: Patch content for a single file
|
||||
@ -253,8 +245,7 @@ class LocMeta:
|
||||
def _parse_code_structures_from_patch(
|
||||
self, file_patch: str, file_path: str
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""
|
||||
Extract function and class names from patch context (fallback method).
|
||||
"""Extract function and class names from patch context (fallback method).
|
||||
|
||||
Args:
|
||||
file_patch: Patch content for a single file
|
||||
@ -311,8 +302,7 @@ class LocMeta:
|
||||
def _parse_patch_localization(
|
||||
self, patch_content: str, instance_id: str
|
||||
) -> LocalizationInfo:
|
||||
"""
|
||||
Parse localization information from a git patch (improved method).
|
||||
"""Parse localization information from a git patch (improved method).
|
||||
|
||||
Args:
|
||||
patch_content: The git patch content
|
||||
@ -390,8 +380,7 @@ class LocMeta:
|
||||
def _extract_code_structures_from_patch(
|
||||
self, file_patch: str, file_path: str
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""
|
||||
Extract function and class names from patch context and content.
|
||||
"""Extract function and class names from patch context and content.
|
||||
|
||||
Args:
|
||||
file_patch: Patch content for a single file
|
||||
@ -519,8 +508,7 @@ class LocMeta:
|
||||
def _parse_patch_localization_with_runtime(
|
||||
self, patch_content: str, instance_id: str, runtime: Runtime
|
||||
) -> LocalizationInfo:
|
||||
"""
|
||||
Parse localization information from a git patch using OpenHands runtime.
|
||||
"""Parse localization information from a git patch using OpenHands runtime.
|
||||
This is the superior method when runtime is available.
|
||||
|
||||
Args:
|
||||
@ -596,8 +584,7 @@ class LocMeta:
|
||||
def parse_instance_loc_with_runtime(
|
||||
self, instance: Union[pd.Series, str], runtime: Runtime = None
|
||||
) -> LocalizationInfo:
|
||||
"""
|
||||
Parse ground-truth localization information using OpenHands runtime.
|
||||
"""Parse ground-truth localization information using OpenHands runtime.
|
||||
|
||||
Args:
|
||||
instance: Either a pandas Series with instance data or an instance_id string
|
||||
@ -634,8 +621,7 @@ class LocMeta:
|
||||
def _analyze_source_code_with_runtime(
|
||||
self, runtime: Runtime, file_path: str, affected_lines: list[int]
|
||||
) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]:
|
||||
"""
|
||||
Analyze source code using OpenHands runtime to find functions and classes.
|
||||
"""Analyze source code using OpenHands runtime to find functions and classes.
|
||||
|
||||
Args:
|
||||
runtime: OpenHands runtime object
|
||||
@ -695,8 +681,7 @@ class LocMeta:
|
||||
def _parse_cython_content_with_line_mapping(
|
||||
self, content: str, affected_lines: list[int]
|
||||
) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]:
|
||||
"""
|
||||
Parse Cython content to extract functions and classes with line mapping.
|
||||
"""Parse Cython content to extract functions and classes with line mapping.
|
||||
Since Cython files can't be parsed with Python's AST, we use regex-based parsing.
|
||||
|
||||
Args:
|
||||
@ -828,8 +813,7 @@ class LocMeta:
|
||||
def _parse_python_content_with_line_mapping(
|
||||
self, content: str, affected_lines: list[int]
|
||||
) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]:
|
||||
"""
|
||||
Parse Python content to extract functions and classes with accurate line mapping.
|
||||
"""Parse Python content to extract functions and classes with accurate line mapping.
|
||||
|
||||
Args:
|
||||
content: Python source code content
|
||||
@ -914,8 +898,7 @@ class LocMeta:
|
||||
def _parse_python_content(
|
||||
self, content: str, affected_lines: list[int]
|
||||
) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]:
|
||||
"""
|
||||
Parse Python content to extract functions and classes.
|
||||
"""Parse Python content to extract functions and classes.
|
||||
|
||||
Args:
|
||||
content: Python source code content
|
||||
@ -989,8 +972,7 @@ class LocMeta:
|
||||
return [], [], {}, {}
|
||||
|
||||
def _split_patch_by_files(self, patch_content: str) -> dict[str, str]:
|
||||
"""
|
||||
Split a multi-file patch into individual file patches.
|
||||
"""Split a multi-file patch into individual file patches.
|
||||
|
||||
Args:
|
||||
patch_content: Complete patch content
|
||||
@ -1049,8 +1031,7 @@ class LocMeta:
|
||||
def _empty_localization_info(
|
||||
self, instance_id: str = 'unknown'
|
||||
) -> LocalizationInfo:
|
||||
"""
|
||||
Return an empty LocalizationInfo object.
|
||||
"""Return an empty LocalizationInfo object.
|
||||
|
||||
Args:
|
||||
instance_id: Instance identifier
|
||||
@ -1072,8 +1053,7 @@ class LocMeta:
|
||||
)
|
||||
|
||||
def get_dataset_statistics(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get statistics about the loaded dataset.
|
||||
"""Get statistics about the loaded dataset.
|
||||
|
||||
Returns:
|
||||
Dictionary containing dataset statistics
|
||||
@ -1095,8 +1075,7 @@ class LocMeta:
|
||||
return stats
|
||||
|
||||
def get_instances_by_repo(self, repo_name: str) -> pd.DataFrame:
|
||||
"""
|
||||
Get all instances for a specific repository.
|
||||
"""Get all instances for a specific repository.
|
||||
|
||||
Args:
|
||||
repo_name: Repository name (e.g., "django/django")
|
||||
|
||||
@ -6,8 +6,7 @@ from openhands.core.logger import openhands_logger as logger
|
||||
|
||||
|
||||
def verify_instance_costs(row: pd.Series) -> float:
|
||||
"""
|
||||
Verifies that the accumulated_cost matches the sum of individual costs in metrics.
|
||||
"""Verifies that the accumulated_cost matches the sum of individual costs in metrics.
|
||||
Also checks for duplicate consecutive costs which might indicate buggy counting.
|
||||
If the consecutive costs are identical, the file is affected by this bug:
|
||||
https://github.com/All-Hands-AI/OpenHands/issues/5383
|
||||
|
||||
@ -181,9 +181,7 @@ def distinct_methods_stats(tree, num_lines):
|
||||
|
||||
|
||||
def loops_stats(tree, num_lines):
|
||||
"""
|
||||
Calculate the average number of loops.
|
||||
"""
|
||||
"""Calculate the average number of loops."""
|
||||
total_loops = 0
|
||||
|
||||
def traverse(node):
|
||||
@ -199,9 +197,7 @@ def loops_stats(tree, num_lines):
|
||||
|
||||
|
||||
def branches_stats(tree, num_lines):
|
||||
"""
|
||||
Calculate the average number of branches (conditional statements).
|
||||
"""
|
||||
"""Calculate the average number of branches (conditional statements)."""
|
||||
total_branches = 0
|
||||
|
||||
def traverse(node):
|
||||
|
||||
@ -192,8 +192,7 @@ def run_mutation_testing(
|
||||
def grade_test_output(
|
||||
test_suite: str, instance: pd.Series, test_output: str, test_spec: TestSpec, runtime
|
||||
):
|
||||
"""
|
||||
Two-pass test grading with short-circuiting:
|
||||
"""Two-pass test grading with short-circuiting:
|
||||
1. Run all tests to identify passing/failing tests
|
||||
2. If no failing tests, evaluate coverage immediately
|
||||
3. Otherwise, run only passing tests for coverage analysis
|
||||
@ -280,8 +279,7 @@ def process_instance(
|
||||
reset_logger: bool = True,
|
||||
log_dir: str | None = None,
|
||||
) -> EvalOutput:
|
||||
"""
|
||||
Evaluate agent performance on a TestGenEval problem instance.
|
||||
"""Evaluate agent performance on a TestGenEval problem instance.
|
||||
|
||||
Note that this signature differs from the expected input to `run_evaluation`. Use
|
||||
`functools.partial` to provide optional arguments before passing to the evaluation harness.
|
||||
@ -453,8 +451,7 @@ def process_instance(
|
||||
|
||||
|
||||
def count_and_log_fields(evaluated_predictions, fields, key):
|
||||
"""
|
||||
Count and log the sum of specified fields in the evaluated predictions,
|
||||
"""Count and log the sum of specified fields in the evaluated predictions,
|
||||
ignoring fields with a value of -1. If all values for a field are -1,
|
||||
return -1.
|
||||
|
||||
|
||||
@ -4,8 +4,7 @@ from evaluation.benchmarks.testgeneval.constants import TestStatus
|
||||
|
||||
|
||||
def parse_log_pytest(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with PyTest framework
|
||||
"""Parser for test logs generated with PyTest framework
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -26,8 +25,7 @@ def parse_log_pytest(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_pytest_options(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with PyTest framework with options
|
||||
"""Parser for test logs generated with PyTest framework with options
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -61,8 +59,7 @@ def parse_log_pytest_options(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_django(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with Django tester framework
|
||||
"""Parser for test logs generated with Django tester framework
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -141,8 +138,7 @@ def parse_log_django(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_pytest_v2(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with PyTest framework (Later Version)
|
||||
"""Parser for test logs generated with PyTest framework (Later Version)
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -170,8 +166,7 @@ def parse_log_pytest_v2(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_seaborn(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with seaborn testing framework
|
||||
"""Parser for test logs generated with seaborn testing framework
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -196,8 +191,7 @@ def parse_log_seaborn(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_sympy(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with Sympy framework
|
||||
"""Parser for test logs generated with Sympy framework
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
@ -229,8 +223,7 @@ def parse_log_sympy(log: str) -> dict[str, str]:
|
||||
|
||||
|
||||
def parse_log_matplotlib(log: str) -> dict[str, str]:
|
||||
"""
|
||||
Parser for test logs generated with PyTest framework
|
||||
"""Parser for test logs generated with PyTest framework
|
||||
|
||||
Args:
|
||||
log (str): log content
|
||||
|
||||
@ -12,8 +12,7 @@ if sys.getrecursionlimit() < 10_000:
|
||||
|
||||
|
||||
def bleu(gold: list[str], pred: list[str]) -> float:
|
||||
"""
|
||||
Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
|
||||
"""Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100.
|
||||
|
||||
:param gold: list of gold tokens
|
||||
:param pred: list of predicted tokens
|
||||
@ -30,8 +29,7 @@ def bleu(gold: list[str], pred: list[str]) -> float:
|
||||
|
||||
|
||||
def batch_bleu(golds: list[list[str]], preds: list[list[str]]) -> list[float]:
|
||||
"""
|
||||
Calculate BLEU score for a batch of sentences.
|
||||
"""Calculate BLEU score for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -43,8 +41,7 @@ def batch_bleu(golds: list[list[str]], preds: list[list[str]]) -> list[float]:
|
||||
|
||||
|
||||
def corpus_bleu(golds: list[list[str]], preds: list[list[str]]) -> float:
|
||||
"""
|
||||
Calculate corpus-level BLEU score for a batch of sentences.
|
||||
"""Calculate corpus-level BLEU score for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -63,8 +60,7 @@ def corpus_bleu(golds: list[list[str]], preds: list[list[str]]) -> float:
|
||||
def edit_sim(
|
||||
gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' '
|
||||
) -> float:
|
||||
"""
|
||||
Calculate char-level edit similarity, in the range of 0~100.
|
||||
"""Calculate char-level edit similarity, in the range of 0~100.
|
||||
|
||||
:param gold: gold sentence or list of gold tokens
|
||||
:param pred: predicted sentence or list of predicted tokens
|
||||
@ -85,8 +81,7 @@ def batch_edit_sim(
|
||||
preds: list[Union[str, list[str]]],
|
||||
sep: str = ' ',
|
||||
) -> list[float]:
|
||||
"""
|
||||
Calculate char-level edit similarity for a batch of sentences.
|
||||
"""Calculate char-level edit similarity for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -102,8 +97,7 @@ T = TypeVar('T')
|
||||
|
||||
|
||||
def exact_match(gold: T, pred: T) -> float:
|
||||
"""
|
||||
Calculate exact match accuracy, in the range of {0, 100}.
|
||||
"""Calculate exact match accuracy, in the range of {0, 100}.
|
||||
|
||||
:param gold: gold sentence or list of gold tokens
|
||||
:param pred: predicted sentence or list of predicted tokens
|
||||
@ -115,8 +109,7 @@ def exact_match(gold: T, pred: T) -> float:
|
||||
|
||||
|
||||
def batch_exact_match(golds: list[T], preds: list[T]) -> list[float]:
|
||||
"""
|
||||
Calculate exact match accuracy for a batch of sentences.
|
||||
"""Calculate exact match accuracy for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -130,8 +123,7 @@ def batch_exact_match(golds: list[T], preds: list[T]) -> list[float]:
|
||||
def rouge_l(
|
||||
gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' '
|
||||
) -> dict[str, float]:
|
||||
"""
|
||||
Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
|
||||
"""Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100.
|
||||
|
||||
:param gold: gold sentence or list of gold tokens
|
||||
:param pred: predicted sentence or list of predicted tokens
|
||||
@ -156,8 +148,7 @@ def batch_rouge_l(
|
||||
preds: list[Union[str, list[str]]],
|
||||
sep: str = ' ',
|
||||
) -> dict[str, list[float]]:
|
||||
"""
|
||||
Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
|
||||
"""Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -175,8 +166,7 @@ def accuracy(
|
||||
pred: list[str],
|
||||
ignore: Optional[Sequence[str]] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculate token-level accuracy, in the range of 0~100.
|
||||
"""Calculate token-level accuracy, in the range of 0~100.
|
||||
If gold and pred are not the same length, the longer one would be truncated.
|
||||
|
||||
:param gold: list of gold tokens
|
||||
@ -210,8 +200,7 @@ def batch_accuracy(
|
||||
preds: list[list[str]],
|
||||
ignore: Optional[Sequence[str]] = None,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Calculate token-level accuracy for a batch of sentences.
|
||||
"""Calculate token-level accuracy for a batch of sentences.
|
||||
|
||||
:param golds: list of gold sentences
|
||||
:param preds: list of predicted sentences
|
||||
@ -226,8 +215,7 @@ def batch_accuracy(
|
||||
def first_match_to_topk(
|
||||
first_match_list: list[int], k_values: list[int]
|
||||
) -> dict[int, list[float]]:
|
||||
"""
|
||||
Calculate top-k accuracy with the first match ranks (1-indexed).
|
||||
"""Calculate top-k accuracy with the first match ranks (1-indexed).
|
||||
|
||||
:param first_match: first match ranks (1-indexed)
|
||||
:param k_values: k values to consider
|
||||
@ -237,8 +225,7 @@ def first_match_to_topk(
|
||||
|
||||
|
||||
def pass_at_k(n: int, c: int, k: int) -> float:
|
||||
"""
|
||||
Sample pass@k metric according to the Codex paper, but in the scale of 0~100.
|
||||
"""Sample pass@k metric according to the Codex paper, but in the scale of 0~100.
|
||||
:param n: total number of samples
|
||||
:param c: number of correct samples
|
||||
:param k: k in pass@$k$
|
||||
@ -251,8 +238,7 @@ def pass_at_k(n: int, c: int, k: int) -> float:
|
||||
|
||||
|
||||
def self_bleu(samples: list[list[str]]) -> float:
|
||||
"""
|
||||
Calculate self-BLEU among the samples.
|
||||
"""Calculate self-BLEU among the samples.
|
||||
:param samples: the chosen m samples
|
||||
:return: self-BLEU
|
||||
"""
|
||||
@ -274,8 +260,7 @@ def self_bleu(samples: list[list[str]]) -> float:
|
||||
|
||||
|
||||
def self_edit_distance(samples: list[Union[str, list[str]]], sep=' ') -> float:
|
||||
"""
|
||||
Calculate self-edit-distance among the samples.
|
||||
"""Calculate self-edit-distance among the samples.
|
||||
:param samples: the chosen m samples
|
||||
:param sep: the separator between tokens
|
||||
:return: self-edit-distance
|
||||
|
||||
@ -30,8 +30,7 @@ def check_mutation(mutation_output):
|
||||
|
||||
|
||||
def count_methods(code_str):
|
||||
"""
|
||||
Counts the number of methods/functions in a given string of code.
|
||||
"""Counts the number of methods/functions in a given string of code.
|
||||
|
||||
Args:
|
||||
code_str (str): A string containing code.
|
||||
@ -46,8 +45,7 @@ def count_methods(code_str):
|
||||
|
||||
|
||||
def get_lines_of_code(code_str):
|
||||
"""
|
||||
Extracts lines of code from a given string.
|
||||
"""Extracts lines of code from a given string.
|
||||
|
||||
Args:
|
||||
code_str (str): A string containing code.
|
||||
|
||||
@ -7,8 +7,7 @@ import traceback
|
||||
|
||||
|
||||
def insert_line_in_string(input_string, new_str, insert_line):
|
||||
"""
|
||||
Inserts a new line into a string at the specified line number.
|
||||
"""Inserts a new line into a string at the specified line number.
|
||||
|
||||
:param input_string: The original string.
|
||||
:param new_str: The string to insert.
|
||||
@ -29,8 +28,7 @@ def insert_line_in_string(input_string, new_str, insert_line):
|
||||
|
||||
|
||||
def print_string_diff(original, modified):
|
||||
"""
|
||||
Prints the differences between two strings line by line.
|
||||
"""Prints the differences between two strings line by line.
|
||||
|
||||
:param original: The original string.
|
||||
:param modified: The modified string.
|
||||
|
||||
@ -37,8 +37,7 @@ def extract_preamble_classes_and_functions(code):
|
||||
current_position = 0
|
||||
|
||||
def extract_class_body(code: str, start_index: int) -> tuple[str, int]:
|
||||
"""
|
||||
Extracts the body of a class from the given code starting from the specified index.
|
||||
"""Extracts the body of a class from the given code starting from the specified index.
|
||||
Returns the class body and the end index of the class body.
|
||||
"""
|
||||
if not code or start_index < 0 or start_index >= len(code):
|
||||
@ -168,8 +167,8 @@ def extract_preamble_classes_and_functions(code):
|
||||
def filter_passing_tests(
|
||||
test_content: str, test_output: str, repo: str
|
||||
) -> tuple[str, list[str], list[str]]:
|
||||
"""
|
||||
Filter tests based on their execution results.
|
||||
"""Filter tests based on their execution results.
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
- Modified test content with only passing tests
|
||||
@ -246,8 +245,7 @@ def filter_passing_tests(
|
||||
def filter_tests(
|
||||
test_content: str, test_output: str, repo: str
|
||||
) -> tuple[str, list[str], list[str]]:
|
||||
"""
|
||||
Filter tests using AST parsing to remove failing test functions from the test file.
|
||||
"""Filter tests using AST parsing to remove failing test functions from the test file.
|
||||
Non-test functions (e.g. setup or helper methods) and classes (even if all test methods are failing)
|
||||
are preserved.
|
||||
|
||||
|
||||
@ -20,9 +20,7 @@ DIFF_MODIFIED_FILE_REGEX = r'--- a/(.*)'
|
||||
|
||||
@dataclass
|
||||
class TestSpec:
|
||||
"""
|
||||
A dataclass that represents a test specification for a single instance of SWE-bench.
|
||||
"""
|
||||
"""A dataclass that represents a test specification for a single instance of SWE-bench."""
|
||||
|
||||
instance_id: str
|
||||
id: str
|
||||
@ -86,10 +84,7 @@ def make_test_setup(specs, env_name, repo_directory, includes_tox=False):
|
||||
|
||||
|
||||
def make_test_script_list(test_cmd, specs, env_name, repo_directory):
|
||||
"""
|
||||
Runs the tests.
|
||||
"""
|
||||
|
||||
"""Runs the tests."""
|
||||
includes_tox = 'tox' in test_cmd
|
||||
eval_commands = make_test_setup(specs, env_name, repo_directory, includes_tox)
|
||||
eval_commands += [
|
||||
@ -104,10 +99,7 @@ def make_test_script_list(test_cmd, specs, env_name, repo_directory):
|
||||
|
||||
|
||||
def make_mutation_script_list(specs, env_name, repo_directory, mutation_timeout):
|
||||
"""
|
||||
Runs the tests.
|
||||
"""
|
||||
|
||||
"""Runs the tests."""
|
||||
eval_commands = make_test_setup(specs, env_name, repo_directory)
|
||||
eval_commands += [
|
||||
'cosmic-ray init mutation.toml mutation.sqlite',
|
||||
|
||||
@ -11,8 +11,7 @@ from evaluation.benchmarks.testgeneval.constants import (
|
||||
|
||||
|
||||
def get_test_directives(instance: TestGenEvalInstance) -> list:
|
||||
"""
|
||||
Get test directives from the test_patch of a task instance
|
||||
"""Get test directives from the test_patch of a task instance
|
||||
|
||||
Args:
|
||||
instance (dict): task instance
|
||||
@ -43,9 +42,7 @@ def get_test_directives(instance: TestGenEvalInstance) -> list:
|
||||
def load_testgeneval_dataset(
|
||||
name='kjain14/testgeneval', split='test', ids=None
|
||||
) -> list[TestGenEvalInstance]:
|
||||
"""
|
||||
Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file
|
||||
"""
|
||||
"""Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file"""
|
||||
# check that all instance IDs are in the dataset
|
||||
if ids:
|
||||
ids = set(ids)
|
||||
|
||||
@ -24,9 +24,7 @@ class ActionType(Enum):
|
||||
|
||||
@dataclass
|
||||
class Selector:
|
||||
"""
|
||||
Represents either a direct anchor ID or a descriptive selector
|
||||
"""
|
||||
"""Represents either a direct anchor ID or a descriptive selector"""
|
||||
|
||||
value: str
|
||||
is_anchor: bool = False
|
||||
@ -149,8 +147,7 @@ def find_matching_anchor(content: str, selector: str) -> str | None:
|
||||
|
||||
|
||||
def resolve_action(action: BrowserAction, content: str) -> BrowserAction:
|
||||
"""
|
||||
Resolve any descriptive selectors in the action to anchor IDs based on the content.
|
||||
"""Resolve any descriptive selectors in the action to anchor IDs based on the content.
|
||||
Returns a new action with resolved selectors.
|
||||
"""
|
||||
if isinstance(action, (InputAction, ClickAction)):
|
||||
@ -174,8 +171,7 @@ def pre_login(
|
||||
save_screenshots=True,
|
||||
screenshots_dir='screenshots',
|
||||
):
|
||||
"""
|
||||
Logs in to all the websites that are needed for the evaluation.
|
||||
"""Logs in to all the websites that are needed for the evaluation.
|
||||
Once logged in, the sessions would be cached in the browser, so OpenHands
|
||||
agent doesn't need to log in to these websites again.
|
||||
"""
|
||||
|
||||
@ -68,8 +68,7 @@ def get_config(
|
||||
|
||||
|
||||
def load_dependencies(runtime: Runtime) -> list[str]:
|
||||
"""
|
||||
Every task has a dependencies.yml file, which lists all the services that the
|
||||
"""Every task has a dependencies.yml file, which lists all the services that the
|
||||
task depends on. This function loads the file and returns all dependent service names.
|
||||
"""
|
||||
command = 'cat /utils/dependencies.yml'
|
||||
|
||||
@ -11,9 +11,7 @@ import sys
|
||||
|
||||
|
||||
def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float:
|
||||
"""
|
||||
Calculate the cost of the model call.
|
||||
"""
|
||||
"""Calculate the cost of the model call."""
|
||||
if 'claude-3-5-sonnet' in model.lower():
|
||||
# https://www.anthropic.com/pricing#anthropic-api, accessed 12/11/2024
|
||||
return 0.000003 * prompt_tokens + 0.000015 * completion_tokens
|
||||
@ -60,8 +58,7 @@ def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> fl
|
||||
|
||||
|
||||
def analyze_eval_json_file(filepath: str) -> tuple[int, int]:
|
||||
"""
|
||||
Analyze a single eval JSON file and extract the total and result from final_score.
|
||||
"""Analyze a single eval JSON file and extract the total and result from final_score.
|
||||
|
||||
Args:
|
||||
filepath: Path to the JSON file
|
||||
@ -84,8 +81,7 @@ def analyze_eval_json_file(filepath: str) -> tuple[int, int]:
|
||||
|
||||
|
||||
def analyze_traj_json_file(filepath: str) -> tuple[int, float]:
|
||||
"""
|
||||
Analyze a single trajectory JSON file and extract the steps and tokens
|
||||
"""Analyze a single trajectory JSON file and extract the steps and tokens
|
||||
for each step. Then estimate the cost based on the tokens and the model type.
|
||||
Note: this is assuming there's no prompt caching at all.
|
||||
"""
|
||||
@ -115,8 +111,7 @@ def analyze_traj_json_file(filepath: str) -> tuple[int, float]:
|
||||
def analyze_folder(
|
||||
folder_path: str,
|
||||
) -> tuple[dict[str, tuple[int, int]], dict[str, tuple[int, float]]]:
|
||||
"""
|
||||
Analyze all eval_*.json & traj_*.json files in the specified folder.
|
||||
"""Analyze all eval_*.json & traj_*.json files in the specified folder.
|
||||
|
||||
Args:
|
||||
folder_path: Path to the folder containing JSON files
|
||||
@ -148,9 +143,7 @@ def analyze_folder(
|
||||
|
||||
|
||||
def get_task_nature_category(task_name: str) -> str:
|
||||
"""
|
||||
Get the nature category of the task.
|
||||
"""
|
||||
"""Get the nature category of the task."""
|
||||
task_nature = task_name.split('-')[0]
|
||||
if task_nature.lower() in ['sde', 'pm', 'ds', 'admin', 'hr', 'finance']:
|
||||
return task_nature
|
||||
@ -159,8 +152,7 @@ def get_task_nature_category(task_name: str) -> str:
|
||||
|
||||
|
||||
def calculate_score(total: int, result: int) -> float:
|
||||
"""
|
||||
Calculate the score as a number between 0 and 1.
|
||||
"""Calculate the score as a number between 0 and 1.
|
||||
|
||||
Formula: score = (result / total) * 0.5 + (result // total) * 0.5
|
||||
Explanation:
|
||||
@ -178,8 +170,7 @@ def calculate_score(total: int, result: int) -> float:
|
||||
|
||||
|
||||
def is_perfect_completion(total: int, result: int) -> bool:
|
||||
"""
|
||||
Check if the task achieved perfect completion.
|
||||
"""Check if the task achieved perfect completion.
|
||||
|
||||
Args:
|
||||
total: Total possible points
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
GPT performs line level generation prediction and truncates overly long tokens
|
||||
"""
|
||||
"""GPT performs line level generation prediction and truncates overly long tokens"""
|
||||
|
||||
import json
|
||||
import os
|
||||
@ -56,8 +54,7 @@ def predict(content, model_name):
|
||||
|
||||
|
||||
def bulid_prompt(description, old_version, old_code, new_version) -> str:
|
||||
"""
|
||||
build prompt
|
||||
"""Build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
GPT performs line level generation prediction and truncates overly long tokens
|
||||
"""
|
||||
"""GPT performs line level generation prediction and truncates overly long tokens"""
|
||||
|
||||
import json
|
||||
import os
|
||||
@ -56,8 +54,7 @@ def predict(content, model_name):
|
||||
|
||||
|
||||
def bulid_prompt(version, description) -> str:
|
||||
"""
|
||||
build prompt
|
||||
"""Build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
block completion
|
||||
"""
|
||||
"""block completion"""
|
||||
|
||||
import copy
|
||||
import gc
|
||||
@ -79,8 +77,7 @@ def run_inference(model_name, origin_data_list):
|
||||
|
||||
|
||||
def bulid_prompt(version, description) -> str:
|
||||
"""
|
||||
build prompt
|
||||
"""Build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
code migration
|
||||
"""
|
||||
"""code migration"""
|
||||
|
||||
import copy
|
||||
import gc
|
||||
@ -81,8 +79,7 @@ def run_inference(model_name, origin_data_list):
|
||||
|
||||
|
||||
def bulid_prompt(description, old_version, old_code, new_version) -> str:
|
||||
"""
|
||||
build prompt
|
||||
"""Build prompt
|
||||
:param version:
|
||||
:param description:
|
||||
:param masked_code:
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
评测block的预测能力
|
||||
"""评测block的预测能力
|
||||
1、判断是否包含正确的函数名
|
||||
2、判断是否合法
|
||||
3、计算ISM,和PM
|
||||
@ -22,8 +21,7 @@ def is_code_valid(code):
|
||||
|
||||
|
||||
def longest_common_prefix_between_lists_with_elements(list1, list2):
|
||||
"""
|
||||
计算两个字符串列表中元素的最长前缀匹配长度
|
||||
"""计算两个字符串列表中元素的最长前缀匹配长度
|
||||
:param list1:
|
||||
:param list2:
|
||||
:return:
|
||||
@ -46,8 +44,7 @@ def longest_common_prefix_between_lists_with_elements(list1, list2):
|
||||
|
||||
|
||||
def get_token(ans_code: str, output_code: str):
|
||||
"""
|
||||
对代码进行词法分析,分解成标识符,返回两个标识符列表
|
||||
"""对代码进行词法分析,分解成标识符,返回两个标识符列表
|
||||
:param ans_code:
|
||||
:param output_code:
|
||||
:return:
|
||||
@ -94,8 +91,7 @@ def get_token(ans_code: str, output_code: str):
|
||||
|
||||
|
||||
def get_token_per_line(code: str):
|
||||
"""
|
||||
对每一行代码进行词法分析,记录每一行的标识符
|
||||
"""对每一行代码进行词法分析,记录每一行的标识符
|
||||
:param code: 代码字符串
|
||||
:return: 每一行的标识符列表组成的列表
|
||||
"""
|
||||
@ -117,8 +113,7 @@ def get_token_per_line(code: str):
|
||||
|
||||
|
||||
def get_ISM(answer_code: str, model_output_list: list, answer_name: str) -> list:
|
||||
"""
|
||||
计算ISM,返回一个有序的得分列表
|
||||
"""计算ISM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
@ -157,8 +152,7 @@ def get_ISM(answer_code: str, model_output_list: list, answer_name: str) -> list
|
||||
def get_ISM_without_verification(
|
||||
answer_code: str, model_output_list: list, answer_name: str
|
||||
) -> list:
|
||||
"""
|
||||
计算ISM,返回一个有序的得分列表
|
||||
"""计算ISM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
@ -190,8 +184,7 @@ def get_ISM_without_verification(
|
||||
|
||||
|
||||
def longest_common_prefix_with_lengths(list1, list2):
|
||||
"""
|
||||
计算两个二维列表中每个子列表的最长前缀匹配长度,并记录拥有最长前缀匹配长度的两个子列表的长度
|
||||
"""计算两个二维列表中每个子列表的最长前缀匹配长度,并记录拥有最长前缀匹配长度的两个子列表的长度
|
||||
:param list1: 第一个二维列表
|
||||
:param list2: 第二个二维列表
|
||||
:return: 最长前缀匹配长度以及拥有最长前缀匹配长度的两个子列表的长度
|
||||
@ -216,8 +209,7 @@ def longest_common_prefix_with_lengths(list1, list2):
|
||||
|
||||
|
||||
def get_PM(answer_code: str, model_output_list: list, answer_name: str) -> list:
|
||||
"""
|
||||
计算PM,返回一个有序的得分列表
|
||||
"""计算PM,返回一个有序的得分列表
|
||||
:return:
|
||||
"""
|
||||
score_list = []
|
||||
@ -254,8 +246,7 @@ def get_PM(answer_code: str, model_output_list: list, answer_name: str) -> list:
|
||||
|
||||
|
||||
def get_score(score_list: list, k):
|
||||
"""
|
||||
计算score@n,k
|
||||
"""计算score@n,k
|
||||
:param score_list:
|
||||
:param k:
|
||||
:return:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Calculate the cdc score for migration
|
||||
"""
|
||||
"""Calculate the cdc score for migration"""
|
||||
|
||||
import json
|
||||
import math
|
||||
@ -11,8 +9,7 @@ import re
|
||||
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
"""判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -43,8 +40,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
"""判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -82,8 +78,7 @@ def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def with_correct(answer_code: str, model_output: str) -> bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
"""当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
@ -105,9 +100,7 @@ def compute_block_score_k(
|
||||
core_line_in_core_block,
|
||||
core_line_in_output_clear,
|
||||
):
|
||||
"""
|
||||
cdc需要满足五个条件,em只需要满足第一个条件
|
||||
"""
|
||||
"""cdc需要满足五个条件,em只需要满足第一个条件"""
|
||||
c = 0
|
||||
n = len(model_output)
|
||||
for index, code in enumerate(model_output):
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Calculate the cdc score for line and block
|
||||
"""
|
||||
"""Calculate the cdc score for line and block"""
|
||||
|
||||
import json
|
||||
import math
|
||||
@ -19,8 +17,7 @@ def is_code_valid(code):
|
||||
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
"""判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -51,8 +48,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
"""判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -90,8 +86,7 @@ def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def with_correct(answer_code: str, model_output: str) -> bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
"""当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Calculate the cdc score for line and block
|
||||
"""
|
||||
"""Calculate the cdc score for line and block"""
|
||||
|
||||
import json
|
||||
import math
|
||||
@ -19,8 +17,7 @@ def is_code_valid(code):
|
||||
|
||||
|
||||
def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断参数数量是否一致
|
||||
"""判断参数数量是否一致
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -51,8 +48,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
"""
|
||||
判断关键词参数赋值是否正确使用
|
||||
"""判断关键词参数赋值是否正确使用
|
||||
:param function_name:
|
||||
:param correct_code:
|
||||
:param test_code:
|
||||
@ -90,8 +86,7 @@ def check_keyword_parameters(function_name, correct_code, test_code):
|
||||
|
||||
|
||||
def with_correct(answer_code: str, model_output: str) -> bool:
|
||||
"""
|
||||
当answer是with结构时,判断模型生成的是不是with结构
|
||||
"""当answer是with结构时,判断模型生成的是不是with结构
|
||||
:param answer_code:
|
||||
:param model_output:
|
||||
:return:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Find the line of code generated by the model using the block in the version code
|
||||
"""
|
||||
"""Find the line of code generated by the model using the block in the version code"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Find the line of code generated by the model using the block in the version code
|
||||
"""
|
||||
"""Find the line of code generated by the model using the block in the version code"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Clear the<start>and<end>generated by the model in inference
|
||||
"""
|
||||
"""Clear the<start>and<end>generated by the model in inference"""
|
||||
|
||||
import json
|
||||
|
||||
|
||||
@ -622,8 +622,7 @@ def compatibility_for_eval_history_pairs(
|
||||
|
||||
|
||||
def is_fatal_evaluation_error(error: str | None) -> bool:
|
||||
"""
|
||||
The AgentController class overrides last error for certain exceptions
|
||||
"""The AgentController class overrides last error for certain exceptions
|
||||
We want to ensure those exeption do not overlap with fatal exceptions defined here
|
||||
This is because we do a comparisino against the stringified error
|
||||
"""
|
||||
|
||||
@ -3,8 +3,7 @@ import sys
|
||||
|
||||
|
||||
def refine_prompt(prompt: str):
|
||||
"""
|
||||
Refines the prompt based on the platform.
|
||||
"""Refines the prompt based on the platform.
|
||||
|
||||
On Windows systems, replaces 'bash' with 'powershell' and 'execute_bash' with 'execute_powershell'
|
||||
to ensure commands work correctly on the Windows platform.
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools.
|
||||
"""
|
||||
"""ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools."""
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
@ -5,7 +5,6 @@ import warnings
|
||||
|
||||
def suppress_cli_warnings():
|
||||
"""Suppress common warnings that appear during CLI usage."""
|
||||
|
||||
# Suppress pydub warning about ffmpeg/avconv
|
||||
warnings.filterwarnings(
|
||||
'ignore',
|
||||
|
||||
@ -239,8 +239,7 @@ def display_mcp_errors() -> None:
|
||||
|
||||
# Prompt output display functions
|
||||
def display_thought_if_new(thought: str, is_agent_message: bool = False) -> None:
|
||||
"""
|
||||
Display a thought only if it hasn't been displayed recently.
|
||||
"""Display a thought only if it hasn't been displayed recently.
|
||||
|
||||
Args:
|
||||
thought: The thought to display
|
||||
@ -301,8 +300,7 @@ def display_event(event: Event, config: OpenHandsConfig) -> None:
|
||||
|
||||
|
||||
def display_message(message: str, is_agent_message: bool = False) -> None:
|
||||
"""
|
||||
Display a message in the terminal with markdown rendering.
|
||||
"""Display a message in the terminal with markdown rendering.
|
||||
|
||||
Args:
|
||||
message: The message to display
|
||||
@ -338,8 +336,7 @@ def display_message(message: str, is_agent_message: bool = False) -> None:
|
||||
|
||||
|
||||
def convert_markdown_to_html(text: str) -> str:
|
||||
"""
|
||||
Convert markdown to HTML for prompt_toolkit's HTML renderer using the markdown library.
|
||||
"""Convert markdown to HTML for prompt_toolkit's HTML renderer using the markdown library.
|
||||
|
||||
Args:
|
||||
text: Markdown text to convert
|
||||
|
||||
@ -56,8 +56,7 @@ def download_latest_vsix_from_github() -> str | None:
|
||||
|
||||
|
||||
def attempt_vscode_extension_install():
|
||||
"""
|
||||
Checks if running in a supported editor and attempts to install the OpenHands companion extension.
|
||||
"""Checks if running in a supported editor and attempts to install the OpenHands companion extension.
|
||||
This is a best-effort, one-time attempt.
|
||||
"""
|
||||
# 1. Check if we are in a supported editor environment
|
||||
@ -132,8 +131,7 @@ def attempt_vscode_extension_install():
|
||||
|
||||
|
||||
def _mark_installation_successful(flag_file: pathlib.Path, editor_name: str) -> None:
|
||||
"""
|
||||
Mark the extension installation as successful by creating the flag file.
|
||||
"""Mark the extension installation as successful by creating the flag file.
|
||||
|
||||
Args:
|
||||
flag_file: Path to the flag file to create
|
||||
@ -147,8 +145,7 @@ def _mark_installation_successful(flag_file: pathlib.Path, editor_name: str) ->
|
||||
|
||||
|
||||
def _is_extension_installed(editor_command: str, extension_id: str) -> bool:
|
||||
"""
|
||||
Check if the OpenHands extension is already installed.
|
||||
"""Check if the OpenHands extension is already installed.
|
||||
|
||||
Args:
|
||||
editor_command: The command to run the editor (e.g., 'code', 'windsurf')
|
||||
@ -174,8 +171,7 @@ def _is_extension_installed(editor_command: str, extension_id: str) -> bool:
|
||||
|
||||
|
||||
def _attempt_github_install(editor_command: str, editor_name: str) -> bool:
|
||||
"""
|
||||
Attempt to install the extension from GitHub Releases.
|
||||
"""Attempt to install the extension from GitHub Releases.
|
||||
|
||||
Downloads the latest VSIX file from GitHub releases and attempts to install it.
|
||||
Ensures proper cleanup of temporary files.
|
||||
@ -227,8 +223,7 @@ def _attempt_github_install(editor_command: str, editor_name: str) -> bool:
|
||||
|
||||
|
||||
def _attempt_bundled_install(editor_command: str, editor_name: str) -> bool:
|
||||
"""
|
||||
Attempt to install the extension from the bundled VSIX file.
|
||||
"""Attempt to install the extension from the bundled VSIX file.
|
||||
|
||||
Uses the VSIX file packaged with the OpenHands installation.
|
||||
|
||||
@ -280,8 +275,7 @@ def _attempt_bundled_install(editor_command: str, editor_name: str) -> bool:
|
||||
def _attempt_marketplace_install(
|
||||
editor_command: str, editor_name: str, extension_id: str
|
||||
) -> bool:
|
||||
"""
|
||||
Attempt to install the extension from the marketplace.
|
||||
"""Attempt to install the extension from the marketplace.
|
||||
|
||||
This method is currently unused as the OpenHands extension is not yet published
|
||||
to the VS Code/Windsurf marketplace. It's kept here for future use when the
|
||||
|
||||
@ -55,8 +55,7 @@ class Agent(ABC):
|
||||
return self._prompt_manager
|
||||
|
||||
def get_system_message(self) -> 'SystemMessageAction | None':
|
||||
"""
|
||||
Returns a SystemMessageAction containing the system message and tools.
|
||||
"""Returns a SystemMessageAction containing the system message and tools.
|
||||
This will be added to the event stream as the first message.
|
||||
|
||||
Returns:
|
||||
|
||||
@ -142,7 +142,6 @@ class AgentController:
|
||||
status_callback: Optional callback function to handle status updates.
|
||||
replay_events: A list of logs to replay.
|
||||
"""
|
||||
|
||||
self.id = sid or event_stream.sid
|
||||
self.user_id = user_id
|
||||
self.file_store = file_store
|
||||
|
||||
@ -57,8 +57,7 @@ class ReplayManager:
|
||||
)
|
||||
|
||||
def should_replay(self) -> bool:
|
||||
"""
|
||||
Whether the controller is in trajectory replay mode, and the replay
|
||||
"""Whether the controller is in trajectory replay mode, and the replay
|
||||
hasn't finished. Note: after the replay is finished, the user and
|
||||
the agent could continue to message/act.
|
||||
|
||||
|
||||
@ -46,8 +46,7 @@ class TrafficControlState(str, Enum):
|
||||
|
||||
@dataclass
|
||||
class State:
|
||||
"""
|
||||
Represents the running state of an agent in the OpenHands system, saving data of its operation and memory.
|
||||
"""Represents the running state of an agent in the OpenHands system, saving data of its operation and memory.
|
||||
|
||||
- Multi-agent/delegate state:
|
||||
- store the task (conversation between the agent and the user)
|
||||
@ -143,10 +142,7 @@ class State:
|
||||
def restore_from_session(
|
||||
sid: str, file_store: FileStore, user_id: str | None = None
|
||||
) -> 'State':
|
||||
"""
|
||||
Restores the state from the previously saved session.
|
||||
"""
|
||||
|
||||
"""Restores the state from the previously saved session."""
|
||||
state: State
|
||||
try:
|
||||
encoded = file_store.read(
|
||||
|
||||
@ -242,41 +242,33 @@ class StateTracker:
|
||||
self.state.budget_flag.increase_limit(headless_mode)
|
||||
|
||||
def get_metrics_snapshot(self):
|
||||
"""
|
||||
Deep copy of metrics
|
||||
"""Deep copy of metrics
|
||||
This serves as a snapshot for the parent's metrics at the time a delegate is created
|
||||
It will be stored and used to compute local metrics for the delegate
|
||||
(since delegates now accumulate metrics from where its parent left off)
|
||||
"""
|
||||
|
||||
return self.state.metrics.copy()
|
||||
|
||||
def save_state(self):
|
||||
"""
|
||||
Save's current state to persistent store
|
||||
"""
|
||||
"""Save's current state to persistent store"""
|
||||
if self.sid and self.file_store:
|
||||
self.state.save_to_session(self.sid, self.file_store, self.user_id)
|
||||
|
||||
def run_control_flags(self):
|
||||
"""
|
||||
Performs one step of the control flags
|
||||
"""
|
||||
"""Performs one step of the control flags"""
|
||||
self.state.iteration_flag.step()
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.step()
|
||||
|
||||
def sync_budget_flag_with_metrics(self):
|
||||
"""
|
||||
Ensures that budget flag is up to date with accumulated costs from llm completions
|
||||
"""Ensures that budget flag is up to date with accumulated costs from llm completions
|
||||
Budget flag will monitor for when budget is exceeded
|
||||
"""
|
||||
if self.state.budget_flag:
|
||||
self.state.budget_flag.current_value = self.state.metrics.accumulated_cost
|
||||
|
||||
def merge_metrics(self, metrics: Metrics):
|
||||
"""
|
||||
Merges metrics with the state metrics
|
||||
"""Merges metrics with the state metrics
|
||||
|
||||
NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc)
|
||||
use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from
|
||||
|
||||
@ -66,8 +66,7 @@ class KubernetesConfig(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_toml_section(cls, data: dict) -> dict[str, 'KubernetesConfig']:
|
||||
"""
|
||||
Create a mapping of KubernetesConfig instances from a toml dictionary representing the [kubernetes] section.
|
||||
"""Create a mapping of KubernetesConfig instances from a toml dictionary representing the [kubernetes] section.
|
||||
|
||||
The configuration is built from all keys in data.
|
||||
|
||||
|
||||
@ -97,8 +97,7 @@ class LLMConfig(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_toml_section(cls, data: dict) -> dict[str, LLMConfig]:
|
||||
"""
|
||||
Create a mapping of LLMConfig instances from a toml dictionary representing the [llm] section.
|
||||
"""Create a mapping of LLMConfig instances from a toml dictionary representing the [llm] section.
|
||||
|
||||
The default configuration is built from all non-dict keys in data.
|
||||
Then, each key with a dict value (e.g. [llm.random_name]) is treated as a custom LLM configuration,
|
||||
@ -117,7 +116,6 @@ class LLMConfig(BaseModel):
|
||||
dict[str, LLMConfig]: A mapping where the key "llm" corresponds to the default configuration
|
||||
and additional keys represent custom configurations.
|
||||
"""
|
||||
|
||||
# Initialize the result mapping
|
||||
llm_mapping: dict[str, LLMConfig] = {}
|
||||
|
||||
|
||||
@ -345,7 +345,6 @@ class OpenHandsMCPConfig:
|
||||
Returns:
|
||||
tuple[MCPSHTTPServerConfig | None, list[MCPStdioServerConfig]]: A tuple containing the default SHTTP server configuration (or None) and a list of MCP stdio server configurations
|
||||
"""
|
||||
|
||||
stdio_servers = []
|
||||
search_engine_stdio_server = OpenHandsMCPConfig.add_search_engine(config)
|
||||
if search_engine_stdio_server:
|
||||
|
||||
@ -93,8 +93,7 @@ class SandboxConfig(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_toml_section(cls, data: dict) -> dict[str, 'SandboxConfig']:
|
||||
"""
|
||||
Create a mapping of SandboxConfig instances from a toml dictionary representing the [sandbox] section.
|
||||
"""Create a mapping of SandboxConfig instances from a toml dictionary representing the [sandbox] section.
|
||||
|
||||
The configuration is built from all keys in data.
|
||||
|
||||
|
||||
@ -16,15 +16,13 @@ class SecurityConfig(BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_toml_section(cls, data: dict) -> dict[str, 'SecurityConfig']:
|
||||
"""
|
||||
Create a mapping of SecurityConfig instances from a toml dictionary representing the [security] section.
|
||||
"""Create a mapping of SecurityConfig instances from a toml dictionary representing the [security] section.
|
||||
|
||||
The configuration is built from all keys in data.
|
||||
|
||||
Returns:
|
||||
dict[str, SecurityConfig]: A mapping where the key "security" corresponds to the [security] configuration
|
||||
"""
|
||||
|
||||
# Initialize the result mapping
|
||||
security_mapping: dict[str, SecurityConfig] = {}
|
||||
|
||||
|
||||
@ -322,10 +322,7 @@ def json_log_handler(
|
||||
level: int = logging.INFO,
|
||||
_out: TextIO = sys.stdout,
|
||||
) -> logging.Handler:
|
||||
"""
|
||||
Configure logger instance for structured logging as json lines.
|
||||
"""
|
||||
|
||||
"""Configure logger instance for structured logging as json lines."""
|
||||
handler = logging.StreamHandler(_out)
|
||||
handler.setLevel(level)
|
||||
handler.setFormatter(json_formatter())
|
||||
@ -496,8 +493,7 @@ class OpenHandsLoggerAdapter(logging.LoggerAdapter):
|
||||
def process(
|
||||
self, msg: str, kwargs: MutableMapping[str, Any]
|
||||
) -> tuple[str, MutableMapping[str, Any]]:
|
||||
"""
|
||||
If 'extra' is supplied in kwargs, merge it with the adapters 'extra' dict
|
||||
"""If 'extra' is supplied in kwargs, merge it with the adapters 'extra' dict
|
||||
Starting in Python 3.13, LoggerAdapter's merge_extra option will do this.
|
||||
"""
|
||||
if 'extra' in kwargs and isinstance(kwargs['extra'], dict):
|
||||
|
||||
@ -14,8 +14,7 @@ async def run_agent_until_done(
|
||||
memory: Memory,
|
||||
end_states: list[AgentState],
|
||||
) -> None:
|
||||
"""
|
||||
run_agent_until_done takes a controller and a runtime, and will run
|
||||
"""run_agent_until_done takes a controller and a runtime, and will run
|
||||
the agent until it reaches a terminal state.
|
||||
Note that runtime must be connected before being passed in here.
|
||||
"""
|
||||
|
||||
@ -257,8 +257,7 @@ def auto_continue_response(
|
||||
|
||||
|
||||
def load_replay_log(trajectory_path: str) -> tuple[list[Event] | None, Action]:
|
||||
"""
|
||||
Load trajectory from given path, serialize it to a list of events, and return
|
||||
"""Load trajectory from given path, serialize it to a list of events, and return
|
||||
two things:
|
||||
1) A list of events except the first action
|
||||
2) First action (user message, a.k.a. initial task)
|
||||
|
||||
@ -3,8 +3,7 @@ from openhands.llm.metrics import Metrics, TokenUsage
|
||||
|
||||
|
||||
def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None:
|
||||
"""
|
||||
Returns at most one token usage record for either:
|
||||
"""Returns at most one token usage record for either:
|
||||
- `tool_call_metadata.model_response.id`, if possible
|
||||
- otherwise event.response_id, if set
|
||||
|
||||
@ -34,8 +33,7 @@ def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | No
|
||||
def get_token_usage_for_event_id(
|
||||
events: list[Event], event_id: int, metrics: Metrics
|
||||
) -> TokenUsage | None:
|
||||
"""
|
||||
Starting from the event with .id == event_id and moving backwards in `events`,
|
||||
"""Starting from the event with .id == event_id and moving backwards in `events`,
|
||||
find the first TokenUsage record (if any) associated either with:
|
||||
- tool_call_metadata.model_response.id, or
|
||||
- event.response_id
|
||||
|
||||
@ -94,8 +94,7 @@ def create_runtime(
|
||||
|
||||
|
||||
def get_provider_tokens():
|
||||
"""
|
||||
Retrieve provider tokens from environment variables and return them as a dictionary.
|
||||
"""Retrieve provider tokens from environment variables and return them as a dictionary.
|
||||
|
||||
Returns:
|
||||
A dictionary mapping ProviderType to ProviderToken if tokens are found, otherwise None.
|
||||
@ -126,8 +125,7 @@ def initialize_repository_for_runtime(
|
||||
immutable_provider_tokens: PROVIDER_TOKEN_TYPE | None = None,
|
||||
selected_repository: str | None = None,
|
||||
) -> str | None:
|
||||
"""
|
||||
Initialize the repository for the runtime by cloning or initializing it,
|
||||
"""Initialize the repository for the runtime by cloning or initializing it,
|
||||
running setup scripts, and setting up git hooks if present.
|
||||
|
||||
Args:
|
||||
|
||||
@ -6,25 +6,19 @@ from openhands.events import Event
|
||||
|
||||
|
||||
class CriticResult(BaseModel):
|
||||
"""
|
||||
A critic result is a score and a message.
|
||||
"""
|
||||
"""A critic result is a score and a message."""
|
||||
|
||||
score: float
|
||||
message: str
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
"""
|
||||
Whether the agent is successful.
|
||||
"""
|
||||
"""Whether the agent is successful."""
|
||||
return self.score >= 0.5
|
||||
|
||||
|
||||
class BaseCritic(abc.ABC):
|
||||
"""
|
||||
A critic is a function that takes in a list of events, optional git patch, and returns a score about the quality of those events.
|
||||
"""
|
||||
"""A critic is a function that takes in a list of events, optional git patch, and returns a score about the quality of those events."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def evaluate(
|
||||
|
||||
@ -42,8 +42,7 @@ class MessageAction(Action):
|
||||
|
||||
@dataclass
|
||||
class SystemMessageAction(Action):
|
||||
"""
|
||||
Action that represents a system message for an agent, including the system prompt
|
||||
"""Action that represents a system message for an agent, including the system prompt
|
||||
and available tools. This should be the first message in the event stream.
|
||||
"""
|
||||
|
||||
|
||||
@ -42,9 +42,7 @@ _DUMMY_PAGE = _CachePage(None, 1, -1)
|
||||
|
||||
@dataclass
|
||||
class EventStore(EventStoreABC):
|
||||
"""
|
||||
A stored list of events backing a conversation
|
||||
"""
|
||||
"""A stored list of events backing a conversation"""
|
||||
|
||||
sid: str
|
||||
file_store: FileStore
|
||||
@ -92,8 +90,7 @@ class EventStore(EventStoreABC):
|
||||
filter: EventFilter | None = None,
|
||||
limit: int | None = None,
|
||||
) -> Iterable[Event]:
|
||||
"""
|
||||
Retrieve events from the event stream, optionally filtering out events of a given type
|
||||
"""Retrieve events from the event stream, optionally filtering out events of a given type
|
||||
and events marked as hidden.
|
||||
|
||||
Args:
|
||||
@ -105,7 +102,6 @@ class EventStore(EventStoreABC):
|
||||
Yields:
|
||||
Events from the stream that match the criteria.
|
||||
"""
|
||||
|
||||
if end_id is None:
|
||||
end_id = self.cur_id
|
||||
else:
|
||||
|
||||
@ -9,9 +9,7 @@ from openhands.events.event_filter import EventFilter
|
||||
|
||||
|
||||
class EventStoreABC:
|
||||
"""
|
||||
A stored list of events backing a conversation
|
||||
"""
|
||||
"""A stored list of events backing a conversation"""
|
||||
|
||||
sid: str
|
||||
user_id: str | None
|
||||
@ -25,8 +23,7 @@ class EventStoreABC:
|
||||
filter: EventFilter | None = None,
|
||||
limit: int | None = None,
|
||||
) -> Iterable[Event]:
|
||||
"""
|
||||
Retrieve events from the event stream, optionally excluding events using a filter
|
||||
"""Retrieve events from the event stream, optionally excluding events using a filter
|
||||
|
||||
Args:
|
||||
start_id: The ID of the first event to retrieve. Defaults to 0.
|
||||
|
||||
@ -13,9 +13,7 @@ from openhands.events.serialization.event import event_from_dict
|
||||
|
||||
@dataclass
|
||||
class NestedEventStore(EventStoreABC):
|
||||
"""
|
||||
A stored list of events backing a conversation
|
||||
"""
|
||||
"""A stored list of events backing a conversation"""
|
||||
|
||||
base_url: str
|
||||
sid: str
|
||||
|
||||
@ -46,8 +46,7 @@ class AgentThinkObservation(Observation):
|
||||
|
||||
@dataclass
|
||||
class MicroagentKnowledge:
|
||||
"""
|
||||
Represents knowledge from a triggered microagent.
|
||||
"""Represents knowledge from a triggered microagent.
|
||||
|
||||
Attributes:
|
||||
name: The name of the microagent that was triggered
|
||||
|
||||
@ -146,7 +146,6 @@ class CmdOutputObservation(Observation):
|
||||
Returns:
|
||||
Original content if not too large, or truncated content otherwise
|
||||
"""
|
||||
|
||||
if len(content) <= max_size:
|
||||
return content
|
||||
|
||||
|
||||
@ -12,7 +12,8 @@ from openhands.events.observation import (
|
||||
def get_pairs_from_events(events: list[Event]) -> list[tuple[Action, Observation]]:
|
||||
"""Return the history as a list of tuples (action, observation).
|
||||
|
||||
This function is a compatibility function for evals reading and visualization working with old histories."""
|
||||
This function is a compatibility function for evals reading and visualization working with old histories.
|
||||
"""
|
||||
tuples: list[tuple[Action, Observation]] = []
|
||||
action_map: dict[int, Action] = {}
|
||||
observation_map: dict[int, Observation] = {}
|
||||
|
||||
@ -191,8 +191,7 @@ class BitBucketService(BaseGitService, GitService, InstallationsService):
|
||||
def _parse_repository(
|
||||
self, repo: dict, link_header: str | None = None
|
||||
) -> Repository:
|
||||
"""
|
||||
Parse a Bitbucket API repository response into a Repository object.
|
||||
"""Parse a Bitbucket API repository response into a Repository object.
|
||||
|
||||
Args:
|
||||
repo: Repository data from Bitbucket API
|
||||
@ -201,7 +200,6 @@ class BitBucketService(BaseGitService, GitService, InstallationsService):
|
||||
Returns:
|
||||
Repository object
|
||||
"""
|
||||
|
||||
repo_id = repo.get('uuid', '')
|
||||
|
||||
workspace_slug = repo.get('workspace', {}).get('slug', '')
|
||||
@ -292,8 +290,7 @@ class BitBucketService(BaseGitService, GitService, InstallationsService):
|
||||
async def _fetch_paginated_data(
|
||||
self, url: str, params: dict, max_items: int
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Fetch data with pagination support for Bitbucket API.
|
||||
"""Fetch data with pagination support for Bitbucket API.
|
||||
|
||||
Args:
|
||||
url: The API endpoint URL
|
||||
|
||||
@ -186,8 +186,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
async def _fetch_paginated_repos(
|
||||
self, url: str, params: dict, max_repos: int, extract_key: str | None = None
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Fetch repositories with pagination support.
|
||||
"""Fetch repositories with pagination support.
|
||||
|
||||
Args:
|
||||
url: The API endpoint URL
|
||||
@ -228,8 +227,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
def _parse_repository(
|
||||
self, repo: dict, link_header: str | None = None
|
||||
) -> Repository:
|
||||
"""
|
||||
Parse a GitHub API repository response into a Repository object.
|
||||
"""Parse a GitHub API repository response into a Repository object.
|
||||
|
||||
Args:
|
||||
repo: Repository data from GitHub API
|
||||
@ -550,8 +548,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
draft: bool = True,
|
||||
labels: list[str] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Creates a PR using user credentials
|
||||
"""Creates a PR using user credentials
|
||||
|
||||
Args:
|
||||
repo_name: The full name of the repository (owner/repo)
|
||||
@ -566,7 +563,6 @@ class GitHubService(BaseGitService, GitService, InstallationsService):
|
||||
- PR URL when successful
|
||||
- Error message when unsuccessful
|
||||
"""
|
||||
|
||||
url = f'{self.BASE_URL}/repos/{repo_name}/pulls'
|
||||
|
||||
# Set default body if none provided
|
||||
|
||||
@ -71,9 +71,7 @@ class GitLabService(BaseGitService, GitService):
|
||||
return ProviderType.GITLAB.value
|
||||
|
||||
async def _get_gitlab_headers(self) -> dict[str, Any]:
|
||||
"""
|
||||
Retrieve the GitLab Token to construct the headers
|
||||
"""
|
||||
"""Retrieve the GitLab Token to construct the headers"""
|
||||
if not self.token:
|
||||
latest_token = await self.get_latest_token()
|
||||
if latest_token:
|
||||
@ -173,8 +171,7 @@ class GitLabService(BaseGitService, GitService):
|
||||
async def execute_graphql_query(
|
||||
self, query: str, variables: dict[str, Any] | None = None
|
||||
) -> Any:
|
||||
"""
|
||||
Execute a GraphQL query against the GitLab GraphQL API
|
||||
"""Execute a GraphQL query against the GitLab GraphQL API
|
||||
|
||||
Args:
|
||||
query: The GraphQL query string
|
||||
@ -244,8 +241,7 @@ class GitLabService(BaseGitService, GitService):
|
||||
def _parse_repository(
|
||||
self, repo: dict, link_header: str | None = None
|
||||
) -> Repository:
|
||||
"""
|
||||
Parse a GitLab API project response into a Repository object.
|
||||
"""Parse a GitLab API project response into a Repository object.
|
||||
|
||||
Args:
|
||||
repo: Project data from GitLab API
|
||||
@ -269,8 +265,7 @@ class GitLabService(BaseGitService, GitService):
|
||||
)
|
||||
|
||||
def _parse_gitlab_url(self, url: str) -> str | None:
|
||||
"""
|
||||
Parse a GitLab URL to extract the repository path.
|
||||
"""Parse a GitLab URL to extract the repository path.
|
||||
|
||||
Expected format: https://{domain}/{group}/{possibly_subgroup}/{repo}
|
||||
Returns the full path from group onwards (e.g., 'group/subgroup/repo' or 'group/repo')
|
||||
@ -588,8 +583,7 @@ class GitLabService(BaseGitService, GitService):
|
||||
description: str | None = None,
|
||||
labels: list[str] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Creates a merge request in GitLab
|
||||
"""Creates a merge request in GitLab
|
||||
|
||||
Args:
|
||||
id: The ID or URL-encoded path of the project
|
||||
@ -603,7 +597,6 @@ class GitLabService(BaseGitService, GitService):
|
||||
- MR URL when successful
|
||||
- Error message when unsuccessful
|
||||
"""
|
||||
|
||||
# Convert string ID to URL-encoded path if needed
|
||||
project_id = str(id).replace('/', '%2F') if isinstance(id, str) else id
|
||||
url = f'{self.BASE_URL}/projects/{project_id}/merge_requests'
|
||||
|
||||
@ -191,10 +191,7 @@ class ProviderHandler:
|
||||
per_page: int | None,
|
||||
installation_id: str | None,
|
||||
) -> list[Repository]:
|
||||
"""
|
||||
Get repositories from providers
|
||||
"""
|
||||
|
||||
"""Get repositories from providers"""
|
||||
"""
|
||||
Get repositories from providers
|
||||
"""
|
||||
@ -226,9 +223,7 @@ class ProviderHandler:
|
||||
return all_repos
|
||||
|
||||
async def get_suggested_tasks(self) -> list[SuggestedTask]:
|
||||
"""
|
||||
Get suggested tasks from providers
|
||||
"""
|
||||
"""Get suggested tasks from providers"""
|
||||
tasks: list[SuggestedTask] = []
|
||||
for provider in self.provider_tokens:
|
||||
try:
|
||||
@ -303,8 +298,7 @@ class ProviderHandler:
|
||||
event_stream: EventStream,
|
||||
env_vars: dict[ProviderType, SecretStr] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
This ensures that the latest provider tokens are masked from the event stream
|
||||
"""This ensures that the latest provider tokens are masked from the event stream
|
||||
It is called when the provider tokens are first initialized in the runtime or when tokens are re-exported with the latest working ones
|
||||
|
||||
Args:
|
||||
@ -320,8 +314,7 @@ class ProviderHandler:
|
||||
def expose_env_vars(
|
||||
self, env_secrets: dict[ProviderType, SecretStr]
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
Return string values instead of typed values for environment secrets
|
||||
"""Return string values instead of typed values for environment secrets
|
||||
Called just before exporting secrets to runtime, or setting secrets in the event stream
|
||||
"""
|
||||
exposed_envs = {}
|
||||
@ -353,8 +346,7 @@ class ProviderHandler:
|
||||
providers: list[ProviderType] | None = None,
|
||||
get_latest: bool = False,
|
||||
) -> dict[ProviderType, SecretStr] | dict[str, str]:
|
||||
"""
|
||||
Retrieves the provider tokens from ProviderHandler object
|
||||
"""Retrieves the provider tokens from ProviderHandler object
|
||||
This is used when initializing/exporting new provider tokens in the runtime
|
||||
|
||||
Args:
|
||||
@ -362,7 +354,6 @@ class ProviderHandler:
|
||||
providers: Return provider tokens for the list passed in, otherwise return all available providers
|
||||
get_latest: Get the latest working token for the providers if True, otherwise get the existing ones
|
||||
"""
|
||||
|
||||
if not self.provider_tokens:
|
||||
return {}
|
||||
|
||||
@ -393,11 +384,9 @@ class ProviderHandler:
|
||||
def check_cmd_action_for_provider_token_ref(
|
||||
cls, event: Action
|
||||
) -> list[ProviderType]:
|
||||
"""
|
||||
Detect if agent run action is using a provider token (e.g $GITHUB_TOKEN)
|
||||
"""Detect if agent run action is using a provider token (e.g $GITHUB_TOKEN)
|
||||
Returns a list of providers which are called by the agent
|
||||
"""
|
||||
|
||||
if not isinstance(event, CmdRunAction):
|
||||
return []
|
||||
|
||||
@ -410,9 +399,7 @@ class ProviderHandler:
|
||||
|
||||
@classmethod
|
||||
def get_provider_env_key(cls, provider: ProviderType) -> str:
|
||||
"""
|
||||
Map ProviderType value to the environment variable name in the runtime
|
||||
"""
|
||||
"""Map ProviderType value to the environment variable name in the runtime"""
|
||||
return f'{provider.value}_token'.lower()
|
||||
|
||||
async def verify_repo_provider(
|
||||
@ -443,8 +430,7 @@ class ProviderHandler:
|
||||
async def get_branches(
|
||||
self, repository: str, specified_provider: ProviderType | None = None
|
||||
) -> list[Branch]:
|
||||
"""
|
||||
Get branches for a repository
|
||||
"""Get branches for a repository
|
||||
|
||||
Args:
|
||||
repository: The repository name
|
||||
|
||||
@ -10,8 +10,7 @@ from openhands.integrations.provider import ProviderType
|
||||
async def validate_provider_token(
|
||||
token: SecretStr, base_domain: str | None = None
|
||||
) -> ProviderType | None:
|
||||
"""
|
||||
Determine whether a token is for GitHub, GitLab, or Bitbucket by attempting to get user info
|
||||
"""Determine whether a token is for GitHub, GitLab, or Bitbucket by attempting to get user info
|
||||
from the services.
|
||||
|
||||
Args:
|
||||
|
||||
@ -24,10 +24,7 @@ def read_task_from_file(file_path: str) -> str:
|
||||
|
||||
|
||||
def read_task(args: argparse.Namespace, cli_multiline_input: bool) -> str:
|
||||
"""
|
||||
Read the task from the CLI args, file, or stdin.
|
||||
"""
|
||||
|
||||
"""Read the task from the CLI args, file, or stdin."""
|
||||
# Determine the task
|
||||
task_str = ''
|
||||
if args.file:
|
||||
|
||||
@ -703,6 +703,7 @@ class LLM(RetryMixin, DebugMixin):
|
||||
|
||||
Args:
|
||||
messages (list): A list of messages, either as a list of dicts or as a list of Message objects.
|
||||
|
||||
Returns:
|
||||
int: The number of tokens.
|
||||
"""
|
||||
|
||||
@ -150,7 +150,6 @@ class Metrics:
|
||||
response_id: str,
|
||||
) -> None:
|
||||
"""Add a single usage record."""
|
||||
|
||||
# Token each turn for calculating context usage.
|
||||
per_turn_token = prompt_tokens + completion_tokens
|
||||
|
||||
|
||||
@ -16,8 +16,7 @@ class RetryMixin:
|
||||
"""Mixin class for retry logic."""
|
||||
|
||||
def retry_decorator(self, **kwargs: Any) -> Callable:
|
||||
"""
|
||||
Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes.
|
||||
"""Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes.
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments to override default retry behavior.
|
||||
|
||||
@ -21,9 +21,7 @@ from openhands.mcp.tool import MCPClientTool
|
||||
|
||||
|
||||
class MCPClient(BaseModel):
|
||||
"""
|
||||
A collection of tools that connects to an MCP server and manages available tools through the Model Context Protocol.
|
||||
"""
|
||||
"""A collection of tools that connects to an MCP server and manages available tools through the Model Context Protocol."""
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
|
||||
@ -3,8 +3,7 @@ from pydantic import ConfigDict
|
||||
|
||||
|
||||
class MCPClientTool(Tool):
|
||||
"""
|
||||
Represents a tool proxy that can be called on the MCP server from the client side.
|
||||
"""Represents a tool proxy that can be called on the MCP server from the client side.
|
||||
|
||||
This version doesn't store a session reference, as sessions are created on-demand
|
||||
by the MCPClient for each operation.
|
||||
|
||||
@ -26,8 +26,7 @@ from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
|
||||
|
||||
|
||||
def convert_mcp_clients_to_tools(mcp_clients: list[MCPClient] | None) -> list[dict]:
|
||||
"""
|
||||
Converts a list of MCPClient instances to ChatCompletionToolParam format
|
||||
"""Converts a list of MCPClient instances to ChatCompletionToolParam format
|
||||
that can be used by CodeActAgent.
|
||||
|
||||
Args:
|
||||
@ -152,8 +151,7 @@ async def create_mcp_clients(
|
||||
async def fetch_mcp_tools_from_config(
|
||||
mcp_config: MCPConfig, conversation_id: str | None = None, use_stdio: bool = False
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Retrieves the list of MCP tools from the MCP clients.
|
||||
"""Retrieves the list of MCP tools from the MCP clients.
|
||||
|
||||
Args:
|
||||
mcp_config: The MCP configuration
|
||||
@ -206,8 +204,7 @@ async def fetch_mcp_tools_from_config(
|
||||
|
||||
|
||||
async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Observation:
|
||||
"""
|
||||
Call a tool on an MCP server and return the observation.
|
||||
"""Call a tool on an MCP server and return the observation.
|
||||
|
||||
Args:
|
||||
mcp_clients: The list of MCP clients to execute the action on
|
||||
@ -270,9 +267,7 @@ async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Obse
|
||||
async def add_mcp_tools_to_agent(
|
||||
agent: 'Agent', runtime: Runtime, memory: 'Memory'
|
||||
) -> MCPConfig:
|
||||
"""
|
||||
Add MCP tools to an agent.
|
||||
"""
|
||||
"""Add MCP tools to an agent."""
|
||||
import sys
|
||||
|
||||
# Skip MCP tools on Windows
|
||||
|
||||
@ -87,7 +87,6 @@ class ConversationMemory:
|
||||
vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included.
|
||||
initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly.
|
||||
"""
|
||||
|
||||
events = condensed_history
|
||||
|
||||
# Ensure the event list starts with SystemMessageAction, then MessageAction(source='user')
|
||||
|
||||
@ -39,8 +39,7 @@ USER_MICROAGENTS_DIR = Path.home() / '.openhands' / 'microagents'
|
||||
|
||||
|
||||
class Memory:
|
||||
"""
|
||||
Memory is a component that listens to the EventStream for information retrieval actions
|
||||
"""Memory is a component that listens to the EventStream for information retrieval actions
|
||||
(a RecallAction) and publishes observations with the content (such as RecallObservation).
|
||||
"""
|
||||
|
||||
@ -145,7 +144,6 @@ class Memory:
|
||||
This method collects information from all available repo microagents and concatenates their contents.
|
||||
Multiple repo microagents are supported, and their contents will be concatenated with newlines between them.
|
||||
"""
|
||||
|
||||
# Create WORKSPACE_CONTEXT info:
|
||||
# - repository_info
|
||||
# - runtime_info
|
||||
@ -211,7 +209,6 @@ class Memory:
|
||||
event: RecallAction,
|
||||
) -> RecallObservation | None:
|
||||
"""When a microagent action triggers microagents, create a RecallObservation with structured data."""
|
||||
|
||||
# Find any matched microagents based on the query
|
||||
microagent_knowledge = self._find_microagent_knowledge(event.query)
|
||||
|
||||
@ -257,8 +254,7 @@ class Memory:
|
||||
def load_user_workspace_microagents(
|
||||
self, user_microagents: list[BaseMicroagent]
|
||||
) -> None:
|
||||
"""
|
||||
This method loads microagents from a user's cloned repo or workspace directory.
|
||||
"""This method loads microagents from a user's cloned repo or workspace directory.
|
||||
|
||||
This is typically called from agent_session or setup once the workspace is cloned.
|
||||
"""
|
||||
@ -272,9 +268,7 @@ class Memory:
|
||||
self.repo_microagents[user_microagent.name] = user_microagent
|
||||
|
||||
def _load_global_microagents(self) -> None:
|
||||
"""
|
||||
Loads microagents from the global microagents_dir
|
||||
"""
|
||||
"""Loads microagents from the global microagents_dir"""
|
||||
repo_agents, knowledge_agents = load_microagents_from_dir(
|
||||
GLOBAL_MICROAGENTS_DIR
|
||||
)
|
||||
@ -284,8 +278,7 @@ class Memory:
|
||||
self.repo_microagents[name] = agent_repo
|
||||
|
||||
def _load_user_microagents(self) -> None:
|
||||
"""
|
||||
Loads microagents from the user's home directory (~/.openhands/microagents/)
|
||||
"""Loads microagents from the user's home directory (~/.openhands/microagents/)
|
||||
Creates the directory if it doesn't exist.
|
||||
"""
|
||||
try:
|
||||
@ -307,8 +300,7 @@ class Memory:
|
||||
)
|
||||
|
||||
def get_microagent_mcp_tools(self) -> list[MCPConfig]:
|
||||
"""
|
||||
Get MCP tools from all repo microagents (always active)
|
||||
"""Get MCP tools from all repo microagents (always active)
|
||||
|
||||
Returns:
|
||||
A list of MCP tools configurations from microagents
|
||||
@ -365,8 +357,7 @@ class Memory:
|
||||
def set_conversation_instructions(
|
||||
self, conversation_instructions: str | None
|
||||
) -> None:
|
||||
"""
|
||||
Set contextual information for conversation
|
||||
"""Set contextual information for conversation
|
||||
This is information the agent may require
|
||||
"""
|
||||
self.conversation_instructions = ConversationInstructions(
|
||||
|
||||
@ -92,7 +92,6 @@ class GithubIssueHandler(IssueHandlerInterface):
|
||||
Returns:
|
||||
List of Github issues.
|
||||
"""
|
||||
|
||||
if not issue_numbers:
|
||||
raise ValueError('Unspecified issue number')
|
||||
|
||||
|
||||
@ -86,7 +86,6 @@ class GitlabIssueHandler(IssueHandlerInterface):
|
||||
Returns:
|
||||
List of Gitlab issues.
|
||||
"""
|
||||
|
||||
if not issue_numbers:
|
||||
raise ValueError('Unspecified issue number')
|
||||
|
||||
|
||||
@ -70,7 +70,6 @@ class IssueResolver:
|
||||
comment_id: Optional ID of a specific comment to focus on.
|
||||
base_domain: The base domain for the git server.
|
||||
"""
|
||||
|
||||
parts = args.selected_repo.rsplit('/', 1)
|
||||
if len(parts) < 2:
|
||||
raise ValueError('Invalid repository format. Expected owner/repo')
|
||||
@ -540,7 +539,6 @@ class IssueResolver:
|
||||
Args:
|
||||
reset_logger: Whether to reset the logger for multiprocessing.
|
||||
"""
|
||||
|
||||
issue = self.extract_issue()
|
||||
|
||||
if self.comment_id is not None:
|
||||
|
||||
@ -16,8 +16,7 @@ from openhands.integrations.utils import validate_provider_token
|
||||
|
||||
|
||||
async def identify_token(token: str, base_domain: str | None) -> ProviderType:
|
||||
"""
|
||||
Identifies whether a token belongs to GitHub, GitLab, or Bitbucket.
|
||||
"""Identifies whether a token belongs to GitHub, GitLab, or Bitbucket.
|
||||
Parameters:
|
||||
token (str): The personal access token to check.
|
||||
base_domain (str): Custom base domain for provider (e.g GitHub Enterprise)
|
||||
|
||||
@ -88,8 +88,7 @@ _ALL_RUNTIME_CLASSES = {**_DEFAULT_RUNTIME_CLASSES, **_THIRD_PARTY_RUNTIME_CLASS
|
||||
|
||||
|
||||
def get_runtime_cls(name: str) -> type[Runtime]:
|
||||
"""
|
||||
If name is one of the predefined runtime names (e.g. 'docker'), return its class.
|
||||
"""If name is one of the predefined runtime names (e.g. 'docker'), return its class.
|
||||
Otherwise attempt to resolve name as subclass of Runtime and return it.
|
||||
Raise on invalid selections.
|
||||
"""
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
This is the main file for the runtime client.
|
||||
"""This is the main file for the runtime client.
|
||||
It is responsible for executing actions received from OpenHands backend and producing observations.
|
||||
|
||||
NOTE: this will be executed inside the docker sandbox.
|
||||
|
||||
@ -196,8 +196,7 @@ class Runtime(FileEditRuntimeMixin):
|
||||
self.add_env_vars(self.config.sandbox.runtime_startup_env_vars)
|
||||
|
||||
def close(self) -> None:
|
||||
"""
|
||||
This should only be called by conversation manager or closing the session.
|
||||
"""This should only be called by conversation manager or closing the session.
|
||||
If called for instance by error handling, it could prevent recovery.
|
||||
"""
|
||||
pass
|
||||
@ -300,9 +299,7 @@ class Runtime(FileEditRuntimeMixin):
|
||||
asyncio.get_event_loop().run_until_complete(self._handle_action(event))
|
||||
|
||||
async def _export_latest_git_provider_tokens(self, event: Action) -> None:
|
||||
"""
|
||||
Refresh runtime provider tokens when agent attemps to run action with provider token
|
||||
"""
|
||||
"""Refresh runtime provider tokens when agent attemps to run action with provider token"""
|
||||
if not self.user_id:
|
||||
return
|
||||
|
||||
@ -1001,9 +998,7 @@ fi
|
||||
def _execute_shell_fn_git_handler(
|
||||
self, command: str, cwd: str | None
|
||||
) -> CommandResult:
|
||||
"""
|
||||
This function is used by the GitHandler to execute shell commands.
|
||||
"""
|
||||
"""This function is used by the GitHandler to execute shell commands."""
|
||||
obs = self.run(CmdRunAction(command=command, is_static=True, cwd=cwd))
|
||||
exit_code = 0
|
||||
content = ''
|
||||
@ -1019,9 +1014,7 @@ fi
|
||||
return CommandResult(content=content, exit_code=exit_code)
|
||||
|
||||
def _create_file_fn_git_handler(self, path: str, content: str) -> int:
|
||||
"""
|
||||
This function is used by the GitHandler to execute shell commands.
|
||||
"""
|
||||
"""This function is used by the GitHandler to execute shell commands."""
|
||||
obs = self.write(FileWriteAction(path=path, content=content))
|
||||
if isinstance(obs, ErrorObservation):
|
||||
return -1
|
||||
@ -1043,8 +1036,7 @@ fi
|
||||
def subscribe_to_shell_stream(
|
||||
self, callback: Callable[[str], None] | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
Subscribe to shell command output stream.
|
||||
"""Subscribe to shell command output stream.
|
||||
This method is meant to be overridden by runtime implementations
|
||||
that want to stream shell command output to external consumers.
|
||||
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
A tiny, isolated server that provides only the /view endpoint from the action execution server.
|
||||
"""A tiny, isolated server that provides only the /view endpoint from the action execution server.
|
||||
This server has no authentication and only listens to localhost traffic.
|
||||
"""
|
||||
|
||||
@ -83,7 +82,6 @@ def start_file_viewer_server(port: int) -> tuple[str, threading.Thread]:
|
||||
Returns:
|
||||
Tuple[str, threading.Thread]: The server URL and the thread object.
|
||||
"""
|
||||
|
||||
# Save the server URL to a file
|
||||
server_url = f'http://localhost:{port}'
|
||||
port_path = '/tmp/oh-server-url'
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Runtime implementations for OpenHands.
|
||||
"""
|
||||
"""Runtime implementations for OpenHands."""
|
||||
|
||||
from openhands.runtime.impl.action_execution.action_execution_client import (
|
||||
ActionExecutionClient,
|
||||
|
||||
@ -138,7 +138,6 @@ class ActionExecutionClient(Runtime):
|
||||
|
||||
If path is None, list files in the sandbox's initial working directory (e.g., /workspace).
|
||||
"""
|
||||
|
||||
try:
|
||||
data = {}
|
||||
if path is not None:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
CLI Runtime implementation for OpenHands.
|
||||
"""
|
||||
"""CLI Runtime implementation for OpenHands."""
|
||||
|
||||
from openhands.runtime.impl.cli.cli_runtime import CLIRuntime
|
||||
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
This runtime runs commands locally using subprocess and performs file operations using Python's standard library.
|
||||
"""This runtime runs commands locally using subprocess and performs file operations using Python's standard library.
|
||||
It does not implement browser functionality.
|
||||
"""
|
||||
|
||||
@ -88,8 +87,7 @@ After installing .NET SDK, restart your terminal and try again.
|
||||
|
||||
|
||||
class CLIRuntime(Runtime):
|
||||
"""
|
||||
A runtime implementation that runs commands locally using subprocess and performs
|
||||
"""A runtime implementation that runs commands locally using subprocess and performs
|
||||
file operations using Python's standard library. It does not implement browser functionality.
|
||||
|
||||
Args:
|
||||
@ -191,8 +189,7 @@ class CLIRuntime(Runtime):
|
||||
logger.info(f'CLIRuntime initialized with workspace at {self._workspace_path}')
|
||||
|
||||
def add_env_vars(self, env_vars: dict[str, Any]) -> None:
|
||||
"""
|
||||
Adds environment variables to the current runtime environment.
|
||||
"""Adds environment variables to the current runtime environment.
|
||||
For CLIRuntime, this means updating os.environ for the current process,
|
||||
so that subsequent commands inherit these variables.
|
||||
This overrides the BaseRuntime behavior which tries to run shell commands
|
||||
@ -218,8 +215,7 @@ class CLIRuntime(Runtime):
|
||||
# during initialization before self._runtime_initialized is True.
|
||||
|
||||
def _safe_terminate_process(self, process_obj, signal_to_send=signal.SIGTERM):
|
||||
"""
|
||||
Safely attempts to terminate/kill a process group or a single process.
|
||||
"""Safely attempts to terminate/kill a process group or a single process.
|
||||
|
||||
Args:
|
||||
process_obj: the subprocess.Popen object started with start_new_session=True
|
||||
@ -292,8 +288,8 @@ class CLIRuntime(Runtime):
|
||||
def _execute_powershell_command(
|
||||
self, command: str, timeout: float
|
||||
) -> CmdOutputObservation | ErrorObservation:
|
||||
"""
|
||||
Execute a command using PowerShell session on Windows.
|
||||
"""Execute a command using PowerShell session on Windows.
|
||||
|
||||
Args:
|
||||
command: The command to execute
|
||||
timeout: Timeout in seconds for the command
|
||||
@ -326,8 +322,8 @@ class CLIRuntime(Runtime):
|
||||
def _execute_shell_command(
|
||||
self, command: str, timeout: float
|
||||
) -> CmdOutputObservation:
|
||||
"""
|
||||
Execute a shell command and stream its output to a callback function.
|
||||
"""Execute a shell command and stream its output to a callback function.
|
||||
|
||||
Args:
|
||||
command: The shell command to execute
|
||||
timeout: Timeout in seconds for the command
|
||||
@ -965,8 +961,7 @@ class CLIRuntime(Runtime):
|
||||
def subscribe_to_shell_stream(
|
||||
self, callback: Callable[[str], None] | None = None
|
||||
) -> bool:
|
||||
"""
|
||||
Subscribe to shell command output stream.
|
||||
"""Subscribe to shell command output stream.
|
||||
|
||||
Args:
|
||||
callback: A function that will be called with each line of output from shell commands.
|
||||
|
||||
@ -58,8 +58,7 @@ POD_LABEL = 'openhands-runtime'
|
||||
|
||||
|
||||
class KubernetesRuntime(ActionExecutionClient):
|
||||
"""
|
||||
A Kubernetes runtime for OpenHands that works with Kind.
|
||||
"""A Kubernetes runtime for OpenHands that works with Kind.
|
||||
|
||||
This runtime creates pods in a Kubernetes cluster to run the agent code.
|
||||
It uses the Kubernetes Python client to create and manage the pods.
|
||||
@ -411,7 +410,6 @@ class KubernetesRuntime(ActionExecutionClient):
|
||||
|
||||
def _get_vscode_service_manifest(self):
|
||||
"""Create a service manifest for the VSCode server."""
|
||||
|
||||
vscode_service_spec = V1ServiceSpec(
|
||||
selector={'app': POD_LABEL, 'session': self.sid},
|
||||
type='ClusterIP',
|
||||
@ -567,7 +565,6 @@ class KubernetesRuntime(ActionExecutionClient):
|
||||
|
||||
def _get_vscode_ingress_manifest(self):
|
||||
"""Create an ingress manifest for the VSCode server."""
|
||||
|
||||
tls = []
|
||||
if self._k8s_config.ingress_tls_secret:
|
||||
runtime_tls = V1IngressTLS(
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
MCP Proxy module for OpenHands.
|
||||
"""
|
||||
"""MCP Proxy module for OpenHands."""
|
||||
|
||||
from openhands.runtime.mcp.proxy.manager import MCPProxyManager
|
||||
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
MCP Proxy Manager for OpenHands.
|
||||
"""MCP Proxy Manager for OpenHands.
|
||||
|
||||
This module provides a manager class for handling FastMCP proxy instances,
|
||||
including initialization, configuration, and mounting to FastAPI applications.
|
||||
@ -20,8 +19,7 @@ fastmcp_logger = fastmcp_get_logger('fastmcp')
|
||||
|
||||
|
||||
class MCPProxyManager:
|
||||
"""
|
||||
Manager for FastMCP proxy instances.
|
||||
"""Manager for FastMCP proxy instances.
|
||||
|
||||
This class encapsulates all the functionality related to creating, configuring,
|
||||
and managing FastMCP proxy instances, including mounting them to FastAPI applications.
|
||||
@ -33,8 +31,7 @@ class MCPProxyManager:
|
||||
api_key: Optional[str] = None,
|
||||
logger_level: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the MCP Proxy Manager.
|
||||
"""Initialize the MCP Proxy Manager.
|
||||
|
||||
Args:
|
||||
name: Name of the proxy server
|
||||
@ -55,9 +52,7 @@ class MCPProxyManager:
|
||||
fastmcp_logger.setLevel(logger_level)
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""
|
||||
Initialize the FastMCP proxy with the current configuration.
|
||||
"""
|
||||
"""Initialize the FastMCP proxy with the current configuration."""
|
||||
if len(self.config['mcpServers']) == 0:
|
||||
logger.info(
|
||||
'No MCP servers configured for FastMCP Proxy, skipping initialization.'
|
||||
@ -76,8 +71,7 @@ class MCPProxyManager:
|
||||
async def mount_to_app(
|
||||
self, app: FastAPI, allow_origins: Optional[list[str]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Mount the SSE server app to a FastAPI application.
|
||||
"""Mount the SSE server app to a FastAPI application.
|
||||
|
||||
Args:
|
||||
app: FastAPI application to mount to
|
||||
@ -128,8 +122,7 @@ class MCPProxyManager:
|
||||
stdio_servers: list[MCPStdioServerConfig],
|
||||
allow_origins: Optional[list[str]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Update the tools configuration and remount the proxy to the app.
|
||||
"""Update the tools configuration and remount the proxy to the app.
|
||||
|
||||
This is a convenience method that combines updating the tools,
|
||||
shutting down the existing proxy, initializing a new one, and
|
||||
|
||||
@ -95,8 +95,7 @@ class VSCodePlugin(Plugin):
|
||||
)
|
||||
|
||||
def _setup_vscode_settings(self) -> None:
|
||||
"""
|
||||
Set up VSCode settings by creating the .vscode directory in the workspace
|
||||
"""Set up VSCode settings by creating the .vscode directory in the workspace
|
||||
and copying the settings.json file there.
|
||||
"""
|
||||
# Get the path to the settings.json file in the plugin directory
|
||||
|
||||
@ -79,8 +79,7 @@ def split_bash_commands(commands: str) -> list[str]:
|
||||
|
||||
|
||||
def escape_bash_special_chars(command: str) -> str:
|
||||
r"""
|
||||
Escapes characters that have different interpretations in bash vs python.
|
||||
r"""Escapes characters that have different interpretations in bash vs python.
|
||||
Specifically handles escape sequences like \;, \|, \&, etc.
|
||||
"""
|
||||
if command.strip() == '':
|
||||
@ -446,6 +445,7 @@ class BashSession:
|
||||
ps1_matches: List of regex matches for PS1 prompts
|
||||
get_content_before_last_match: when there's only one PS1 match, whether to get
|
||||
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
|
||||
|
||||
Returns:
|
||||
Combined string of all outputs between matches
|
||||
"""
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
"""
|
||||
Utility module for generating file viewer HTML content.
|
||||
"""
|
||||
"""Utility module for generating file viewer HTML content."""
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
@ -8,8 +6,7 @@ import os
|
||||
|
||||
|
||||
def generate_file_viewer_html(file_path: str) -> str:
|
||||
"""
|
||||
Generate HTML content for viewing different file types.
|
||||
"""Generate HTML content for viewing different file types.
|
||||
|
||||
Args:
|
||||
file_path: The absolute path to the file
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Get git changes in the current working directory relative to the remote origin if possible.
|
||||
"""Get git changes in the current working directory relative to the remote origin if possible.
|
||||
NOTE: Since this is run as a script, there should be no imports from project files!
|
||||
"""
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Get git diff in a single git file for the closest git repo in the file system
|
||||
"""Get git diff in a single git file for the closest git repo in the file system
|
||||
NOTE: Since this is run as a script, there should be no imports from project files!
|
||||
"""
|
||||
|
||||
|
||||
@ -14,8 +14,7 @@ GIT_DIFF_CMD = (
|
||||
|
||||
@dataclass
|
||||
class CommandResult:
|
||||
"""
|
||||
Represents the result of a shell command execution.
|
||||
"""Represents the result of a shell command execution.
|
||||
|
||||
Attributes:
|
||||
content (str): The output content of the command.
|
||||
@ -27,9 +26,7 @@ class CommandResult:
|
||||
|
||||
|
||||
class GitHandler:
|
||||
"""
|
||||
A handler for executing Git-related operations via shell commands.
|
||||
"""
|
||||
"""A handler for executing Git-related operations via shell commands."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@ -43,8 +40,7 @@ class GitHandler:
|
||||
self.git_diff_cmd = GIT_DIFF_CMD
|
||||
|
||||
def set_cwd(self, cwd: str) -> None:
|
||||
"""
|
||||
Sets the current working directory for Git operations.
|
||||
"""Sets the current working directory for Git operations.
|
||||
|
||||
Args:
|
||||
cwd (str): The directory path.
|
||||
@ -60,8 +56,7 @@ class GitHandler:
|
||||
return script_file
|
||||
|
||||
def get_git_changes(self) -> list[dict[str, str]] | None:
|
||||
"""
|
||||
Retrieves the list of changed files in Git repositories.
|
||||
"""Retrieves the list of changed files in Git repositories.
|
||||
Examines each direct subdirectory of the workspace directory looking for git repositories
|
||||
and returns the changes for each of these directories.
|
||||
Optimized to use a single git command per repository for maximum performance.
|
||||
@ -100,8 +95,7 @@ class GitHandler:
|
||||
return self.get_git_changes()
|
||||
|
||||
def get_git_diff(self, file_path: str) -> dict[str, str]:
|
||||
"""
|
||||
Retrieves the original and modified content of a file in the repository.
|
||||
"""Retrieves the original and modified content of a file in the repository.
|
||||
|
||||
Args:
|
||||
file_path (str): Path to the file.
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
"""
|
||||
This module provides a Windows-specific implementation for running commands
|
||||
"""This module provides a Windows-specific implementation for running commands
|
||||
in a PowerShell session using the pythonnet library to interact with the .NET
|
||||
PowerShell SDK directly. This aims to provide a more robust and integrated
|
||||
way to manage PowerShell processes compared to using temporary script files.
|
||||
@ -95,8 +94,7 @@ except Exception as e:
|
||||
|
||||
|
||||
class WindowsPowershellSession:
|
||||
"""
|
||||
Manages a persistent PowerShell session using the .NET SDK via pythonnet.
|
||||
"""Manages a persistent PowerShell session using the .NET SDK via pythonnet.
|
||||
|
||||
Allows executing commands within a single runspace, preserving state
|
||||
(variables, current directory) between calls.
|
||||
@ -110,8 +108,7 @@ class WindowsPowershellSession:
|
||||
no_change_timeout_seconds: int = 30,
|
||||
max_memory_mb: int | None = None,
|
||||
):
|
||||
"""
|
||||
Initializes the PowerShell session.
|
||||
"""Initializes the PowerShell session.
|
||||
|
||||
Args:
|
||||
work_dir: The starting working directory for the session.
|
||||
@ -388,9 +385,7 @@ class WindowsPowershellSession:
|
||||
def _check_active_job(
|
||||
self, timeout_seconds: int
|
||||
) -> CmdOutputObservation | ErrorObservation:
|
||||
"""
|
||||
Checks the active job for new output and status, waiting up to timeout_seconds.
|
||||
"""
|
||||
"""Checks the active job for new output and status, waiting up to timeout_seconds."""
|
||||
with self._job_lock:
|
||||
if not self.active_job:
|
||||
return ErrorObservation(
|
||||
@ -649,8 +644,7 @@ class WindowsPowershellSession:
|
||||
return self._cwd
|
||||
|
||||
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
|
||||
"""
|
||||
Executes a command, potentially as a PowerShell background job for long-running tasks.
|
||||
"""Executes a command, potentially as a PowerShell background job for long-running tasks.
|
||||
Aligned with bash.py behavior regarding command execution and messages.
|
||||
|
||||
Args:
|
||||
|
||||
@ -1,11 +1,8 @@
|
||||
"""
|
||||
Custom exceptions for Windows-specific runtime issues.
|
||||
"""
|
||||
"""Custom exceptions for Windows-specific runtime issues."""
|
||||
|
||||
|
||||
class DotNetMissingError(Exception):
|
||||
"""
|
||||
Exception raised when .NET SDK or CoreCLR is missing or cannot be loaded.
|
||||
"""Exception raised when .NET SDK or CoreCLR is missing or cannot be loaded.
|
||||
This is used to provide a cleaner error message to users without a full stack trace.
|
||||
"""
|
||||
|
||||
|
||||
@ -7,9 +7,7 @@ from openhands.storage.data_models.conversation_status import ConversationStatus
|
||||
|
||||
@dataclass
|
||||
class AgentLoopInfo:
|
||||
"""
|
||||
Information about an agent loop - the URL on which to locate it and the event store
|
||||
"""
|
||||
"""Information about an agent loop - the URL on which to locate it and the event store"""
|
||||
|
||||
conversation_id: str
|
||||
url: str | None
|
||||
|
||||
@ -9,8 +9,7 @@ from openhands.storage.data_models.conversation_status import ConversationStatus
|
||||
|
||||
@dataclass
|
||||
class ConversationInfo:
|
||||
"""
|
||||
Information about a conversation. This combines conversation metadata with
|
||||
"""Information about a conversation. This combines conversation metadata with
|
||||
information on whether a conversation is currently running
|
||||
"""
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user