From c2f46200c0d26a2bb3ef33417d88c3cfa0ab418c Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Wed, 13 Aug 2025 15:13:19 -0400 Subject: [PATCH] chore(lint): Apply comprehensive linting and formatting fixes (#10287) Co-authored-by: openhands --- build_vscode.py | 3 +- evaluation/benchmarks/commit0/run_infer.py | 1 - .../benchmarks/discoverybench/run_infer.py | 15 +-- .../benchmarks/multi_swe_bench/eval_infer.py | 3 +- .../swe_bench/binary_patch_utils.py | 10 +- evaluation/benchmarks/swe_bench/eval_infer.py | 3 +- .../swe_bench/loc_eval/loc_evaluator.py | 36 +++---- .../swe_bench/loc_eval/loc_utils.py | 63 ++++------- .../swe_bench/scripts/eval/verify_costs.py | 3 +- .../testgeneval/compute_readability.py | 8 +- .../benchmarks/testgeneval/eval_infer.py | 9 +- .../benchmarks/testgeneval/log_parsers.py | 21 ++-- evaluation/benchmarks/testgeneval/metrics.py | 45 +++----- .../benchmarks/testgeneval/report_utils.py | 6 +- .../scripts/eval/build_outputs_ablation.py | 6 +- .../benchmarks/testgeneval/test_filter.py | 10 +- .../benchmarks/testgeneval/test_spec.py | 14 +-- evaluation/benchmarks/testgeneval/utils.py | 7 +- .../benchmarks/the_agent_company/browsing.py | 10 +- .../benchmarks/the_agent_company/run_infer.py | 3 +- .../scripts/summarise_results.py | 23 ++-- .../inference_utils/api_code_migration.py | 7 +- .../api_test_block_completion.py | 7 +- .../versicode/inference_utils/test_block.py | 7 +- .../inference_utils/test_migration.py | 7 +- .../versicode/metric/compute_ism_pm_score.py | 27 ++--- .../metric/compute_migration_cdc_score.py | 17 +-- .../metric/compute_versicode_cdc_score.py | 13 +-- .../metric/compute_versicode_em_score.py | 13 +-- .../choose_core_line_from_block_versicode.py | 4 +- ...oose_core_line_from_migration_versicode.py | 4 +- .../versicode/output_processing/clear_ans.py | 4 +- evaluation/utils/shared.py | 3 +- .../agenthub/codeact_agent/tools/prompt.py | 3 +- .../agenthub/readonly_agent/readonly_agent.py | 4 +- openhands/cli/suppress_warnings.py | 1 - openhands/cli/tui.py | 9 +- openhands/cli/vscode_extension.py | 18 ++-- openhands/controller/agent.py | 3 +- openhands/controller/agent_controller.py | 1 - openhands/controller/replay.py | 3 +- openhands/controller/state/state.py | 8 +- openhands/controller/state/state_tracker.py | 18 +--- openhands/core/config/kubernetes_config.py | 3 +- openhands/core/config/llm_config.py | 4 +- openhands/core/config/mcp_config.py | 1 - openhands/core/config/sandbox_config.py | 3 +- openhands/core/config/security_config.py | 4 +- openhands/core/logger.py | 8 +- openhands/core/loop.py | 3 +- openhands/core/main.py | 3 +- openhands/core/message_utils.py | 6 +- openhands/core/setup.py | 6 +- openhands/critic/base.py | 12 +-- openhands/events/action/message.py | 3 +- openhands/events/event_store.py | 8 +- openhands/events/event_store_abc.py | 7 +- openhands/events/nested_event_store.py | 4 +- openhands/events/observation/agent.py | 3 +- openhands/events/observation/commands.py | 1 - openhands/events/utils.py | 3 +- .../bitbucket/bitbucket_service.py | 7 +- .../integrations/github/github_service.py | 10 +- .../integrations/gitlab/gitlab_service.py | 17 +-- openhands/integrations/provider.py | 30 ++---- openhands/integrations/utils.py | 3 +- openhands/io/io.py | 5 +- openhands/llm/llm.py | 1 + openhands/llm/metrics.py | 1 - openhands/llm/retry_mixin.py | 3 +- openhands/mcp/client.py | 4 +- openhands/mcp/tool.py | 3 +- openhands/mcp/utils.py | 13 +-- openhands/memory/conversation_memory.py | 1 - openhands/memory/memory.py | 21 ++-- openhands/resolver/interfaces/github.py | 1 - openhands/resolver/interfaces/gitlab.py | 1 - openhands/resolver/issue_resolver.py | 2 - openhands/resolver/utils.py | 3 +- openhands/runtime/__init__.py | 3 +- openhands/runtime/action_execution_server.py | 3 +- openhands/runtime/base.py | 18 +--- openhands/runtime/file_viewer_server.py | 4 +- openhands/runtime/impl/__init__.py | 4 +- .../action_execution_client.py | 1 - openhands/runtime/impl/cli/__init__.py | 4 +- openhands/runtime/impl/cli/cli_runtime.py | 23 ++-- .../impl/kubernetes/kubernetes_runtime.py | 5 +- openhands/runtime/mcp/proxy/__init__.py | 4 +- openhands/runtime/mcp/proxy/manager.py | 19 ++-- openhands/runtime/plugins/vscode/__init__.py | 3 +- openhands/runtime/utils/bash.py | 4 +- openhands/runtime/utils/file_viewer.py | 7 +- openhands/runtime/utils/git_changes.py | 3 +- openhands/runtime/utils/git_diff.py | 3 +- openhands/runtime/utils/git_handler.py | 16 +-- openhands/runtime/utils/windows_bash.py | 16 +-- openhands/runtime/utils/windows_exceptions.py | 7 +- .../server/data_models/agent_loop_info.py | 4 +- .../server/data_models/conversation_info.py | 3 +- openhands/server/dependencies.py | 3 +- openhands/server/files.py | 4 +- openhands/server/middleware.py | 7 +- openhands/server/monitoring.py | 10 +- openhands/server/routes/conversation.py | 2 + .../server/routes/manage_conversations.py | 1 - openhands/server/routes/mcp.py | 8 +- openhands/server/routes/secrets.py | 4 +- openhands/server/routes/settings.py | 4 +- .../server/services/conversation_service.py | 1 - openhands/server/session/agent_session.py | 7 +- .../server/session/conversation_init_data.py | 4 +- openhands/server/settings.py | 24 ++--- openhands/storage/batched_web_hook.py | 33 ++---- openhands/storage/data_models/settings.py | 5 +- openhands/storage/data_models/user_secrets.py | 4 +- openhands/storage/web_hook.py | 24 ++--- openhands/utils/async_utils.py | 13 +-- openhands/utils/conversation_summary.py | 6 +- openhands/utils/http_session.py | 3 +- openhands/utils/prompt.py | 10 +- openhands/utils/shutdown_listener.py | 3 +- tests/e2e/test_local_runtime.py | 3 +- tests/runtime/test_replay.py | 12 +-- tests/unit/core/config/test_config_utils.py | 21 ++-- .../unit/core/config/test_llm_draft_config.py | 15 +-- tests/unit/llm/test_llm.py | 18 ++-- .../test_issue_handler_error_handling.py | 5 +- .../resolver/github/test_resolve_issues.py | 1 - ...est_gitlab_issue_handler_error_handling.py | 5 +- tests/unit/test_agent_controller.py | 2 - tests/unit/test_agent_delegation.py | 16 ++- tests/unit/test_agent_session.py | 4 - tests/unit/test_agents.py | 1 - tests/unit/test_bitbucket.py | 19 +--- tests/unit/test_cli_thought_order.py | 3 +- tests/unit/test_config_precedence.py | 3 - tests/unit/test_contextual_events.py | 20 ++-- tests/unit/test_conversation_memory.py | 6 +- .../test_conversation_window_condenser.py | 3 +- tests/unit/test_empty_image_url_fix_v2.py | 3 - tests/unit/test_git_handler.py | 14 +-- tests/unit/test_image_content_validation.py | 3 - tests/unit/test_mcp_integration.py | 3 - tests/unit/test_message_utils.py | 10 +- tests/unit/test_nested_event_store.py | 3 +- tests/unit/test_provider_immutability.py | 2 - tests/unit/test_runtime_import_robustness.py | 9 +- tests/unit/test_secrets_api.py | 7 -- tests/unit/test_settings_api.py | 1 - tests/unit/test_settings_store_functions.py | 2 - tests/unit/test_windows_bash.py | 3 +- third_party/__init__.py | 4 +- third_party/runtime/__init__.py | 2 +- third_party/runtime/impl/__init__.py | 2 +- third_party/runtime/impl/daytona/__init__.py | 2 +- .../runtime/impl/daytona/daytona_runtime.py | 100 ++++++++++-------- third_party/runtime/impl/e2b/__init__.py | 2 +- third_party/runtime/impl/e2b/e2b_runtime.py | 20 ++-- third_party/runtime/impl/e2b/sandbox.py | 46 ++++---- third_party/runtime/impl/modal/__init__.py | 2 +- .../runtime/impl/modal/modal_runtime.py | 76 ++++++------- third_party/runtime/impl/runloop/__init__.py | 2 +- .../runtime/impl/runloop/runloop_runtime.py | 52 ++++----- 164 files changed, 526 insertions(+), 1023 deletions(-) diff --git a/build_vscode.py b/build_vscode.py index f9f1cf0cd3..703c5eed63 100644 --- a/build_vscode.py +++ b/build_vscode.py @@ -93,8 +93,7 @@ def build_vscode_extension(): def build(setup_kwargs): - """ - This function is called by Poetry during the build process. + """This function is called by Poetry during the build process. `setup_kwargs` is a dictionary that will be passed to `setuptools.setup()`. """ print('--- Running custom Poetry build script (build_vscode.py) ---') diff --git a/evaluation/benchmarks/commit0/run_infer.py b/evaluation/benchmarks/commit0/run_infer.py index 176d8f7233..11dcb274b4 100644 --- a/evaluation/benchmarks/commit0/run_infer.py +++ b/evaluation/benchmarks/commit0/run_infer.py @@ -506,7 +506,6 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame: Returns: Filtered dataset based on split type """ - filtered_dataset = pd.concat( [ dataset[dataset['repo'].str.split('/').str[1] == repo] diff --git a/evaluation/benchmarks/discoverybench/run_infer.py b/evaluation/benchmarks/discoverybench/run_infer.py index b1a6b1a842..e783268e98 100644 --- a/evaluation/benchmarks/discoverybench/run_infer.py +++ b/evaluation/benchmarks/discoverybench/run_infer.py @@ -89,8 +89,7 @@ def get_config( def get_dv_query_for_real( datasets, question, domain_knowledge=None, workflow_tags=None ): - """ - Prepare a structured query for the agent to execute on the specified datasets. + """Prepare a structured query for the agent to execute on the specified datasets. This function constructs a query by compiling metadata from the provided datasets, along with any relevant domain knowledge and workflow tags. @@ -104,7 +103,6 @@ def get_dv_query_for_real( query_to_dv: Query to be run on the dataset dataset_meta: Metadata of the dataset """ - dataset_meta = '' for dataset_metadata in datasets: dataset_meta += 'Dataset name: ' + dataset_metadata['name'] @@ -140,8 +138,7 @@ def get_dv_query_for_real( def initialize_runtime(runtime: Runtime, data_files: list[str]): - """ - Initialize the runtime for the agent. + """Initialize the runtime for the agent. This function is called before the runtime is used to run the agent. """ @@ -231,8 +228,7 @@ def process_instance( metadata: EvalMetadata, reset_logger: bool = True, ): - """ - Process and evaluate a single instance of the dataset. + """Process and evaluate a single instance of the dataset. This function executes the OpenHands agent for a specific instance of the dataset. It retrieves @@ -247,7 +243,6 @@ def process_instance( Returns: output: EvalOutput object """ - config = get_config(metadata) # Setup the logger properly, so you can run @@ -356,8 +351,7 @@ def list_csv_files(list_of_datasets): def create_dataset(repo_location: str, split: str = 'test'): - """ - Create a dataset from the discoverybench repository + """Create a dataset from the discoverybench repository by walking through the repository and extracting metadata from the metadata_{}.json files @@ -368,7 +362,6 @@ def create_dataset(repo_location: str, split: str = 'test'): Returns: df: DataFrame containing the dataset instances """ - data_dict = {} data_location = os.path.join(repo_location, 'discoverybench', 'real', split) diff --git a/evaluation/benchmarks/multi_swe_bench/eval_infer.py b/evaluation/benchmarks/multi_swe_bench/eval_infer.py index ae259cb597..74364f2fe5 100644 --- a/evaluation/benchmarks/multi_swe_bench/eval_infer.py +++ b/evaluation/benchmarks/multi_swe_bench/eval_infer.py @@ -105,8 +105,7 @@ def process_instance( log_dir: str | None = None, runtime_failure_count: int = 0, ) -> EvalOutput: - """ - Evaluate agent performance on a SWE-bench problem instance. + """Evaluate agent performance on a SWE-bench problem instance. Note that this signature differs from the expected input to `run_evaluation`. Use `functools.partial` to provide optional arguments before passing to the evaluation harness. diff --git a/evaluation/benchmarks/swe_bench/binary_patch_utils.py b/evaluation/benchmarks/swe_bench/binary_patch_utils.py index 9cf0dbd714..4487c4a304 100644 --- a/evaluation/benchmarks/swe_bench/binary_patch_utils.py +++ b/evaluation/benchmarks/swe_bench/binary_patch_utils.py @@ -1,11 +1,8 @@ -""" -Utilities for handling binary files and patch generation in SWE-bench evaluation. -""" +"""Utilities for handling binary files and patch generation in SWE-bench evaluation.""" def remove_binary_diffs(patch_text): - """ - Remove binary file diffs from a git patch. + """Remove binary file diffs from a git patch. Args: patch_text (str): The git patch text @@ -36,8 +33,7 @@ def remove_binary_diffs(patch_text): def remove_binary_files_from_git(): - """ - Generate a bash command to remove binary files from git staging. + """Generate a bash command to remove binary files from git staging. Returns: str: A bash command that removes binary files from git staging diff --git a/evaluation/benchmarks/swe_bench/eval_infer.py b/evaluation/benchmarks/swe_bench/eval_infer.py index 81b7decd4c..2fdc3e2e2f 100644 --- a/evaluation/benchmarks/swe_bench/eval_infer.py +++ b/evaluation/benchmarks/swe_bench/eval_infer.py @@ -111,8 +111,7 @@ def process_instance( runtime_failure_count: int = 0, conditional_imports: ConditionalImports | None = None, ) -> EvalOutput: - """ - Evaluate agent performance on a SWE-bench problem instance. + """Evaluate agent performance on a SWE-bench problem instance. Note that this signature differs from the expected input to `run_evaluation`. Use `functools.partial` to provide optional arguments before passing to the evaluation harness. diff --git a/evaluation/benchmarks/swe_bench/loc_eval/loc_evaluator.py b/evaluation/benchmarks/swe_bench/loc_eval/loc_evaluator.py index 403ae446cb..f7bc22a403 100644 --- a/evaluation/benchmarks/swe_bench/loc_eval/loc_evaluator.py +++ b/evaluation/benchmarks/swe_bench/loc_eval/loc_evaluator.py @@ -16,8 +16,7 @@ from openhands.core.logger import openhands_logger as logger class LocEvaluator: def __init__(self, args): - """ - Localization evaluation. + """Localization evaluation. Args: args: all main arguments @@ -76,8 +75,7 @@ class LocEvaluator: self.task_resolved = False def _init_dir(self, directory_path): - """ - Check if a directory exists and create it if it doesn't. + """Check if a directory exists and create it if it doesn't. Args: directory_path (str): Path to the directory to check/create @@ -207,8 +205,7 @@ class LocEvaluator: self._compute_avg_over_all() def _write_to_json(self, data, file_name): - """ - Writes the current object data to a JSON file. + """Writes the current object data to a JSON file. Returns: bool: True if writing was successful, False otherwise. @@ -225,8 +222,7 @@ class LocEvaluator: return False def read_from_json(self, file_path): - """ - Reads data from a JSON file and loads it into the current object. + """Reads data from a JSON file and loads it into the current object. Returns: dict: The loaded JSON data, or an empty dict if the file doesn't exist @@ -253,8 +249,7 @@ class LocEvaluator: return {} def read_from_jsonl(self, file_path): - """ - Reads data from a JSON file and loads it into the current object. + """Reads data from a JSON file and loads it into the current object. Returns: dict: The loaded JSON data, or an empty dict if the file doesn't exist @@ -294,8 +289,7 @@ class LocEvaluator: history_idx += 1 def _parse_string_to_dict(self, dict_string) -> dict: - """ - Convert a string representation of a dictionary to an actual dictionary. + """Convert a string representation of a dictionary to an actual dictionary. Args: dict_string (str): String representation of a dictionary @@ -328,8 +322,7 @@ class LocEvaluator: return None def _parse_value_from_args(self, argument_str: str, key: str) -> str: - """ - Parse a specific key's value from argument string. + """Parse a specific key's value from argument string. Args: argument_str (str): The argument string containing key-value pairs @@ -407,8 +400,7 @@ class LocEvaluator: return '' def _parse_path_from_args(self, argument_str: str) -> str: - """ - Parse path from argument string. + """Parse path from argument string. Args: argument_str (str): The argument string containing path information @@ -419,8 +411,7 @@ class LocEvaluator: return self._parse_value_from_args(argument_str, 'path') def _parse_func_names_from_str(self, code_patch) -> list: - """ - Parse function names from the new_str code patch. + """Parse function names from the new_str code patch. Args: code_patch: Either a string (argument string) or already extracted new_str code @@ -801,8 +792,7 @@ class LocEvaluator: def swe_data_loader(args): - """ - Loading SWE-Bench data. + """Loading SWE-Bench data. Args: args: Main arguments. @@ -834,8 +824,7 @@ def swe_data_loader(args): def infer_data_loader(args): - """ - Load instance IDs. + """Load instance IDs. Args: args: Main arguments. @@ -868,8 +857,7 @@ def infer_data_loader(args): def infer_cost_calculator(args): - """ - Calculate total and average costs from metric JSON files with detailed output. + """Calculate total and average costs from metric JSON files with detailed output. Args: args: Main arguments. diff --git a/evaluation/benchmarks/swe_bench/loc_eval/loc_utils.py b/evaluation/benchmarks/swe_bench/loc_eval/loc_utils.py index e290354d9d..6edd10db55 100644 --- a/evaluation/benchmarks/swe_bench/loc_eval/loc_utils.py +++ b/evaluation/benchmarks/swe_bench/loc_eval/loc_utils.py @@ -28,8 +28,7 @@ class LocalizationInfo: hunks_per_file: dict[str, int] # File -> number of hunks def to_dict(self) -> dict[str, Any]: - """ - Convert LocalizationInfo to a dictionary for JSON serialization. + """Convert LocalizationInfo to a dictionary for JSON serialization. Returns: Dictionary representation of the localization information @@ -58,8 +57,7 @@ class LocalizationInfo: @classmethod def from_dict(cls, data: dict[str, Any]) -> 'LocalizationInfo': - """ - Create LocalizationInfo from a dictionary (for loading from JSON). + """Create LocalizationInfo from a dictionary (for loading from JSON). Args: data: Dictionary containing localization information @@ -91,8 +89,7 @@ class LocalizationInfo: class LocMeta: - """ - SWE-Bench dataset loader and ground-truth localization parser. + """SWE-Bench dataset loader and ground-truth localization parser. This class handles loading SWE-Bench datasets and extracting ground-truth localization information from patches for code localization evaluation. @@ -104,8 +101,7 @@ class LocMeta: dataset_name: str = 'princeton-nlp/SWE-bench_Verified', split: str = 'test', ): - """ - Initialize LocMeta with a SWE-Bench dataset. + """Initialize LocMeta with a SWE-Bench dataset. Args: dataset_name: HuggingFace dataset name (e.g., "princeton-nlp/SWE-bench_Verified") @@ -124,8 +120,7 @@ class LocMeta: self._init_swe_dataset() def _init_swe_dataset(self) -> None: - """ - Load and initialize the SWE-Bench dataset from HuggingFace. + """Load and initialize the SWE-Bench dataset from HuggingFace. Converts to pandas DataFrame for easy manipulation. """ try: @@ -150,8 +145,7 @@ class LocMeta: raise def get_instance_by_id(self, instance_id: str) -> pd.Series: - """ - Retrieve a specific instance by its ID. + """Retrieve a specific instance by its ID. Args: instance_id: The instance identifier @@ -169,8 +163,7 @@ class LocMeta: return self.df.iloc[idx] def parse_instance_loc(self, instance: Union[pd.Series, str]) -> LocalizationInfo: - """ - Parse ground-truth localization information from a SWE-Bench instance. + """Parse ground-truth localization information from a SWE-Bench instance. Args: instance: Either a pandas Series with instance data or an instance_id string @@ -218,8 +211,7 @@ class LocMeta: def _parse_file_patch_lines( self, file_patch: str ) -> tuple[list[tuple[int, int]], int, int]: - """ - Parse line ranges and count changes from a single file patch. + """Parse line ranges and count changes from a single file patch. Args: file_patch: Patch content for a single file @@ -253,8 +245,7 @@ class LocMeta: def _parse_code_structures_from_patch( self, file_patch: str, file_path: str ) -> tuple[list[str], list[str]]: - """ - Extract function and class names from patch context (fallback method). + """Extract function and class names from patch context (fallback method). Args: file_patch: Patch content for a single file @@ -311,8 +302,7 @@ class LocMeta: def _parse_patch_localization( self, patch_content: str, instance_id: str ) -> LocalizationInfo: - """ - Parse localization information from a git patch (improved method). + """Parse localization information from a git patch (improved method). Args: patch_content: The git patch content @@ -390,8 +380,7 @@ class LocMeta: def _extract_code_structures_from_patch( self, file_patch: str, file_path: str ) -> tuple[list[str], list[str]]: - """ - Extract function and class names from patch context and content. + """Extract function and class names from patch context and content. Args: file_patch: Patch content for a single file @@ -519,8 +508,7 @@ class LocMeta: def _parse_patch_localization_with_runtime( self, patch_content: str, instance_id: str, runtime: Runtime ) -> LocalizationInfo: - """ - Parse localization information from a git patch using OpenHands runtime. + """Parse localization information from a git patch using OpenHands runtime. This is the superior method when runtime is available. Args: @@ -596,8 +584,7 @@ class LocMeta: def parse_instance_loc_with_runtime( self, instance: Union[pd.Series, str], runtime: Runtime = None ) -> LocalizationInfo: - """ - Parse ground-truth localization information using OpenHands runtime. + """Parse ground-truth localization information using OpenHands runtime. Args: instance: Either a pandas Series with instance data or an instance_id string @@ -634,8 +621,7 @@ class LocMeta: def _analyze_source_code_with_runtime( self, runtime: Runtime, file_path: str, affected_lines: list[int] ) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]: - """ - Analyze source code using OpenHands runtime to find functions and classes. + """Analyze source code using OpenHands runtime to find functions and classes. Args: runtime: OpenHands runtime object @@ -695,8 +681,7 @@ class LocMeta: def _parse_cython_content_with_line_mapping( self, content: str, affected_lines: list[int] ) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]: - """ - Parse Cython content to extract functions and classes with line mapping. + """Parse Cython content to extract functions and classes with line mapping. Since Cython files can't be parsed with Python's AST, we use regex-based parsing. Args: @@ -828,8 +813,7 @@ class LocMeta: def _parse_python_content_with_line_mapping( self, content: str, affected_lines: list[int] ) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]: - """ - Parse Python content to extract functions and classes with accurate line mapping. + """Parse Python content to extract functions and classes with accurate line mapping. Args: content: Python source code content @@ -914,8 +898,7 @@ class LocMeta: def _parse_python_content( self, content: str, affected_lines: list[int] ) -> tuple[list[str], list[str], dict[int, str], dict[int, str]]: - """ - Parse Python content to extract functions and classes. + """Parse Python content to extract functions and classes. Args: content: Python source code content @@ -989,8 +972,7 @@ class LocMeta: return [], [], {}, {} def _split_patch_by_files(self, patch_content: str) -> dict[str, str]: - """ - Split a multi-file patch into individual file patches. + """Split a multi-file patch into individual file patches. Args: patch_content: Complete patch content @@ -1049,8 +1031,7 @@ class LocMeta: def _empty_localization_info( self, instance_id: str = 'unknown' ) -> LocalizationInfo: - """ - Return an empty LocalizationInfo object. + """Return an empty LocalizationInfo object. Args: instance_id: Instance identifier @@ -1072,8 +1053,7 @@ class LocMeta: ) def get_dataset_statistics(self) -> dict[str, Any]: - """ - Get statistics about the loaded dataset. + """Get statistics about the loaded dataset. Returns: Dictionary containing dataset statistics @@ -1095,8 +1075,7 @@ class LocMeta: return stats def get_instances_by_repo(self, repo_name: str) -> pd.DataFrame: - """ - Get all instances for a specific repository. + """Get all instances for a specific repository. Args: repo_name: Repository name (e.g., "django/django") diff --git a/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py b/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py index 628ecb4fb5..4d7ac30895 100644 --- a/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py +++ b/evaluation/benchmarks/swe_bench/scripts/eval/verify_costs.py @@ -6,8 +6,7 @@ from openhands.core.logger import openhands_logger as logger def verify_instance_costs(row: pd.Series) -> float: - """ - Verifies that the accumulated_cost matches the sum of individual costs in metrics. + """Verifies that the accumulated_cost matches the sum of individual costs in metrics. Also checks for duplicate consecutive costs which might indicate buggy counting. If the consecutive costs are identical, the file is affected by this bug: https://github.com/All-Hands-AI/OpenHands/issues/5383 diff --git a/evaluation/benchmarks/testgeneval/compute_readability.py b/evaluation/benchmarks/testgeneval/compute_readability.py index a2f2f9dd7f..1112779bfc 100644 --- a/evaluation/benchmarks/testgeneval/compute_readability.py +++ b/evaluation/benchmarks/testgeneval/compute_readability.py @@ -181,9 +181,7 @@ def distinct_methods_stats(tree, num_lines): def loops_stats(tree, num_lines): - """ - Calculate the average number of loops. - """ + """Calculate the average number of loops.""" total_loops = 0 def traverse(node): @@ -199,9 +197,7 @@ def loops_stats(tree, num_lines): def branches_stats(tree, num_lines): - """ - Calculate the average number of branches (conditional statements). - """ + """Calculate the average number of branches (conditional statements).""" total_branches = 0 def traverse(node): diff --git a/evaluation/benchmarks/testgeneval/eval_infer.py b/evaluation/benchmarks/testgeneval/eval_infer.py index 99eea4ba43..27aaaee1ac 100644 --- a/evaluation/benchmarks/testgeneval/eval_infer.py +++ b/evaluation/benchmarks/testgeneval/eval_infer.py @@ -192,8 +192,7 @@ def run_mutation_testing( def grade_test_output( test_suite: str, instance: pd.Series, test_output: str, test_spec: TestSpec, runtime ): - """ - Two-pass test grading with short-circuiting: + """Two-pass test grading with short-circuiting: 1. Run all tests to identify passing/failing tests 2. If no failing tests, evaluate coverage immediately 3. Otherwise, run only passing tests for coverage analysis @@ -280,8 +279,7 @@ def process_instance( reset_logger: bool = True, log_dir: str | None = None, ) -> EvalOutput: - """ - Evaluate agent performance on a TestGenEval problem instance. + """Evaluate agent performance on a TestGenEval problem instance. Note that this signature differs from the expected input to `run_evaluation`. Use `functools.partial` to provide optional arguments before passing to the evaluation harness. @@ -453,8 +451,7 @@ def process_instance( def count_and_log_fields(evaluated_predictions, fields, key): - """ - Count and log the sum of specified fields in the evaluated predictions, + """Count and log the sum of specified fields in the evaluated predictions, ignoring fields with a value of -1. If all values for a field are -1, return -1. diff --git a/evaluation/benchmarks/testgeneval/log_parsers.py b/evaluation/benchmarks/testgeneval/log_parsers.py index 80d8246487..2692703eed 100644 --- a/evaluation/benchmarks/testgeneval/log_parsers.py +++ b/evaluation/benchmarks/testgeneval/log_parsers.py @@ -4,8 +4,7 @@ from evaluation.benchmarks.testgeneval.constants import TestStatus def parse_log_pytest(log: str) -> dict[str, str]: - """ - Parser for test logs generated with PyTest framework + """Parser for test logs generated with PyTest framework Args: log (str): log content @@ -26,8 +25,7 @@ def parse_log_pytest(log: str) -> dict[str, str]: def parse_log_pytest_options(log: str) -> dict[str, str]: - """ - Parser for test logs generated with PyTest framework with options + """Parser for test logs generated with PyTest framework with options Args: log (str): log content @@ -61,8 +59,7 @@ def parse_log_pytest_options(log: str) -> dict[str, str]: def parse_log_django(log: str) -> dict[str, str]: - """ - Parser for test logs generated with Django tester framework + """Parser for test logs generated with Django tester framework Args: log (str): log content @@ -141,8 +138,7 @@ def parse_log_django(log: str) -> dict[str, str]: def parse_log_pytest_v2(log: str) -> dict[str, str]: - """ - Parser for test logs generated with PyTest framework (Later Version) + """Parser for test logs generated with PyTest framework (Later Version) Args: log (str): log content @@ -170,8 +166,7 @@ def parse_log_pytest_v2(log: str) -> dict[str, str]: def parse_log_seaborn(log: str) -> dict[str, str]: - """ - Parser for test logs generated with seaborn testing framework + """Parser for test logs generated with seaborn testing framework Args: log (str): log content @@ -196,8 +191,7 @@ def parse_log_seaborn(log: str) -> dict[str, str]: def parse_log_sympy(log: str) -> dict[str, str]: - """ - Parser for test logs generated with Sympy framework + """Parser for test logs generated with Sympy framework Args: log (str): log content @@ -229,8 +223,7 @@ def parse_log_sympy(log: str) -> dict[str, str]: def parse_log_matplotlib(log: str) -> dict[str, str]: - """ - Parser for test logs generated with PyTest framework + """Parser for test logs generated with PyTest framework Args: log (str): log content diff --git a/evaluation/benchmarks/testgeneval/metrics.py b/evaluation/benchmarks/testgeneval/metrics.py index 01a21444c9..2d2f7c208c 100644 --- a/evaluation/benchmarks/testgeneval/metrics.py +++ b/evaluation/benchmarks/testgeneval/metrics.py @@ -12,8 +12,7 @@ if sys.getrecursionlimit() < 10_000: def bleu(gold: list[str], pred: list[str]) -> float: - """ - Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100. + """Calculate BLEU score, using smoothing method 2 with auto reweighting, in the range of 0~100. :param gold: list of gold tokens :param pred: list of predicted tokens @@ -30,8 +29,7 @@ def bleu(gold: list[str], pred: list[str]) -> float: def batch_bleu(golds: list[list[str]], preds: list[list[str]]) -> list[float]: - """ - Calculate BLEU score for a batch of sentences. + """Calculate BLEU score for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -43,8 +41,7 @@ def batch_bleu(golds: list[list[str]], preds: list[list[str]]) -> list[float]: def corpus_bleu(golds: list[list[str]], preds: list[list[str]]) -> float: - """ - Calculate corpus-level BLEU score for a batch of sentences. + """Calculate corpus-level BLEU score for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -63,8 +60,7 @@ def corpus_bleu(golds: list[list[str]], preds: list[list[str]]) -> float: def edit_sim( gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' ' ) -> float: - """ - Calculate char-level edit similarity, in the range of 0~100. + """Calculate char-level edit similarity, in the range of 0~100. :param gold: gold sentence or list of gold tokens :param pred: predicted sentence or list of predicted tokens @@ -85,8 +81,7 @@ def batch_edit_sim( preds: list[Union[str, list[str]]], sep: str = ' ', ) -> list[float]: - """ - Calculate char-level edit similarity for a batch of sentences. + """Calculate char-level edit similarity for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -102,8 +97,7 @@ T = TypeVar('T') def exact_match(gold: T, pred: T) -> float: - """ - Calculate exact match accuracy, in the range of {0, 100}. + """Calculate exact match accuracy, in the range of {0, 100}. :param gold: gold sentence or list of gold tokens :param pred: predicted sentence or list of predicted tokens @@ -115,8 +109,7 @@ def exact_match(gold: T, pred: T) -> float: def batch_exact_match(golds: list[T], preds: list[T]) -> list[float]: - """ - Calculate exact match accuracy for a batch of sentences. + """Calculate exact match accuracy for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -130,8 +123,7 @@ def batch_exact_match(golds: list[T], preds: list[T]) -> list[float]: def rouge_l( gold: Union[str, list[str]], pred: Union[str, list[str]], sep: str = ' ' ) -> dict[str, float]: - """ - Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100. + """Calculate ROUGE-L F1, precision, and recall scores, in the range of 0~100. :param gold: gold sentence or list of gold tokens :param pred: predicted sentence or list of predicted tokens @@ -156,8 +148,7 @@ def batch_rouge_l( preds: list[Union[str, list[str]]], sep: str = ' ', ) -> dict[str, list[float]]: - """ - Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences. + """Calculate ROUGE-L F1, precision, and recall scores for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -175,8 +166,7 @@ def accuracy( pred: list[str], ignore: Optional[Sequence[str]] = None, ) -> float: - """ - Calculate token-level accuracy, in the range of 0~100. + """Calculate token-level accuracy, in the range of 0~100. If gold and pred are not the same length, the longer one would be truncated. :param gold: list of gold tokens @@ -210,8 +200,7 @@ def batch_accuracy( preds: list[list[str]], ignore: Optional[Sequence[str]] = None, ) -> list[float]: - """ - Calculate token-level accuracy for a batch of sentences. + """Calculate token-level accuracy for a batch of sentences. :param golds: list of gold sentences :param preds: list of predicted sentences @@ -226,8 +215,7 @@ def batch_accuracy( def first_match_to_topk( first_match_list: list[int], k_values: list[int] ) -> dict[int, list[float]]: - """ - Calculate top-k accuracy with the first match ranks (1-indexed). + """Calculate top-k accuracy with the first match ranks (1-indexed). :param first_match: first match ranks (1-indexed) :param k_values: k values to consider @@ -237,8 +225,7 @@ def first_match_to_topk( def pass_at_k(n: int, c: int, k: int) -> float: - """ - Sample pass@k metric according to the Codex paper, but in the scale of 0~100. + """Sample pass@k metric according to the Codex paper, but in the scale of 0~100. :param n: total number of samples :param c: number of correct samples :param k: k in pass@$k$ @@ -251,8 +238,7 @@ def pass_at_k(n: int, c: int, k: int) -> float: def self_bleu(samples: list[list[str]]) -> float: - """ - Calculate self-BLEU among the samples. + """Calculate self-BLEU among the samples. :param samples: the chosen m samples :return: self-BLEU """ @@ -274,8 +260,7 @@ def self_bleu(samples: list[list[str]]) -> float: def self_edit_distance(samples: list[Union[str, list[str]]], sep=' ') -> float: - """ - Calculate self-edit-distance among the samples. + """Calculate self-edit-distance among the samples. :param samples: the chosen m samples :param sep: the separator between tokens :return: self-edit-distance diff --git a/evaluation/benchmarks/testgeneval/report_utils.py b/evaluation/benchmarks/testgeneval/report_utils.py index f5a8401314..3ba76fd894 100644 --- a/evaluation/benchmarks/testgeneval/report_utils.py +++ b/evaluation/benchmarks/testgeneval/report_utils.py @@ -30,8 +30,7 @@ def check_mutation(mutation_output): def count_methods(code_str): - """ - Counts the number of methods/functions in a given string of code. + """Counts the number of methods/functions in a given string of code. Args: code_str (str): A string containing code. @@ -46,8 +45,7 @@ def count_methods(code_str): def get_lines_of_code(code_str): - """ - Extracts lines of code from a given string. + """Extracts lines of code from a given string. Args: code_str (str): A string containing code. diff --git a/evaluation/benchmarks/testgeneval/scripts/eval/build_outputs_ablation.py b/evaluation/benchmarks/testgeneval/scripts/eval/build_outputs_ablation.py index 87860849ce..431d618f27 100644 --- a/evaluation/benchmarks/testgeneval/scripts/eval/build_outputs_ablation.py +++ b/evaluation/benchmarks/testgeneval/scripts/eval/build_outputs_ablation.py @@ -7,8 +7,7 @@ import traceback def insert_line_in_string(input_string, new_str, insert_line): - """ - Inserts a new line into a string at the specified line number. + """Inserts a new line into a string at the specified line number. :param input_string: The original string. :param new_str: The string to insert. @@ -29,8 +28,7 @@ def insert_line_in_string(input_string, new_str, insert_line): def print_string_diff(original, modified): - """ - Prints the differences between two strings line by line. + """Prints the differences between two strings line by line. :param original: The original string. :param modified: The modified string. diff --git a/evaluation/benchmarks/testgeneval/test_filter.py b/evaluation/benchmarks/testgeneval/test_filter.py index 5d42548765..0167bc5e76 100644 --- a/evaluation/benchmarks/testgeneval/test_filter.py +++ b/evaluation/benchmarks/testgeneval/test_filter.py @@ -37,8 +37,7 @@ def extract_preamble_classes_and_functions(code): current_position = 0 def extract_class_body(code: str, start_index: int) -> tuple[str, int]: - """ - Extracts the body of a class from the given code starting from the specified index. + """Extracts the body of a class from the given code starting from the specified index. Returns the class body and the end index of the class body. """ if not code or start_index < 0 or start_index >= len(code): @@ -168,8 +167,8 @@ def extract_preamble_classes_and_functions(code): def filter_passing_tests( test_content: str, test_output: str, repo: str ) -> tuple[str, list[str], list[str]]: - """ - Filter tests based on their execution results. + """Filter tests based on their execution results. + Returns: Tuple containing: - Modified test content with only passing tests @@ -246,8 +245,7 @@ def filter_passing_tests( def filter_tests( test_content: str, test_output: str, repo: str ) -> tuple[str, list[str], list[str]]: - """ - Filter tests using AST parsing to remove failing test functions from the test file. + """Filter tests using AST parsing to remove failing test functions from the test file. Non-test functions (e.g. setup or helper methods) and classes (even if all test methods are failing) are preserved. diff --git a/evaluation/benchmarks/testgeneval/test_spec.py b/evaluation/benchmarks/testgeneval/test_spec.py index b51ac79179..71ec2a0ebf 100644 --- a/evaluation/benchmarks/testgeneval/test_spec.py +++ b/evaluation/benchmarks/testgeneval/test_spec.py @@ -20,9 +20,7 @@ DIFF_MODIFIED_FILE_REGEX = r'--- a/(.*)' @dataclass class TestSpec: - """ - A dataclass that represents a test specification for a single instance of SWE-bench. - """ + """A dataclass that represents a test specification for a single instance of SWE-bench.""" instance_id: str id: str @@ -86,10 +84,7 @@ def make_test_setup(specs, env_name, repo_directory, includes_tox=False): def make_test_script_list(test_cmd, specs, env_name, repo_directory): - """ - Runs the tests. - """ - + """Runs the tests.""" includes_tox = 'tox' in test_cmd eval_commands = make_test_setup(specs, env_name, repo_directory, includes_tox) eval_commands += [ @@ -104,10 +99,7 @@ def make_test_script_list(test_cmd, specs, env_name, repo_directory): def make_mutation_script_list(specs, env_name, repo_directory, mutation_timeout): - """ - Runs the tests. - """ - + """Runs the tests.""" eval_commands = make_test_setup(specs, env_name, repo_directory) eval_commands += [ 'cosmic-ray init mutation.toml mutation.sqlite', diff --git a/evaluation/benchmarks/testgeneval/utils.py b/evaluation/benchmarks/testgeneval/utils.py index 8cd81b0139..6a3959bb3f 100644 --- a/evaluation/benchmarks/testgeneval/utils.py +++ b/evaluation/benchmarks/testgeneval/utils.py @@ -11,8 +11,7 @@ from evaluation.benchmarks.testgeneval.constants import ( def get_test_directives(instance: TestGenEvalInstance) -> list: - """ - Get test directives from the test_patch of a task instance + """Get test directives from the test_patch of a task instance Args: instance (dict): task instance @@ -43,9 +42,7 @@ def get_test_directives(instance: TestGenEvalInstance) -> list: def load_testgeneval_dataset( name='kjain14/testgeneval', split='test', ids=None ) -> list[TestGenEvalInstance]: - """ - Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file - """ + """Load SWE-bench dataset from Hugging Face Datasets or local .json/.jsonl file""" # check that all instance IDs are in the dataset if ids: ids = set(ids) diff --git a/evaluation/benchmarks/the_agent_company/browsing.py b/evaluation/benchmarks/the_agent_company/browsing.py index acf8901e96..8641425616 100644 --- a/evaluation/benchmarks/the_agent_company/browsing.py +++ b/evaluation/benchmarks/the_agent_company/browsing.py @@ -24,9 +24,7 @@ class ActionType(Enum): @dataclass class Selector: - """ - Represents either a direct anchor ID or a descriptive selector - """ + """Represents either a direct anchor ID or a descriptive selector""" value: str is_anchor: bool = False @@ -149,8 +147,7 @@ def find_matching_anchor(content: str, selector: str) -> str | None: def resolve_action(action: BrowserAction, content: str) -> BrowserAction: - """ - Resolve any descriptive selectors in the action to anchor IDs based on the content. + """Resolve any descriptive selectors in the action to anchor IDs based on the content. Returns a new action with resolved selectors. """ if isinstance(action, (InputAction, ClickAction)): @@ -174,8 +171,7 @@ def pre_login( save_screenshots=True, screenshots_dir='screenshots', ): - """ - Logs in to all the websites that are needed for the evaluation. + """Logs in to all the websites that are needed for the evaluation. Once logged in, the sessions would be cached in the browser, so OpenHands agent doesn't need to log in to these websites again. """ diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py index 9f5780d559..513b02d612 100644 --- a/evaluation/benchmarks/the_agent_company/run_infer.py +++ b/evaluation/benchmarks/the_agent_company/run_infer.py @@ -68,8 +68,7 @@ def get_config( def load_dependencies(runtime: Runtime) -> list[str]: - """ - Every task has a dependencies.yml file, which lists all the services that the + """Every task has a dependencies.yml file, which lists all the services that the task depends on. This function loads the file and returns all dependent service names. """ command = 'cat /utils/dependencies.yml' diff --git a/evaluation/benchmarks/the_agent_company/scripts/summarise_results.py b/evaluation/benchmarks/the_agent_company/scripts/summarise_results.py index 175894ac91..8b92edc2c8 100644 --- a/evaluation/benchmarks/the_agent_company/scripts/summarise_results.py +++ b/evaluation/benchmarks/the_agent_company/scripts/summarise_results.py @@ -11,9 +11,7 @@ import sys def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> float: - """ - Calculate the cost of the model call. - """ + """Calculate the cost of the model call.""" if 'claude-3-5-sonnet' in model.lower(): # https://www.anthropic.com/pricing#anthropic-api, accessed 12/11/2024 return 0.000003 * prompt_tokens + 0.000015 * completion_tokens @@ -60,8 +58,7 @@ def calculate_cost(model: str, prompt_tokens: int, completion_tokens: int) -> fl def analyze_eval_json_file(filepath: str) -> tuple[int, int]: - """ - Analyze a single eval JSON file and extract the total and result from final_score. + """Analyze a single eval JSON file and extract the total and result from final_score. Args: filepath: Path to the JSON file @@ -84,8 +81,7 @@ def analyze_eval_json_file(filepath: str) -> tuple[int, int]: def analyze_traj_json_file(filepath: str) -> tuple[int, float]: - """ - Analyze a single trajectory JSON file and extract the steps and tokens + """Analyze a single trajectory JSON file and extract the steps and tokens for each step. Then estimate the cost based on the tokens and the model type. Note: this is assuming there's no prompt caching at all. """ @@ -115,8 +111,7 @@ def analyze_traj_json_file(filepath: str) -> tuple[int, float]: def analyze_folder( folder_path: str, ) -> tuple[dict[str, tuple[int, int]], dict[str, tuple[int, float]]]: - """ - Analyze all eval_*.json & traj_*.json files in the specified folder. + """Analyze all eval_*.json & traj_*.json files in the specified folder. Args: folder_path: Path to the folder containing JSON files @@ -148,9 +143,7 @@ def analyze_folder( def get_task_nature_category(task_name: str) -> str: - """ - Get the nature category of the task. - """ + """Get the nature category of the task.""" task_nature = task_name.split('-')[0] if task_nature.lower() in ['sde', 'pm', 'ds', 'admin', 'hr', 'finance']: return task_nature @@ -159,8 +152,7 @@ def get_task_nature_category(task_name: str) -> str: def calculate_score(total: int, result: int) -> float: - """ - Calculate the score as a number between 0 and 1. + """Calculate the score as a number between 0 and 1. Formula: score = (result / total) * 0.5 + (result // total) * 0.5 Explanation: @@ -178,8 +170,7 @@ def calculate_score(total: int, result: int) -> float: def is_perfect_completion(total: int, result: int) -> bool: - """ - Check if the task achieved perfect completion. + """Check if the task achieved perfect completion. Args: total: Total possible points diff --git a/evaluation/benchmarks/versicode/inference_utils/api_code_migration.py b/evaluation/benchmarks/versicode/inference_utils/api_code_migration.py index 9adba1f05f..6025a8af1d 100644 --- a/evaluation/benchmarks/versicode/inference_utils/api_code_migration.py +++ b/evaluation/benchmarks/versicode/inference_utils/api_code_migration.py @@ -1,6 +1,4 @@ -""" -GPT performs line level generation prediction and truncates overly long tokens -""" +"""GPT performs line level generation prediction and truncates overly long tokens""" import json import os @@ -56,8 +54,7 @@ def predict(content, model_name): def bulid_prompt(description, old_version, old_code, new_version) -> str: - """ - build prompt + """Build prompt :param version: :param description: :param masked_code: diff --git a/evaluation/benchmarks/versicode/inference_utils/api_test_block_completion.py b/evaluation/benchmarks/versicode/inference_utils/api_test_block_completion.py index b9bedab6e0..51abc91c31 100644 --- a/evaluation/benchmarks/versicode/inference_utils/api_test_block_completion.py +++ b/evaluation/benchmarks/versicode/inference_utils/api_test_block_completion.py @@ -1,6 +1,4 @@ -""" -GPT performs line level generation prediction and truncates overly long tokens -""" +"""GPT performs line level generation prediction and truncates overly long tokens""" import json import os @@ -56,8 +54,7 @@ def predict(content, model_name): def bulid_prompt(version, description) -> str: - """ - build prompt + """Build prompt :param version: :param description: :param masked_code: diff --git a/evaluation/benchmarks/versicode/inference_utils/test_block.py b/evaluation/benchmarks/versicode/inference_utils/test_block.py index 1c0e7f2b15..20fe5297d6 100644 --- a/evaluation/benchmarks/versicode/inference_utils/test_block.py +++ b/evaluation/benchmarks/versicode/inference_utils/test_block.py @@ -1,6 +1,4 @@ -""" -block completion -""" +"""block completion""" import copy import gc @@ -79,8 +77,7 @@ def run_inference(model_name, origin_data_list): def bulid_prompt(version, description) -> str: - """ - build prompt + """Build prompt :param version: :param description: :param masked_code: diff --git a/evaluation/benchmarks/versicode/inference_utils/test_migration.py b/evaluation/benchmarks/versicode/inference_utils/test_migration.py index cf4ec66e0e..fc7df2a849 100644 --- a/evaluation/benchmarks/versicode/inference_utils/test_migration.py +++ b/evaluation/benchmarks/versicode/inference_utils/test_migration.py @@ -1,6 +1,4 @@ -""" -code migration -""" +"""code migration""" import copy import gc @@ -81,8 +79,7 @@ def run_inference(model_name, origin_data_list): def bulid_prompt(description, old_version, old_code, new_version) -> str: - """ - build prompt + """Build prompt :param version: :param description: :param masked_code: diff --git a/evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py b/evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py index f6577f30c9..2e63b4970b 100644 --- a/evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py +++ b/evaluation/benchmarks/versicode/metric/compute_ism_pm_score.py @@ -1,5 +1,4 @@ -""" -评测block的预测能力 +"""评测block的预测能力 1、判断是否包含正确的函数名 2、判断是否合法 3、计算ISM,和PM @@ -22,8 +21,7 @@ def is_code_valid(code): def longest_common_prefix_between_lists_with_elements(list1, list2): - """ - 计算两个字符串列表中元素的最长前缀匹配长度 + """计算两个字符串列表中元素的最长前缀匹配长度 :param list1: :param list2: :return: @@ -46,8 +44,7 @@ def longest_common_prefix_between_lists_with_elements(list1, list2): def get_token(ans_code: str, output_code: str): - """ - 对代码进行词法分析,分解成标识符,返回两个标识符列表 + """对代码进行词法分析,分解成标识符,返回两个标识符列表 :param ans_code: :param output_code: :return: @@ -94,8 +91,7 @@ def get_token(ans_code: str, output_code: str): def get_token_per_line(code: str): - """ - 对每一行代码进行词法分析,记录每一行的标识符 + """对每一行代码进行词法分析,记录每一行的标识符 :param code: 代码字符串 :return: 每一行的标识符列表组成的列表 """ @@ -117,8 +113,7 @@ def get_token_per_line(code: str): def get_ISM(answer_code: str, model_output_list: list, answer_name: str) -> list: - """ - 计算ISM,返回一个有序的得分列表 + """计算ISM,返回一个有序的得分列表 :return: """ score_list = [] @@ -157,8 +152,7 @@ def get_ISM(answer_code: str, model_output_list: list, answer_name: str) -> list def get_ISM_without_verification( answer_code: str, model_output_list: list, answer_name: str ) -> list: - """ - 计算ISM,返回一个有序的得分列表 + """计算ISM,返回一个有序的得分列表 :return: """ score_list = [] @@ -190,8 +184,7 @@ def get_ISM_without_verification( def longest_common_prefix_with_lengths(list1, list2): - """ - 计算两个二维列表中每个子列表的最长前缀匹配长度,并记录拥有最长前缀匹配长度的两个子列表的长度 + """计算两个二维列表中每个子列表的最长前缀匹配长度,并记录拥有最长前缀匹配长度的两个子列表的长度 :param list1: 第一个二维列表 :param list2: 第二个二维列表 :return: 最长前缀匹配长度以及拥有最长前缀匹配长度的两个子列表的长度 @@ -216,8 +209,7 @@ def longest_common_prefix_with_lengths(list1, list2): def get_PM(answer_code: str, model_output_list: list, answer_name: str) -> list: - """ - 计算PM,返回一个有序的得分列表 + """计算PM,返回一个有序的得分列表 :return: """ score_list = [] @@ -254,8 +246,7 @@ def get_PM(answer_code: str, model_output_list: list, answer_name: str) -> list: def get_score(score_list: list, k): - """ - 计算score@n,k + """计算score@n,k :param score_list: :param k: :return: diff --git a/evaluation/benchmarks/versicode/metric/compute_migration_cdc_score.py b/evaluation/benchmarks/versicode/metric/compute_migration_cdc_score.py index ab51ffbe1a..4702e10150 100644 --- a/evaluation/benchmarks/versicode/metric/compute_migration_cdc_score.py +++ b/evaluation/benchmarks/versicode/metric/compute_migration_cdc_score.py @@ -1,6 +1,4 @@ -""" -Calculate the cdc score for migration -""" +"""Calculate the cdc score for migration""" import json import math @@ -11,8 +9,7 @@ import re def is_correct_parameter_count(function_name, correct_code, test_code): - """ - 判断参数数量是否一致 + """判断参数数量是否一致 :param function_name: :param correct_code: :param test_code: @@ -43,8 +40,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code): def check_keyword_parameters(function_name, correct_code, test_code): - """ - 判断关键词参数赋值是否正确使用 + """判断关键词参数赋值是否正确使用 :param function_name: :param correct_code: :param test_code: @@ -82,8 +78,7 @@ def check_keyword_parameters(function_name, correct_code, test_code): def with_correct(answer_code: str, model_output: str) -> bool: - """ - 当answer是with结构时,判断模型生成的是不是with结构 + """当answer是with结构时,判断模型生成的是不是with结构 :param answer_code: :param model_output: :return: @@ -105,9 +100,7 @@ def compute_block_score_k( core_line_in_core_block, core_line_in_output_clear, ): - """ - cdc需要满足五个条件,em只需要满足第一个条件 - """ + """cdc需要满足五个条件,em只需要满足第一个条件""" c = 0 n = len(model_output) for index, code in enumerate(model_output): diff --git a/evaluation/benchmarks/versicode/metric/compute_versicode_cdc_score.py b/evaluation/benchmarks/versicode/metric/compute_versicode_cdc_score.py index c9d6389a64..4831bb2fc9 100644 --- a/evaluation/benchmarks/versicode/metric/compute_versicode_cdc_score.py +++ b/evaluation/benchmarks/versicode/metric/compute_versicode_cdc_score.py @@ -1,6 +1,4 @@ -""" -Calculate the cdc score for line and block -""" +"""Calculate the cdc score for line and block""" import json import math @@ -19,8 +17,7 @@ def is_code_valid(code): def is_correct_parameter_count(function_name, correct_code, test_code): - """ - 判断参数数量是否一致 + """判断参数数量是否一致 :param function_name: :param correct_code: :param test_code: @@ -51,8 +48,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code): def check_keyword_parameters(function_name, correct_code, test_code): - """ - 判断关键词参数赋值是否正确使用 + """判断关键词参数赋值是否正确使用 :param function_name: :param correct_code: :param test_code: @@ -90,8 +86,7 @@ def check_keyword_parameters(function_name, correct_code, test_code): def with_correct(answer_code: str, model_output: str) -> bool: - """ - 当answer是with结构时,判断模型生成的是不是with结构 + """当answer是with结构时,判断模型生成的是不是with结构 :param answer_code: :param model_output: :return: diff --git a/evaluation/benchmarks/versicode/metric/compute_versicode_em_score.py b/evaluation/benchmarks/versicode/metric/compute_versicode_em_score.py index 255c7e7714..9a02722416 100644 --- a/evaluation/benchmarks/versicode/metric/compute_versicode_em_score.py +++ b/evaluation/benchmarks/versicode/metric/compute_versicode_em_score.py @@ -1,6 +1,4 @@ -""" -Calculate the cdc score for line and block -""" +"""Calculate the cdc score for line and block""" import json import math @@ -19,8 +17,7 @@ def is_code_valid(code): def is_correct_parameter_count(function_name, correct_code, test_code): - """ - 判断参数数量是否一致 + """判断参数数量是否一致 :param function_name: :param correct_code: :param test_code: @@ -51,8 +48,7 @@ def is_correct_parameter_count(function_name, correct_code, test_code): def check_keyword_parameters(function_name, correct_code, test_code): - """ - 判断关键词参数赋值是否正确使用 + """判断关键词参数赋值是否正确使用 :param function_name: :param correct_code: :param test_code: @@ -90,8 +86,7 @@ def check_keyword_parameters(function_name, correct_code, test_code): def with_correct(answer_code: str, model_output: str) -> bool: - """ - 当answer是with结构时,判断模型生成的是不是with结构 + """当answer是with结构时,判断模型生成的是不是with结构 :param answer_code: :param model_output: :return: diff --git a/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_block_versicode.py b/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_block_versicode.py index 75cb42b631..7d3c1092db 100644 --- a/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_block_versicode.py +++ b/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_block_versicode.py @@ -1,6 +1,4 @@ -""" -Find the line of code generated by the model using the block in the version code -""" +"""Find the line of code generated by the model using the block in the version code""" import json import os diff --git a/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_migration_versicode.py b/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_migration_versicode.py index cc2f65f1c7..eb01988a43 100644 --- a/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_migration_versicode.py +++ b/evaluation/benchmarks/versicode/output_processing/choose_core_line_from_migration_versicode.py @@ -1,6 +1,4 @@ -""" -Find the line of code generated by the model using the block in the version code -""" +"""Find the line of code generated by the model using the block in the version code""" import json import os diff --git a/evaluation/benchmarks/versicode/output_processing/clear_ans.py b/evaluation/benchmarks/versicode/output_processing/clear_ans.py index 01060c80e7..d071276f0f 100644 --- a/evaluation/benchmarks/versicode/output_processing/clear_ans.py +++ b/evaluation/benchmarks/versicode/output_processing/clear_ans.py @@ -1,6 +1,4 @@ -""" -Clear theandgenerated by the model in inference -""" +"""Clear theandgenerated by the model in inference""" import json diff --git a/evaluation/utils/shared.py b/evaluation/utils/shared.py index 12bc3540eb..e2102d16b1 100644 --- a/evaluation/utils/shared.py +++ b/evaluation/utils/shared.py @@ -622,8 +622,7 @@ def compatibility_for_eval_history_pairs( def is_fatal_evaluation_error(error: str | None) -> bool: - """ - The AgentController class overrides last error for certain exceptions + """The AgentController class overrides last error for certain exceptions We want to ensure those exeption do not overlap with fatal exceptions defined here This is because we do a comparisino against the stringified error """ diff --git a/openhands/agenthub/codeact_agent/tools/prompt.py b/openhands/agenthub/codeact_agent/tools/prompt.py index 6c1548a2c8..9a9ee8ab8a 100644 --- a/openhands/agenthub/codeact_agent/tools/prompt.py +++ b/openhands/agenthub/codeact_agent/tools/prompt.py @@ -3,8 +3,7 @@ import sys def refine_prompt(prompt: str): - """ - Refines the prompt based on the platform. + """Refines the prompt based on the platform. On Windows systems, replaces 'bash' with 'powershell' and 'execute_bash' with 'execute_powershell' to ensure commands work correctly on the Windows platform. diff --git a/openhands/agenthub/readonly_agent/readonly_agent.py b/openhands/agenthub/readonly_agent/readonly_agent.py index d30fe6163c..efccdbf43c 100644 --- a/openhands/agenthub/readonly_agent/readonly_agent.py +++ b/openhands/agenthub/readonly_agent/readonly_agent.py @@ -1,6 +1,4 @@ -""" -ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools. -""" +"""ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only tools.""" import os from typing import TYPE_CHECKING diff --git a/openhands/cli/suppress_warnings.py b/openhands/cli/suppress_warnings.py index b57bb0b45b..01042cc443 100644 --- a/openhands/cli/suppress_warnings.py +++ b/openhands/cli/suppress_warnings.py @@ -5,7 +5,6 @@ import warnings def suppress_cli_warnings(): """Suppress common warnings that appear during CLI usage.""" - # Suppress pydub warning about ffmpeg/avconv warnings.filterwarnings( 'ignore', diff --git a/openhands/cli/tui.py b/openhands/cli/tui.py index 0e75cd88a5..c164193dbd 100644 --- a/openhands/cli/tui.py +++ b/openhands/cli/tui.py @@ -239,8 +239,7 @@ def display_mcp_errors() -> None: # Prompt output display functions def display_thought_if_new(thought: str, is_agent_message: bool = False) -> None: - """ - Display a thought only if it hasn't been displayed recently. + """Display a thought only if it hasn't been displayed recently. Args: thought: The thought to display @@ -301,8 +300,7 @@ def display_event(event: Event, config: OpenHandsConfig) -> None: def display_message(message: str, is_agent_message: bool = False) -> None: - """ - Display a message in the terminal with markdown rendering. + """Display a message in the terminal with markdown rendering. Args: message: The message to display @@ -338,8 +336,7 @@ def display_message(message: str, is_agent_message: bool = False) -> None: def convert_markdown_to_html(text: str) -> str: - """ - Convert markdown to HTML for prompt_toolkit's HTML renderer using the markdown library. + """Convert markdown to HTML for prompt_toolkit's HTML renderer using the markdown library. Args: text: Markdown text to convert diff --git a/openhands/cli/vscode_extension.py b/openhands/cli/vscode_extension.py index 72de8d51dc..b458d3db14 100644 --- a/openhands/cli/vscode_extension.py +++ b/openhands/cli/vscode_extension.py @@ -56,8 +56,7 @@ def download_latest_vsix_from_github() -> str | None: def attempt_vscode_extension_install(): - """ - Checks if running in a supported editor and attempts to install the OpenHands companion extension. + """Checks if running in a supported editor and attempts to install the OpenHands companion extension. This is a best-effort, one-time attempt. """ # 1. Check if we are in a supported editor environment @@ -132,8 +131,7 @@ def attempt_vscode_extension_install(): def _mark_installation_successful(flag_file: pathlib.Path, editor_name: str) -> None: - """ - Mark the extension installation as successful by creating the flag file. + """Mark the extension installation as successful by creating the flag file. Args: flag_file: Path to the flag file to create @@ -147,8 +145,7 @@ def _mark_installation_successful(flag_file: pathlib.Path, editor_name: str) -> def _is_extension_installed(editor_command: str, extension_id: str) -> bool: - """ - Check if the OpenHands extension is already installed. + """Check if the OpenHands extension is already installed. Args: editor_command: The command to run the editor (e.g., 'code', 'windsurf') @@ -174,8 +171,7 @@ def _is_extension_installed(editor_command: str, extension_id: str) -> bool: def _attempt_github_install(editor_command: str, editor_name: str) -> bool: - """ - Attempt to install the extension from GitHub Releases. + """Attempt to install the extension from GitHub Releases. Downloads the latest VSIX file from GitHub releases and attempts to install it. Ensures proper cleanup of temporary files. @@ -227,8 +223,7 @@ def _attempt_github_install(editor_command: str, editor_name: str) -> bool: def _attempt_bundled_install(editor_command: str, editor_name: str) -> bool: - """ - Attempt to install the extension from the bundled VSIX file. + """Attempt to install the extension from the bundled VSIX file. Uses the VSIX file packaged with the OpenHands installation. @@ -280,8 +275,7 @@ def _attempt_bundled_install(editor_command: str, editor_name: str) -> bool: def _attempt_marketplace_install( editor_command: str, editor_name: str, extension_id: str ) -> bool: - """ - Attempt to install the extension from the marketplace. + """Attempt to install the extension from the marketplace. This method is currently unused as the OpenHands extension is not yet published to the VS Code/Windsurf marketplace. It's kept here for future use when the diff --git a/openhands/controller/agent.py b/openhands/controller/agent.py index 7613b1ce23..6c49b58b32 100644 --- a/openhands/controller/agent.py +++ b/openhands/controller/agent.py @@ -55,8 +55,7 @@ class Agent(ABC): return self._prompt_manager def get_system_message(self) -> 'SystemMessageAction | None': - """ - Returns a SystemMessageAction containing the system message and tools. + """Returns a SystemMessageAction containing the system message and tools. This will be added to the event stream as the first message. Returns: diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index 357f3f852e..2952cfcb1c 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -142,7 +142,6 @@ class AgentController: status_callback: Optional callback function to handle status updates. replay_events: A list of logs to replay. """ - self.id = sid or event_stream.sid self.user_id = user_id self.file_store = file_store diff --git a/openhands/controller/replay.py b/openhands/controller/replay.py index 0eb8009435..4cfec767f4 100644 --- a/openhands/controller/replay.py +++ b/openhands/controller/replay.py @@ -57,8 +57,7 @@ class ReplayManager: ) def should_replay(self) -> bool: - """ - Whether the controller is in trajectory replay mode, and the replay + """Whether the controller is in trajectory replay mode, and the replay hasn't finished. Note: after the replay is finished, the user and the agent could continue to message/act. diff --git a/openhands/controller/state/state.py b/openhands/controller/state/state.py index 3bb17d670e..3af407d896 100644 --- a/openhands/controller/state/state.py +++ b/openhands/controller/state/state.py @@ -46,8 +46,7 @@ class TrafficControlState(str, Enum): @dataclass class State: - """ - Represents the running state of an agent in the OpenHands system, saving data of its operation and memory. + """Represents the running state of an agent in the OpenHands system, saving data of its operation and memory. - Multi-agent/delegate state: - store the task (conversation between the agent and the user) @@ -143,10 +142,7 @@ class State: def restore_from_session( sid: str, file_store: FileStore, user_id: str | None = None ) -> 'State': - """ - Restores the state from the previously saved session. - """ - + """Restores the state from the previously saved session.""" state: State try: encoded = file_store.read( diff --git a/openhands/controller/state/state_tracker.py b/openhands/controller/state/state_tracker.py index 9b27eb90e5..aab0a2b07f 100644 --- a/openhands/controller/state/state_tracker.py +++ b/openhands/controller/state/state_tracker.py @@ -242,41 +242,33 @@ class StateTracker: self.state.budget_flag.increase_limit(headless_mode) def get_metrics_snapshot(self): - """ - Deep copy of metrics + """Deep copy of metrics This serves as a snapshot for the parent's metrics at the time a delegate is created It will be stored and used to compute local metrics for the delegate (since delegates now accumulate metrics from where its parent left off) """ - return self.state.metrics.copy() def save_state(self): - """ - Save's current state to persistent store - """ + """Save's current state to persistent store""" if self.sid and self.file_store: self.state.save_to_session(self.sid, self.file_store, self.user_id) def run_control_flags(self): - """ - Performs one step of the control flags - """ + """Performs one step of the control flags""" self.state.iteration_flag.step() if self.state.budget_flag: self.state.budget_flag.step() def sync_budget_flag_with_metrics(self): - """ - Ensures that budget flag is up to date with accumulated costs from llm completions + """Ensures that budget flag is up to date with accumulated costs from llm completions Budget flag will monitor for when budget is exceeded """ if self.state.budget_flag: self.state.budget_flag.current_value = self.state.metrics.accumulated_cost def merge_metrics(self, metrics: Metrics): - """ - Merges metrics with the state metrics + """Merges metrics with the state metrics NOTE: this should be refactored in the future. We should have services (draft llm, title autocomplete, condenser, etc) use their own LLMs, but the metrics object should be shared. This way we have one source of truth for accumulated costs from diff --git a/openhands/core/config/kubernetes_config.py b/openhands/core/config/kubernetes_config.py index 69528744bc..706a51a29f 100644 --- a/openhands/core/config/kubernetes_config.py +++ b/openhands/core/config/kubernetes_config.py @@ -66,8 +66,7 @@ class KubernetesConfig(BaseModel): @classmethod def from_toml_section(cls, data: dict) -> dict[str, 'KubernetesConfig']: - """ - Create a mapping of KubernetesConfig instances from a toml dictionary representing the [kubernetes] section. + """Create a mapping of KubernetesConfig instances from a toml dictionary representing the [kubernetes] section. The configuration is built from all keys in data. diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index eaa381b835..c8b20c96a0 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -97,8 +97,7 @@ class LLMConfig(BaseModel): @classmethod def from_toml_section(cls, data: dict) -> dict[str, LLMConfig]: - """ - Create a mapping of LLMConfig instances from a toml dictionary representing the [llm] section. + """Create a mapping of LLMConfig instances from a toml dictionary representing the [llm] section. The default configuration is built from all non-dict keys in data. Then, each key with a dict value (e.g. [llm.random_name]) is treated as a custom LLM configuration, @@ -117,7 +116,6 @@ class LLMConfig(BaseModel): dict[str, LLMConfig]: A mapping where the key "llm" corresponds to the default configuration and additional keys represent custom configurations. """ - # Initialize the result mapping llm_mapping: dict[str, LLMConfig] = {} diff --git a/openhands/core/config/mcp_config.py b/openhands/core/config/mcp_config.py index 4fa2d9673f..d2f7328909 100644 --- a/openhands/core/config/mcp_config.py +++ b/openhands/core/config/mcp_config.py @@ -345,7 +345,6 @@ class OpenHandsMCPConfig: Returns: tuple[MCPSHTTPServerConfig | None, list[MCPStdioServerConfig]]: A tuple containing the default SHTTP server configuration (or None) and a list of MCP stdio server configurations """ - stdio_servers = [] search_engine_stdio_server = OpenHandsMCPConfig.add_search_engine(config) if search_engine_stdio_server: diff --git a/openhands/core/config/sandbox_config.py b/openhands/core/config/sandbox_config.py index 41b5016864..002d90f92f 100644 --- a/openhands/core/config/sandbox_config.py +++ b/openhands/core/config/sandbox_config.py @@ -93,8 +93,7 @@ class SandboxConfig(BaseModel): @classmethod def from_toml_section(cls, data: dict) -> dict[str, 'SandboxConfig']: - """ - Create a mapping of SandboxConfig instances from a toml dictionary representing the [sandbox] section. + """Create a mapping of SandboxConfig instances from a toml dictionary representing the [sandbox] section. The configuration is built from all keys in data. diff --git a/openhands/core/config/security_config.py b/openhands/core/config/security_config.py index 774e8ad4d9..63067cf912 100644 --- a/openhands/core/config/security_config.py +++ b/openhands/core/config/security_config.py @@ -16,15 +16,13 @@ class SecurityConfig(BaseModel): @classmethod def from_toml_section(cls, data: dict) -> dict[str, 'SecurityConfig']: - """ - Create a mapping of SecurityConfig instances from a toml dictionary representing the [security] section. + """Create a mapping of SecurityConfig instances from a toml dictionary representing the [security] section. The configuration is built from all keys in data. Returns: dict[str, SecurityConfig]: A mapping where the key "security" corresponds to the [security] configuration """ - # Initialize the result mapping security_mapping: dict[str, SecurityConfig] = {} diff --git a/openhands/core/logger.py b/openhands/core/logger.py index 126ba33c02..a939ff707e 100644 --- a/openhands/core/logger.py +++ b/openhands/core/logger.py @@ -322,10 +322,7 @@ def json_log_handler( level: int = logging.INFO, _out: TextIO = sys.stdout, ) -> logging.Handler: - """ - Configure logger instance for structured logging as json lines. - """ - + """Configure logger instance for structured logging as json lines.""" handler = logging.StreamHandler(_out) handler.setLevel(level) handler.setFormatter(json_formatter()) @@ -496,8 +493,7 @@ class OpenHandsLoggerAdapter(logging.LoggerAdapter): def process( self, msg: str, kwargs: MutableMapping[str, Any] ) -> tuple[str, MutableMapping[str, Any]]: - """ - If 'extra' is supplied in kwargs, merge it with the adapters 'extra' dict + """If 'extra' is supplied in kwargs, merge it with the adapters 'extra' dict Starting in Python 3.13, LoggerAdapter's merge_extra option will do this. """ if 'extra' in kwargs and isinstance(kwargs['extra'], dict): diff --git a/openhands/core/loop.py b/openhands/core/loop.py index 7bf44bd68e..3e3a0655a5 100644 --- a/openhands/core/loop.py +++ b/openhands/core/loop.py @@ -14,8 +14,7 @@ async def run_agent_until_done( memory: Memory, end_states: list[AgentState], ) -> None: - """ - run_agent_until_done takes a controller and a runtime, and will run + """run_agent_until_done takes a controller and a runtime, and will run the agent until it reaches a terminal state. Note that runtime must be connected before being passed in here. """ diff --git a/openhands/core/main.py b/openhands/core/main.py index 1dd58fb117..3adb8b6ce1 100644 --- a/openhands/core/main.py +++ b/openhands/core/main.py @@ -257,8 +257,7 @@ def auto_continue_response( def load_replay_log(trajectory_path: str) -> tuple[list[Event] | None, Action]: - """ - Load trajectory from given path, serialize it to a list of events, and return + """Load trajectory from given path, serialize it to a list of events, and return two things: 1) A list of events except the first action 2) First action (user message, a.k.a. initial task) diff --git a/openhands/core/message_utils.py b/openhands/core/message_utils.py index 96289b90a7..1235a58ae3 100644 --- a/openhands/core/message_utils.py +++ b/openhands/core/message_utils.py @@ -3,8 +3,7 @@ from openhands.llm.metrics import Metrics, TokenUsage def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | None: - """ - Returns at most one token usage record for either: + """Returns at most one token usage record for either: - `tool_call_metadata.model_response.id`, if possible - otherwise event.response_id, if set @@ -34,8 +33,7 @@ def get_token_usage_for_event(event: Event, metrics: Metrics) -> TokenUsage | No def get_token_usage_for_event_id( events: list[Event], event_id: int, metrics: Metrics ) -> TokenUsage | None: - """ - Starting from the event with .id == event_id and moving backwards in `events`, + """Starting from the event with .id == event_id and moving backwards in `events`, find the first TokenUsage record (if any) associated either with: - tool_call_metadata.model_response.id, or - event.response_id diff --git a/openhands/core/setup.py b/openhands/core/setup.py index 80efb9cef9..34c066fb95 100644 --- a/openhands/core/setup.py +++ b/openhands/core/setup.py @@ -94,8 +94,7 @@ def create_runtime( def get_provider_tokens(): - """ - Retrieve provider tokens from environment variables and return them as a dictionary. + """Retrieve provider tokens from environment variables and return them as a dictionary. Returns: A dictionary mapping ProviderType to ProviderToken if tokens are found, otherwise None. @@ -126,8 +125,7 @@ def initialize_repository_for_runtime( immutable_provider_tokens: PROVIDER_TOKEN_TYPE | None = None, selected_repository: str | None = None, ) -> str | None: - """ - Initialize the repository for the runtime by cloning or initializing it, + """Initialize the repository for the runtime by cloning or initializing it, running setup scripts, and setting up git hooks if present. Args: diff --git a/openhands/critic/base.py b/openhands/critic/base.py index bfde3827d8..3a7def922f 100644 --- a/openhands/critic/base.py +++ b/openhands/critic/base.py @@ -6,25 +6,19 @@ from openhands.events import Event class CriticResult(BaseModel): - """ - A critic result is a score and a message. - """ + """A critic result is a score and a message.""" score: float message: str @property def success(self) -> bool: - """ - Whether the agent is successful. - """ + """Whether the agent is successful.""" return self.score >= 0.5 class BaseCritic(abc.ABC): - """ - A critic is a function that takes in a list of events, optional git patch, and returns a score about the quality of those events. - """ + """A critic is a function that takes in a list of events, optional git patch, and returns a score about the quality of those events.""" @abc.abstractmethod def evaluate( diff --git a/openhands/events/action/message.py b/openhands/events/action/message.py index 767494e324..eb082cc1cc 100644 --- a/openhands/events/action/message.py +++ b/openhands/events/action/message.py @@ -42,8 +42,7 @@ class MessageAction(Action): @dataclass class SystemMessageAction(Action): - """ - Action that represents a system message for an agent, including the system prompt + """Action that represents a system message for an agent, including the system prompt and available tools. This should be the first message in the event stream. """ diff --git a/openhands/events/event_store.py b/openhands/events/event_store.py index fccde45d40..07ea385efa 100644 --- a/openhands/events/event_store.py +++ b/openhands/events/event_store.py @@ -42,9 +42,7 @@ _DUMMY_PAGE = _CachePage(None, 1, -1) @dataclass class EventStore(EventStoreABC): - """ - A stored list of events backing a conversation - """ + """A stored list of events backing a conversation""" sid: str file_store: FileStore @@ -92,8 +90,7 @@ class EventStore(EventStoreABC): filter: EventFilter | None = None, limit: int | None = None, ) -> Iterable[Event]: - """ - Retrieve events from the event stream, optionally filtering out events of a given type + """Retrieve events from the event stream, optionally filtering out events of a given type and events marked as hidden. Args: @@ -105,7 +102,6 @@ class EventStore(EventStoreABC): Yields: Events from the stream that match the criteria. """ - if end_id is None: end_id = self.cur_id else: diff --git a/openhands/events/event_store_abc.py b/openhands/events/event_store_abc.py index 355df3807b..840410ce34 100644 --- a/openhands/events/event_store_abc.py +++ b/openhands/events/event_store_abc.py @@ -9,9 +9,7 @@ from openhands.events.event_filter import EventFilter class EventStoreABC: - """ - A stored list of events backing a conversation - """ + """A stored list of events backing a conversation""" sid: str user_id: str | None @@ -25,8 +23,7 @@ class EventStoreABC: filter: EventFilter | None = None, limit: int | None = None, ) -> Iterable[Event]: - """ - Retrieve events from the event stream, optionally excluding events using a filter + """Retrieve events from the event stream, optionally excluding events using a filter Args: start_id: The ID of the first event to retrieve. Defaults to 0. diff --git a/openhands/events/nested_event_store.py b/openhands/events/nested_event_store.py index a42130d667..775475394e 100644 --- a/openhands/events/nested_event_store.py +++ b/openhands/events/nested_event_store.py @@ -13,9 +13,7 @@ from openhands.events.serialization.event import event_from_dict @dataclass class NestedEventStore(EventStoreABC): - """ - A stored list of events backing a conversation - """ + """A stored list of events backing a conversation""" base_url: str sid: str diff --git a/openhands/events/observation/agent.py b/openhands/events/observation/agent.py index fc668e9df2..6c015d98b2 100644 --- a/openhands/events/observation/agent.py +++ b/openhands/events/observation/agent.py @@ -46,8 +46,7 @@ class AgentThinkObservation(Observation): @dataclass class MicroagentKnowledge: - """ - Represents knowledge from a triggered microagent. + """Represents knowledge from a triggered microagent. Attributes: name: The name of the microagent that was triggered diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index 849c415c19..bcc1bcd4a0 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -146,7 +146,6 @@ class CmdOutputObservation(Observation): Returns: Original content if not too large, or truncated content otherwise """ - if len(content) <= max_size: return content diff --git a/openhands/events/utils.py b/openhands/events/utils.py index cfc2dd804c..6d83563dc6 100644 --- a/openhands/events/utils.py +++ b/openhands/events/utils.py @@ -12,7 +12,8 @@ from openhands.events.observation import ( def get_pairs_from_events(events: list[Event]) -> list[tuple[Action, Observation]]: """Return the history as a list of tuples (action, observation). - This function is a compatibility function for evals reading and visualization working with old histories.""" + This function is a compatibility function for evals reading and visualization working with old histories. + """ tuples: list[tuple[Action, Observation]] = [] action_map: dict[int, Action] = {} observation_map: dict[int, Observation] = {} diff --git a/openhands/integrations/bitbucket/bitbucket_service.py b/openhands/integrations/bitbucket/bitbucket_service.py index 38db7bb606..80e2f77096 100644 --- a/openhands/integrations/bitbucket/bitbucket_service.py +++ b/openhands/integrations/bitbucket/bitbucket_service.py @@ -191,8 +191,7 @@ class BitBucketService(BaseGitService, GitService, InstallationsService): def _parse_repository( self, repo: dict, link_header: str | None = None ) -> Repository: - """ - Parse a Bitbucket API repository response into a Repository object. + """Parse a Bitbucket API repository response into a Repository object. Args: repo: Repository data from Bitbucket API @@ -201,7 +200,6 @@ class BitBucketService(BaseGitService, GitService, InstallationsService): Returns: Repository object """ - repo_id = repo.get('uuid', '') workspace_slug = repo.get('workspace', {}).get('slug', '') @@ -292,8 +290,7 @@ class BitBucketService(BaseGitService, GitService, InstallationsService): async def _fetch_paginated_data( self, url: str, params: dict, max_items: int ) -> list[dict]: - """ - Fetch data with pagination support for Bitbucket API. + """Fetch data with pagination support for Bitbucket API. Args: url: The API endpoint URL diff --git a/openhands/integrations/github/github_service.py b/openhands/integrations/github/github_service.py index 9c4a322d32..de9bde9826 100644 --- a/openhands/integrations/github/github_service.py +++ b/openhands/integrations/github/github_service.py @@ -186,8 +186,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService): async def _fetch_paginated_repos( self, url: str, params: dict, max_repos: int, extract_key: str | None = None ) -> list[dict]: - """ - Fetch repositories with pagination support. + """Fetch repositories with pagination support. Args: url: The API endpoint URL @@ -228,8 +227,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService): def _parse_repository( self, repo: dict, link_header: str | None = None ) -> Repository: - """ - Parse a GitHub API repository response into a Repository object. + """Parse a GitHub API repository response into a Repository object. Args: repo: Repository data from GitHub API @@ -550,8 +548,7 @@ class GitHubService(BaseGitService, GitService, InstallationsService): draft: bool = True, labels: list[str] | None = None, ) -> str: - """ - Creates a PR using user credentials + """Creates a PR using user credentials Args: repo_name: The full name of the repository (owner/repo) @@ -566,7 +563,6 @@ class GitHubService(BaseGitService, GitService, InstallationsService): - PR URL when successful - Error message when unsuccessful """ - url = f'{self.BASE_URL}/repos/{repo_name}/pulls' # Set default body if none provided diff --git a/openhands/integrations/gitlab/gitlab_service.py b/openhands/integrations/gitlab/gitlab_service.py index b23eff2a11..c075359286 100644 --- a/openhands/integrations/gitlab/gitlab_service.py +++ b/openhands/integrations/gitlab/gitlab_service.py @@ -71,9 +71,7 @@ class GitLabService(BaseGitService, GitService): return ProviderType.GITLAB.value async def _get_gitlab_headers(self) -> dict[str, Any]: - """ - Retrieve the GitLab Token to construct the headers - """ + """Retrieve the GitLab Token to construct the headers""" if not self.token: latest_token = await self.get_latest_token() if latest_token: @@ -173,8 +171,7 @@ class GitLabService(BaseGitService, GitService): async def execute_graphql_query( self, query: str, variables: dict[str, Any] | None = None ) -> Any: - """ - Execute a GraphQL query against the GitLab GraphQL API + """Execute a GraphQL query against the GitLab GraphQL API Args: query: The GraphQL query string @@ -244,8 +241,7 @@ class GitLabService(BaseGitService, GitService): def _parse_repository( self, repo: dict, link_header: str | None = None ) -> Repository: - """ - Parse a GitLab API project response into a Repository object. + """Parse a GitLab API project response into a Repository object. Args: repo: Project data from GitLab API @@ -269,8 +265,7 @@ class GitLabService(BaseGitService, GitService): ) def _parse_gitlab_url(self, url: str) -> str | None: - """ - Parse a GitLab URL to extract the repository path. + """Parse a GitLab URL to extract the repository path. Expected format: https://{domain}/{group}/{possibly_subgroup}/{repo} Returns the full path from group onwards (e.g., 'group/subgroup/repo' or 'group/repo') @@ -588,8 +583,7 @@ class GitLabService(BaseGitService, GitService): description: str | None = None, labels: list[str] | None = None, ) -> str: - """ - Creates a merge request in GitLab + """Creates a merge request in GitLab Args: id: The ID or URL-encoded path of the project @@ -603,7 +597,6 @@ class GitLabService(BaseGitService, GitService): - MR URL when successful - Error message when unsuccessful """ - # Convert string ID to URL-encoded path if needed project_id = str(id).replace('/', '%2F') if isinstance(id, str) else id url = f'{self.BASE_URL}/projects/{project_id}/merge_requests' diff --git a/openhands/integrations/provider.py b/openhands/integrations/provider.py index 51e74d300d..7e7a54f0ae 100644 --- a/openhands/integrations/provider.py +++ b/openhands/integrations/provider.py @@ -191,10 +191,7 @@ class ProviderHandler: per_page: int | None, installation_id: str | None, ) -> list[Repository]: - """ - Get repositories from providers - """ - + """Get repositories from providers""" """ Get repositories from providers """ @@ -226,9 +223,7 @@ class ProviderHandler: return all_repos async def get_suggested_tasks(self) -> list[SuggestedTask]: - """ - Get suggested tasks from providers - """ + """Get suggested tasks from providers""" tasks: list[SuggestedTask] = [] for provider in self.provider_tokens: try: @@ -303,8 +298,7 @@ class ProviderHandler: event_stream: EventStream, env_vars: dict[ProviderType, SecretStr] | None = None, ) -> None: - """ - This ensures that the latest provider tokens are masked from the event stream + """This ensures that the latest provider tokens are masked from the event stream It is called when the provider tokens are first initialized in the runtime or when tokens are re-exported with the latest working ones Args: @@ -320,8 +314,7 @@ class ProviderHandler: def expose_env_vars( self, env_secrets: dict[ProviderType, SecretStr] ) -> dict[str, str]: - """ - Return string values instead of typed values for environment secrets + """Return string values instead of typed values for environment secrets Called just before exporting secrets to runtime, or setting secrets in the event stream """ exposed_envs = {} @@ -353,8 +346,7 @@ class ProviderHandler: providers: list[ProviderType] | None = None, get_latest: bool = False, ) -> dict[ProviderType, SecretStr] | dict[str, str]: - """ - Retrieves the provider tokens from ProviderHandler object + """Retrieves the provider tokens from ProviderHandler object This is used when initializing/exporting new provider tokens in the runtime Args: @@ -362,7 +354,6 @@ class ProviderHandler: providers: Return provider tokens for the list passed in, otherwise return all available providers get_latest: Get the latest working token for the providers if True, otherwise get the existing ones """ - if not self.provider_tokens: return {} @@ -393,11 +384,9 @@ class ProviderHandler: def check_cmd_action_for_provider_token_ref( cls, event: Action ) -> list[ProviderType]: - """ - Detect if agent run action is using a provider token (e.g $GITHUB_TOKEN) + """Detect if agent run action is using a provider token (e.g $GITHUB_TOKEN) Returns a list of providers which are called by the agent """ - if not isinstance(event, CmdRunAction): return [] @@ -410,9 +399,7 @@ class ProviderHandler: @classmethod def get_provider_env_key(cls, provider: ProviderType) -> str: - """ - Map ProviderType value to the environment variable name in the runtime - """ + """Map ProviderType value to the environment variable name in the runtime""" return f'{provider.value}_token'.lower() async def verify_repo_provider( @@ -443,8 +430,7 @@ class ProviderHandler: async def get_branches( self, repository: str, specified_provider: ProviderType | None = None ) -> list[Branch]: - """ - Get branches for a repository + """Get branches for a repository Args: repository: The repository name diff --git a/openhands/integrations/utils.py b/openhands/integrations/utils.py index 9b8c1d430f..eb4114b049 100644 --- a/openhands/integrations/utils.py +++ b/openhands/integrations/utils.py @@ -10,8 +10,7 @@ from openhands.integrations.provider import ProviderType async def validate_provider_token( token: SecretStr, base_domain: str | None = None ) -> ProviderType | None: - """ - Determine whether a token is for GitHub, GitLab, or Bitbucket by attempting to get user info + """Determine whether a token is for GitHub, GitLab, or Bitbucket by attempting to get user info from the services. Args: diff --git a/openhands/io/io.py b/openhands/io/io.py index 2e42df912b..e2991ec479 100644 --- a/openhands/io/io.py +++ b/openhands/io/io.py @@ -24,10 +24,7 @@ def read_task_from_file(file_path: str) -> str: def read_task(args: argparse.Namespace, cli_multiline_input: bool) -> str: - """ - Read the task from the CLI args, file, or stdin. - """ - + """Read the task from the CLI args, file, or stdin.""" # Determine the task task_str = '' if args.file: diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index 08d9308be6..a3a775ca69 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -703,6 +703,7 @@ class LLM(RetryMixin, DebugMixin): Args: messages (list): A list of messages, either as a list of dicts or as a list of Message objects. + Returns: int: The number of tokens. """ diff --git a/openhands/llm/metrics.py b/openhands/llm/metrics.py index 2dbe90f824..bacad0d0e5 100644 --- a/openhands/llm/metrics.py +++ b/openhands/llm/metrics.py @@ -150,7 +150,6 @@ class Metrics: response_id: str, ) -> None: """Add a single usage record.""" - # Token each turn for calculating context usage. per_turn_token = prompt_tokens + completion_tokens diff --git a/openhands/llm/retry_mixin.py b/openhands/llm/retry_mixin.py index 5841a5c9ed..3169d1313d 100644 --- a/openhands/llm/retry_mixin.py +++ b/openhands/llm/retry_mixin.py @@ -16,8 +16,7 @@ class RetryMixin: """Mixin class for retry logic.""" def retry_decorator(self, **kwargs: Any) -> Callable: - """ - Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes. + """Create a LLM retry decorator with customizable parameters. This is used for 429 errors, and a few other exceptions in LLM classes. Args: **kwargs: Keyword arguments to override default retry behavior. diff --git a/openhands/mcp/client.py b/openhands/mcp/client.py index 9bf8fb7005..197ed3c390 100644 --- a/openhands/mcp/client.py +++ b/openhands/mcp/client.py @@ -21,9 +21,7 @@ from openhands.mcp.tool import MCPClientTool class MCPClient(BaseModel): - """ - A collection of tools that connects to an MCP server and manages available tools through the Model Context Protocol. - """ + """A collection of tools that connects to an MCP server and manages available tools through the Model Context Protocol.""" model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/openhands/mcp/tool.py b/openhands/mcp/tool.py index 7bbf332a58..895dabb8d8 100644 --- a/openhands/mcp/tool.py +++ b/openhands/mcp/tool.py @@ -3,8 +3,7 @@ from pydantic import ConfigDict class MCPClientTool(Tool): - """ - Represents a tool proxy that can be called on the MCP server from the client side. + """Represents a tool proxy that can be called on the MCP server from the client side. This version doesn't store a session reference, as sessions are created on-demand by the MCPClient for each operation. diff --git a/openhands/mcp/utils.py b/openhands/mcp/utils.py index fd633e5301..83e88cfa2c 100644 --- a/openhands/mcp/utils.py +++ b/openhands/mcp/utils.py @@ -26,8 +26,7 @@ from openhands.runtime.impl.cli.cli_runtime import CLIRuntime def convert_mcp_clients_to_tools(mcp_clients: list[MCPClient] | None) -> list[dict]: - """ - Converts a list of MCPClient instances to ChatCompletionToolParam format + """Converts a list of MCPClient instances to ChatCompletionToolParam format that can be used by CodeActAgent. Args: @@ -152,8 +151,7 @@ async def create_mcp_clients( async def fetch_mcp_tools_from_config( mcp_config: MCPConfig, conversation_id: str | None = None, use_stdio: bool = False ) -> list[dict]: - """ - Retrieves the list of MCP tools from the MCP clients. + """Retrieves the list of MCP tools from the MCP clients. Args: mcp_config: The MCP configuration @@ -206,8 +204,7 @@ async def fetch_mcp_tools_from_config( async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Observation: - """ - Call a tool on an MCP server and return the observation. + """Call a tool on an MCP server and return the observation. Args: mcp_clients: The list of MCP clients to execute the action on @@ -270,9 +267,7 @@ async def call_tool_mcp(mcp_clients: list[MCPClient], action: MCPAction) -> Obse async def add_mcp_tools_to_agent( agent: 'Agent', runtime: Runtime, memory: 'Memory' ) -> MCPConfig: - """ - Add MCP tools to an agent. - """ + """Add MCP tools to an agent.""" import sys # Skip MCP tools on Windows diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py index 3a767a433e..fde2b8caa3 100644 --- a/openhands/memory/conversation_memory.py +++ b/openhands/memory/conversation_memory.py @@ -87,7 +87,6 @@ class ConversationMemory: vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included. initial_user_action: The initial user message action, if available. Used to ensure the conversation starts correctly. """ - events = condensed_history # Ensure the event list starts with SystemMessageAction, then MessageAction(source='user') diff --git a/openhands/memory/memory.py b/openhands/memory/memory.py index de1818d11a..fc1fa22339 100644 --- a/openhands/memory/memory.py +++ b/openhands/memory/memory.py @@ -39,8 +39,7 @@ USER_MICROAGENTS_DIR = Path.home() / '.openhands' / 'microagents' class Memory: - """ - Memory is a component that listens to the EventStream for information retrieval actions + """Memory is a component that listens to the EventStream for information retrieval actions (a RecallAction) and publishes observations with the content (such as RecallObservation). """ @@ -145,7 +144,6 @@ class Memory: This method collects information from all available repo microagents and concatenates their contents. Multiple repo microagents are supported, and their contents will be concatenated with newlines between them. """ - # Create WORKSPACE_CONTEXT info: # - repository_info # - runtime_info @@ -211,7 +209,6 @@ class Memory: event: RecallAction, ) -> RecallObservation | None: """When a microagent action triggers microagents, create a RecallObservation with structured data.""" - # Find any matched microagents based on the query microagent_knowledge = self._find_microagent_knowledge(event.query) @@ -257,8 +254,7 @@ class Memory: def load_user_workspace_microagents( self, user_microagents: list[BaseMicroagent] ) -> None: - """ - This method loads microagents from a user's cloned repo or workspace directory. + """This method loads microagents from a user's cloned repo or workspace directory. This is typically called from agent_session or setup once the workspace is cloned. """ @@ -272,9 +268,7 @@ class Memory: self.repo_microagents[user_microagent.name] = user_microagent def _load_global_microagents(self) -> None: - """ - Loads microagents from the global microagents_dir - """ + """Loads microagents from the global microagents_dir""" repo_agents, knowledge_agents = load_microagents_from_dir( GLOBAL_MICROAGENTS_DIR ) @@ -284,8 +278,7 @@ class Memory: self.repo_microagents[name] = agent_repo def _load_user_microagents(self) -> None: - """ - Loads microagents from the user's home directory (~/.openhands/microagents/) + """Loads microagents from the user's home directory (~/.openhands/microagents/) Creates the directory if it doesn't exist. """ try: @@ -307,8 +300,7 @@ class Memory: ) def get_microagent_mcp_tools(self) -> list[MCPConfig]: - """ - Get MCP tools from all repo microagents (always active) + """Get MCP tools from all repo microagents (always active) Returns: A list of MCP tools configurations from microagents @@ -365,8 +357,7 @@ class Memory: def set_conversation_instructions( self, conversation_instructions: str | None ) -> None: - """ - Set contextual information for conversation + """Set contextual information for conversation This is information the agent may require """ self.conversation_instructions = ConversationInstructions( diff --git a/openhands/resolver/interfaces/github.py b/openhands/resolver/interfaces/github.py index c213648029..74add2f99f 100644 --- a/openhands/resolver/interfaces/github.py +++ b/openhands/resolver/interfaces/github.py @@ -92,7 +92,6 @@ class GithubIssueHandler(IssueHandlerInterface): Returns: List of Github issues. """ - if not issue_numbers: raise ValueError('Unspecified issue number') diff --git a/openhands/resolver/interfaces/gitlab.py b/openhands/resolver/interfaces/gitlab.py index eb211e563a..26130fcb44 100644 --- a/openhands/resolver/interfaces/gitlab.py +++ b/openhands/resolver/interfaces/gitlab.py @@ -86,7 +86,6 @@ class GitlabIssueHandler(IssueHandlerInterface): Returns: List of Gitlab issues. """ - if not issue_numbers: raise ValueError('Unspecified issue number') diff --git a/openhands/resolver/issue_resolver.py b/openhands/resolver/issue_resolver.py index 1c93ea0ffd..474096baa6 100644 --- a/openhands/resolver/issue_resolver.py +++ b/openhands/resolver/issue_resolver.py @@ -70,7 +70,6 @@ class IssueResolver: comment_id: Optional ID of a specific comment to focus on. base_domain: The base domain for the git server. """ - parts = args.selected_repo.rsplit('/', 1) if len(parts) < 2: raise ValueError('Invalid repository format. Expected owner/repo') @@ -540,7 +539,6 @@ class IssueResolver: Args: reset_logger: Whether to reset the logger for multiprocessing. """ - issue = self.extract_issue() if self.comment_id is not None: diff --git a/openhands/resolver/utils.py b/openhands/resolver/utils.py index 4af40be24a..527727fba3 100644 --- a/openhands/resolver/utils.py +++ b/openhands/resolver/utils.py @@ -16,8 +16,7 @@ from openhands.integrations.utils import validate_provider_token async def identify_token(token: str, base_domain: str | None) -> ProviderType: - """ - Identifies whether a token belongs to GitHub, GitLab, or Bitbucket. + """Identifies whether a token belongs to GitHub, GitLab, or Bitbucket. Parameters: token (str): The personal access token to check. base_domain (str): Custom base domain for provider (e.g GitHub Enterprise) diff --git a/openhands/runtime/__init__.py b/openhands/runtime/__init__.py index eedb9b0af5..eabb1c0ee3 100644 --- a/openhands/runtime/__init__.py +++ b/openhands/runtime/__init__.py @@ -88,8 +88,7 @@ _ALL_RUNTIME_CLASSES = {**_DEFAULT_RUNTIME_CLASSES, **_THIRD_PARTY_RUNTIME_CLASS def get_runtime_cls(name: str) -> type[Runtime]: - """ - If name is one of the predefined runtime names (e.g. 'docker'), return its class. + """If name is one of the predefined runtime names (e.g. 'docker'), return its class. Otherwise attempt to resolve name as subclass of Runtime and return it. Raise on invalid selections. """ diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py index b88a9a46cf..b62268b4a5 100644 --- a/openhands/runtime/action_execution_server.py +++ b/openhands/runtime/action_execution_server.py @@ -1,5 +1,4 @@ -""" -This is the main file for the runtime client. +"""This is the main file for the runtime client. It is responsible for executing actions received from OpenHands backend and producing observations. NOTE: this will be executed inside the docker sandbox. diff --git a/openhands/runtime/base.py b/openhands/runtime/base.py index 0000d40133..23e58b19c3 100644 --- a/openhands/runtime/base.py +++ b/openhands/runtime/base.py @@ -196,8 +196,7 @@ class Runtime(FileEditRuntimeMixin): self.add_env_vars(self.config.sandbox.runtime_startup_env_vars) def close(self) -> None: - """ - This should only be called by conversation manager or closing the session. + """This should only be called by conversation manager or closing the session. If called for instance by error handling, it could prevent recovery. """ pass @@ -300,9 +299,7 @@ class Runtime(FileEditRuntimeMixin): asyncio.get_event_loop().run_until_complete(self._handle_action(event)) async def _export_latest_git_provider_tokens(self, event: Action) -> None: - """ - Refresh runtime provider tokens when agent attemps to run action with provider token - """ + """Refresh runtime provider tokens when agent attemps to run action with provider token""" if not self.user_id: return @@ -1001,9 +998,7 @@ fi def _execute_shell_fn_git_handler( self, command: str, cwd: str | None ) -> CommandResult: - """ - This function is used by the GitHandler to execute shell commands. - """ + """This function is used by the GitHandler to execute shell commands.""" obs = self.run(CmdRunAction(command=command, is_static=True, cwd=cwd)) exit_code = 0 content = '' @@ -1019,9 +1014,7 @@ fi return CommandResult(content=content, exit_code=exit_code) def _create_file_fn_git_handler(self, path: str, content: str) -> int: - """ - This function is used by the GitHandler to execute shell commands. - """ + """This function is used by the GitHandler to execute shell commands.""" obs = self.write(FileWriteAction(path=path, content=content)) if isinstance(obs, ErrorObservation): return -1 @@ -1043,8 +1036,7 @@ fi def subscribe_to_shell_stream( self, callback: Callable[[str], None] | None = None ) -> bool: - """ - Subscribe to shell command output stream. + """Subscribe to shell command output stream. This method is meant to be overridden by runtime implementations that want to stream shell command output to external consumers. diff --git a/openhands/runtime/file_viewer_server.py b/openhands/runtime/file_viewer_server.py index d0d3c88e0d..dc86e03750 100644 --- a/openhands/runtime/file_viewer_server.py +++ b/openhands/runtime/file_viewer_server.py @@ -1,5 +1,4 @@ -""" -A tiny, isolated server that provides only the /view endpoint from the action execution server. +"""A tiny, isolated server that provides only the /view endpoint from the action execution server. This server has no authentication and only listens to localhost traffic. """ @@ -83,7 +82,6 @@ def start_file_viewer_server(port: int) -> tuple[str, threading.Thread]: Returns: Tuple[str, threading.Thread]: The server URL and the thread object. """ - # Save the server URL to a file server_url = f'http://localhost:{port}' port_path = '/tmp/oh-server-url' diff --git a/openhands/runtime/impl/__init__.py b/openhands/runtime/impl/__init__.py index a4e9701ed4..4398d70a3e 100644 --- a/openhands/runtime/impl/__init__.py +++ b/openhands/runtime/impl/__init__.py @@ -1,6 +1,4 @@ -""" -Runtime implementations for OpenHands. -""" +"""Runtime implementations for OpenHands.""" from openhands.runtime.impl.action_execution.action_execution_client import ( ActionExecutionClient, diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py index 773a806aa5..b7beec5700 100644 --- a/openhands/runtime/impl/action_execution/action_execution_client.py +++ b/openhands/runtime/impl/action_execution/action_execution_client.py @@ -138,7 +138,6 @@ class ActionExecutionClient(Runtime): If path is None, list files in the sandbox's initial working directory (e.g., /workspace). """ - try: data = {} if path is not None: diff --git a/openhands/runtime/impl/cli/__init__.py b/openhands/runtime/impl/cli/__init__.py index 95e2746d22..ae3165ebfb 100644 --- a/openhands/runtime/impl/cli/__init__.py +++ b/openhands/runtime/impl/cli/__init__.py @@ -1,6 +1,4 @@ -""" -CLI Runtime implementation for OpenHands. -""" +"""CLI Runtime implementation for OpenHands.""" from openhands.runtime.impl.cli.cli_runtime import CLIRuntime diff --git a/openhands/runtime/impl/cli/cli_runtime.py b/openhands/runtime/impl/cli/cli_runtime.py index aab98a3db8..bd5b12e284 100644 --- a/openhands/runtime/impl/cli/cli_runtime.py +++ b/openhands/runtime/impl/cli/cli_runtime.py @@ -1,5 +1,4 @@ -""" -This runtime runs commands locally using subprocess and performs file operations using Python's standard library. +"""This runtime runs commands locally using subprocess and performs file operations using Python's standard library. It does not implement browser functionality. """ @@ -88,8 +87,7 @@ After installing .NET SDK, restart your terminal and try again. class CLIRuntime(Runtime): - """ - A runtime implementation that runs commands locally using subprocess and performs + """A runtime implementation that runs commands locally using subprocess and performs file operations using Python's standard library. It does not implement browser functionality. Args: @@ -191,8 +189,7 @@ class CLIRuntime(Runtime): logger.info(f'CLIRuntime initialized with workspace at {self._workspace_path}') def add_env_vars(self, env_vars: dict[str, Any]) -> None: - """ - Adds environment variables to the current runtime environment. + """Adds environment variables to the current runtime environment. For CLIRuntime, this means updating os.environ for the current process, so that subsequent commands inherit these variables. This overrides the BaseRuntime behavior which tries to run shell commands @@ -218,8 +215,7 @@ class CLIRuntime(Runtime): # during initialization before self._runtime_initialized is True. def _safe_terminate_process(self, process_obj, signal_to_send=signal.SIGTERM): - """ - Safely attempts to terminate/kill a process group or a single process. + """Safely attempts to terminate/kill a process group or a single process. Args: process_obj: the subprocess.Popen object started with start_new_session=True @@ -292,8 +288,8 @@ class CLIRuntime(Runtime): def _execute_powershell_command( self, command: str, timeout: float ) -> CmdOutputObservation | ErrorObservation: - """ - Execute a command using PowerShell session on Windows. + """Execute a command using PowerShell session on Windows. + Args: command: The command to execute timeout: Timeout in seconds for the command @@ -326,8 +322,8 @@ class CLIRuntime(Runtime): def _execute_shell_command( self, command: str, timeout: float ) -> CmdOutputObservation: - """ - Execute a shell command and stream its output to a callback function. + """Execute a shell command and stream its output to a callback function. + Args: command: The shell command to execute timeout: Timeout in seconds for the command @@ -965,8 +961,7 @@ class CLIRuntime(Runtime): def subscribe_to_shell_stream( self, callback: Callable[[str], None] | None = None ) -> bool: - """ - Subscribe to shell command output stream. + """Subscribe to shell command output stream. Args: callback: A function that will be called with each line of output from shell commands. diff --git a/openhands/runtime/impl/kubernetes/kubernetes_runtime.py b/openhands/runtime/impl/kubernetes/kubernetes_runtime.py index b7cb3aeec4..825d346463 100644 --- a/openhands/runtime/impl/kubernetes/kubernetes_runtime.py +++ b/openhands/runtime/impl/kubernetes/kubernetes_runtime.py @@ -58,8 +58,7 @@ POD_LABEL = 'openhands-runtime' class KubernetesRuntime(ActionExecutionClient): - """ - A Kubernetes runtime for OpenHands that works with Kind. + """A Kubernetes runtime for OpenHands that works with Kind. This runtime creates pods in a Kubernetes cluster to run the agent code. It uses the Kubernetes Python client to create and manage the pods. @@ -411,7 +410,6 @@ class KubernetesRuntime(ActionExecutionClient): def _get_vscode_service_manifest(self): """Create a service manifest for the VSCode server.""" - vscode_service_spec = V1ServiceSpec( selector={'app': POD_LABEL, 'session': self.sid}, type='ClusterIP', @@ -567,7 +565,6 @@ class KubernetesRuntime(ActionExecutionClient): def _get_vscode_ingress_manifest(self): """Create an ingress manifest for the VSCode server.""" - tls = [] if self._k8s_config.ingress_tls_secret: runtime_tls = V1IngressTLS( diff --git a/openhands/runtime/mcp/proxy/__init__.py b/openhands/runtime/mcp/proxy/__init__.py index cd91f07fba..141a22e4ee 100644 --- a/openhands/runtime/mcp/proxy/__init__.py +++ b/openhands/runtime/mcp/proxy/__init__.py @@ -1,6 +1,4 @@ -""" -MCP Proxy module for OpenHands. -""" +"""MCP Proxy module for OpenHands.""" from openhands.runtime.mcp.proxy.manager import MCPProxyManager diff --git a/openhands/runtime/mcp/proxy/manager.py b/openhands/runtime/mcp/proxy/manager.py index 8cb793bad0..e3c0c675ca 100644 --- a/openhands/runtime/mcp/proxy/manager.py +++ b/openhands/runtime/mcp/proxy/manager.py @@ -1,5 +1,4 @@ -""" -MCP Proxy Manager for OpenHands. +"""MCP Proxy Manager for OpenHands. This module provides a manager class for handling FastMCP proxy instances, including initialization, configuration, and mounting to FastAPI applications. @@ -20,8 +19,7 @@ fastmcp_logger = fastmcp_get_logger('fastmcp') class MCPProxyManager: - """ - Manager for FastMCP proxy instances. + """Manager for FastMCP proxy instances. This class encapsulates all the functionality related to creating, configuring, and managing FastMCP proxy instances, including mounting them to FastAPI applications. @@ -33,8 +31,7 @@ class MCPProxyManager: api_key: Optional[str] = None, logger_level: Optional[int] = None, ): - """ - Initialize the MCP Proxy Manager. + """Initialize the MCP Proxy Manager. Args: name: Name of the proxy server @@ -55,9 +52,7 @@ class MCPProxyManager: fastmcp_logger.setLevel(logger_level) def initialize(self) -> None: - """ - Initialize the FastMCP proxy with the current configuration. - """ + """Initialize the FastMCP proxy with the current configuration.""" if len(self.config['mcpServers']) == 0: logger.info( 'No MCP servers configured for FastMCP Proxy, skipping initialization.' @@ -76,8 +71,7 @@ class MCPProxyManager: async def mount_to_app( self, app: FastAPI, allow_origins: Optional[list[str]] = None ) -> None: - """ - Mount the SSE server app to a FastAPI application. + """Mount the SSE server app to a FastAPI application. Args: app: FastAPI application to mount to @@ -128,8 +122,7 @@ class MCPProxyManager: stdio_servers: list[MCPStdioServerConfig], allow_origins: Optional[list[str]] = None, ) -> None: - """ - Update the tools configuration and remount the proxy to the app. + """Update the tools configuration and remount the proxy to the app. This is a convenience method that combines updating the tools, shutting down the existing proxy, initializing a new one, and diff --git a/openhands/runtime/plugins/vscode/__init__.py b/openhands/runtime/plugins/vscode/__init__.py index 2ad548380c..a4f76a2815 100644 --- a/openhands/runtime/plugins/vscode/__init__.py +++ b/openhands/runtime/plugins/vscode/__init__.py @@ -95,8 +95,7 @@ class VSCodePlugin(Plugin): ) def _setup_vscode_settings(self) -> None: - """ - Set up VSCode settings by creating the .vscode directory in the workspace + """Set up VSCode settings by creating the .vscode directory in the workspace and copying the settings.json file there. """ # Get the path to the settings.json file in the plugin directory diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py index 4b0a63741d..68064f842d 100644 --- a/openhands/runtime/utils/bash.py +++ b/openhands/runtime/utils/bash.py @@ -79,8 +79,7 @@ def split_bash_commands(commands: str) -> list[str]: def escape_bash_special_chars(command: str) -> str: - r""" - Escapes characters that have different interpretations in bash vs python. + r"""Escapes characters that have different interpretations in bash vs python. Specifically handles escape sequences like \;, \|, \&, etc. """ if command.strip() == '': @@ -446,6 +445,7 @@ class BashSession: ps1_matches: List of regex matches for PS1 prompts get_content_before_last_match: when there's only one PS1 match, whether to get the content before the last PS1 prompt (True) or after the last PS1 prompt (False) + Returns: Combined string of all outputs between matches """ diff --git a/openhands/runtime/utils/file_viewer.py b/openhands/runtime/utils/file_viewer.py index f2bae769f6..181e78ddd9 100644 --- a/openhands/runtime/utils/file_viewer.py +++ b/openhands/runtime/utils/file_viewer.py @@ -1,6 +1,4 @@ -""" -Utility module for generating file viewer HTML content. -""" +"""Utility module for generating file viewer HTML content.""" import base64 import mimetypes @@ -8,8 +6,7 @@ import os def generate_file_viewer_html(file_path: str) -> str: - """ - Generate HTML content for viewing different file types. + """Generate HTML content for viewing different file types. Args: file_path: The absolute path to the file diff --git a/openhands/runtime/utils/git_changes.py b/openhands/runtime/utils/git_changes.py index acc84a0e8f..d53a9e8de4 100644 --- a/openhands/runtime/utils/git_changes.py +++ b/openhands/runtime/utils/git_changes.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -""" -Get git changes in the current working directory relative to the remote origin if possible. +"""Get git changes in the current working directory relative to the remote origin if possible. NOTE: Since this is run as a script, there should be no imports from project files! """ diff --git a/openhands/runtime/utils/git_diff.py b/openhands/runtime/utils/git_diff.py index 9f71222b19..eb9cfdf990 100644 --- a/openhands/runtime/utils/git_diff.py +++ b/openhands/runtime/utils/git_diff.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -""" -Get git diff in a single git file for the closest git repo in the file system +"""Get git diff in a single git file for the closest git repo in the file system NOTE: Since this is run as a script, there should be no imports from project files! """ diff --git a/openhands/runtime/utils/git_handler.py b/openhands/runtime/utils/git_handler.py index 5b5ead5f4f..1bfed7c46b 100644 --- a/openhands/runtime/utils/git_handler.py +++ b/openhands/runtime/utils/git_handler.py @@ -14,8 +14,7 @@ GIT_DIFF_CMD = ( @dataclass class CommandResult: - """ - Represents the result of a shell command execution. + """Represents the result of a shell command execution. Attributes: content (str): The output content of the command. @@ -27,9 +26,7 @@ class CommandResult: class GitHandler: - """ - A handler for executing Git-related operations via shell commands. - """ + """A handler for executing Git-related operations via shell commands.""" def __init__( self, @@ -43,8 +40,7 @@ class GitHandler: self.git_diff_cmd = GIT_DIFF_CMD def set_cwd(self, cwd: str) -> None: - """ - Sets the current working directory for Git operations. + """Sets the current working directory for Git operations. Args: cwd (str): The directory path. @@ -60,8 +56,7 @@ class GitHandler: return script_file def get_git_changes(self) -> list[dict[str, str]] | None: - """ - Retrieves the list of changed files in Git repositories. + """Retrieves the list of changed files in Git repositories. Examines each direct subdirectory of the workspace directory looking for git repositories and returns the changes for each of these directories. Optimized to use a single git command per repository for maximum performance. @@ -100,8 +95,7 @@ class GitHandler: return self.get_git_changes() def get_git_diff(self, file_path: str) -> dict[str, str]: - """ - Retrieves the original and modified content of a file in the repository. + """Retrieves the original and modified content of a file in the repository. Args: file_path (str): Path to the file. diff --git a/openhands/runtime/utils/windows_bash.py b/openhands/runtime/utils/windows_bash.py index 76c7ff039b..db1fb20935 100644 --- a/openhands/runtime/utils/windows_bash.py +++ b/openhands/runtime/utils/windows_bash.py @@ -1,5 +1,4 @@ -""" -This module provides a Windows-specific implementation for running commands +"""This module provides a Windows-specific implementation for running commands in a PowerShell session using the pythonnet library to interact with the .NET PowerShell SDK directly. This aims to provide a more robust and integrated way to manage PowerShell processes compared to using temporary script files. @@ -95,8 +94,7 @@ except Exception as e: class WindowsPowershellSession: - """ - Manages a persistent PowerShell session using the .NET SDK via pythonnet. + """Manages a persistent PowerShell session using the .NET SDK via pythonnet. Allows executing commands within a single runspace, preserving state (variables, current directory) between calls. @@ -110,8 +108,7 @@ class WindowsPowershellSession: no_change_timeout_seconds: int = 30, max_memory_mb: int | None = None, ): - """ - Initializes the PowerShell session. + """Initializes the PowerShell session. Args: work_dir: The starting working directory for the session. @@ -388,9 +385,7 @@ class WindowsPowershellSession: def _check_active_job( self, timeout_seconds: int ) -> CmdOutputObservation | ErrorObservation: - """ - Checks the active job for new output and status, waiting up to timeout_seconds. - """ + """Checks the active job for new output and status, waiting up to timeout_seconds.""" with self._job_lock: if not self.active_job: return ErrorObservation( @@ -649,8 +644,7 @@ class WindowsPowershellSession: return self._cwd def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation: - """ - Executes a command, potentially as a PowerShell background job for long-running tasks. + """Executes a command, potentially as a PowerShell background job for long-running tasks. Aligned with bash.py behavior regarding command execution and messages. Args: diff --git a/openhands/runtime/utils/windows_exceptions.py b/openhands/runtime/utils/windows_exceptions.py index bb1917abff..3863af6901 100644 --- a/openhands/runtime/utils/windows_exceptions.py +++ b/openhands/runtime/utils/windows_exceptions.py @@ -1,11 +1,8 @@ -""" -Custom exceptions for Windows-specific runtime issues. -""" +"""Custom exceptions for Windows-specific runtime issues.""" class DotNetMissingError(Exception): - """ - Exception raised when .NET SDK or CoreCLR is missing or cannot be loaded. + """Exception raised when .NET SDK or CoreCLR is missing or cannot be loaded. This is used to provide a cleaner error message to users without a full stack trace. """ diff --git a/openhands/server/data_models/agent_loop_info.py b/openhands/server/data_models/agent_loop_info.py index 1d5fc01e95..c094c9dcfc 100644 --- a/openhands/server/data_models/agent_loop_info.py +++ b/openhands/server/data_models/agent_loop_info.py @@ -7,9 +7,7 @@ from openhands.storage.data_models.conversation_status import ConversationStatus @dataclass class AgentLoopInfo: - """ - Information about an agent loop - the URL on which to locate it and the event store - """ + """Information about an agent loop - the URL on which to locate it and the event store""" conversation_id: str url: str | None diff --git a/openhands/server/data_models/conversation_info.py b/openhands/server/data_models/conversation_info.py index f74c707d1b..c16c2a4186 100644 --- a/openhands/server/data_models/conversation_info.py +++ b/openhands/server/data_models/conversation_info.py @@ -9,8 +9,7 @@ from openhands.storage.data_models.conversation_status import ConversationStatus @dataclass class ConversationInfo: - """ - Information about a conversation. This combines conversation metadata with + """Information about a conversation. This combines conversation metadata with information on whether a conversation is currently running """ diff --git a/openhands/server/dependencies.py b/openhands/server/dependencies.py index 73ab3c0cd4..0c1f6b1ec7 100644 --- a/openhands/server/dependencies.py +++ b/openhands/server/dependencies.py @@ -10,8 +10,7 @@ _SESSION_API_KEY_HEADER = APIKeyHeader(name='X-Session-API-Key', auto_error=Fals def check_session_api_key( session_api_key: str | None = Depends(_SESSION_API_KEY_HEADER), ): - """ - Check the session API key and throw an exception if incorrect. Having this as a dependency + """Check the session API key and throw an exception if incorrect. Having this as a dependency means it appears in OpenAPI Docs """ if session_api_key != _SESSION_API_KEY: diff --git a/openhands/server/files.py b/openhands/server/files.py index 7d296ba107..b0f6069f03 100644 --- a/openhands/server/files.py +++ b/openhands/server/files.py @@ -4,9 +4,7 @@ from pydantic import ( class POSTUploadFilesModel(BaseModel): - """ - Upload files response model - """ + """Upload files response model""" file_urls: list[str] skipped_files: list[str] diff --git a/openhands/server/middleware.py b/openhands/server/middleware.py index cc7f09b024..e2bae700a7 100644 --- a/openhands/server/middleware.py +++ b/openhands/server/middleware.py @@ -14,8 +14,7 @@ from starlette.types import ASGIApp class LocalhostCORSMiddleware(CORSMiddleware): - """ - Custom CORS middleware that allows any request from localhost/127.0.0.1 domains, + """Custom CORS middleware that allows any request from localhost/127.0.0.1 domains, while using standard CORS rules for other origins. """ @@ -50,9 +49,7 @@ class LocalhostCORSMiddleware(CORSMiddleware): class CacheControlMiddleware(BaseHTTPMiddleware): - """ - Middleware to disable caching for all routes by adding appropriate headers - """ + """Middleware to disable caching for all routes by adding appropriate headers""" async def dispatch( self, request: Request, call_next: RequestResponseEndpoint diff --git a/openhands/server/monitoring.py b/openhands/server/monitoring.py index 89c99fe83e..2ff30744a8 100644 --- a/openhands/server/monitoring.py +++ b/openhands/server/monitoring.py @@ -17,22 +17,18 @@ class MonitoringListener: """ def on_session_event(self, event: Event) -> None: - """ - Track metrics about events being added to a Session's EventStream. - """ + """Track metrics about events being added to a Session's EventStream.""" pass def on_agent_session_start(self, success: bool, duration: float) -> None: - """ - Track an agent session start. + """Track an agent session start. Success is true if startup completed without error. Duration is start time in seconds observed by AgentSession. """ pass def on_create_conversation(self) -> None: - """ - Track the beginning of conversation creation. + """Track the beginning of conversation creation. Does not currently capture whether it succeed. """ pass diff --git a/openhands/server/routes/conversation.py b/openhands/server/routes/conversation.py index 0271bb271a..1de94c5ba1 100644 --- a/openhands/server/routes/conversation.py +++ b/openhands/server/routes/conversation.py @@ -114,6 +114,7 @@ async def search_events( user_id: str | None = Depends(get_user_id), ): """Search through the event stream with filtering and pagination. + Args: conversation_id: The conversation ID start_id: Starting ID in the event stream. Defaults to 0 @@ -123,6 +124,7 @@ async def search_events( limit: Maximum number of events to return. Must be between 1 and 100. Defaults to 20 metadata: Conversation metadata (injected by dependency) user_id: User ID (injected by dependency) + Returns: dict: Dictionary containing: - events: List of matching events diff --git a/openhands/server/routes/manage_conversations.py b/openhands/server/routes/manage_conversations.py index 4da6b7e310..cef6278873 100644 --- a/openhands/server/routes/manage_conversations.py +++ b/openhands/server/routes/manage_conversations.py @@ -626,7 +626,6 @@ async def update_conversation( Raises: HTTPException: If conversation is not found or user lacks permission """ - logger.info( f'Updating conversation {conversation_id} with title: {data.title}', extra={'session_id': conversation_id, 'user_id': user_id}, diff --git a/openhands/server/routes/mcp.py b/openhands/server/routes/mcp.py index f858593ea6..5fe3bc1689 100644 --- a/openhands/server/routes/mcp.py +++ b/openhands/server/routes/mcp.py @@ -32,10 +32,7 @@ CONVO_URL = HOST + '/conversations/{}' async def get_convo_link(service: GitService, conversation_id: str, body: str) -> str: - """ - Appends a followup link, in the PR body, to the OpenHands conversation that opened the PR - """ - + """Appends a followup link, in the PR body, to the OpenHands conversation that opened the PR""" if server_config.app_mode != AppMode.SAAS: return body @@ -94,7 +91,6 @@ async def create_pr( ] = None, ) -> str: """Open a PR in GitHub""" - logger.info('Calling OpenHands MCP create_pr') request = get_http_request() @@ -165,7 +161,6 @@ async def create_mr( ] = None, ) -> str: """Open a MR in GitLab""" - logger.info('Calling OpenHands MCP create_mr') request = get_http_request() @@ -233,7 +228,6 @@ async def create_bitbucket_pr( description: Annotated[str | None, Field(description='PR description')], ) -> str: """Open a PR in Bitbucket""" - logger.info('Calling OpenHands MCP create_bitbucket_pr') request = get_http_request() diff --git a/openhands/server/routes/secrets.py b/openhands/server/routes/secrets.py index eec3d23d26..cf808e17d4 100644 --- a/openhands/server/routes/secrets.py +++ b/openhands/server/routes/secrets.py @@ -33,12 +33,10 @@ app = APIRouter(prefix='/api', dependencies=get_dependencies()) async def invalidate_legacy_secrets_store( settings: Settings, settings_store: SettingsStore, secrets_store: SecretsStore ) -> UserSecrets | None: - """ - We are moving `secrets_store` (a field from `Settings` object) to its own dedicated store + """We are moving `secrets_store` (a field from `Settings` object) to its own dedicated store This function moves the values from Settings to UserSecrets, and deletes the values in Settings While this function in called multiple times, the migration only ever happens once """ - if len(settings.secrets_store.provider_tokens.items()) > 0: user_secrets = UserSecrets( provider_tokens=settings.secrets_store.provider_tokens diff --git a/openhands/server/routes/settings.py b/openhands/server/routes/settings.py index 83ec75d524..4e94c64765 100644 --- a/openhands/server/routes/settings.py +++ b/openhands/server/routes/settings.py @@ -97,9 +97,7 @@ async def load_settings( }, ) async def reset_settings() -> JSONResponse: - """ - Resets user settings. (Deprecated) - """ + """Resets user settings. (Deprecated)""" logger.warning('Deprecated endpoint /api/reset-settings called by user') return JSONResponse( status_code=status.HTTP_410_GONE, diff --git a/openhands/server/services/conversation_service.py b/openhands/server/services/conversation_service.py index 7e8b68ec51..8ed0a755cd 100644 --- a/openhands/server/services/conversation_service.py +++ b/openhands/server/services/conversation_service.py @@ -164,7 +164,6 @@ async def setup_init_convo_settings( ) -> ConversationInitData: """Set up conversation initialization data with provider tokens. - Args: user_id: The user ID conversation_id: The conversation ID diff --git a/openhands/server/session/agent_session.py b/openhands/server/session/agent_session.py index df2d0c15ab..8581656b19 100644 --- a/openhands/server/session/agent_session.py +++ b/openhands/server/session/agent_session.py @@ -72,7 +72,6 @@ class AgentSession: - sid: The session ID - file_store: Instance of the FileStore """ - self.sid = sid self.event_stream = EventStream(sid, file_store, user_id) self.file_store = file_store @@ -253,8 +252,7 @@ class AgentSession: agent_to_llm_config: dict[str, LLMConfig] | None, agent_configs: dict[str, AgentConfig] | None, ) -> MessageAction: - """ - Replays a trajectory from a JSON file. Note that once the replay session + """Replays a trajectory from a JSON file. Note that once the replay session finishes, the controller will continue to run with further user instructions, so we still need to pass llm configs, budget, etc., even though the replay itself does not call LLM or cost money. @@ -279,7 +277,6 @@ class AgentSession: Parameters: - security_analyzer: The name of the security analyzer to use """ - if security_analyzer: self.logger.debug(f'Using security analyzer: {security_analyzer}') self.security_analyzer = options.SecurityAnalyzers.get( @@ -325,7 +322,6 @@ class AgentSession: Return True on successfully connected, False if could not connect. Raises if already created, possibly in other situations. """ - if self.runtime is not None: raise RuntimeError('Runtime already created') @@ -422,7 +418,6 @@ class AgentSession: Returns: Agent Controller and a bool indicating if state was restored from a previous conversation """ - if self.controller is not None: raise RuntimeError('Controller already created') if self.runtime is None: diff --git a/openhands/server/session/conversation_init_data.py b/openhands/server/session/conversation_init_data.py index 4f459d8857..cdf76db977 100644 --- a/openhands/server/session/conversation_init_data.py +++ b/openhands/server/session/conversation_init_data.py @@ -6,9 +6,7 @@ from openhands.storage.data_models.settings import Settings class ConversationInitData(Settings): - """ - Session initialization data for the web environment - a deep copy of the global config is made and then overridden with this data. - """ + """Session initialization data for the web environment - a deep copy of the global config is made and then overridden with this data.""" git_provider_tokens: PROVIDER_TOKEN_TYPE | None = Field(default=None, frozen=True) custom_secrets: CUSTOM_SECRETS_TYPE | None = Field(default=None, frozen=True) diff --git a/openhands/server/settings.py b/openhands/server/settings.py index 7399d619c1..ebcadec932 100644 --- a/openhands/server/settings.py +++ b/openhands/server/settings.py @@ -13,26 +13,20 @@ from openhands.storage.data_models.settings import Settings class POSTProviderModel(BaseModel): - """ - Settings for POST requests - """ + """Settings for POST requests""" mcp_config: MCPConfig | None = None provider_tokens: dict[ProviderType, ProviderToken] = {} class POSTCustomSecrets(BaseModel): - """ - Adding new custom secret - """ + """Adding new custom secret""" custom_secrets: dict[str, CustomSecret] = {} class GETSettingsModel(Settings): - """ - Settings with additional token data for the frontend - """ + """Settings with additional token data for the frontend""" provider_tokens_set: dict[ProviderType, str | None] | None = ( None # provider + base_domain key-value pair @@ -44,25 +38,19 @@ class GETSettingsModel(Settings): class CustomSecretWithoutValueModel(BaseModel): - """ - Custom secret model without value - """ + """Custom secret model without value""" name: str description: str | None = None class CustomSecretModel(CustomSecretWithoutValueModel): - """ - Custom secret model with value - """ + """Custom secret model with value""" value: SecretStr class GETCustomSecrets(BaseModel): - """ - Custom secrets names - """ + """Custom secrets names""" custom_secrets: list[CustomSecretWithoutValueModel] | None = None diff --git a/openhands/storage/batched_web_hook.py b/openhands/storage/batched_web_hook.py index 057b7f0a96..9c6220b65f 100644 --- a/openhands/storage/batched_web_hook.py +++ b/openhands/storage/batched_web_hook.py @@ -13,8 +13,7 @@ WEBHOOK_BATCH_SIZE_LIMIT_BYTES = 1048576 # 1MB class BatchedWebHookFileStore(FileStore): - """ - File store which batches updates before sending them to a webhook. + """File store which batches updates before sending them to a webhook. This class wraps another FileStore implementation and sends HTTP requests to a specified URL when files are written or deleted. Updates are batched @@ -51,8 +50,7 @@ class BatchedWebHookFileStore(FileStore): batch_timeout_seconds: Optional[float] = None, batch_size_limit_bytes: Optional[int] = None, ): - """ - Initialize a BatchedWebHookFileStore. + """Initialize a BatchedWebHookFileStore. Args: file_store: The underlying FileStore implementation @@ -84,8 +82,7 @@ class BatchedWebHookFileStore(FileStore): self._batch_size = 0 def write(self, path: str, contents: Union[str, bytes]) -> None: - """ - Write contents to a file and queue a webhook update. + """Write contents to a file and queue a webhook update. Args: path: The path to write to @@ -95,8 +92,7 @@ class BatchedWebHookFileStore(FileStore): self._queue_update(path, 'write', contents) def read(self, path: str) -> str: - """ - Read contents from a file. + """Read contents from a file. Args: path: The path to read from @@ -107,8 +103,7 @@ class BatchedWebHookFileStore(FileStore): return self.file_store.read(path) def list(self, path: str) -> list[str]: - """ - List files in a directory. + """List files in a directory. Args: path: The directory path to list @@ -119,8 +114,7 @@ class BatchedWebHookFileStore(FileStore): return self.file_store.list(path) def delete(self, path: str) -> None: - """ - Delete a file and queue a webhook update. + """Delete a file and queue a webhook update. Args: path: The path to delete @@ -131,8 +125,7 @@ class BatchedWebHookFileStore(FileStore): def _queue_update( self, path: str, operation: str, contents: Optional[Union[str, bytes]] ) -> None: - """ - Queue an update to be sent to the webhook. + """Queue an update to be sent to the webhook. Args: path: The path that was modified @@ -183,15 +176,13 @@ class BatchedWebHookFileStore(FileStore): self._batch_timer = timer def _send_batch_from_timer(self) -> None: - """ - Send the batch from the timer thread. + """Send the batch from the timer thread. This method is called by the timer and submits the actual sending to the executor. """ EXECUTOR.submit(self._send_batch) def _send_batch(self) -> None: - """ - Send the current batch of updates to the webhook as a single request. + """Send the current batch of updates to the webhook as a single request. This method acquires the batch lock and processes all pending updates in one batch. """ batch_to_send: dict[str, tuple[str, Optional[Union[str, bytes]]]] = {} @@ -225,8 +216,7 @@ class BatchedWebHookFileStore(FileStore): def _send_batch_request( self, batch: dict[str, tuple[str, Optional[Union[str, bytes]]]] ) -> None: - """ - Send a single batch request to the webhook URL with all updates. + """Send a single batch request to the webhook URL with all updates. This method is retried up to 3 times with a 1-second delay between attempts. @@ -267,8 +257,7 @@ class BatchedWebHookFileStore(FileStore): response.raise_for_status() def flush(self) -> None: - """ - Immediately send any pending updates to the webhook. + """Immediately send any pending updates to the webhook. This can be called to ensure all updates are sent before shutting down. """ self._send_batch() diff --git a/openhands/storage/data_models/settings.py b/openhands/storage/data_models/settings.py index 29370d1d04..8d67387356 100644 --- a/openhands/storage/data_models/settings.py +++ b/openhands/storage/data_models/settings.py @@ -18,9 +18,7 @@ from openhands.storage.data_models.user_secrets import UserSecrets class Settings(BaseModel): - """ - Persisted settings for OpenHands sessions - """ + """Persisted settings for OpenHands sessions""" language: str | None = None agent: str | None = None @@ -107,7 +105,6 @@ class Settings(BaseModel): @field_serializer('secrets_store') def secrets_store_serializer(self, secrets: UserSecrets, info: SerializationInfo): """Custom serializer for secrets store.""" - """Force invalidate secret store""" return {'provider_tokens': {}} diff --git a/openhands/storage/data_models/user_secrets.py b/openhands/storage/data_models/user_secrets.py index 43db861e21..36af6f336f 100644 --- a/openhands/storage/data_models/user_secrets.py +++ b/openhands/storage/data_models/user_secrets.py @@ -140,12 +140,10 @@ class UserSecrets(BaseModel): return new_data def set_event_stream_secrets(self, event_stream: EventStream) -> None: - """ - This ensures that provider tokens and custom secrets masked from the event stream + """This ensures that provider tokens and custom secrets masked from the event stream Args: event_stream: Agent session's event stream """ - secrets = self.get_env_vars() event_stream.set_secrets(secrets) diff --git a/openhands/storage/web_hook.py b/openhands/storage/web_hook.py index b52f206bba..71f7c73edd 100644 --- a/openhands/storage/web_hook.py +++ b/openhands/storage/web_hook.py @@ -6,8 +6,7 @@ from openhands.utils.async_utils import EXECUTOR class WebHookFileStore(FileStore): - """ - File store which includes a web hook to be invoked after any changes occur. + """File store which includes a web hook to be invoked after any changes occur. This class wraps another FileStore implementation and sends HTTP requests to a specified URL whenever files are written or deleted. @@ -25,8 +24,7 @@ class WebHookFileStore(FileStore): def __init__( self, file_store: FileStore, base_url: str, client: httpx.Client | None = None ): - """ - Initialize a WebHookFileStore. + """Initialize a WebHookFileStore. Args: file_store: The underlying FileStore implementation @@ -40,8 +38,7 @@ class WebHookFileStore(FileStore): self.client = client def write(self, path: str, contents: str | bytes) -> None: - """ - Write contents to a file and trigger a webhook. + """Write contents to a file and trigger a webhook. Args: path: The path to write to @@ -51,8 +48,7 @@ class WebHookFileStore(FileStore): EXECUTOR.submit(self._on_write, path, contents) def read(self, path: str) -> str: - """ - Read contents from a file. + """Read contents from a file. Args: path: The path to read from @@ -63,8 +59,7 @@ class WebHookFileStore(FileStore): return self.file_store.read(path) def list(self, path: str) -> list[str]: - """ - List files in a directory. + """List files in a directory. Args: path: The directory path to list @@ -75,8 +70,7 @@ class WebHookFileStore(FileStore): return self.file_store.list(path) def delete(self, path: str) -> None: - """ - Delete a file and trigger a webhook. + """Delete a file and trigger a webhook. Args: path: The path to delete @@ -89,8 +83,7 @@ class WebHookFileStore(FileStore): stop=tenacity.stop_after_attempt(3), ) def _on_write(self, path: str, contents: str | bytes) -> None: - """ - Send a POST request to the webhook URL when a file is written. + """Send a POST request to the webhook URL when a file is written. This method is retried up to 3 times with a 1-second delay between attempts. @@ -110,8 +103,7 @@ class WebHookFileStore(FileStore): stop=tenacity.stop_after_attempt(3), ) def _on_delete(self, path: str) -> None: - """ - Send a DELETE request to the webhook URL when a file is deleted. + """Send a DELETE request to the webhook URL when a file is deleted. This method is retried up to 3 times with a 1-second delay between attempts. diff --git a/openhands/utils/async_utils.py b/openhands/utils/async_utils.py index f3a09b4f80..678f486c23 100644 --- a/openhands/utils/async_utils.py +++ b/openhands/utils/async_utils.py @@ -8,8 +8,7 @@ EXECUTOR = ThreadPoolExecutor() async def call_sync_from_async(fn: Callable, *args, **kwargs): - """ - Shorthand for running a function in the default background thread pool executor + """Shorthand for running a function in the default background thread pool executor and awaiting the result. The nature of synchronous code is that the future returned by this function is not cancellable """ @@ -22,11 +21,9 @@ async def call_sync_from_async(fn: Callable, *args, **kwargs): def call_async_from_sync( corofn: Callable, timeout: float = GENERAL_TIMEOUT, *args, **kwargs ): - """ - Shorthand for running a coroutine in the default background thread pool executor + """Shorthand for running a coroutine in the default background thread pool executor and awaiting the result """ - if corofn is None: raise ValueError('corofn is None') if not asyncio.iscoroutinefunction(corofn): @@ -65,8 +62,7 @@ async def call_coro_in_bg_thread( async def wait_all( iterable: Iterable[Coroutine], timeout: int = GENERAL_TIMEOUT ) -> list: - """ - Shorthand for waiting for all the coroutines in the iterable given in parallel. Creates + """Shorthand for waiting for all the coroutines in the iterable given in parallel. Creates a task for each coroutine. Returns a list of results in the original order. If any single task raised an exception, this is raised. If multiple tasks raised exceptions, an AsyncException is raised containing all exceptions. @@ -104,8 +100,7 @@ class AsyncException(Exception): async def run_in_loop( coro: Coroutine, loop: asyncio.AbstractEventLoop, timeout: float = GENERAL_TIMEOUT ): - """ - Mitigate the dreaded "coroutine was created in a different event loop" error. + """Mitigate the dreaded "coroutine was created in a different event loop" error. Pass the coroutine to a different event loop if needed. """ running_loop = asyncio.get_running_loop() diff --git a/openhands/utils/conversation_summary.py b/openhands/utils/conversation_summary.py index f00792a5c5..11fa030f6a 100644 --- a/openhands/utils/conversation_summary.py +++ b/openhands/utils/conversation_summary.py @@ -63,8 +63,7 @@ async def generate_conversation_title( def get_default_conversation_title(conversation_id: str) -> str: - """ - Generate a default title for a conversation based on its ID. + """Generate a default title for a conversation based on its ID. Args: conversation_id: The ID of the conversation @@ -78,8 +77,7 @@ def get_default_conversation_title(conversation_id: str) -> str: async def auto_generate_title( conversation_id: str, user_id: str | None, file_store: FileStore, settings: Settings ) -> str: - """ - Auto-generate a title for a conversation based on the first user message. + """Auto-generate a title for a conversation based on the first user message. Uses LLM-based title generation if available, otherwise falls back to a simple truncation. Args: diff --git a/openhands/utils/http_session.py b/openhands/utils/http_session.py index f5bb33a749..2244e45fb0 100644 --- a/openhands/utils/http_session.py +++ b/openhands/utils/http_session.py @@ -10,8 +10,7 @@ CLIENT = httpx.Client() @dataclass class HttpSession: - """ - request.Session is reusable after it has been closed. This behavior makes it + """request.Session is reusable after it has been closed. This behavior makes it likely to leak file descriptors (Especially when combined with tenacity). We wrap the session to make it unusable after being closed """ diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py index 798b727611..9c06549e3b 100644 --- a/openhands/utils/prompt.py +++ b/openhands/utils/prompt.py @@ -29,8 +29,7 @@ class RepositoryInfo: @dataclass class ConversationInstructions: - """ - Optional instructions the agent must follow throughout the conversation while addressing the user's initial task + """Optional instructions the agent must follow throughout the conversation while addressing the user's initial task Examples include @@ -42,8 +41,7 @@ class ConversationInstructions: class PromptManager: - """ - Manages prompt templates and includes information from the user's workspace micro-agents and global micro-agents. + """Manages prompt templates and includes information from the user's workspace micro-agents and global micro-agents. This class is dedicated to loading and rendering prompts (system prompt, user prompt). @@ -71,8 +69,7 @@ class PromptManager: ) def _load_template(self, template_name: str) -> Template: - """ - Load a template from the prompt directory. + """Load a template from the prompt directory. Args: template_name: Full filename of the template to load, including the .j2 extension. @@ -105,7 +102,6 @@ class PromptManager: These additional context will convert the current generic agent into a more specialized agent that is tailored to the user's task. """ - return self.user_template.render().strip() def build_workspace_context( diff --git a/openhands/utils/shutdown_listener.py b/openhands/utils/shutdown_listener.py index ac99094ce6..b444c2c2bc 100644 --- a/openhands/utils/shutdown_listener.py +++ b/openhands/utils/shutdown_listener.py @@ -1,5 +1,4 @@ -""" -This module monitors the app for shutdown signals. This exists because the atexit module +"""This module monitors the app for shutdown signals. This exists because the atexit module does not play nocely with stareltte / uvicorn shutdown signals. """ diff --git a/tests/e2e/test_local_runtime.py b/tests/e2e/test_local_runtime.py index 437c2f0060..1a74fa08e5 100644 --- a/tests/e2e/test_local_runtime.py +++ b/tests/e2e/test_local_runtime.py @@ -5,8 +5,7 @@ import tempfile def test_headless_mode_with_dummy_agent_no_browser(): - """ - E2E test: build a docker image from python:3.13, install openhands from source, + """E2E test: build a docker image from python:3.13, install openhands from source, and run a local runtime task in headless mode. """ repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) diff --git a/tests/runtime/test_replay.py b/tests/runtime/test_replay.py index 85e3bbf0a0..3719c43fcf 100644 --- a/tests/runtime/test_replay.py +++ b/tests/runtime/test_replay.py @@ -30,8 +30,7 @@ def _get_config(trajectory_name: str, agent: str = OH_DEFAULT_AGENT): def test_simple_replay(temp_dir, runtime_cls, run_as_openhands): - """ - A simple replay test that involves simple terminal operations and edits + """A simple replay test that involves simple terminal operations and edits (creating a simple 2048 game), using the default agent """ runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) @@ -54,8 +53,7 @@ def test_simple_replay(temp_dir, runtime_cls, run_as_openhands): def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands): - """ - A simple replay test that involves simple terminal operations and edits + """A simple replay test that involves simple terminal operations and edits (writing a Vue.js App), using the default agent Note: @@ -85,8 +83,7 @@ def test_simple_gui_replay(temp_dir, runtime_cls, run_as_openhands): def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands): - """ - Replay requires a consistent initial state to start with, otherwise it might + """Replay requires a consistent initial state to start with, otherwise it might be producing garbage. The trajectory used in this test assumes existence of a file named 'game_2048.py', which doesn't exist when we replay the trajectory (so called inconsistent initial states). This test demonstrates how this would @@ -121,8 +118,7 @@ def test_replay_wrong_initial_state(temp_dir, runtime_cls, run_as_openhands): def test_replay_basic_interactions(temp_dir, runtime_cls, run_as_openhands): - """ - Replay a trajectory that involves interactions, i.e. with user messages + """Replay a trajectory that involves interactions, i.e. with user messages in the middle. This tests two things: 1) The controller should be able to replay all actions without human interference (no asking for user input). diff --git a/tests/unit/core/config/test_config_utils.py b/tests/unit/core/config/test_config_utils.py index 16f4f6e307..c317b16304 100644 --- a/tests/unit/core/config/test_config_utils.py +++ b/tests/unit/core/config/test_config_utils.py @@ -9,8 +9,7 @@ DEFAULT_AGENT_NAME = 'CodeActAgent' def test_finalize_config_cli_disables_jupyter_and_browsing_when_true(): - """ - Test that finalize_config sets enable_jupyter and enable_browsing to False + """Test that finalize_config sets enable_jupyter and enable_browsing to False when runtime is 'cli' and they were initially True. """ app_config = OpenHandsConfig() @@ -30,8 +29,7 @@ def test_finalize_config_cli_disables_jupyter_and_browsing_when_true(): def test_finalize_config_cli_keeps_jupyter_and_browsing_false_when_false(): - """ - Test that finalize_config keeps enable_jupyter and enable_browsing as False + """Test that finalize_config keeps enable_jupyter and enable_browsing as False when runtime is 'cli' and they were initially False. """ app_config = OpenHandsConfig() @@ -51,8 +49,7 @@ def test_finalize_config_cli_keeps_jupyter_and_browsing_false_when_false(): def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_true_by_default(): - """ - Test that finalize_config keeps enable_jupyter and enable_browsing as True (default) + """Test that finalize_config keeps enable_jupyter and enable_browsing as True (default) when runtime is not 'cli'. """ app_config = OpenHandsConfig() @@ -73,8 +70,7 @@ def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_true_by_defaul def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_false_if_set(): - """ - Test that finalize_config keeps enable_jupyter and enable_browsing as False + """Test that finalize_config keeps enable_jupyter and enable_browsing as False when runtime is not 'cli' but they were explicitly set to False. """ app_config = OpenHandsConfig() @@ -94,8 +90,7 @@ def test_finalize_config_other_runtime_keeps_jupyter_and_browsing_false_if_set() def test_finalize_config_no_agents_defined(): - """ - Test that finalize_config runs without error if no agents are defined in the config, + """Test that finalize_config runs without error if no agents are defined in the config, even when runtime is 'cli'. """ app_config = OpenHandsConfig() @@ -109,8 +104,7 @@ def test_finalize_config_no_agents_defined(): def test_finalize_config_multiple_agents_cli_runtime(): - """ - Test that finalize_config correctly disables jupyter and browsing for multiple agents + """Test that finalize_config correctly disables jupyter and browsing for multiple agents when runtime is 'cli'. """ app_config = OpenHandsConfig() @@ -138,8 +132,7 @@ def test_finalize_config_multiple_agents_cli_runtime(): def test_finalize_config_multiple_agents_other_runtime(): - """ - Test that finalize_config correctly keeps jupyter and browsing enabled (or as set) + """Test that finalize_config correctly keeps jupyter and browsing enabled (or as set) for multiple agents when runtime is not 'cli'. """ app_config = OpenHandsConfig() diff --git a/tests/unit/core/config/test_llm_draft_config.py b/tests/unit/core/config/test_llm_draft_config.py index 302cc2b37a..b3070d02f6 100644 --- a/tests/unit/core/config/test_llm_draft_config.py +++ b/tests/unit/core/config/test_llm_draft_config.py @@ -8,8 +8,7 @@ from openhands.core.config.utils import load_from_toml @pytest.fixture def config_toml_without_draft_editor(tmp_path: pathlib.Path) -> str: - """ - This fixture provides a TOML config that DOES NOT contain [llm.draft_editor]. + """This fixture provides a TOML config that DOES NOT contain [llm.draft_editor]. We'll use it to verify that the draft_editor LLM is not present in the config. """ toml_content = """ @@ -31,8 +30,7 @@ api_key = "custom-api-key-1" @pytest.fixture def config_toml_with_draft_editor(tmp_path: pathlib.Path) -> str: - """ - This fixture provides a TOML config that DOES contain [llm.draft_editor]. + """This fixture provides a TOML config that DOES contain [llm.draft_editor]. We'll use it to verify that the draft_editor LLM is loaded as any other custom LLM. """ toml_content = """ @@ -58,8 +56,7 @@ api_key = "custom-api-key-2" def test_no_draft_editor_in_config(config_toml_without_draft_editor): - """ - Test that draft_editor is simply not present if not declared in the TOML. + """Test that draft_editor is simply not present if not declared in the TOML. Previously, we tested fallback behavior. Now, it's simplified to not exist at all. This docstring remains to illustrate that the old fallback logic is removed. """ @@ -73,8 +70,7 @@ def test_no_draft_editor_in_config(config_toml_without_draft_editor): def test_draft_editor_as_named_llm(config_toml_with_draft_editor): - """ - Test that draft_editor is loaded if declared in the TOML under [llm.draft_editor]. + """Test that draft_editor is loaded if declared in the TOML under [llm.draft_editor]. This docstring references the simpler approach: if it exists, it's just another named LLM. """ config = OpenHandsConfig() @@ -90,8 +86,7 @@ def test_draft_editor_as_named_llm(config_toml_with_draft_editor): def test_draft_editor_fallback(config_toml_with_draft_editor): - """ - Test that the draft_editor config does pick up fallbacks + """Test that the draft_editor config does pick up fallbacks normally set in LLMConfig class and from generic LLM. We expect the 'draft_editor' LLM to behave just like any custom LLM would. diff --git a/tests/unit/llm/test_llm.py b/tests/unit/llm/test_llm.py index 0a644fdc8f..9b6ccea9d2 100644 --- a/tests/unit/llm/test_llm.py +++ b/tests/unit/llm/test_llm.py @@ -445,8 +445,7 @@ def test_completion_keyboard_interrupt_handler(mock_litellm_completion, default_ def test_completion_retry_with_llm_no_response_error_zero_temp( mock_litellm_completion, default_config ): - """ - Test that the retry decorator properly handles LLMNoResponseError by: + """Test that the retry decorator properly handles LLMNoResponseError by: 1. First call to llm_completion uses temperature=0 and throws LLMNoResponseError 2. Second call should have temperature=0.2 and return a successful response """ @@ -498,8 +497,7 @@ def test_completion_retry_with_llm_no_response_error_zero_temp( def test_completion_retry_with_llm_no_response_error_nonzero_temp( mock_litellm_completion, default_config ): - """ - Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, + """Test that the retry decorator works for LLMNoResponseError when initial temperature is non-zero, and keeps the original temperature on retry. This test verifies that when LLMNoResponseError is raised with a non-zero temperature: @@ -530,8 +528,7 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp( @patch('openhands.llm.llm.litellm.get_model_info') @patch('openhands.llm.llm.httpx.get') def test_gemini_25_pro_function_calling(mock_httpx_get, mock_get_model_info): - """ - Test that Gemini 2.5 Pro models have function calling enabled by default. + """Test that Gemini 2.5 Pro models have function calling enabled by default. This includes testing various model name formats with different prefixes. """ # Mock the model info response @@ -589,8 +586,7 @@ def test_gemini_25_pro_function_calling(mock_httpx_get, mock_get_model_info): def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_retry( mock_litellm_completion, default_config ): - """ - Test that the retry decorator works for LLMNoResponseError with non-zero temperature + """Test that the retry decorator works for LLMNoResponseError with non-zero temperature and successfully retries while preserving the original temperature. This test verifies that: @@ -650,8 +646,7 @@ def test_completion_retry_with_llm_no_response_error_nonzero_temp_successful_ret def test_completion_retry_with_llm_no_response_error_successful_retry( mock_litellm_completion, default_config ): - """ - Test that the retry decorator works for LLMNoResponseError with zero temperature + """Test that the retry decorator works for LLMNoResponseError with zero temperature and successfully retries with temperature=0.2. This test verifies that: @@ -734,8 +729,7 @@ def test_completion_with_litellm_mock(mock_litellm_completion, default_config): @patch('openhands.llm.llm.litellm_completion') def test_llm_gemini_thinking_parameter(mock_litellm_completion, default_config): - """ - Test that the 'thinking' parameter is correctly passed to litellm_completion + """Test that the 'thinking' parameter is correctly passed to litellm_completion when a Gemini model is used with 'low' reasoning_effort. """ # Configure for Gemini model with low reasoning effort diff --git a/tests/unit/resolver/github/test_issue_handler_error_handling.py b/tests/unit/resolver/github/test_issue_handler_error_handling.py index cb8da4f739..63a79b3558 100644 --- a/tests/unit/resolver/github/test_issue_handler_error_handling.py +++ b/tests/unit/resolver/github/test_issue_handler_error_handling.py @@ -153,9 +153,7 @@ class MockLLMResponse: class DotDict(dict): - """ - A dictionary that supports dot notation access. - """ + """A dictionary that supports dot notation access.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -190,7 +188,6 @@ class DotDict(dict): @patch('openhands.llm.llm.litellm_completion') def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config): """Test that the retry mechanism in guess_success respects wait time between retries.""" - with patch('time.sleep') as mock_sleep: # Simulate a rate limit error followed by a successful response mock_litellm_completion.side_effect = [ diff --git a/tests/unit/resolver/github/test_resolve_issues.py b/tests/unit/resolver/github/test_resolve_issues.py index 7444a46909..6bda19ef05 100644 --- a/tests/unit/resolver/github/test_resolve_issues.py +++ b/tests/unit/resolver/github/test_resolve_issues.py @@ -431,7 +431,6 @@ async def test_process_issue( test_case, ): """Test the process_issue method with different scenarios.""" - # Set up test data issue = Issue( owner='test_owner', diff --git a/tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py b/tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py index 68735d95f9..c3be6277ce 100644 --- a/tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py +++ b/tests/unit/resolver/gitlab/test_gitlab_issue_handler_error_handling.py @@ -155,9 +155,7 @@ class MockLLMResponse: class DotDict(dict): - """ - A dictionary that supports dot notation access. - """ + """A dictionary that supports dot notation access.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -192,7 +190,6 @@ class DotDict(dict): @patch('openhands.llm.llm.litellm_completion') def test_guess_success_rate_limit_wait_time(mock_litellm_completion, default_config): """Test that the retry mechanism in guess_success respects wait time between retries.""" - with patch('time.sleep') as mock_sleep: # Simulate a rate limit error followed by a successful response mock_litellm_completion.side_effect = [ diff --git a/tests/unit/test_agent_controller.py b/tests/unit/test_agent_controller.py index cac91b84ad..c0bfe21573 100644 --- a/tests/unit/test_agent_controller.py +++ b/tests/unit/test_agent_controller.py @@ -506,7 +506,6 @@ async def test_budget_reset_on_continue(mock_agent, mock_event_stream): 2. LLM budget does not reset when user continues 3. Budget is extended by adding the initial budget cap to the current accumulated cost """ - # Create a real Metrics instance shared between controller state and llm metrics = Metrics() metrics.accumulated_cost = 6.0 @@ -1414,7 +1413,6 @@ async def test_action_metrics_copy(mock_agent): @pytest.mark.asyncio async def test_condenser_metrics_included(mock_agent, test_event_stream): """Test that metrics from the condenser's LLM are included in the action metrics.""" - # Set up agent metrics agent_metrics = Metrics(model_name='agent-model') agent_metrics.accumulated_cost = 0.05 diff --git a/tests/unit/test_agent_delegation.py b/tests/unit/test_agent_delegation.py index 98c1920a92..1c5bc6f545 100644 --- a/tests/unit/test_agent_delegation.py +++ b/tests/unit/test_agent_delegation.py @@ -80,11 +80,10 @@ def mock_child_agent(): @pytest.mark.asyncio async def test_delegation_flow(mock_parent_agent, mock_child_agent, mock_event_stream): - """ - Test that when the parent agent delegates to a child - 1. the parent's delegate is set, and once the child finishes, the parent is cleaned up properly. - 2. metrics are accumulated globally (delegate is adding to the parents metrics) - 3. local metrics for the delegate are still accessible + """Test that when the parent agent delegates to a child + 1. the parent's delegate is set, and once the child finishes, the parent is cleaned up properly. + 2. metrics are accumulated globally (delegate is adding to the parents metrics) + 3. local metrics for the delegate are still accessible """ # Mock the agent class resolution so that AgentController can instantiate mock_child_agent Agent.get_cls = Mock(return_value=lambda llm, config: mock_child_agent) @@ -258,8 +257,7 @@ async def test_delegate_step_different_states( mock_delegate.close = AsyncMock() async def call_on_event_with_new_loop(): - """ - In this thread, create and set a fresh event loop, so that the run_until_complete() + """In this thread, create and set a fresh event loop, so that the run_until_complete() calls inside controller.on_event(...) find a valid loop. """ loop_in_thread = asyncio.new_event_loop() @@ -296,9 +294,7 @@ async def test_delegate_step_different_states( async def test_delegate_hits_global_limits( mock_child_agent, mock_event_stream, mock_parent_agent ): - """ - Global limits from control flags should apply to delegates - """ + """Global limits from control flags should apply to delegates""" # Mock the agent class resolution so that AgentController can instantiate mock_child_agent Agent.get_cls = Mock(return_value=lambda llm, config: mock_child_agent) diff --git a/tests/unit/test_agent_session.py b/tests/unit/test_agent_session.py index 673add1da4..2697380f4a 100644 --- a/tests/unit/test_agent_session.py +++ b/tests/unit/test_agent_session.py @@ -55,7 +55,6 @@ def mock_agent(): @pytest.mark.asyncio async def test_agent_session_start_with_no_state(mock_agent): """Test that AgentSession.start() works correctly when there's no state to restore""" - # Setup file_store = InMemoryFileStore({}) session = AgentSession( @@ -143,7 +142,6 @@ async def test_agent_session_start_with_no_state(mock_agent): @pytest.mark.asyncio async def test_agent_session_start_with_restored_state(mock_agent): """Test that AgentSession.start() works correctly when there's a state to restore""" - # Setup file_store = InMemoryFileStore({}) session = AgentSession( @@ -234,7 +232,6 @@ async def test_agent_session_start_with_restored_state(mock_agent): @pytest.mark.asyncio async def test_metrics_centralization_and_sharing(mock_agent): """Test that metrics are centralized and shared between controller and agent.""" - # Setup file_store = InMemoryFileStore({}) session = AgentSession( @@ -322,7 +319,6 @@ async def test_metrics_centralization_and_sharing(mock_agent): @pytest.mark.asyncio async def test_budget_control_flag_syncs_with_metrics(mock_agent): """Test that BudgetControlFlag's current value matches the accumulated costs.""" - # Setup file_store = InMemoryFileStore({}) session = AgentSession( diff --git a/tests/unit/test_agents.py b/tests/unit/test_agents.py index 32ad833612..4e6b4913e0 100644 --- a/tests/unit/test_agents.py +++ b/tests/unit/test_agents.py @@ -322,7 +322,6 @@ def test_mismatched_tool_call_events_and_auto_add_system_message( This also tests that the system message is automatically added to the event stream if SystemMessageAction is not present. """ - tool_call_metadata = Mock( spec=ToolCallMetadata, model_response=Mock( diff --git a/tests/unit/test_bitbucket.py b/tests/unit/test_bitbucket.py index effc01ed29..513ce3f2b7 100644 --- a/tests/unit/test_bitbucket.py +++ b/tests/unit/test_bitbucket.py @@ -353,8 +353,7 @@ class TestBitbucketProviderDomain(unittest.TestCase): # Provider Token Validation Tests @pytest.mark.asyncio async def test_validate_provider_token_with_bitbucket_token(): - """ - Test that validate_provider_token correctly identifies a Bitbucket token + """Test that validate_provider_token correctly identifies a Bitbucket token and doesn't try to validate it as GitHub or GitLab. """ # Mock the service classes to avoid actual API calls @@ -393,8 +392,7 @@ async def test_validate_provider_token_with_bitbucket_token(): @pytest.mark.asyncio async def test_check_provider_tokens_with_only_bitbucket(): - """ - Test that check_provider_tokens doesn't try to validate GitHub or GitLab tokens + """Test that check_provider_tokens doesn't try to validate GitHub or GitLab tokens when only a Bitbucket token is provided. """ # Create a mock validate_provider_token function @@ -432,10 +430,7 @@ async def test_check_provider_tokens_with_only_bitbucket(): @pytest.mark.asyncio async def test_bitbucket_sort_parameter_mapping(): - """ - Test that the Bitbucket service correctly maps sort parameters. - """ - + """Test that the Bitbucket service correctly maps sort parameters.""" # Create a service instance service = BitBucketService(token=SecretStr('test-token')) @@ -466,9 +461,7 @@ async def test_bitbucket_sort_parameter_mapping(): @pytest.mark.asyncio async def test_bitbucket_pagination(): - """ - Test that the Bitbucket service correctly handles pagination for repositories. - """ + """Test that the Bitbucket service correctly handles pagination for repositories.""" # Create a service instance service = BitBucketService(token=SecretStr('test-token')) @@ -540,9 +533,7 @@ async def test_bitbucket_pagination(): @pytest.mark.asyncio async def test_validate_provider_token_with_empty_tokens(): - """ - Test that validate_provider_token handles empty tokens correctly. - """ + """Test that validate_provider_token handles empty tokens correctly.""" # Create a mock for each service with ( patch('openhands.integrations.utils.GitHubService') as mock_github_service, diff --git a/tests/unit/test_cli_thought_order.py b/tests/unit/test_cli_thought_order.py index c77eff7a14..46d6506138 100644 --- a/tests/unit/test_cli_thought_order.py +++ b/tests/unit/test_cli_thought_order.py @@ -1,5 +1,4 @@ -""" -Tests for CLI thought display order fix. +"""Tests for CLI thought display order fix. This ensures that agent thoughts are displayed before commands, not after. """ diff --git a/tests/unit/test_config_precedence.py b/tests/unit/test_config_precedence.py index 3881472ea4..c56ee7a240 100644 --- a/tests/unit/test_config_precedence.py +++ b/tests/unit/test_config_precedence.py @@ -314,7 +314,6 @@ def test_cli_settings_json_not_override_config_toml( def test_default_values_applied_when_none(): """Test that default values are applied when config values are None.""" - # Create mock args with None values for agent_cls and max_iterations mock_args = MagicMock() mock_args.config_file = None @@ -336,7 +335,6 @@ def test_default_values_applied_when_none(): def test_cli_args_override_defaults(): """Test that CLI arguments override default values.""" - # Create mock args with custom values mock_args = MagicMock() mock_args.config_file = None @@ -358,7 +356,6 @@ def test_cli_args_override_defaults(): def test_cli_args_none_uses_config_toml_values(): """Test that when CLI args agent_cls and max_iterations are None, config.toml values are used.""" - # Create mock args with None values for agent_cls and max_iterations mock_args = MagicMock() mock_args.config_file = None diff --git a/tests/unit/test_contextual_events.py b/tests/unit/test_contextual_events.py index 6c11f97ad1..6f8fc9b31c 100644 --- a/tests/unit/test_contextual_events.py +++ b/tests/unit/test_contextual_events.py @@ -79,8 +79,7 @@ def create_test_events(event_specs: list[dict]) -> list[Event]: def test_get_contextual_events_basic_retrieval(): - """ - Tests basic retrieval of events, ensuring correct count, order, and string formatting. + """Tests basic retrieval of events, ensuring correct count, order, and string formatting. All events in this test are of types that are NOT filtered out by default. """ mock_event_stream = MagicMock(spec=EventStream) @@ -196,9 +195,7 @@ def test_get_contextual_events_basic_retrieval(): def test_get_contextual_events_filtering(): - """ - Tests that specified event types and hidden events are filtered out. - """ + """Tests that specified event types and hidden events are filtered out.""" mock_event_stream = MagicMock(spec=EventStream) target_event_id = 3 # Target a non-filtered event @@ -320,8 +317,7 @@ def test_get_contextual_events_filtering(): def test_get_contextual_events_target_at_beginning(): - """ - Tests behavior when the target event_id is at the beginning of the stream, + """Tests behavior when the target event_id is at the beginning of the stream, resulting in fewer than context_size events before it. """ mock_event_stream = MagicMock(spec=EventStream) @@ -391,8 +387,7 @@ def test_get_contextual_events_target_at_beginning(): def test_get_contextual_events_target_at_end(): - """ - Tests behavior when the target event_id is at the end of the stream, + """Tests behavior when the target event_id is at the end of the stream, resulting in fewer than context_size + 1 events after it. """ mock_event_stream = MagicMock(spec=EventStream) @@ -472,9 +467,7 @@ def test_get_contextual_events_target_at_end(): def test_get_contextual_events_empty_search_results(): - """ - Tests behavior when search_events returns empty lists for before and after. - """ + """Tests behavior when search_events returns empty lists for before and after.""" mock_event_stream = MagicMock(spec=EventStream) target_event_id = 10 context_size = 4 @@ -505,8 +498,7 @@ def test_get_contextual_events_empty_search_results(): def test_get_contextual_events_all_events_filtered(): - """ - Tests behavior when all events in the context window are of types + """Tests behavior when all events in the context window are of types that should be filtered out. """ mock_event_stream = MagicMock(spec=EventStream) diff --git a/tests/unit/test_conversation_memory.py b/tests/unit/test_conversation_memory.py index 7ae0122f38..154021b3d5 100644 --- a/tests/unit/test_conversation_memory.py +++ b/tests/unit/test_conversation_memory.py @@ -231,8 +231,7 @@ def test_ensure_initial_user_message_different_user_msg_at_index_1( def test_ensure_initial_user_message_different_user_msg_at_index_1_and_orphaned_obs( conversation_memory, initial_user_action ): - """ - Test process_events when an incorrect user message is at index 1 AND + """Test process_events when an incorrect user message is at index 1 AND an orphaned observation (with tool_call_metadata but no matching action) exists. Expect: System msg, CORRECT initial user msg, the incorrect user msg (shifted). The orphaned observation should be filtered out. @@ -1397,8 +1396,7 @@ def _create_mock_tool_call_metadata( def test_process_events_partial_history(conversation_memory): - """ - Tests process_events with full and partial histories to verify + """Tests process_events with full and partial histories to verify _ensure_system_message, _ensure_initial_user_message, and tool call matching logic. """ # --- Define Common Events --- diff --git a/tests/unit/test_conversation_window_condenser.py b/tests/unit/test_conversation_window_condenser.py index 491c730242..5a17345997 100644 --- a/tests/unit/test_conversation_window_condenser.py +++ b/tests/unit/test_conversation_window_condenser.py @@ -1,5 +1,4 @@ -""" -Unit tests for ConversationWindowCondenser. +"""Unit tests for ConversationWindowCondenser. These tests mirror the tests for `_apply_conversation_window` in the AgentController, but adapted to test the condenser implementation. The ConversationWindowCondenser diff --git a/tests/unit/test_empty_image_url_fix_v2.py b/tests/unit/test_empty_image_url_fix_v2.py index acb32f9c96..07a90c122f 100644 --- a/tests/unit/test_empty_image_url_fix_v2.py +++ b/tests/unit/test_empty_image_url_fix_v2.py @@ -9,7 +9,6 @@ from openhands.utils.prompt import PromptManager def test_empty_image_url_handling(): """Test that empty image URLs are properly filtered out and notification text is added.""" - # Create a browser observation with empty screenshot and set_of_marks browser_obs = BrowserOutputObservation( url='https://example.com', @@ -70,7 +69,6 @@ def test_empty_image_url_handling(): def test_valid_image_url_handling(): """Test that valid image URLs are properly handled.""" - # Create a browser observation with valid base64 image data valid_base64_image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==' @@ -119,7 +117,6 @@ def test_valid_image_url_handling(): def test_mixed_image_url_handling(): """Test handling of mixed valid and invalid image URLs.""" - # Create a browser observation with one empty and one valid image valid_base64_image = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==' diff --git a/tests/unit/test_git_handler.py b/tests/unit/test_git_handler.py index 1c7908ed3d..b26be80348 100644 --- a/tests/unit/test_git_handler.py +++ b/tests/unit/test_git_handler.py @@ -160,9 +160,7 @@ class TestGitHandler(unittest.TestCase): self.write_file(nested_2, 'unstaged_add.txt') def test_get_git_changes(self): - """ - Test with unpushed commits, staged commits, and unstaged commits - """ + """Test with unpushed commits, staged commits, and unstaged commits""" changes = self.git_handler.get_git_changes() expected_changes = [ @@ -180,9 +178,7 @@ class TestGitHandler(unittest.TestCase): assert changes == expected_changes def test_get_git_changes_after_push(self): - """ - Test with staged commits, and unstaged commits - """ + """Test with staged commits, and unstaged commits""" self.run_command('git push -u origin feature-branch', self.local_dir) changes = self.git_handler.get_git_changes() @@ -198,9 +194,7 @@ class TestGitHandler(unittest.TestCase): assert changes == expected_changes def test_get_git_changes_nested_repos(self): - """ - Test with staged commits, and unstaged commits - """ + """Test with staged commits, and unstaged commits""" self.setup_nested() changes = self.git_handler.get_git_changes() @@ -261,7 +255,6 @@ class TestGitHandler(unittest.TestCase): def test_get_git_changes_fallback(self): """Test that get_git_changes falls back to creating a script file when needed.""" - # Break the git changes command with patch( 'openhands.runtime.utils.git_handler.GIT_CHANGES_CMD', @@ -287,7 +280,6 @@ class TestGitHandler(unittest.TestCase): def test_get_git_diff_fallback(self): """Test that get_git_diff delegates to the git_diff module.""" - # Break the git diff command with patch( 'openhands.runtime.utils.git_handler.GIT_DIFF_CMD', 'non-existant-command' diff --git a/tests/unit/test_image_content_validation.py b/tests/unit/test_image_content_validation.py index f281e9fa2a..56a2800b7c 100644 --- a/tests/unit/test_image_content_validation.py +++ b/tests/unit/test_image_content_validation.py @@ -10,7 +10,6 @@ from openhands.core.message import ImageContent def test_image_content_serializes_all_urls(): """Test that ImageContent serializes all URLs it's given, including empty ones.""" - # Create ImageContent with mixed valid and invalid URLs image_content = ImageContent( image_urls=[ @@ -45,7 +44,6 @@ def test_image_content_serializes_all_urls(): def test_image_content_serializes_empty_urls(): """Test that ImageContent serializes empty URLs (filtering happens upstream).""" - # Create ImageContent with only empty URLs image_content = ImageContent(image_urls=['', ' ']) @@ -60,7 +58,6 @@ def test_image_content_serializes_empty_urls(): def test_image_content_all_valid_urls(): """Test that ImageContent preserves all valid URLs.""" - valid_urls = [ 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==', 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/wAARCAABAAEDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAv/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwA/wA==', diff --git a/tests/unit/test_mcp_integration.py b/tests/unit/test_mcp_integration.py index 45502902ce..b52f9043f6 100644 --- a/tests/unit/test_mcp_integration.py +++ b/tests/unit/test_mcp_integration.py @@ -13,7 +13,6 @@ from openhands.storage.settings.file_settings_store import FileSettingsStore @pytest.mark.asyncio async def test_user_auth_mcp_merging_integration(): """Test that MCP merging works in the user auth flow.""" - # Mock config.toml settings config_settings = Settings( mcp_config=MCPConfig( @@ -57,7 +56,6 @@ async def test_user_auth_mcp_merging_integration(): @pytest.mark.asyncio async def test_user_auth_caching_behavior(): """Test that user auth caches the merged settings correctly.""" - config_settings = Settings( mcp_config=MCPConfig( sse_servers=[MCPSSEServerConfig(url='http://config-server.com')] @@ -102,7 +100,6 @@ async def test_user_auth_caching_behavior(): @pytest.mark.asyncio async def test_user_auth_no_stored_settings(): """Test behavior when no settings are stored (first time user).""" - user_auth = DefaultUserAuth() # Mock settings store to return None (no stored settings) diff --git a/tests/unit/test_message_utils.py b/tests/unit/test_message_utils.py index 2cc5ad92f1..c107b43e16 100644 --- a/tests/unit/test_message_utils.py +++ b/tests/unit/test_message_utils.py @@ -57,8 +57,7 @@ def test_get_token_usage_for_event(): def test_get_token_usage_for_event_id(): - """ - Test that we search backward from the event with the given id, + """Test that we search backward from the event with the given id, finding the first usage record that matches a response_id in that or previous events. """ metrics = Metrics(model_name='test-model') @@ -119,8 +118,7 @@ def test_get_token_usage_for_event_id(): def test_get_token_usage_for_event_fallback(): - """ - Verify that if tool_call_metadata.model_response.id is missing or mismatched, + """Verify that if tool_call_metadata.model_response.id is missing or mismatched, but event.response_id is set to a valid usage ID, we find the usage record via fallback. """ metrics = Metrics(model_name='fallback-test') @@ -159,11 +157,9 @@ def test_get_token_usage_for_event_fallback(): def test_get_token_usage_for_event_id_fallback(): - """ - Verify that get_token_usage_for_event_id also falls back to event.response_id + """Verify that get_token_usage_for_event_id also falls back to event.response_id if tool_call_metadata.model_response.id is missing or mismatched. """ - # NOTE: this should never happen (tm), but there is a hint in the code that it might: # message_utils.py: 166 ("(overwrites any previous message with the same response_id)") # so we'll handle it gracefully. diff --git a/tests/unit/test_nested_event_store.py b/tests/unit/test_nested_event_store.py index 4792f0de01..e6dda119c5 100644 --- a/tests/unit/test_nested_event_store.py +++ b/tests/unit/test_nested_event_store.py @@ -1,5 +1,4 @@ -""" -Unit tests for the NestedEventStore class. +"""Unit tests for the NestedEventStore class. These tests focus on the search_events method, which retrieves events from a remote API and applies filtering based on various criteria. diff --git a/tests/unit/test_provider_immutability.py b/tests/unit/test_provider_immutability.py index 5e7302d074..b820a588f3 100644 --- a/tests/unit/test_provider_immutability.py +++ b/tests/unit/test_provider_immutability.py @@ -116,7 +116,6 @@ def test_settings_immutability(): def test_provider_handler_immutability(): """Test that ProviderHandler maintains token immutability""" - # Create initial tokens tokens = MappingProxyType( {ProviderType.GITHUB: ProviderToken(token=SecretStr('test'))} @@ -309,7 +308,6 @@ async def test_set_event_stream_secrets(event_stream): def test_check_cmd_action_for_provider_token_ref(): """Test detection of provider tokens in command actions""" - # Test command with GitHub token cmd = CmdRunAction(command='echo $GITHUB_TOKEN') providers = ProviderHandler.check_cmd_action_for_provider_token_ref(cmd) diff --git a/tests/unit/test_runtime_import_robustness.py b/tests/unit/test_runtime_import_robustness.py index 85e9c736f7..00907056e1 100644 --- a/tests/unit/test_runtime_import_robustness.py +++ b/tests/unit/test_runtime_import_robustness.py @@ -1,5 +1,4 @@ -""" -Test that the runtime import system is robust against broken third-party dependencies. +"""Test that the runtime import system is robust against broken third-party dependencies. This test specifically addresses the issue where broken third-party runtime dependencies (like runloop-api-client with incompatible httpx_aiohttp versions) would break the entire @@ -14,7 +13,6 @@ import pytest def test_cli_import_with_broken_third_party_runtime(): """Test that CLI can be imported even with broken third-party runtime dependencies.""" - # Clear any cached modules to ensure fresh import modules_to_clear = [ k for k in sys.modules.keys() if 'openhands' in k or 'third_party' in k @@ -33,7 +31,6 @@ def test_cli_import_with_broken_third_party_runtime(): def test_runtime_import_robustness(): """Test that runtime import system is robust against broken dependencies.""" - # Clear any cached runtime modules modules_to_clear = [k for k in sys.modules.keys() if 'openhands.runtime' in k] for module in modules_to_clear: @@ -50,7 +47,6 @@ def test_runtime_import_robustness(): def test_get_runtime_cls_works(): """Test that get_runtime_cls works even when third-party runtimes are broken.""" - # Import the runtime module import openhands.runtime @@ -68,7 +64,6 @@ def test_get_runtime_cls_works(): def test_runtime_exception_handling(): """Test that the runtime discovery code properly handles exceptions.""" - # This test verifies that the fix in openhands/runtime/__init__.py # properly catches all exceptions (not just ImportError) during # third-party runtime discovery @@ -84,7 +79,6 @@ def test_runtime_exception_handling(): def test_runtime_import_exception_handling_behavior(): """Test that runtime import handles ImportError silently but logs other exceptions.""" - # Test the exception handling logic by simulating the exact code from runtime init from io import StringIO @@ -139,7 +133,6 @@ def test_runtime_import_exception_handling_behavior(): def test_import_error_handled_silently(caplog): """Test that ImportError is handled silently (no logging) as it means library is not installed.""" - # Simulate the exact code path for ImportError logging.getLogger('openhands.runtime') diff --git a/tests/unit/test_secrets_api.py b/tests/unit/test_secrets_api.py index cae9a34c0c..36bddbb6b0 100644 --- a/tests/unit/test_secrets_api.py +++ b/tests/unit/test_secrets_api.py @@ -52,7 +52,6 @@ def file_secrets_store(temp_dir): @pytest.mark.asyncio async def test_load_custom_secrets_names(test_client, file_secrets_store): """Test loading custom secrets names.""" - # Create initial settings with custom secrets custom_secrets = { 'API_KEY': CustomSecret(secret=SecretStr('api-key-value')), @@ -118,7 +117,6 @@ async def test_load_custom_secrets_names_empty(test_client, file_secrets_store): @pytest.mark.asyncio async def test_add_custom_secret(test_client, file_secrets_store): """Test adding a new custom secret.""" - # Create initial settings with provider tokens but no custom secrets provider_tokens = { ProviderType.GITHUB: ProviderToken(token=SecretStr('github-token')) @@ -149,7 +147,6 @@ async def test_create_custom_secret_with_no_existing_secrets( test_client, file_secrets_store ): """Test creating a custom secret when there are no existing secrets at all.""" - # Don't store any initial settings - this simulates a completely new user # or a situation where the secrets store is empty @@ -180,7 +177,6 @@ async def test_create_custom_secret_with_no_existing_secrets( @pytest.mark.asyncio async def test_update_existing_custom_secret(test_client, file_secrets_store): """Test updating an existing custom secret's name and description (cannot change value once set).""" - # Create initial settings with a custom secret custom_secrets = {'API_KEY': CustomSecret(secret=SecretStr('old-api-key'))} provider_tokens = { @@ -218,7 +214,6 @@ async def test_update_existing_custom_secret(test_client, file_secrets_store): @pytest.mark.asyncio async def test_add_multiple_custom_secrets(test_client, file_secrets_store): """Test adding multiple custom secrets at once.""" - # Create initial settings with one custom secret custom_secrets = { 'EXISTING_SECRET': CustomSecret(secret=SecretStr('existing-value')) @@ -280,7 +275,6 @@ async def test_add_multiple_custom_secrets(test_client, file_secrets_store): @pytest.mark.asyncio async def test_delete_custom_secret(test_client, file_secrets_store): """Test deleting a custom secret.""" - # Create initial settings with multiple custom secrets custom_secrets = { 'API_KEY': CustomSecret(secret=SecretStr('api-key-value')), @@ -320,7 +314,6 @@ async def test_delete_custom_secret(test_client, file_secrets_store): @pytest.mark.asyncio async def test_delete_nonexistent_custom_secret(test_client, file_secrets_store): """Test deleting a custom secret that doesn't exist.""" - # Create initial settings with a custom secret custom_secrets = { 'API_KEY': CustomSecret(secret=SecretStr('api-key-value'), description='') diff --git a/tests/unit/test_settings_api.py b/tests/unit/test_settings_api.py index 2923563a92..d2978d941e 100644 --- a/tests/unit/test_settings_api.py +++ b/tests/unit/test_settings_api.py @@ -73,7 +73,6 @@ def test_client(): @pytest.mark.asyncio async def test_settings_api_endpoints(test_client): """Test that the settings API endpoints work with the new auth system""" - # Test data with remote_runtime_resource_factor settings_data = { 'language': 'en', diff --git a/tests/unit/test_settings_store_functions.py b/tests/unit/test_settings_store_functions.py index 0a66f4ba66..688a02d75a 100644 --- a/tests/unit/test_settings_store_functions.py +++ b/tests/unit/test_settings_store_functions.py @@ -238,7 +238,6 @@ async def test_store_provider_tokens_new_tokens(test_client, file_secrets_store) @pytest.mark.asyncio async def test_store_provider_tokens_update_existing(test_client, file_secrets_store): """Test store_provider_tokens updates existing tokens.""" - # Create existing settings with a GitHub token github_token = ProviderToken(token=SecretStr('old-token')) provider_tokens = {ProviderType.GITHUB: github_token} @@ -266,7 +265,6 @@ async def test_store_provider_tokens_update_existing(test_client, file_secrets_s @pytest.mark.asyncio async def test_store_provider_tokens_keep_existing(test_client, file_secrets_store): """Test store_provider_tokens keeps existing tokens when empty string provided.""" - # Create existing secrets with a GitHub token github_token = ProviderToken(token=SecretStr('existing-token')) provider_tokens = {ProviderType.GITHUB: github_token} diff --git a/tests/unit/test_windows_bash.py b/tests/unit/test_windows_bash.py index ae4e58488f..856985ab66 100644 --- a/tests/unit/test_windows_bash.py +++ b/tests/unit/test_windows_bash.py @@ -214,7 +214,8 @@ def test_long_running_command(windows_bash_session): def test_multiple_commands_rejected_and_individual_execution(windows_bash_session): """Test that executing multiple commands separated by newline is rejected, - but individual commands (including multiline) execute correctly.""" + but individual commands (including multiline) execute correctly. + """ # Define a list of commands, including multiline and special characters cmds = [ 'Get-ChildItem', diff --git a/third_party/__init__.py b/third_party/__init__.py index fb4b6e3669..e55d3a2758 100644 --- a/third_party/__init__.py +++ b/third_party/__init__.py @@ -8,7 +8,7 @@ To use third-party runtimes, install OpenHands with the third_party_runtimes ext Available third-party runtimes: - daytona: Daytona cloud development environment -- e2b: E2B secure sandbox environment +- e2b: E2B secure sandbox environment - modal: Modal cloud compute platform - runloop: Runloop AI sandbox environment -""" \ No newline at end of file +""" diff --git a/third_party/runtime/__init__.py b/third_party/runtime/__init__.py index c5cdfaaa5c..342b8eb41b 100644 --- a/third_party/runtime/__init__.py +++ b/third_party/runtime/__init__.py @@ -1 +1 @@ -"""Third-party runtime implementations.""" \ No newline at end of file +"""Third-party runtime implementations.""" diff --git a/third_party/runtime/impl/__init__.py b/third_party/runtime/impl/__init__.py index 27f1695b76..e23e398b65 100644 --- a/third_party/runtime/impl/__init__.py +++ b/third_party/runtime/impl/__init__.py @@ -1 +1 @@ -"""Third-party runtime implementation modules.""" \ No newline at end of file +"""Third-party runtime implementation modules.""" diff --git a/third_party/runtime/impl/daytona/__init__.py b/third_party/runtime/impl/daytona/__init__.py index 467302bdbb..ae61d8f7ef 100644 --- a/third_party/runtime/impl/daytona/__init__.py +++ b/third_party/runtime/impl/daytona/__init__.py @@ -4,4 +4,4 @@ This runtime reads configuration directly from environment variables: - DAYTONA_API_KEY: API key for Daytona authentication - DAYTONA_API_URL: Daytona API URL endpoint (defaults to https://app.daytona.io/api) - DAYTONA_TARGET: Daytona target region (defaults to 'eu') -""" \ No newline at end of file +""" diff --git a/third_party/runtime/impl/daytona/daytona_runtime.py b/third_party/runtime/impl/daytona/daytona_runtime.py index 020b0458cc..d922cb4c77 100644 --- a/third_party/runtime/impl/daytona/daytona_runtime.py +++ b/third_party/runtime/impl/daytona/daytona_runtime.py @@ -24,7 +24,7 @@ from openhands.runtime.utils.request import RequestHTTPError from openhands.utils.async_utils import call_sync_from_async from openhands.utils.tenacity_stop import stop_if_should_exit -OPENHANDS_SID_LABEL = 'OpenHands_SID' +OPENHANDS_SID_LABEL = "OpenHands_SID" class DaytonaRuntime(ActionExecutionClient): @@ -37,7 +37,7 @@ class DaytonaRuntime(ActionExecutionClient): self, config: OpenHandsConfig, event_stream: EventStream, - sid: str = 'default', + sid: str = "default", plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, status_callback: Callable | None = None, @@ -47,11 +47,13 @@ class DaytonaRuntime(ActionExecutionClient): git_provider_tokens: PROVIDER_TOKEN_TYPE | None = None, ): # Read Daytona configuration from environment variables - daytona_api_key = os.getenv('DAYTONA_API_KEY') + daytona_api_key = os.getenv("DAYTONA_API_KEY") if not daytona_api_key: - raise ValueError('DAYTONA_API_KEY environment variable is required for Daytona runtime') - daytona_api_url = os.getenv('DAYTONA_API_URL', 'https://app.daytona.io/api') - daytona_target = os.getenv('DAYTONA_TARGET', 'eu') + raise ValueError( + "DAYTONA_API_KEY environment variable is required for Daytona runtime" + ) + daytona_api_url = os.getenv("DAYTONA_API_URL", "https://app.daytona.io/api") + daytona_target = os.getenv("DAYTONA_TARGET", "eu") self.config = config self.sid = sid @@ -68,8 +70,8 @@ class DaytonaRuntime(ActionExecutionClient): # workspace_base cannot be used because we can't bind mount into a workspace. if self.config.workspace_base is not None: self.log( - 'warning', - 'Workspace mounting is not supported in the Daytona runtime.', + "warning", + "Workspace mounting is not supported in the Daytona runtime.", ) super().__init__( @@ -90,15 +92,15 @@ class DaytonaRuntime(ActionExecutionClient): sandboxes = self.daytona.list({OPENHANDS_SID_LABEL: self.sid}) if len(sandboxes) == 0: return None - assert len(sandboxes) == 1, 'Multiple sandboxes found for SID' + assert len(sandboxes) == 1, "Multiple sandboxes found for SID" sandbox = sandboxes[0] - self.log('info', f'Attached to existing sandbox with id: {self.sid}') + self.log("info", f"Attached to existing sandbox with id: {self.sid}") except Exception: self.log( - 'warning', - f'Failed to attach to existing sandbox with id: {self.sid}', + "warning", + f"Failed to attach to existing sandbox with id: {self.sid}", ) sandbox = None @@ -106,23 +108,25 @@ class DaytonaRuntime(ActionExecutionClient): def _get_creation_env_vars(self) -> dict[str, str]: env_vars: dict[str, str] = { - 'port': str(self._sandbox_port), - 'PYTHONUNBUFFERED': '1', - 'VSCODE_PORT': str(self._vscode_port), + "port": str(self._sandbox_port), + "PYTHONUNBUFFERED": "1", + "VSCODE_PORT": str(self._vscode_port), } if self.config.debug: - env_vars['DEBUG'] = 'true' + env_vars["DEBUG"] = "true" return env_vars def _create_sandbox(self) -> Sandbox: # Check if auto-stop should be disabled - otherwise have it trigger after 60 minutes - disable_auto_stop = os.getenv('DAYTONA_DISABLE_AUTO_STOP', 'false').lower() == 'true' + disable_auto_stop = ( + os.getenv("DAYTONA_DISABLE_AUTO_STOP", "false").lower() == "true" + ) auto_stop_interval = 0 if disable_auto_stop else 60 sandbox_params = CreateSandboxFromSnapshotParams( - language='python', + language="python", snapshot=self.config.sandbox.runtime_container_image, public=True, env_vars=self._get_creation_env_vars(), @@ -132,7 +136,7 @@ class DaytonaRuntime(ActionExecutionClient): return self.daytona.create(sandbox_params) def _construct_api_url(self, port: int) -> str: - assert self.sandbox is not None, 'Sandbox is not initialized' + assert self.sandbox is not None, "Sandbox is not initialized" return self.sandbox.get_preview_link(port).url @property @@ -140,26 +144,26 @@ class DaytonaRuntime(ActionExecutionClient): return self.api_url def _start_action_execution_server(self) -> None: - assert self.sandbox is not None, 'Sandbox is not initialized' + assert self.sandbox is not None, "Sandbox is not initialized" start_command: list[str] = get_action_execution_server_startup_command( server_port=self._sandbox_port, plugins=self.plugins, app_config=self.config, override_user_id=1000, - override_username='openhands', + override_username="openhands", ) start_command_str: str = ( - f'mkdir -p {self.config.workspace_mount_path_in_sandbox} && cd /openhands/code && ' - + ' '.join(start_command) + f"mkdir -p {self.config.workspace_mount_path_in_sandbox} && cd /openhands/code && " + + " ".join(start_command) ) self.log( - 'debug', - f'Starting action execution server with command: {start_command_str}', + "debug", + f"Starting action execution server with command: {start_command_str}", ) - exec_session_id = 'action-execution-server' + exec_session_id = "action-execution-server" self.sandbox.process.create_session(exec_session_id) exec_command = self.sandbox.process.execute_session_command( @@ -167,7 +171,7 @@ class DaytonaRuntime(ActionExecutionClient): SessionExecuteRequest(command=start_command_str, var_async=True), ) - self.log('debug', f'exec_command_id: {exec_command.cmd_id}') + self.log("debug", f"exec_command_id: {exec_command.cmd_id}") @tenacity.retry( stop=tenacity.stop_after_delay(120) | stop_if_should_exit(), @@ -189,30 +193,30 @@ class DaytonaRuntime(ActionExecutionClient): if self.sandbox is None: self.set_runtime_status(RuntimeStatus.BUILDING_RUNTIME) self.sandbox = await call_sync_from_async(self._create_sandbox) - self.log('info', f'Created a new sandbox with id: {self.sid}') + self.log("info", f"Created a new sandbox with id: {self.sid}") self.api_url = self._construct_api_url(self._sandbox_port) state = self.sandbox.state - if state == 'stopping': - self.log('info', 'Waiting for the Daytona sandbox to stop...') + if state == "stopping": + self.log("info", "Waiting for the Daytona sandbox to stop...") await call_sync_from_async(self.sandbox.wait_for_sandbox_stop) - state = 'stopped' + state = "stopped" - if state == 'stopped': - self.log('info', 'Starting the Daytona sandbox...') + if state == "stopped": + self.log("info", "Starting the Daytona sandbox...") await call_sync_from_async(self.sandbox.start) should_start_action_execution_server = True if should_start_action_execution_server: await call_sync_from_async(self._start_action_execution_server) self.log( - 'info', - f'Container started. Action execution server url: {self.api_url}', + "info", + f"Container started. Action execution server url: {self.api_url}", ) - self.log('info', 'Waiting for client to become ready...') + self.log("info", "Waiting for client to become ready...") self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) await call_sync_from_async(self._wait_until_alive) @@ -220,8 +224,8 @@ class DaytonaRuntime(ActionExecutionClient): await call_sync_from_async(self.setup_initial_env) self.log( - 'info', - f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}', + "info", + f"Container initialized with plugins: {[plugin.name for plugin in self.plugins]}", ) if should_start_action_execution_server: @@ -233,7 +237,7 @@ class DaytonaRuntime(ActionExecutionClient): lambda e: ( isinstance(e, httpx.HTTPError) or isinstance(e, RequestHTTPError) ) - and hasattr(e, 'response') + and hasattr(e, "response") and e.response.status_code == 502 ), stop=tenacity.stop_after_delay(120) | stop_if_should_exit(), @@ -250,13 +254,15 @@ class DaytonaRuntime(ActionExecutionClient): return if self.sandbox: - delete_on_close = os.getenv('DAYTONA_DELETE_ON_CLOSE', 'false').lower() == 'true' + delete_on_close = ( + os.getenv("DAYTONA_DELETE_ON_CLOSE", "false").lower() == "true" + ) if delete_on_close: self.sandbox.delete() else: # Only stop if sandbox is currently started - if self._get_sandbox().state == 'started': + if self._get_sandbox().state == "started": self.sandbox.stop() @property @@ -266,26 +272,26 @@ class DaytonaRuntime(ActionExecutionClient): token = super().get_vscode_token() if not token: self.log( - 'warning', 'Failed to get VSCode token while trying to get VSCode URL' + "warning", "Failed to get VSCode token while trying to get VSCode URL" ) return None if not self.sandbox: self.log( - 'warning', 'Sandbox is not initialized while trying to get VSCode URL' + "warning", "Sandbox is not initialized while trying to get VSCode URL" ) return None self._vscode_url = ( self._construct_api_url(self._vscode_port) - + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}' + + f"/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}" ) self.log( - 'debug', - f'VSCode URL: {self._vscode_url}', + "debug", + f"VSCode URL: {self._vscode_url}", ) return self._vscode_url @property def additional_agent_instructions(self) -> str: - return f'When showing endpoints to access applications for any port, e.g. port 3000, instead of localhost:3000, use this format: {self._construct_api_url(3000)}.' + return f"When showing endpoints to access applications for any port, e.g. port 3000, instead of localhost:3000, use this format: {self._construct_api_url(3000)}." diff --git a/third_party/runtime/impl/e2b/__init__.py b/third_party/runtime/impl/e2b/__init__.py index 2cbb9a612e..89ec333926 100644 --- a/third_party/runtime/impl/e2b/__init__.py +++ b/third_party/runtime/impl/e2b/__init__.py @@ -2,4 +2,4 @@ This runtime reads configuration directly from environment variables: - E2B_API_KEY: API key for E2B authentication -""" \ No newline at end of file +""" diff --git a/third_party/runtime/impl/e2b/e2b_runtime.py b/third_party/runtime/impl/e2b/e2b_runtime.py index d32f47ee10..bd43219da1 100644 --- a/third_party/runtime/impl/e2b/e2b_runtime.py +++ b/third_party/runtime/impl/e2b/e2b_runtime.py @@ -27,7 +27,7 @@ class E2BRuntime(ActionExecutionClient): self, config: OpenHandsConfig, event_stream: EventStream, - sid: str = 'default', + sid: str = "default", plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, status_callback: Callable | None = None, @@ -52,27 +52,27 @@ class E2BRuntime(ActionExecutionClient): if sandbox is None: self.sandbox = E2BSandbox(config.sandbox) if not isinstance(self.sandbox, E2BSandbox): - raise ValueError('E2BRuntime requires an E2BSandbox') + raise ValueError("E2BRuntime requires an E2BSandbox") self.file_store = E2BFileStore(self.sandbox.filesystem) def read(self, action: FileReadAction) -> Observation: content = self.file_store.read(action.path) - lines = read_lines(content.split('\n'), action.start, action.end) - code_view = ''.join(lines) + lines = read_lines(content.split("\n"), action.start, action.end) + code_view = "".join(lines) return FileReadObservation(code_view, path=action.path) def write(self, action: FileWriteAction) -> Observation: if action.start == 0 and action.end == -1: self.file_store.write(action.path, action.content) - return FileWriteObservation(content='', path=action.path) + return FileWriteObservation(content="", path=action.path) files = self.file_store.list(action.path) if action.path in files: - all_lines = self.file_store.read(action.path).split('\n') + all_lines = self.file_store.read(action.path).split("\n") new_file = insert_lines( - action.content.split('\n'), all_lines, action.start, action.end + action.content.split("\n"), all_lines, action.start, action.end ) - self.file_store.write(action.path, ''.join(new_file)) - return FileWriteObservation('', path=action.path) + self.file_store.write(action.path, "".join(new_file)) + return FileWriteObservation("", path=action.path) else: # FIXME: we should create a new file here - return ErrorObservation(f'File not found: {action.path}') + return ErrorObservation(f"File not found: {action.path}") diff --git a/third_party/runtime/impl/e2b/sandbox.py b/third_party/runtime/impl/e2b/sandbox.py index d48529243c..842dfeb091 100644 --- a/third_party/runtime/impl/e2b/sandbox.py +++ b/third_party/runtime/impl/e2b/sandbox.py @@ -12,29 +12,31 @@ from openhands.core.logger import openhands_logger as logger class E2BBox: closed = False - _cwd: str = '/home/user' + _cwd: str = "/home/user" _env: dict[str, str] = {} is_initial_session: bool = True def __init__( self, config: SandboxConfig, - template: str = 'openhands', + template: str = "openhands", ): self.config = copy.deepcopy(config) self.initialize_plugins: bool = config.initialize_plugins - + # Read API key from environment variable - e2b_api_key = os.getenv('E2B_API_KEY') + e2b_api_key = os.getenv("E2B_API_KEY") if not e2b_api_key: - raise ValueError('E2B_API_KEY environment variable is required for E2B runtime') - + raise ValueError( + "E2B_API_KEY environment variable is required for E2B runtime" + ) + self.sandbox = E2BSandbox( api_key=e2b_api_key, template=template, # It's possible to stream stdout and stderr from sandbox and from each process - on_stderr=lambda x: logger.debug(f'E2B sandbox stderr: {x}'), - on_stdout=lambda x: logger.debug(f'E2B sandbox stdout: {x}'), + on_stderr=lambda x: logger.debug(f"E2B sandbox stderr: {x}"), + on_stdout=lambda x: logger.debug(f"E2B sandbox stdout: {x}"), cwd=self._cwd, # Default workdir inside sandbox ) logger.debug(f'Started E2B sandbox with ID "{self.sandbox.id}"') @@ -46,23 +48,23 @@ class E2BBox: def _archive(self, host_src: str, recursive: bool = False): if recursive: assert os.path.isdir(host_src), ( - 'Source must be a directory when recursive is True' + "Source must be a directory when recursive is True" ) - files = glob(host_src + '/**/*', recursive=True) + files = glob(host_src + "/**/*", recursive=True) srcname = os.path.basename(host_src) - tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') - with tarfile.open(tar_filename, mode='w') as tar: + tar_filename = os.path.join(os.path.dirname(host_src), srcname + ".tar") + with tarfile.open(tar_filename, mode="w") as tar: for file in files: tar.add( file, arcname=os.path.relpath(file, os.path.dirname(host_src)) ) else: assert os.path.isfile(host_src), ( - 'Source must be a file when recursive is False' + "Source must be a file when recursive is False" ) srcname = os.path.basename(host_src) - tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar') - with tarfile.open(tar_filename, mode='w') as tar: + tar_filename = os.path.join(os.path.dirname(host_src), srcname + ".tar") + with tarfile.open(tar_filename, mode="w") as tar: tar.add(host_src, arcname=srcname) return tar_filename @@ -72,12 +74,12 @@ class E2BBox: try: process_output = process.wait(timeout=timeout) except TimeoutException: - logger.debug('Command timed out, killing process...') + logger.debug("Command timed out, killing process...") process.kill() return -1, f'Command: "{cmd}" timed out' logs = [m.line for m in process_output.messages] - logs_str = '\n'.join(logs) + logs_str = "\n".join(logs) if process.exit_code is None: return -1, logs_str @@ -89,24 +91,24 @@ class E2BBox: tar_filename = self._archive(host_src, recursive) # Prepend the sandbox destination with our sandbox cwd - sandbox_dest = os.path.join(self._cwd, sandbox_dest.removeprefix('/')) + sandbox_dest = os.path.join(self._cwd, sandbox_dest.removeprefix("/")) - with open(tar_filename, 'rb') as tar_file: + with open(tar_filename, "rb") as tar_file: # Upload the archive to /home/user (default destination that always exists) uploaded_path = self.sandbox.upload_file(tar_file) # Check if sandbox_dest exists. If not, create it. - process = self.sandbox.process.start_and_wait(f'test -d {sandbox_dest}') + process = self.sandbox.process.start_and_wait(f"test -d {sandbox_dest}") if process.exit_code != 0: self.sandbox.filesystem.make_dir(sandbox_dest) # Extract the archive into the destination and delete the archive process = self.sandbox.process.start_and_wait( - f'sudo tar -xf {uploaded_path} -C {sandbox_dest} && sudo rm {uploaded_path}' + f"sudo tar -xf {uploaded_path} -C {sandbox_dest} && sudo rm {uploaded_path}" ) if process.exit_code != 0: raise Exception( - f'Failed to extract {uploaded_path} to {sandbox_dest}: {process.stderr}' + f"Failed to extract {uploaded_path} to {sandbox_dest}: {process.stderr}" ) # Delete the local archive diff --git a/third_party/runtime/impl/modal/__init__.py b/third_party/runtime/impl/modal/__init__.py index 8ab3aa7fac..45906cd24b 100644 --- a/third_party/runtime/impl/modal/__init__.py +++ b/third_party/runtime/impl/modal/__init__.py @@ -3,4 +3,4 @@ This runtime reads configuration directly from environment variables: - MODAL_TOKEN_ID: Modal API token ID for authentication - MODAL_TOKEN_SECRET: Modal API token secret for authentication -""" \ No newline at end of file +""" diff --git a/third_party/runtime/impl/modal/modal_runtime.py b/third_party/runtime/impl/modal/modal_runtime.py index aa1811654d..4e8664c3b7 100644 --- a/third_party/runtime/impl/modal/modal_runtime.py +++ b/third_party/runtime/impl/modal/modal_runtime.py @@ -40,7 +40,7 @@ class ModalRuntime(ActionExecutionClient): env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None. """ - container_name_prefix = 'openhands-sandbox-' + container_name_prefix = "openhands-sandbox-" sandbox: modal.Sandbox | None sid: str @@ -48,7 +48,7 @@ class ModalRuntime(ActionExecutionClient): self, config: OpenHandsConfig, event_stream: EventStream, - sid: str = 'default', + sid: str = "default", plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, status_callback: Callable | None = None, @@ -58,13 +58,17 @@ class ModalRuntime(ActionExecutionClient): git_provider_tokens: PROVIDER_TOKEN_TYPE | None = None, ): # Read Modal API credentials from environment variables - modal_token_id = os.getenv('MODAL_TOKEN_ID') - modal_token_secret = os.getenv('MODAL_TOKEN_SECRET') + modal_token_id = os.getenv("MODAL_TOKEN_ID") + modal_token_secret = os.getenv("MODAL_TOKEN_SECRET") if not modal_token_id: - raise ValueError('MODAL_TOKEN_ID environment variable is required for Modal runtime') + raise ValueError( + "MODAL_TOKEN_ID environment variable is required for Modal runtime" + ) if not modal_token_secret: - raise ValueError('MODAL_TOKEN_SECRET environment variable is required for Modal runtime') + raise ValueError( + "MODAL_TOKEN_SECRET environment variable is required for Modal runtime" + ) self.config = config self.sandbox = None @@ -75,14 +79,14 @@ class ModalRuntime(ActionExecutionClient): modal_token_secret, ) self.app = modal.App.lookup( - 'openhands', create_if_missing=True, client=self.modal_client + "openhands", create_if_missing=True, client=self.modal_client ) # workspace_base cannot be used because we can't bind mount into a sandbox. if self.config.workspace_base is not None: self.log( - 'warning', - 'Setting workspace_base is not supported in the modal runtime.', + "warning", + "Setting workspace_base is not supported in the modal runtime.", ) # This value is arbitrary as it's private to the container @@ -96,8 +100,8 @@ class ModalRuntime(ActionExecutionClient): if self.config.sandbox.runtime_extra_deps: self.log( - 'debug', - f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}', + "debug", + f"Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}", ) super().__init__( @@ -116,7 +120,7 @@ class ModalRuntime(ActionExecutionClient): async def connect(self): self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) - self.log('debug', f'ModalRuntime `{self.sid}`') + self.log("debug", f"ModalRuntime `{self.sid}`") self.image = self._get_image_definition( self.base_container_image_id, @@ -127,7 +131,7 @@ class ModalRuntime(ActionExecutionClient): if self.attach_to_existing: if self.sid in MODAL_RUNTIME_IDS: sandbox_id = MODAL_RUNTIME_IDS[self.sid] - self.log('debug', f'Attaching to existing Modal sandbox: {sandbox_id}') + self.log("debug", f"Attaching to existing Modal sandbox: {sandbox_id}") self.sandbox = modal.Sandbox.from_id( sandbox_id, client=self.modal_client ) @@ -142,13 +146,13 @@ class ModalRuntime(ActionExecutionClient): self.set_runtime_status(RuntimeStatus.RUNTIME_STARTED) if self.sandbox is None: - raise Exception('Sandbox not initialized') + raise Exception("Sandbox not initialized") tunnel = self.sandbox.tunnels()[self.container_port] self.api_url = tunnel.url - self.log('debug', f'Container started. Server url: {self.api_url}') + self.log("debug", f"Container started. Server url: {self.api_url}") if not self.attach_to_existing: - self.log('debug', 'Waiting for client to become ready...') + self.log("debug", "Waiting for client to become ready...") self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) self._wait_until_alive() @@ -190,15 +194,15 @@ class ModalRuntime(ActionExecutionClient): ) base_runtime_image = modal.Image.from_dockerfile( - path=os.path.join(build_folder, 'Dockerfile'), + path=os.path.join(build_folder, "Dockerfile"), context_mount=modal.Mount.from_local_dir( local_path=build_folder, - remote_path='.', # to current WORKDIR + remote_path=".", # to current WORKDIR ), ) else: raise ValueError( - 'Neither runtime container image nor base container image is set' + "Neither runtime container image nor base container image is set" ) return base_runtime_image.run_commands( @@ -220,29 +224,29 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc plugins: list[PluginRequirement] | None = None, ): try: - self.log('debug', 'Preparing to start container...') + self.log("debug", "Preparing to start container...") # Combine environment variables environment: dict[str, str | None] = { - 'port': str(self.container_port), - 'PYTHONUNBUFFERED': '1', - 'VSCODE_PORT': str(self._vscode_port), + "port": str(self.container_port), + "PYTHONUNBUFFERED": "1", + "VSCODE_PORT": str(self._vscode_port), } if self.config.debug: - environment['DEBUG'] = 'true' + environment["DEBUG"] = "true" env_secret = modal.Secret.from_dict(environment) - self.log('debug', f'Sandbox workspace: {sandbox_workspace_dir}') + self.log("debug", f"Sandbox workspace: {sandbox_workspace_dir}") sandbox_start_cmd = get_action_execution_server_startup_command( server_port=self.container_port, plugins=self.plugins, app_config=self.config, ) - self.log('debug', f'Starting container with command: {sandbox_start_cmd}') + self.log("debug", f"Starting container with command: {sandbox_start_cmd}") self.sandbox = modal.Sandbox.create( *sandbox_start_cmd, secrets=[env_secret], - workdir='/openhands/code', + workdir="/openhands/code", encrypted_ports=[self.container_port, self._vscode_port], image=self.image, app=self.app, @@ -250,13 +254,13 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc timeout=60 * 60, ) MODAL_RUNTIME_IDS[self.sid] = self.sandbox.object_id - self.log('debug', 'Container started') + self.log("debug", "Container started") except Exception as e: self.log( - 'error', f'Error: Instance {self.sid} FAILED to start container!\n' + "error", f"Error: Instance {self.sid} FAILED to start container!\n" ) - self.log('error', str(e)) + self.log("error", str(e)) self.close() raise e @@ -270,26 +274,26 @@ echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc @property def vscode_url(self) -> str | None: if self._vscode_url is not None: # cached value - self.log('debug', f'VSCode URL: {self._vscode_url}') + self.log("debug", f"VSCode URL: {self._vscode_url}") return self._vscode_url token = super().get_vscode_token() if not token: - self.log('error', 'VSCode token not found') + self.log("error", "VSCode token not found") return None if not self.sandbox: - self.log('error', 'Sandbox not initialized') + self.log("error", "Sandbox not initialized") return None tunnel = self.sandbox.tunnels()[self._vscode_port] tunnel_url = tunnel.url self._vscode_url = ( tunnel_url - + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}' + + f"/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}" ) self.log( - 'debug', - f'VSCode URL: {self._vscode_url}', + "debug", + f"VSCode URL: {self._vscode_url}", ) return self._vscode_url diff --git a/third_party/runtime/impl/runloop/__init__.py b/third_party/runtime/impl/runloop/__init__.py index a775d72587..2f4becbf24 100644 --- a/third_party/runtime/impl/runloop/__init__.py +++ b/third_party/runtime/impl/runloop/__init__.py @@ -2,4 +2,4 @@ This runtime reads configuration directly from environment variables: - RUNLOOP_API_KEY: API key for Runloop authentication -""" \ No newline at end of file +""" diff --git a/third_party/runtime/impl/runloop/runloop_runtime.py b/third_party/runtime/impl/runloop/runloop_runtime.py index b3e37b143d..4fb70c3904 100644 --- a/third_party/runtime/impl/runloop/runloop_runtime.py +++ b/third_party/runtime/impl/runloop/runloop_runtime.py @@ -19,7 +19,7 @@ from openhands.runtime.runtime_status import RuntimeStatus from openhands.runtime.utils.command import get_action_execution_server_startup_command from openhands.utils.tenacity_stop import stop_if_should_exit -CONTAINER_NAME_PREFIX = 'openhands-runtime-' +CONTAINER_NAME_PREFIX = "openhands-runtime-" class RunloopRuntime(ActionExecutionClient): @@ -32,7 +32,7 @@ class RunloopRuntime(ActionExecutionClient): self, config: OpenHandsConfig, event_stream: EventStream, - sid: str = 'default', + sid: str = "default", plugins: list[PluginRequirement] | None = None, env_vars: dict[str, str] | None = None, status_callback: Callable | None = None, @@ -42,10 +42,12 @@ class RunloopRuntime(ActionExecutionClient): git_provider_tokens: PROVIDER_TOKEN_TYPE | None = None, ): # Read Runloop API key from environment variable - runloop_api_key = os.getenv('RUNLOOP_API_KEY') + runloop_api_key = os.getenv("RUNLOOP_API_KEY") if not runloop_api_key: - raise ValueError('RUNLOOP_API_KEY environment variable is required for Runloop runtime') - + raise ValueError( + "RUNLOOP_API_KEY environment variable is required for Runloop runtime" + ) + self.devbox: DevboxView | None = None self.config = config self.runloop_api_client = Runloop( @@ -77,15 +79,15 @@ class RunloopRuntime(ActionExecutionClient): ) def _wait_for_devbox(self, devbox: DevboxView) -> DevboxView: """Pull devbox status until it is running""" - if devbox == 'running': + if devbox == "running": return devbox devbox = self.runloop_api_client.devboxes.retrieve(id=devbox.id) - if devbox.status != 'running': - raise ConnectionRefusedError('Devbox is not running') + if devbox.status != "running": + raise ConnectionRefusedError("Devbox is not running") # Devbox is connected and running - logging.debug(f'devbox.id={devbox.id} is running') + logging.debug(f"devbox.id={devbox.id} is running") return devbox def _create_new_devbox(self) -> DevboxView: @@ -101,26 +103,26 @@ class RunloopRuntime(ActionExecutionClient): # (ie browser) to be installed as root # Convert start_command list to a single command string with additional setup start_command_str = ( - 'export MAMBA_ROOT_PREFIX=/openhands/micromamba && ' - 'cd /openhands/code && ' - '/openhands/micromamba/bin/micromamba run -n openhands poetry config virtualenvs.path /openhands/poetry && ' - + ' '.join(start_command) + "export MAMBA_ROOT_PREFIX=/openhands/micromamba && " + "cd /openhands/code && " + "/openhands/micromamba/bin/micromamba run -n openhands poetry config virtualenvs.path /openhands/poetry && " + + " ".join(start_command) ) entrypoint = f"sudo bash -c '{start_command_str}'" devbox = self.runloop_api_client.devboxes.create( entrypoint=entrypoint, name=self.sid, - environment_variables={'DEBUG': 'true'} if self.config.debug else {}, - prebuilt='openhands', + environment_variables={"DEBUG": "true"} if self.config.debug else {}, + prebuilt="openhands", launch_parameters=LaunchParameters( available_ports=[self._sandbox_port, self._vscode_port], - resource_size_request='LARGE', + resource_size_request="LARGE", launch_commands=[ - f'mkdir -p {self.config.workspace_mount_path_in_sandbox}' + f"mkdir -p {self.config.workspace_mount_path_in_sandbox}" ], ), - metadata={'container-name': self.container_name}, + metadata={"container-name": self.container_name}, ) return self._wait_for_devbox(devbox) @@ -129,7 +131,7 @@ class RunloopRuntime(ActionExecutionClient): if self.attach_to_existing: active_devboxes = self.runloop_api_client.devboxes.list( - status='running' + status="running" ).devboxes self.devbox = next( (devbox for devbox in active_devboxes if devbox.name == self.sid), None @@ -145,11 +147,11 @@ class RunloopRuntime(ActionExecutionClient): ) self.api_url = tunnel.url - logger.info(f'Container started. Server url: {self.api_url}') + logger.info(f"Container started. Server url: {self.api_url}") # End Runloop connect # NOTE: Copied from DockerRuntime - logger.info('Waiting for client to become ready...') + logger.info("Waiting for client to become ready...") self.set_runtime_status(RuntimeStatus.STARTING_RUNTIME) self._wait_until_alive() @@ -157,7 +159,7 @@ class RunloopRuntime(ActionExecutionClient): self.setup_initial_env() logger.info( - f'Container initialized with plugins: {[plugin.name for plugin in self.plugins]}' + f"Container initialized with plugins: {[plugin.name for plugin in self.plugins]}" ) self.set_runtime_status(RuntimeStatus.READY) @@ -192,12 +194,12 @@ class RunloopRuntime(ActionExecutionClient): id=self.devbox.id, port=self._vscode_port, ).url - + f'/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}' + + f"/?tkn={token}&folder={self.config.workspace_mount_path_in_sandbox}" ) self.log( - 'debug', - f'VSCode URL: {self._vscode_url}', + "debug", + f"VSCode URL: {self._vscode_url}", ) return self._vscode_url