diff --git a/.github/scripts/update_pr_description.sh b/.github/scripts/update_pr_description.sh index 04fe71109a..cca991e2d4 100755 --- a/.github/scripts/update_pr_description.sh +++ b/.github/scripts/update_pr_description.sh @@ -1,33 +1,53 @@ #!/bin/bash +set -euxo pipefail + # This script updates the PR description with commands to run the PR locally # It adds both Docker and uvx commands # Get the branch name for the PR -BRANCH_NAME=$(gh pr view $PR_NUMBER --json headRefName --jq .headRefName) +BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName) # Define the Docker command DOCKER_RUN_COMMAND="docker run -it --rm \ -p 3000:3000 \ -v /var/run/docker.sock:/var/run/docker.sock \ --add-host host.docker.internal:host-gateway \ - -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \ - --name openhands-app-$SHORT_SHA \ - docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA" + -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \ + --name openhands-app-${SHORT_SHA} \ + docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}" # Define the uvx command -UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@$BRANCH_NAME openhands" +UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands" # Get the current PR body -PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body) +PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body) # Prepare the new PR body with both commands if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then - # For existing PR descriptions, replace the command section - NEW_PR_BODY=$(echo "$PR_BODY" | sed "s|To run this PR locally, use the following command:.*\`\`\`|To run this PR locally, use the following command:\n\nGUI with Docker:\n\`\`\`\n$DOCKER_RUN_COMMAND\n\`\`\`\n\nCLI with uvx:\n\`\`\`\n$UVX_RUN_COMMAND\n\`\`\`|s") + # For existing PR descriptions, use a more robust approach + # Split the PR body at the "To run this PR locally" section and replace everything after it + BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d') + NEW_PR_BODY=$(cat < +**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below). + + +```bash +openhands serve +``` + +This command will: +- Check that Docker is installed and running +- Pull the required Docker images +- Launch the OpenHands GUI server at http://localhost:3000 +- Use the same configuration directory (`~/.openhands`) as the CLI mode + +#### Mounting Your Current Directory + +To mount your current working directory into the GUI server container, use the `--mount-cwd` flag: + +```bash +openhands serve --mount-cwd +``` + +This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container. + +#### Using GPU Support + +If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag: + +```bash +openhands serve --gpu +``` + +This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags: + +```bash +openhands serve --gpu --mount-cwd +``` + +**Prerequisites for GPU support:** +- NVIDIA GPU drivers must be installed on your host system +- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured + +#### Requirements + +Before using the `openhands serve` command, ensure that: +- Docker is installed and running on your system +- You have internet access to pull the required Docker images +- Port 3000 is available on your system + +The CLI will automatically check these requirements and provide helpful error messages if anything is missing. + +### Using Docker Directly + +Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions. + ## Overview ### Initial Setup diff --git a/docs/usage/local-setup.mdx b/docs/usage/local-setup.mdx index 86217b01ae..67bf7900f0 100644 --- a/docs/usage/local-setup.mdx +++ b/docs/usage/local-setup.mdx @@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to ### Start the App +#### Option 1: Using the CLI Launcher (Recommended) + +If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience: + +```bash +# Install OpenHands +pip install openhands-ai + +# Launch the GUI server +openhands serve + +# Or with GPU support (requires nvidia-docker) +openhands serve --gpu + +# Or with current directory mounted +openhands serve --mount-cwd +``` + +Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed. + +This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container. + +#### Option 2: Using Docker Directly + ```bash docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik diff --git a/evaluation/benchmarks/EDA/run_infer.py b/evaluation/benchmarks/EDA/run_infer.py index a80b745ce7..649b72b11e 100644 --- a/evaluation/benchmarks/EDA/run_infer.py +++ b/evaluation/benchmarks/EDA/run_infer.py @@ -18,8 +18,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -172,7 +172,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model' ) diff --git a/evaluation/benchmarks/commit0/run_infer.py b/evaluation/benchmarks/commit0/run_infer.py index 99c5b4a43d..176d8f7233 100644 --- a/evaluation/benchmarks/commit0/run_infer.py +++ b/evaluation/benchmarks/commit0/run_infer.py @@ -26,8 +26,8 @@ from openhands.controller.state.state import State from openhands.core.config import ( AgentConfig, OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame: if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/gaia/run_infer.py b/evaluation/benchmarks/gaia/run_infer.py index 82b656e52c..cc21cfc25d 100644 --- a/evaluation/benchmarks/gaia/run_infer.py +++ b/evaluation/benchmarks/gaia/run_infer.py @@ -31,8 +31,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, load_from_toml, ) from openhands.core.config.utils import get_agent_config_arg @@ -294,7 +294,7 @@ Here is the task: if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--level', type=str, diff --git a/evaluation/benchmarks/gorilla/run_infer.py b/evaluation/benchmarks/gorilla/run_infer.py index 652e774503..79e5fffdc6 100644 --- a/evaluation/benchmarks/gorilla/run_infer.py +++ b/evaluation/benchmarks/gorilla/run_infer.py @@ -20,8 +20,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -134,7 +134,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--hubs', type=str, diff --git a/evaluation/benchmarks/gpqa/run_infer.py b/evaluation/benchmarks/gpqa/run_infer.py index 3bc981378b..cb1bbd68b1 100644 --- a/evaluation/benchmarks/gpqa/run_infer.py +++ b/evaluation/benchmarks/gpqa/run_infer.py @@ -38,8 +38,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck! if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() # data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended' parser.add_argument( '--data-split', diff --git a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py index 40846e1e61..2aad6fb1b5 100644 --- a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py +++ b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py @@ -21,7 +21,7 @@ from evaluation.utils.shared import ( from openhands.core.config import ( LLMConfig, OpenHandsConfig, - get_parser, + get_evaluation_parser, load_openhands_config, ) from openhands.core.logger import openhands_logger as logger @@ -167,7 +167,7 @@ def process_predictions(predictions_path: str): if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '-s', '--eval-split', diff --git a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py index 0fe5e79e3b..1dba49413d 100644 --- a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py +++ b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py @@ -30,8 +30,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, load_openhands_config, ) from openhands.core.logger import openhands_logger as logger @@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '-s', '--eval-split', diff --git a/evaluation/benchmarks/logic_reasoning/run_infer.py b/evaluation/benchmarks/logic_reasoning/run_infer.py index 39743cd674..eb4342f44a 100644 --- a/evaluation/benchmarks/logic_reasoning/run_infer.py +++ b/evaluation/benchmarks/logic_reasoning/run_infer.py @@ -18,8 +18,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -267,7 +267,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/mint/run_infer.py b/evaluation/benchmarks/mint/run_infer.py index d0cec13a2f..b7fac0e44d 100644 --- a/evaluation/benchmarks/mint/run_infer.py +++ b/evaluation/benchmarks/mint/run_infer.py @@ -23,8 +23,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -229,7 +229,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() SUBSETS = [ # Eurus subset: https://arxiv.org/abs/2404.02078 diff --git a/evaluation/benchmarks/ml_bench/run_analysis.py b/evaluation/benchmarks/ml_bench/run_analysis.py index 9c0958060f..8baddcffb1 100644 --- a/evaluation/benchmarks/ml_bench/run_analysis.py +++ b/evaluation/benchmarks/ml_bench/run_analysis.py @@ -4,7 +4,11 @@ import pprint import tqdm -from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config +from openhands.core.config import ( + get_evaluation_parser, + get_llm_config_arg, + load_openhands_config, +) from openhands.core.logger import openhands_logger as logger from openhands.llm.llm import LLM @@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str: if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--json_file_path', type=str, diff --git a/evaluation/benchmarks/ml_bench/run_infer.py b/evaluation/benchmarks/ml_bench/run_infer.py index 40874cf9a4..1b4b094520 100644 --- a/evaluation/benchmarks/ml_bench/run_infer.py +++ b/evaluation/benchmarks/ml_bench/run_infer.py @@ -34,8 +34,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, load_openhands_config, ) from openhands.core.logger import openhands_logger as logger @@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '-s', '--eval-split', diff --git a/evaluation/benchmarks/multi_swe_bench/eval_infer.py b/evaluation/benchmarks/multi_swe_bench/eval_infer.py index c895bb0b62..ae259cb597 100644 --- a/evaluation/benchmarks/multi_swe_bench/eval_infer.py +++ b/evaluation/benchmarks/multi_swe_bench/eval_infer.py @@ -30,7 +30,7 @@ from evaluation.utils.shared import ( from openhands.core.config import ( LLMConfig, OpenHandsConfig, - get_parser, + get_evaluation_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime @@ -323,7 +323,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--input-file', type=str, diff --git a/evaluation/benchmarks/multi_swe_bench/run_infer.py b/evaluation/benchmarks/multi_swe_bench/run_infer.py index 4f12677dc2..ca33f65298 100644 --- a/evaluation/benchmarks/multi_swe_bench/run_infer.py +++ b/evaluation/benchmarks/multi_swe_bench/run_infer.py @@ -32,8 +32,8 @@ from openhands.controller.state.state import State from openhands.core.config import ( AgentConfig, OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame: if __name__ == '__main__': # pdb.set_trace() - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/scienceagentbench/run_infer.py b/evaluation/benchmarks/scienceagentbench/run_infer.py index 8a83a7e200..c346ac1da1 100644 --- a/evaluation/benchmarks/scienceagentbench/run_infer.py +++ b/evaluation/benchmarks/scienceagentbench/run_infer.py @@ -21,8 +21,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--use-knowledge', type=str, diff --git a/evaluation/benchmarks/swe_bench/eval_infer.py b/evaluation/benchmarks/swe_bench/eval_infer.py index 48170be8b5..81b7decd4c 100644 --- a/evaluation/benchmarks/swe_bench/eval_infer.py +++ b/evaluation/benchmarks/swe_bench/eval_infer.py @@ -26,7 +26,7 @@ from evaluation.utils.shared import ( from openhands.core.config import ( LLMConfig, OpenHandsConfig, - get_parser, + get_evaluation_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime @@ -353,7 +353,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--input-file', type=str, diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 52ca9a9b81..7873b115a1 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -43,8 +43,8 @@ from openhands.controller.state.state import State from openhands.core.config import ( AgentConfig, OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.config.condenser_config import NoOpCondenserConfig from openhands.core.config.utils import get_condenser_config_arg @@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame: if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/swe_bench/run_infer_interact.py b/evaluation/benchmarks/swe_bench/run_infer_interact.py index 1ed4cc4e2f..c97a2d6b3f 100755 --- a/evaluation/benchmarks/swe_bench/run_infer_interact.py +++ b/evaluation/benchmarks/swe_bench/run_infer_interact.py @@ -28,8 +28,8 @@ from evaluation.utils.shared import ( ) from openhands.controller.state.state import State from openhands.core.config import ( + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.config.condenser_config import NoOpCondenserConfig from openhands.core.config.utils import get_condenser_config_arg @@ -201,7 +201,7 @@ def process_instance( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/swe_bench/run_localize.py b/evaluation/benchmarks/swe_bench/run_localize.py index f17d40b87c..0c34991577 100644 --- a/evaluation/benchmarks/swe_bench/run_localize.py +++ b/evaluation/benchmarks/swe_bench/run_localize.py @@ -31,8 +31,8 @@ from openhands.controller.state.state import State from openhands.core.config import ( AgentConfig, OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [ ] if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/testgeneval/eval_infer.py b/evaluation/benchmarks/testgeneval/eval_infer.py index 6312ff66e5..99eea4ba43 100644 --- a/evaluation/benchmarks/testgeneval/eval_infer.py +++ b/evaluation/benchmarks/testgeneval/eval_infer.py @@ -41,7 +41,7 @@ from evaluation.utils.shared import ( reset_logger_for_multiprocessing, run_evaluation, ) -from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser +from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime from openhands.events.action import CmdRunAction @@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key): if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--input-file', type=str, required=True, help='Path to input predictions file' ) diff --git a/evaluation/benchmarks/testgeneval/run_infer.py b/evaluation/benchmarks/testgeneval/run_infer.py index 5338914369..39288ff537 100644 --- a/evaluation/benchmarks/testgeneval/run_infer.py +++ b/evaluation/benchmarks/testgeneval/run_infer.py @@ -37,8 +37,8 @@ from openhands.core.config import ( AgentConfig, OpenHandsConfig, SandboxConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py index e71550d0d9..9f5780d559 100644 --- a/evaluation/benchmarks/the_agent_company/run_infer.py +++ b/evaluation/benchmarks/the_agent_company/run_infer.py @@ -18,8 +18,8 @@ from openhands.core.config import ( LLMConfig, OpenHandsConfig, get_agent_config_arg, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.config.agent_config import AgentConfig from openhands.core.logger import openhands_logger as logger @@ -197,7 +197,7 @@ def run_evaluator( if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--task-image-name', type=str, diff --git a/evaluation/benchmarks/toolqa/run_infer.py b/evaluation/benchmarks/toolqa/run_infer.py index 29e65d944f..4db988efa1 100644 --- a/evaluation/benchmarks/toolqa/run_infer.py +++ b/evaluation/benchmarks/toolqa/run_infer.py @@ -19,8 +19,8 @@ from evaluation.utils.shared import ( from openhands.controller.state.state import State from openhands.core.config import ( OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool = if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/evaluation/benchmarks/visual_swe_bench/run_infer.py b/evaluation/benchmarks/visual_swe_bench/run_infer.py index d07e885b1d..215f7933b1 100644 --- a/evaluation/benchmarks/visual_swe_bench/run_infer.py +++ b/evaluation/benchmarks/visual_swe_bench/run_infer.py @@ -31,8 +31,8 @@ from openhands.controller.state.state import State from openhands.core.config import ( AgentConfig, OpenHandsConfig, + get_evaluation_parser, get_llm_config_arg, - get_parser, ) from openhands.core.logger import openhands_logger as logger from openhands.core.main import create_runtime, run_controller @@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [ ] if __name__ == '__main__': - parser = get_parser() + parser = get_evaluation_parser() parser.add_argument( '--dataset', type=str, diff --git a/openhands/cli/__init__.py b/openhands/cli/__init__.py new file mode 100644 index 0000000000..9315930b74 --- /dev/null +++ b/openhands/cli/__init__.py @@ -0,0 +1 @@ +"""OpenHands CLI module.""" diff --git a/openhands/cli/entry.py b/openhands/cli/entry.py new file mode 100644 index 0000000000..1fcb6d4b63 --- /dev/null +++ b/openhands/cli/entry.py @@ -0,0 +1,54 @@ +"""Main entry point for OpenHands CLI with subcommand support.""" + +import sys + +import openhands +import openhands.cli.suppress_warnings # noqa: F401 +from openhands.cli.gui_launcher import launch_gui_server +from openhands.cli.main import run_cli_command +from openhands.core.config import get_cli_parser +from openhands.core.config.arg_utils import get_subparser + + +def main(): + """Main entry point with subcommand support and backward compatibility.""" + parser = get_cli_parser() + + # If user only asks for --help or -h without a subcommand + if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'): + # Print top-level help + print(parser.format_help()) + + # Also print help for `cli` subcommand + print('\n' + '=' * 80) + print('CLI command help:\n') + + cli_parser = get_subparser(parser, 'cli') + print(cli_parser.format_help()) + + sys.exit(0) + + # Special case: no subcommand provided, simulate "openhands cli" + if len(sys.argv) == 1 or ( + len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve'] + ): + # Inject 'cli' as default command + sys.argv.insert(1, 'cli') + + args = parser.parse_args() + + if hasattr(args, 'version') and args.version: + print(f'OpenHands CLI version: {openhands.get_version()}') + sys.exit(0) + + if args.command == 'serve': + launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu) + elif args.command == 'cli' or args.command is None: + run_cli_command(args) + else: + parser.print_help() + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/openhands/cli/gui_launcher.py b/openhands/cli/gui_launcher.py new file mode 100644 index 0000000000..fb3b6a6ff8 --- /dev/null +++ b/openhands/cli/gui_launcher.py @@ -0,0 +1,219 @@ +"""GUI launcher for OpenHands CLI.""" + +import os +import shutil +import subprocess +import sys +from pathlib import Path + +from prompt_toolkit import print_formatted_text +from prompt_toolkit.formatted_text import HTML + +from openhands import __version__ + + +def _format_docker_command_for_logging(cmd: list[str]) -> str: + """Format a Docker command for logging with grey color. + + Args: + cmd (list[str]): The Docker command as a list of strings + + Returns: + str: The formatted command string in grey HTML color + """ + cmd_str = ' '.join(cmd) + return f'Running Docker command: {cmd_str}' + + +def check_docker_requirements() -> bool: + """Check if Docker is installed and running. + + Returns: + bool: True if Docker is available and running, False otherwise. + """ + # Check if Docker is installed + if not shutil.which('docker'): + print_formatted_text( + HTML('❌ Docker is not installed or not in PATH.') + ) + print_formatted_text( + HTML( + 'Please install Docker first: https://docs.docker.com/get-docker/' + ) + ) + return False + + # Check if Docker daemon is running + try: + result = subprocess.run( + ['docker', 'info'], capture_output=True, text=True, timeout=10 + ) + if result.returncode != 0: + print_formatted_text( + HTML('❌ Docker daemon is not running.') + ) + print_formatted_text( + HTML('Please start Docker and try again.') + ) + return False + except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e: + print_formatted_text( + HTML('❌ Failed to check Docker status.') + ) + print_formatted_text(HTML(f'Error: {e}')) + return False + + return True + + +def ensure_config_dir_exists() -> Path: + """Ensure the OpenHands configuration directory exists and return its path.""" + config_dir = Path.home() / '.openhands' + config_dir.mkdir(exist_ok=True) + return config_dir + + +def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None: + """Launch the OpenHands GUI server using Docker. + + Args: + mount_cwd: If True, mount the current working directory into the container. + gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker. + """ + print_formatted_text( + HTML('🚀 Launching OpenHands GUI server...') + ) + print_formatted_text('') + + # Check Docker requirements + if not check_docker_requirements(): + sys.exit(1) + + # Ensure config directory exists + config_dir = ensure_config_dir_exists() + + # Get the current version for the Docker image + version = __version__ + runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik' + app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}' + + print_formatted_text(HTML('Pulling required Docker images...')) + + # Pull the runtime image first + pull_cmd = ['docker', 'pull', runtime_image] + print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd))) + try: + subprocess.run( + pull_cmd, + check=True, + timeout=300, # 5 minutes timeout + ) + except subprocess.CalledProcessError: + print_formatted_text( + HTML('❌ Failed to pull runtime image.') + ) + sys.exit(1) + except subprocess.TimeoutExpired: + print_formatted_text( + HTML('❌ Timeout while pulling runtime image.') + ) + sys.exit(1) + + print_formatted_text('') + print_formatted_text( + HTML('✅ Starting OpenHands GUI server...') + ) + print_formatted_text( + HTML('The server will be available at: http://localhost:3000') + ) + print_formatted_text(HTML('Press Ctrl+C to stop the server.')) + print_formatted_text('') + + # Build the Docker command + docker_cmd = [ + 'docker', + 'run', + '-it', + '--rm', + '--pull=always', + '-e', + f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}', + '-e', + 'LOG_ALL_EVENTS=true', + '-v', + '/var/run/docker.sock:/var/run/docker.sock', + '-v', + f'{config_dir}:/.openhands', + ] + + # Add GPU support if requested + if gpu: + print_formatted_text( + HTML('🖥️ Enabling GPU support via nvidia-docker...') + ) + # Add the --gpus all flag to enable all GPUs + docker_cmd.insert(2, '--gpus') + docker_cmd.insert(3, 'all') + # Add environment variable to pass GPU support to sandbox containers + docker_cmd.extend( + [ + '-e', + 'SANDBOX_ENABLE_GPU=true', + ] + ) + + # Add current working directory mount if requested + if mount_cwd: + cwd = Path.cwd() + # Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem + docker_cmd.extend( + [ + '-e', + f'SANDBOX_VOLUMES={cwd}:/workspace:rw', + ] + ) + + # Set user ID for Unix-like systems only + if os.name != 'nt': # Not Windows + try: + user_id = subprocess.check_output(['id', '-u'], text=True).strip() + docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}']) + except (subprocess.CalledProcessError, FileNotFoundError): + # If 'id' command fails or doesn't exist, skip setting user ID + pass + # Print the folder that will be mounted to inform the user + print_formatted_text( + HTML( + f'📂 Mounting current directory: {cwd} to /workspace' + ) + ) + + docker_cmd.extend( + [ + '-p', + '3000:3000', + '--add-host', + 'host.docker.internal:host-gateway', + '--name', + 'openhands-app', + app_image, + ] + ) + + try: + # Log and run the Docker command + print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd))) + subprocess.run(docker_cmd, check=True) + except subprocess.CalledProcessError as e: + print_formatted_text('') + print_formatted_text( + HTML('❌ Failed to start OpenHands GUI server.') + ) + print_formatted_text(HTML(f'Error: {e}')) + sys.exit(1) + except KeyboardInterrupt: + print_formatted_text('') + print_formatted_text( + HTML('✓ OpenHands GUI server stopped successfully.') + ) + sys.exit(0) diff --git a/openhands/cli/main.py b/openhands/cli/main.py index fd5f8b18cc..17aaaad895 100644 --- a/openhands/cli/main.py +++ b/openhands/cli/main.py @@ -45,7 +45,6 @@ from openhands.controller import AgentController from openhands.controller.agent import Agent from openhands.core.config import ( OpenHandsConfig, - parse_arguments, setup_config_from_args, ) from openhands.core.config.condenser_config import NoOpCondenserConfig @@ -524,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None: print_formatted_text('') -async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None: +async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None: """Runs the agent in CLI mode.""" - args = parse_arguments() - # Set log level from command line argument if provided if args.log_level and isinstance(args.log_level, str): log_level = getattr(logging, str(args.log_level).upper()) @@ -575,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None: # Use settings from settings store if available and override with command line arguments if settings: - # Handle agent configuration - if args.agent_cls: - config.default_agent = str(args.agent_cls) - else: - # settings.agent is not None because we check for it in setup_config_from_args - assert settings.agent is not None - config.default_agent = settings.agent + # settings.agent is not None because we check for it in setup_config_from_args + assert settings.agent is not None + config.default_agent = settings.agent # Handle LLM configuration with proper precedence: # 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args) @@ -719,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it get_runtime_cls(config.runtime).teardown(config) -def main(): +def run_cli_command(args): + """Run the CLI command with proper error handling and cleanup.""" loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - loop.run_until_complete(main_with_loop(loop)) + loop.run_until_complete(main_with_loop(loop, args)) except KeyboardInterrupt: print_formatted_text('⚠️ Session was interrupted: interrupted\n') except ConnectionRefusedError as e: - print(f'Connection refused: {e}') + print_formatted_text(f'Connection refused: {e}') sys.exit(1) except Exception as e: - print(f'An error occurred: {e}') + print_formatted_text(f'An error occurred: {e}') sys.exit(1) finally: try: @@ -743,9 +737,5 @@ def main(): loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) loop.close() except Exception as e: - print(f'Error during cleanup: {e}') + print_formatted_text(f'Error during cleanup: {e}') sys.exit(1) - - -if __name__ == '__main__': - main() diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py index cf78955711..97d71bd5f1 100644 --- a/openhands/core/config/__init__.py +++ b/openhands/core/config/__init__.py @@ -1,4 +1,9 @@ from openhands.core.config.agent_config import AgentConfig +from openhands.core.config.arg_utils import ( + get_cli_parser, + get_evaluation_parser, + get_headless_parser, +) from openhands.core.config.cli_config import CLIConfig from openhands.core.config.config_utils import ( OH_DEFAULT_AGENT, @@ -15,7 +20,6 @@ from openhands.core.config.utils import ( finalize_config, get_agent_config_arg, get_llm_config_arg, - get_parser, load_from_env, load_from_toml, load_openhands_config, @@ -41,7 +45,9 @@ __all__ = [ 'get_agent_config_arg', 'get_llm_config_arg', 'get_field_info', - 'get_parser', + 'get_cli_parser', + 'get_headless_parser', + 'get_evaluation_parser', 'parse_arguments', 'setup_config_from_args', ] diff --git a/openhands/core/config/arg_utils.py b/openhands/core/config/arg_utils.py new file mode 100644 index 0000000000..d39a4856e0 --- /dev/null +++ b/openhands/core/config/arg_utils.py @@ -0,0 +1,224 @@ +"""Centralized command line argument configuration for OpenHands CLI and headless modes.""" + +import argparse +from argparse import ArgumentParser, _SubParsersAction + + +def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser: + for action in parser._actions: + if isinstance(action, _SubParsersAction): + if name in action.choices: + return action.choices[name] + raise ValueError(f"Subparser '{name}' not found") + + +def add_common_arguments(parser: argparse.ArgumentParser) -> None: + """Add common arguments shared between CLI and headless modes.""" + parser.add_argument( + '--config-file', + type=str, + default='config.toml', + help='Path to the config file (default: config.toml in the current directory)', + ) + parser.add_argument( + '-t', + '--task', + type=str, + default='', + help='The task for the agent to perform', + ) + parser.add_argument( + '-f', + '--file', + type=str, + help='Path to a file containing the task. Overrides -t if both are provided.', + ) + parser.add_argument( + '-n', + '--name', + help='Session name', + type=str, + default='', + ) + parser.add_argument( + '--log-level', + help='Set the log level', + type=str, + default=None, + ) + parser.add_argument( + '-l', + '--llm-config', + default=None, + type=str, + help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml', + ) + parser.add_argument( + '--agent-config', + default=None, + type=str, + help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml', + ) + parser.add_argument( + '-v', '--version', action='store_true', help='Show version information' + ) + + +def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None: + """Add arguments specific to evaluation mode.""" + # Evaluation-specific arguments + parser.add_argument( + '--eval-output-dir', + default='evaluation/evaluation_outputs/outputs', + type=str, + help='The directory to save evaluation output', + ) + parser.add_argument( + '--eval-n-limit', + default=None, + type=int, + help='The number of instances to evaluate', + ) + parser.add_argument( + '--eval-num-workers', + default=4, + type=int, + help='The number of workers to use for evaluation', + ) + parser.add_argument( + '--eval-note', + default=None, + type=str, + help='The note to add to the evaluation directory', + ) + parser.add_argument( + '--eval-ids', + default=None, + type=str, + help='The comma-separated list (in quotes) of IDs of the instances to evaluate', + ) + + +def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None: + """Add arguments specific to headless mode (full evaluation suite).""" + parser.add_argument( + '-d', + '--directory', + type=str, + help='The working directory for the agent', + ) + parser.add_argument( + '-c', + '--agent-cls', + default=None, + type=str, + help='Name of the default agent to use', + ) + parser.add_argument( + '-i', + '--max-iterations', + default=None, + type=int, + help='The maximum number of iterations to run the agent', + ) + parser.add_argument( + '-b', + '--max-budget-per-task', + type=float, + help='The maximum budget allowed per task, beyond which the agent will stop.', + ) + # Additional headless-specific arguments + parser.add_argument( + '--no-auto-continue', + help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)', + action='store_true', + default=False, + ) + parser.add_argument( + '--selected-repo', + help='GitHub repository to clone (format: owner/repo)', + type=str, + default=None, + ) + + +def get_cli_parser() -> argparse.ArgumentParser: + """Create argument parser for CLI mode with simplified argument set.""" + # Create a description with welcome message explaining available commands + description = ( + 'Welcome to OpenHands: Code Less, Make More\n\n' + 'OpenHands supports two main commands:\n' + ' serve - Launch the OpenHands GUI server (web interface)\n' + ' cli - Run OpenHands in CLI mode (terminal interface)\n\n' + 'Running "openhands" without a command is the same as "openhands cli"' + ) + + parser = argparse.ArgumentParser( + description=description, + prog='openhands', + formatter_class=argparse.RawDescriptionHelpFormatter, # Preserve formatting in description + epilog='For more information about a command, run: openhands COMMAND --help', + ) + + # Create subparsers + subparsers = parser.add_subparsers( + dest='command', + title='commands', + description='OpenHands supports two main commands:', + metavar='COMMAND', + ) + + # Add 'serve' subcommand + serve_parser = subparsers.add_parser( + 'serve', help='Launch the OpenHands GUI server using Docker (web interface)' + ) + serve_parser.add_argument( + '--mount-cwd', + help='Mount the current working directory into the GUI server container', + action='store_true', + default=False, + ) + serve_parser.add_argument( + '--gpu', + help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker', + action='store_true', + default=False, + ) + + # Add 'cli' subcommand - import all the existing CLI arguments + cli_parser = subparsers.add_parser( + 'cli', help='Run OpenHands in CLI mode (terminal interface)' + ) + add_common_arguments(cli_parser) + + cli_parser.add_argument( + '--override-cli-mode', + help='Override the default settings for CLI mode', + type=bool, + default=False, + ) + parser.add_argument( + '--conversation', + help='The conversation id to continue', + type=str, + default=None, + ) + + return parser + + +def get_headless_parser() -> argparse.ArgumentParser: + """Create argument parser for headless mode with full argument set.""" + parser = argparse.ArgumentParser(description='Run the agent via CLI') + add_common_arguments(parser) + add_headless_specific_arguments(parser) + return parser + + +def get_evaluation_parser() -> argparse.ArgumentParser: + """Create argument parser for evaluation mode.""" + parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode') + add_common_arguments(parser) + add_headless_specific_arguments(parser) + add_evaluation_arguments(parser) + return parser diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index 81ac040a5f..cf95e75d7d 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -15,6 +15,7 @@ from pydantic import BaseModel, SecretStr, ValidationError from openhands import __version__ from openhands.core import logger from openhands.core.config.agent_config import AgentConfig +from openhands.core.config.arg_utils import get_headless_parser from openhands.core.config.condenser_config import ( CondenserConfig, condenser_config_from_toml_section, @@ -670,148 +671,9 @@ def get_condenser_config_arg( return None -# Command line arguments -def get_parser() -> argparse.ArgumentParser: - """Get the argument parser.""" - parser = argparse.ArgumentParser(description='Run the agent via CLI') - - # Add version argument - parser.add_argument( - '-v', '--version', action='store_true', help='Show version information' - ) - - parser.add_argument( - '--config-file', - type=str, - default='config.toml', - help='Path to the config file (default: config.toml in the current directory)', - ) - parser.add_argument( - '-d', - '--directory', - type=str, - help='The working directory for the agent', - ) - parser.add_argument( - '-t', - '--task', - type=str, - default='', - help='The task for the agent to perform', - ) - parser.add_argument( - '-f', - '--file', - type=str, - help='Path to a file containing the task. Overrides -t if both are provided.', - ) - parser.add_argument( - '-c', - '--agent-cls', - default=None, - type=str, - help='Name of the default agent to use', - ) - parser.add_argument( - '-i', - '--max-iterations', - default=None, - type=int, - help='The maximum number of iterations to run the agent', - ) - parser.add_argument( - '-b', - '--max-budget-per-task', - type=float, - help='The maximum budget allowed per task, beyond which the agent will stop.', - ) - # --eval configs are for evaluations only - parser.add_argument( - '--eval-output-dir', - default='evaluation/evaluation_outputs/outputs', - type=str, - help='The directory to save evaluation output', - ) - parser.add_argument( - '--eval-n-limit', - default=None, - type=int, - help='The number of instances to evaluate', - ) - parser.add_argument( - '--eval-num-workers', - default=4, - type=int, - help='The number of workers to use for evaluation', - ) - parser.add_argument( - '--eval-note', - default=None, - type=str, - help='The note to add to the evaluation directory', - ) - parser.add_argument( - '-l', - '--llm-config', - default=None, - type=str, - help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml', - ) - parser.add_argument( - '--agent-config', - default=None, - type=str, - help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml', - ) - parser.add_argument( - '-n', - '--name', - help='Session name', - type=str, - default='', - ) - parser.add_argument( - '--conversation', - help='The conversation id to continue', - type=str, - default=None, - ) - parser.add_argument( - '--eval-ids', - default=None, - type=str, - help='The comma-separated list (in quotes) of IDs of the instances to evaluate', - ) - parser.add_argument( - '--no-auto-continue', - help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)', - action='store_true', - default=False, - ) - parser.add_argument( - '--selected-repo', - help='GitHub repository to clone (format: owner/repo)', - type=str, - default=None, - ) - parser.add_argument( - '--override-cli-mode', - help='Override the default settings for CLI mode', - type=bool, - default=False, - ) - parser.add_argument( - '--log-level', - help='Set the log level', - type=str, - default=None, - ) - return parser - - def parse_arguments() -> argparse.Namespace: """Parse command line arguments.""" - parser = get_parser() + parser = get_headless_parser() args = parser.parse_args() if args.version: @@ -916,17 +778,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig: ) # Override default agent if provided - if args.agent_cls: + if hasattr(args, 'agent_cls') and args.agent_cls: config.default_agent = args.agent_cls # Set max iterations and max budget per task if provided, otherwise fall back to config values - if args.max_iterations is not None: + if hasattr(args, 'max_iterations') and args.max_iterations is not None: config.max_iterations = args.max_iterations - if args.max_budget_per_task is not None: + if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None: config.max_budget_per_task = args.max_budget_per_task # Read selected repository in config for use by CLI and main.py - if args.selected_repo is not None: + if hasattr(args, 'selected_repo') and args.selected_repo is not None: config.sandbox.selected_repo = args.selected_repo return config diff --git a/openhands/utils/term_color.py b/openhands/utils/term_color.py index 6938369da3..c1c062faa3 100644 --- a/openhands/utils/term_color.py +++ b/openhands/utils/term_color.py @@ -10,6 +10,7 @@ class TermColor(Enum): SUCCESS = 'green' ERROR = 'red' INFO = 'blue' + GREY = 'dark_grey' def colorize(text: str, color: TermColor = TermColor.WARNING) -> str: diff --git a/pyproject.toml b/pyproject.toml index c18b3d20a4..721973ce04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -166,7 +166,7 @@ joblib = "*" swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" } [tool.poetry.scripts] -openhands = "openhands.cli.main:main" +openhands = "openhands.cli.entry:main" [tool.poetry.group.testgeneval.dependencies] fuzzywuzzy = "^0.18.0" diff --git a/tests/unit/test_arg_parser.py b/tests/unit/test_arg_parser.py index 619b24b63f..c1a7bc84c7 100644 --- a/tests/unit/test_arg_parser.py +++ b/tests/unit/test_arg_parser.py @@ -1,10 +1,29 @@ import pytest -from openhands.core.config import get_parser +from openhands.core.config import ( + get_evaluation_parser, + get_headless_parser, +) -def test_parser_default_values(): - parser = get_parser() +def test_headless_parser_default_values(): + parser = get_headless_parser() + args = parser.parse_args([]) + + assert args.directory is None + assert args.task == '' + assert args.file is None + assert args.agent_cls is None + assert args.max_iterations is None + assert args.max_budget_per_task is None + assert args.llm_config is None + assert args.name == '' + assert not args.no_auto_continue + assert args.selected_repo is None + + +def test_evaluation_parser_default_values(): + parser = get_evaluation_parser() args = parser.parse_args([]) assert args.directory is None @@ -23,8 +42,8 @@ def test_parser_default_values(): assert args.selected_repo is None -def test_parser_custom_values(): - parser = get_parser() +def test_evaluation_parser_custom_values(): + parser = get_evaluation_parser() args = parser.parse_args( [ '-v', @@ -76,7 +95,7 @@ def test_parser_custom_values(): def test_parser_file_overrides_task(): - parser = get_parser() + parser = get_headless_parser() args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt']) assert args.task == 'task from command' @@ -84,31 +103,31 @@ def test_parser_file_overrides_task(): def test_parser_invalid_max_iterations(): - parser = get_parser() + parser = get_headless_parser() with pytest.raises(SystemExit): parser.parse_args(['-i', 'not_a_number']) def test_parser_invalid_max_budget(): - parser = get_parser() + parser = get_headless_parser() with pytest.raises(SystemExit): parser.parse_args(['-b', 'not_a_number']) -def test_parser_invalid_eval_n_limit(): - parser = get_parser() +def test_evaluation_parser_invalid_eval_n_limit(): + parser = get_evaluation_parser() with pytest.raises(SystemExit): parser.parse_args(['--eval-n-limit', 'not_a_number']) -def test_parser_invalid_eval_num_workers(): - parser = get_parser() +def test_evaluation_parser_invalid_eval_num_workers(): + parser = get_evaluation_parser() with pytest.raises(SystemExit): parser.parse_args(['--eval-num-workers', 'not_a_number']) -def test_help_message(capsys): - parser = get_parser() +def test_headless_parser_help_message(capsys): + parser = get_headless_parser() with pytest.raises(SystemExit): parser.parse_args(['--help']) captured = capsys.readouterr() @@ -126,6 +145,41 @@ def test_help_message(capsys): '-c AGENT_CLS, --agent-cls AGENT_CLS', '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS', '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK', + '-l LLM_CONFIG, --llm-config LLM_CONFIG', + '--agent-config AGENT_CONFIG', + '-n NAME, --name NAME', + '--config-file CONFIG_FILE', + '--no-auto-continue', + '--selected-repo SELECTED_REPO', + '--log-level LOG_LEVEL', + ] + + for element in expected_elements: + assert element in help_output, f"Expected '{element}' to be in the help message" + + option_count = help_output.count(' -') + assert option_count == 15, f'Expected 15 options, found {option_count}' + + +def test_evaluation_parser_help_message(capsys): + parser = get_evaluation_parser() + with pytest.raises(SystemExit): + parser.parse_args(['--help']) + captured = capsys.readouterr() + help_output = captured.out + print(help_output) + expected_elements = [ + 'usage:', + 'Run OpenHands in evaluation mode', + 'options:', + '-v, --version', + '-h, --help', + '-d DIRECTORY, --directory DIRECTORY', + '-t TASK, --task TASK', + '-f FILE, --file FILE', + '-c AGENT_CLS, --agent-cls AGENT_CLS', + '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS', + '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK', '--eval-output-dir EVAL_OUTPUT_DIR', '--eval-n-limit EVAL_N_LIMIT', '--eval-num-workers EVAL_NUM_WORKERS', @@ -137,20 +191,18 @@ def test_help_message(capsys): '--config-file CONFIG_FILE', '--no-auto-continue', '--selected-repo SELECTED_REPO', - '--override-cli-mode OVERRIDE_CLI_MODE', '--log-level LOG_LEVEL', - '--conversation CONVERSATION', ] for element in expected_elements: assert element in help_output, f"Expected '{element}' to be in the help message" option_count = help_output.count(' -') - assert option_count == 22, f'Expected 22 options, found {option_count}' + assert option_count == 20, f'Expected 20 options, found {option_count}' def test_selected_repo_format(): """Test that the selected-repo argument accepts owner/repo format.""" - parser = get_parser() + parser = get_headless_parser() args = parser.parse_args(['--selected-repo', 'owner/repo']) assert args.selected_repo == 'owner/repo' diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 2eb5df7492..80b53e8869 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -325,7 +325,6 @@ async def test_run_session_with_initial_action( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -345,7 +344,6 @@ async def test_main_without_task( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test main function without a task.""" loop = asyncio.get_running_loop() @@ -360,7 +358,9 @@ async def test_main_without_task( mock_args.name = None mock_args.file = None mock_args.conversation = None - mock_parse_args.return_value = mock_args + mock_args.log_level = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock config mock_config = MagicMock() @@ -394,10 +394,9 @@ async def test_main_without_task( mock_run_session.return_value = False # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once() @@ -418,7 +417,6 @@ async def test_main_without_task( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -438,7 +436,6 @@ async def test_main_with_task( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test main function with a task.""" loop = asyncio.get_running_loop() @@ -451,7 +448,11 @@ async def test_main_with_task( mock_args.agent_cls = 'custom-agent' mock_args.llm_config = 'custom-config' mock_args.file = None - mock_parse_args.return_value = mock_args + mock_args.name = None + mock_args.conversation = None + mock_args.log_level = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock config mock_config = MagicMock() @@ -486,10 +487,9 @@ async def test_main_with_task( mock_run_session.side_effect = [True, False] # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once() @@ -520,7 +520,6 @@ async def test_main_with_task( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -540,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test main function with a session name passes it to run_session.""" loop = asyncio.get_running_loop() @@ -556,7 +554,9 @@ async def test_main_with_session_name_passes_name_to_run_session( mock_args.name = test_session_name # Set the session name mock_args.file = None mock_args.conversation = None - mock_parse_args.return_value = mock_args + mock_args.log_level = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock config mock_config = MagicMock() @@ -590,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session( mock_run_session.return_value = False # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once() @@ -713,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -733,7 +731,6 @@ async def test_main_security_check_fails( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test main function when security check fails.""" loop = asyncio.get_running_loop() @@ -743,7 +740,14 @@ async def test_main_security_check_fails( # Mock arguments mock_args = MagicMock() - mock_parse_args.return_value = mock_args + mock_args.agent_cls = None + mock_args.llm_config = None + mock_args.name = None + mock_args.file = None + mock_args.conversation = None + mock_args.log_level = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock config mock_config = MagicMock() @@ -765,10 +769,9 @@ async def test_main_security_check_fails( mock_check_security.return_value = False # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once() @@ -779,7 +782,6 @@ async def test_main_security_check_fails( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -799,7 +801,6 @@ async def test_config_loading_order( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test the order of configuration loading in the main function. @@ -820,7 +821,10 @@ async def test_config_loading_order( # Add a file property to avoid file I/O errors mock_args.file = None mock_args.log_level = 'INFO' - mock_parse_args.return_value = mock_args + mock_args.name = None + mock_args.conversation = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock read_task to return a dummy task mock_read_task.return_value = 'Test task' @@ -863,10 +867,9 @@ async def test_config_loading_order( mock_run_session.return_value = False # No new session requested # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions for argument parsing and config setup - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once() @@ -896,7 +899,6 @@ async def test_config_loading_order( @pytest.mark.asyncio -@patch('openhands.cli.main.parse_arguments') @patch('openhands.cli.main.setup_config_from_args') @patch('openhands.cli.main.FileSettingsStore.get_instance') @patch('openhands.cli.main.check_folder_security_agreement') @@ -918,7 +920,6 @@ async def test_main_with_file_option( mock_check_security, mock_get_settings_store, mock_setup_config, - mock_parse_args, ): """Test main function with a file option.""" loop = asyncio.get_running_loop() @@ -933,7 +934,10 @@ async def test_main_with_file_option( mock_args.name = None mock_args.file = '/path/to/test/file.txt' mock_args.task = None - mock_parse_args.return_value = mock_args + mock_args.conversation = None + mock_args.log_level = None + mock_args.config_file = 'config.toml' + mock_args.override_cli_mode = None # Mock config mock_config = MagicMock() @@ -969,10 +973,9 @@ async def test_main_with_file_option( mock_run_session.return_value = False # Run the function - await cli.main_with_loop(loop) + await cli.main_with_loop(loop, mock_args) # Assertions - mock_parse_args.assert_called_once() mock_setup_config.assert_called_once_with(mock_args) mock_get_settings_store.assert_called_once() mock_settings_store.load.assert_called_once()