feat(cli): Use CLI to launch OpenHands UI server via Docker (#9783)

Co-authored-by: openhands <openhands@all-hands.dev>
2026-03-22 13:47:19 +08:00 · 2025-08-08 14:04:07 -04:00
parent 81ef363658
commit 04ff4a025b
37 changed files with 798 additions and 275 deletions
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -1,10 +1,29 @@
 import pytest

-from openhands.core.config import get_parser
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_headless_parser,
+)


-def test_parser_default_values():
-    parser = get_parser()
+def test_headless_parser_default_values():
+    parser = get_headless_parser()
+    args = parser.parse_args([])
+
+    assert args.directory is None
+    assert args.task == ''
+    assert args.file is None
+    assert args.agent_cls is None
+    assert args.max_iterations is None
+    assert args.max_budget_per_task is None
+    assert args.llm_config is None
+    assert args.name == ''
+    assert not args.no_auto_continue
+    assert args.selected_repo is None
+
+
+def test_evaluation_parser_default_values():
+    parser = get_evaluation_parser()
    args = parser.parse_args([])

    assert args.directory is None
@@ -23,8 +42,8 @@ def test_parser_default_values():
    assert args.selected_repo is None


-def test_parser_custom_values():
-    parser = get_parser()
+def test_evaluation_parser_custom_values():
+    parser = get_evaluation_parser()
    args = parser.parse_args(
        [
            '-v',
@@ -76,7 +95,7 @@ def test_parser_custom_values():


 def test_parser_file_overrides_task():
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])

    assert args.task == 'task from command'
@@ -84,31 +103,31 @@ def test_parser_file_overrides_task():


 def test_parser_invalid_max_iterations():
-    parser = get_parser()
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['-i', 'not_a_number'])


 def test_parser_invalid_max_budget():
-    parser = get_parser()
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['-b', 'not_a_number'])


-def test_parser_invalid_eval_n_limit():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_n_limit():
+    parser = get_evaluation_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--eval-n-limit', 'not_a_number'])


-def test_parser_invalid_eval_num_workers():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_num_workers():
+    parser = get_evaluation_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--eval-num-workers', 'not_a_number'])


-def test_help_message(capsys):
-    parser = get_parser()
+def test_headless_parser_help_message(capsys):
+    parser = get_headless_parser()
    with pytest.raises(SystemExit):
        parser.parse_args(['--help'])
    captured = capsys.readouterr()
@@ -126,6 +145,41 @@ def test_help_message(capsys):
        '-c AGENT_CLS, --agent-cls AGENT_CLS',
        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
+        '-l LLM_CONFIG, --llm-config LLM_CONFIG',
+        '--agent-config AGENT_CONFIG',
+        '-n NAME, --name NAME',
+        '--config-file CONFIG_FILE',
+        '--no-auto-continue',
+        '--selected-repo SELECTED_REPO',
+        '--log-level LOG_LEVEL',
+    ]
+
+    for element in expected_elements:
+        assert element in help_output, f"Expected '{element}' to be in the help message"
+
+    option_count = help_output.count('  -')
+    assert option_count == 15, f'Expected 15 options, found {option_count}'
+
+
+def test_evaluation_parser_help_message(capsys):
+    parser = get_evaluation_parser()
+    with pytest.raises(SystemExit):
+        parser.parse_args(['--help'])
+    captured = capsys.readouterr()
+    help_output = captured.out
+    print(help_output)
+    expected_elements = [
+        'usage:',
+        'Run OpenHands in evaluation mode',
+        'options:',
+        '-v, --version',
+        '-h, --help',
+        '-d DIRECTORY, --directory DIRECTORY',
+        '-t TASK, --task TASK',
+        '-f FILE, --file FILE',
+        '-c AGENT_CLS, --agent-cls AGENT_CLS',
+        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
+        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
        '--eval-output-dir EVAL_OUTPUT_DIR',
        '--eval-n-limit EVAL_N_LIMIT',
        '--eval-num-workers EVAL_NUM_WORKERS',
@@ -137,20 +191,18 @@ def test_help_message(capsys):
        '--config-file CONFIG_FILE',
        '--no-auto-continue',
        '--selected-repo SELECTED_REPO',
-        '--override-cli-mode OVERRIDE_CLI_MODE',
        '--log-level LOG_LEVEL',
-        '--conversation CONVERSATION',
    ]

    for element in expected_elements:
        assert element in help_output, f"Expected '{element}' to be in the help message"

    option_count = help_output.count('  -')
-    assert option_count == 22, f'Expected 22 options, found {option_count}'
+    assert option_count == 20, f'Expected 20 options, found {option_count}'


 def test_selected_repo_format():
    """Test that the selected-repo argument accepts owner/repo format."""
-    parser = get_parser()
+    parser = get_headless_parser()
    args = parser.parse_args(['--selected-repo', 'owner/repo'])
    assert args.selected_repo == 'owner/repo'
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -325,7 +325,6 @@ async def test_run_session_with_initial_action(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -345,7 +344,6 @@ async def test_main_without_task(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function without a task."""
    loop = asyncio.get_running_loop()
@@ -360,7 +358,9 @@ async def test_main_without_task(
    mock_args.name = None
    mock_args.file = None
    mock_args.conversation = None
-    mock_parse_args.return_value = mock_args
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -394,10 +394,9 @@ async def test_main_without_task(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -418,7 +417,6 @@ async def test_main_without_task(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -438,7 +436,6 @@ async def test_main_with_task(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a task."""
    loop = asyncio.get_running_loop()
@@ -451,7 +448,11 @@ async def test_main_with_task(
    mock_args.agent_cls = 'custom-agent'
    mock_args.llm_config = 'custom-config'
    mock_args.file = None
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -486,10 +487,9 @@ async def test_main_with_task(
    mock_run_session.side_effect = [True, False]

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -520,7 +520,6 @@ async def test_main_with_task(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -540,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a session name passes it to run_session."""
    loop = asyncio.get_running_loop()
@@ -556,7 +554,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_args.name = test_session_name  # Set the session name
    mock_args.file = None
    mock_args.conversation = None
-    mock_parse_args.return_value = mock_args
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -590,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -713,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -733,7 +731,6 @@ async def test_main_security_check_fails(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function when security check fails."""
    loop = asyncio.get_running_loop()
@@ -743,7 +740,14 @@ async def test_main_security_check_fails(

    # Mock arguments
    mock_args = MagicMock()
-    mock_parse_args.return_value = mock_args
+    mock_args.agent_cls = None
+    mock_args.llm_config = None
+    mock_args.name = None
+    mock_args.file = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -765,10 +769,9 @@ async def test_main_security_check_fails(
    mock_check_security.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -779,7 +782,6 @@ async def test_main_security_check_fails(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -799,7 +801,6 @@ async def test_config_loading_order(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test the order of configuration loading in the main function.

@@ -820,7 +821,10 @@ async def test_config_loading_order(
    # Add a file property to avoid file I/O errors
    mock_args.file = None
    mock_args.log_level = 'INFO'
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock read_task to return a dummy task
    mock_read_task.return_value = 'Test task'
@@ -863,10 +867,9 @@ async def test_config_loading_order(
    mock_run_session.return_value = False  # No new session requested

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions for argument parsing and config setup
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()
@@ -896,7 +899,6 @@ async def test_config_loading_order(


@pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
@patch('openhands.cli.main.setup_config_from_args')
@patch('openhands.cli.main.FileSettingsStore.get_instance')
@patch('openhands.cli.main.check_folder_security_agreement')
@@ -918,7 +920,6 @@ async def test_main_with_file_option(
    mock_check_security,
    mock_get_settings_store,
    mock_setup_config,
-    mock_parse_args,
 ):
    """Test main function with a file option."""
    loop = asyncio.get_running_loop()
@@ -933,7 +934,10 @@ async def test_main_with_file_option(
    mock_args.name = None
    mock_args.file = '/path/to/test/file.txt'
    mock_args.task = None
-    mock_parse_args.return_value = mock_args
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None

    # Mock config
    mock_config = MagicMock()
@@ -969,10 +973,9 @@ async def test_main_with_file_option(
    mock_run_session.return_value = False

    # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)

    # Assertions
-    mock_parse_args.assert_called_once()
    mock_setup_config.assert_called_once_with(mock_args)
    mock_get_settings_store.assert_called_once()
    mock_settings_store.load.assert_called_once()