Command line args fixes (#5990)

2025-12-26 05:48:36 +08:00 · 2025-01-05 03:58:26 +01:00 · 2025-01-05 03:58:26 +01:00 · 3d2138d9ce
commit 3d2138d9ce
parent e4cf2eee2d
9 changed files with 142 additions and 68 deletions
--- a/config.template.toml
+++ b/config.template.toml
@ -198,6 +198,16 @@ model = "gpt-4o"
 # agent.CodeActAgent
 ##############################################################################
 [agent]
+
+# whether the browsing tool is enabled
+codeact_enable_browsing = true
+
+# whether the LLM draft editor is enabled
+codeact_enable_llm_editor = false
+
+# whether the IPython tool is enabled
+codeact_enable_jupyter = true
+
 # Name of the micro agent to use for this agent
 #micro_agent_name = ""

@ -210,6 +220,12 @@ model = "gpt-4o"
 # LLM config group to use
 #llm_config = 'your-llm-config-group'

+# Whether to use microagents at all
+#use_microagents = true
+
+# List of microagents to disable
+#disabled_microagents = []
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@ -6,11 +6,10 @@ from uuid import uuid4
 from termcolor import colored

 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
-from openhands import __version__
 from openhands.core.config import (
    AppConfig,
-    get_parser,
-    load_app_config,
+    parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@ -84,27 +83,30 @@ def display_event(event: Event, config: AppConfig):
        display_confirmation(event.confirmation_state)


-async def main(loop):
+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
+async def main(loop: asyncio.AbstractEventLoop):
    """Runs the agent in CLI mode"""

-    parser = get_parser()
-    # Add the version argument
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='version',
-        version=f'{__version__}',
-        help='Show the version number and exit',
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.version:
-        print(f'OpenHands version: {__version__}')
-        return
+    args = parse_arguments()

    logger.setLevel(logging.WARNING)
-    config = load_app_config(config_file=args.config_file)
+
+    config = setup_config_from_args(args)
+
    sid = str(uuid4())

    runtime = create_runtime(config, sid=sid, headless_mode=True)
@ -116,9 +118,7 @@ async def main(loop):

    async def prompt_for_next_task():
        # Run input() in a thread pool to avoid blocking the event loop
-        next_message = await loop.run_in_executor(
-            None, lambda: input('How can I help? >> ')
-        )
+        next_message = await loop.run_in_executor(None, read_input, config)
        if not next_message.strip():
            await prompt_for_next_task()
        if next_message == 'exit':
--- a/openhands/core/config/init.py
+++ b/openhands/core/config/init.py
@ -16,6 +16,7 @@ from openhands.core.config.utils import (
    load_from_env,
    load_from_toml,
    parse_arguments,
+    setup_config_from_args,
 )

 __all__ = [
@ -34,4 +35,5 @@ __all__ = [
    'get_field_info',
    'get_parser',
    'parse_arguments',
+    'setup_config_from_args',
 ]
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@ -42,6 +42,8 @@ class AppConfig:
        file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited.
        file_uploads_restrict_file_types: Whether to restrict upload file types.
        file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
+        cli_multiline_input: Whether to enable multiline input in CLI. When disabled,
+            input is read line by line. When enabled, input continues until /exit command.
    """

    llms: dict[str, LLMConfig] = field(default_factory=dict)
@ -71,6 +73,7 @@ class AppConfig:
    file_uploads_restrict_file_types: bool = False
    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
    runloop_api_key: str | None = None
+    cli_multiline_input: bool = False

    defaults_dict: ClassVar[dict] = {}

--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@ -2,6 +2,7 @@ import argparse
 import os
 import pathlib
 import platform
+import sys
 from dataclasses import is_dataclass
 from types import UnionType
 from typing import Any, MutableMapping, get_args, get_origin
@ -311,8 +312,14 @@ def get_llm_config_arg(

 # Command line arguments
 def get_parser() -> argparse.ArgumentParser:
-    """Get the parser for the command line arguments."""
-    parser = argparse.ArgumentParser(description='Run an agent with a specific task')
+    """Get the argument parser."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+
+    # Add version argument
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
    parser.add_argument(
        '--config-file',
        type=str,
@ -406,16 +413,23 @@ def get_parser() -> argparse.ArgumentParser:
    parser.add_argument(
        '--no-auto-continue',
        action='store_true',
-        help='Disable automatic "continue" responses. Will read from stdin instead.',
+        help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
    )
    return parser


 def parse_arguments() -> argparse.Namespace:
-    """Parse the command line arguments."""
+    """Parse command line arguments."""
    parser = get_parser()
-    parsed_args, _ = parser.parse_known_args()
-    return parsed_args
+    args = parser.parse_args()
+
+    if args.version:
+        from openhands import __version__
+
+        print(f'OpenHands version: {__version__}')
+        sys.exit(0)
+
+    return args


 def load_app_config(
@ -435,3 +449,31 @@ def load_app_config(
        logger.DEBUG = config.debug
        logger.DISABLE_COLOR_PRINTING = config.disable_color
    return config
+
+
+def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
+    """Load config from toml and override with command line arguments.
+
+    Common setup used by both CLI and main.py entry points.
+    """
+    # Load base config from toml and env vars
+    config = load_app_config(config_file=args.config_file)
+
+    # Override with command line arguments if provided
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+        if llm_config is None:
+            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
+        config.set_llm_config(llm_config)
+
+    # Override default agent if provided
+    if args.agent_cls:
+        config.default_agent = args.agent_cls
+
+    # Set max iterations and max budget per task if provided, otherwise fall back to config values
+    if args.max_iterations is not None:
+        config.max_iterations = args.max_iterations
+    if args.max_budget_per_task is not None:
+        config.max_budget_per_task = args.max_budget_per_task
+
+    return config
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@ -9,9 +9,8 @@ from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import (
    AppConfig,
-    get_llm_config_arg,
-    load_app_config,
    parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@ -51,6 +50,21 @@ def read_task_from_stdin() -> str:
    return sys.stdin.read()


+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
 async def run_controller(
    config: AppConfig,
    initial_user_action: Action,
@ -120,9 +134,7 @@ async def run_controller(
                if exit_on_message:
                    message = '/exit'
                elif fake_user_response_fn is None:
-                    # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
-                    print('Request user input (press Ctrl+D/Z when done) >> ')
-                    message = sys.stdin.read().rstrip()
+                    message = read_input(config)
                else:
                    message = fake_user_response_fn(controller.get_state())
                action = MessageAction(content=message)
@ -195,31 +207,13 @@ if __name__ == '__main__':
    else:
        raise ValueError('No task provided. Please specify a task through -t, -f.')
    initial_user_action: MessageAction = MessageAction(content=task_str)
-    # Load the app config
-    # this will load config from config.toml in the current directory
-    # as well as from the environment variables
-    config = load_app_config(config_file=args.config_file)

-    # Override default LLM configs ([llm] section in config.toml)
-    if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
-        if llm_config is None:
-            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
-        config.set_llm_config(llm_config)
-
-    # Set default agent
-    config.default_agent = args.agent_cls
+    config = setup_config_from_args(args)

    # Set session name
    session_name = args.name
    sid = generate_sid(config, session_name)

-    # if max budget per task is not sent on the command line, use the config value
-    if args.max_budget_per_task is not None:
-        config.max_budget_per_task = args.max_budget_per_task
-    if args.max_iterations is not None:
-        config.max_iterations = args.max_iterations
-
    asyncio.run(
        run_controller(
            config=config,
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -122,12 +122,6 @@ class LLM(RetryMixin, DebugMixin):
        if self.is_function_calling_active():
            logger.debug('LLM: model supports function calling')

-        # Compatibility flag: use string serializer for DeepSeek models
-        # See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818
-        self._use_string_serializer = False
-        if 'deepseek' in self.config.model:
-            self._use_string_serializer = True
-
        # if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm
        if self.config.custom_tokenizer is not None:
            self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer)
@ -449,21 +443,14 @@ class LLM(RetryMixin, DebugMixin):

        # Handle native_tool_calling user-defined configuration
        if self.config.native_tool_calling is None:
-            logger.debug(
-                f'Using default tool calling behavior based on model evaluation: {model_name_supported}'
-            )
            return model_name_supported
        elif self.config.native_tool_calling is False:
-            logger.debug('Function calling explicitly disabled via configuration')
            return False
        else:
            # try to enable native tool calling if supported by the model
            supports_fn_call = litellm.supports_function_calling(
                model=self.config.model
            )
-            logger.debug(
-                f'Function calling explicitly enabled, litellm support: {supports_fn_call}'
-            )
            return supports_fn_call

    def _post_completion(self, response: ModelResponse) -> float:
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@ -26,6 +26,7 @@ def test_parser_custom_values():
    parser = get_parser()
    args = parser.parse_args(
        [
+            '-v',
            '-d',
            '/path/to/dir',
            '-t',
@ -67,6 +68,7 @@ def test_parser_custom_values():
    assert args.llm_config == 'gpt4'
    assert args.name == 'test_session'
    assert args.no_auto_continue
+    assert args.version


 def test_parser_file_overrides_task():
@ -110,8 +112,9 @@ def test_help_message(capsys):
    print(help_output)
    expected_elements = [
        'usage:',
-        'Run an agent with a specific task',
+        'Run the agent via CLI',
        'options:',
+        '-v, --version',
        '-h, --help',
        '-d DIRECTORY, --directory DIRECTORY',
        '-t TASK, --task TASK',
@ -134,4 +137,4 @@ def test_help_message(capsys):
        assert element in help_output, f"Expected '{element}' to be in the help message"

    option_count = help_output.count('  -')
-    assert option_count == 16, f'Expected 16 options, found {option_count}'
+    assert option_count == 17, f'Expected 17 options, found {option_count}'
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+from openhands.core.cli import read_input
+from openhands.core.config import AppConfig
+
+
+def test_single_line_input():
+    """Test that single line input works when cli_multiline_input is False"""
+    config = AppConfig()
+    config.cli_multiline_input = False
+
+    with patch('builtins.input', return_value='hello world'):
+        result = read_input(config)
+        assert result == 'hello world'
+
+
+def test_multiline_input():
+    """Test that multiline input works when cli_multiline_input is True"""
+    config = AppConfig()
+    config.cli_multiline_input = True
+
+    # Simulate multiple lines of input followed by /exit
+    mock_inputs = ['line 1', 'line 2', 'line 3', '/exit']
+
+    with patch('builtins.input', side_effect=mock_inputs):
+        result = read_input(config)
+        assert result == 'line 1\nline 2\nline 3'