From 3d2138d9ce8703aec87b4fe3d0c84b29238ae8fd Mon Sep 17 00:00:00 2001
From: Engel Nyst <enyst@users.noreply.github.com>
Date: Sun, 5 Jan 2025 03:58:26 +0100
Subject: [PATCH] Command line args fixes (#5990)

---
 config.template.toml                | 16 +++++++++
 openhands/core/cli.py               | 46 ++++++++++++------------
 openhands/core/config/__init__.py   |  2 ++
 openhands/core/config/app_config.py |  3 ++
 openhands/core/config/utils.py      | 54 +++++++++++++++++++++++++----
 openhands/core/main.py              | 42 ++++++++++------------
 openhands/llm/llm.py                | 13 -------
 tests/unit/test_arg_parser.py       |  7 ++--
 tests/unit/test_cli.py              | 27 +++++++++++++++
 9 files changed, 142 insertions(+), 68 deletions(-)
 create mode 100644 tests/unit/test_cli.py

diff --git a/config.template.toml b/config.template.toml
index 5890c5f301..de0ebf3a57 100644
--- a/config.template.toml
+++ b/config.template.toml
@@ -198,6 +198,16 @@ model = "gpt-4o"
 # agent.CodeActAgent
 ##############################################################################
 [agent]
+
+# whether the browsing tool is enabled
+codeact_enable_browsing = true
+
+# whether the LLM draft editor is enabled
+codeact_enable_llm_editor = false
+
+# whether the IPython tool is enabled
+codeact_enable_jupyter = true
+
 # Name of the micro agent to use for this agent
 #micro_agent_name = ""
 
@@ -210,6 +220,12 @@ model = "gpt-4o"
 # LLM config group to use
 #llm_config = 'your-llm-config-group'
 
+# Whether to use microagents at all
+#use_microagents = true
+
+# List of microagents to disable
+#disabled_microagents = []
+
 [agent.RepoExplorerAgent]
 # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
 # useful when an agent doesn't demand high quality but uses a lot of tokens
diff --git a/openhands/core/cli.py b/openhands/core/cli.py
index 7363a8af5a..f1f687fed5 100644
--- a/openhands/core/cli.py
+++ b/openhands/core/cli.py
@@ -6,11 +6,10 @@ from uuid import uuid4
 from termcolor import colored
 
 import openhands.agenthub  # noqa F401 (we import this to get the agents registered)
-from openhands import __version__
 from openhands.core.config import (
     AppConfig,
-    get_parser,
-    load_app_config,
+    parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@@ -84,27 +83,30 @@ def display_event(event: Event, config: AppConfig):
         display_confirmation(event.confirmation_state)
 
 
-async def main(loop):
+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
+async def main(loop: asyncio.AbstractEventLoop):
     """Runs the agent in CLI mode"""
 
-    parser = get_parser()
-    # Add the version argument
-    parser.add_argument(
-        '-v',
-        '--version',
-        action='version',
-        version=f'{__version__}',
-        help='Show the version number and exit',
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.version:
-        print(f'OpenHands version: {__version__}')
-        return
+    args = parse_arguments()
 
     logger.setLevel(logging.WARNING)
-    config = load_app_config(config_file=args.config_file)
+
+    config = setup_config_from_args(args)
+
     sid = str(uuid4())
 
     runtime = create_runtime(config, sid=sid, headless_mode=True)
@@ -116,9 +118,7 @@ async def main(loop):
 
     async def prompt_for_next_task():
         # Run input() in a thread pool to avoid blocking the event loop
-        next_message = await loop.run_in_executor(
-            None, lambda: input('How can I help? >> ')
-        )
+        next_message = await loop.run_in_executor(None, read_input, config)
         if not next_message.strip():
             await prompt_for_next_task()
         if next_message == 'exit':
diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py
index b8fefb715c..2e0f87e321 100644
--- a/openhands/core/config/__init__.py
+++ b/openhands/core/config/__init__.py
@@ -16,6 +16,7 @@ from openhands.core.config.utils import (
     load_from_env,
     load_from_toml,
     parse_arguments,
+    setup_config_from_args,
 )
 
 __all__ = [
@@ -34,4 +35,5 @@ __all__ = [
     'get_field_info',
     'get_parser',
     'parse_arguments',
+    'setup_config_from_args',
 ]
diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py
index ccdd445d77..2dbb4aeaa8 100644
--- a/openhands/core/config/app_config.py
+++ b/openhands/core/config/app_config.py
@@ -42,6 +42,8 @@ class AppConfig:
         file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited.
         file_uploads_restrict_file_types: Whether to restrict upload file types.
         file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
+        cli_multiline_input: Whether to enable multiline input in CLI. When disabled,
+            input is read line by line. When enabled, input continues until /exit command.
     """
 
     llms: dict[str, LLMConfig] = field(default_factory=dict)
@@ -71,6 +73,7 @@ class AppConfig:
     file_uploads_restrict_file_types: bool = False
     file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
     runloop_api_key: str | None = None
+    cli_multiline_input: bool = False
 
     defaults_dict: ClassVar[dict] = {}
 
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index c375d2d553..7719ce0d59 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -2,6 +2,7 @@ import argparse
 import os
 import pathlib
 import platform
+import sys
 from dataclasses import is_dataclass
 from types import UnionType
 from typing import Any, MutableMapping, get_args, get_origin
@@ -311,8 +312,14 @@ def get_llm_config_arg(
 
 # Command line arguments
 def get_parser() -> argparse.ArgumentParser:
-    """Get the parser for the command line arguments."""
-    parser = argparse.ArgumentParser(description='Run an agent with a specific task')
+    """Get the argument parser."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+
+    # Add version argument
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
     parser.add_argument(
         '--config-file',
         type=str,
@@ -406,16 +413,23 @@ def get_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         '--no-auto-continue',
         action='store_true',
-        help='Disable automatic "continue" responses. Will read from stdin instead.',
+        help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
     )
     return parser
 
 
 def parse_arguments() -> argparse.Namespace:
-    """Parse the command line arguments."""
+    """Parse command line arguments."""
     parser = get_parser()
-    parsed_args, _ = parser.parse_known_args()
-    return parsed_args
+    args = parser.parse_args()
+
+    if args.version:
+        from openhands import __version__
+
+        print(f'OpenHands version: {__version__}')
+        sys.exit(0)
+
+    return args
 
 
 def load_app_config(
@@ -435,3 +449,31 @@ def load_app_config(
         logger.DEBUG = config.debug
         logger.DISABLE_COLOR_PRINTING = config.disable_color
     return config
+
+
+def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
+    """Load config from toml and override with command line arguments.
+
+    Common setup used by both CLI and main.py entry points.
+    """
+    # Load base config from toml and env vars
+    config = load_app_config(config_file=args.config_file)
+
+    # Override with command line arguments if provided
+    if args.llm_config:
+        llm_config = get_llm_config_arg(args.llm_config)
+        if llm_config is None:
+            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
+        config.set_llm_config(llm_config)
+
+    # Override default agent if provided
+    if args.agent_cls:
+        config.default_agent = args.agent_cls
+
+    # Set max iterations and max budget per task if provided, otherwise fall back to config values
+    if args.max_iterations is not None:
+        config.max_iterations = args.max_iterations
+    if args.max_budget_per_task is not None:
+        config.max_budget_per_task = args.max_budget_per_task
+
+    return config
diff --git a/openhands/core/main.py b/openhands/core/main.py
index d0436f20e4..65e3286483 100644
--- a/openhands/core/main.py
+++ b/openhands/core/main.py
@@ -9,9 +9,8 @@ from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import (
     AppConfig,
-    get_llm_config_arg,
-    load_app_config,
     parse_arguments,
+    setup_config_from_args,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.loop import run_agent_until_done
@@ -51,6 +50,21 @@ def read_task_from_stdin() -> str:
     return sys.stdin.read()
 
 
+def read_input(config: AppConfig) -> str:
+    """Read input from user based on config settings."""
+    if config.cli_multiline_input:
+        print('Enter your message (enter "/exit" on a new line to finish):')
+        lines = []
+        while True:
+            line = input('>> ').rstrip()
+            if line == '/exit':  # finish input
+                break
+            lines.append(line)
+        return '\n'.join(lines)
+    else:
+        return input('>> ').rstrip()
+
+
 async def run_controller(
     config: AppConfig,
     initial_user_action: Action,
@@ -120,9 +134,7 @@ async def run_controller(
                 if exit_on_message:
                     message = '/exit'
                 elif fake_user_response_fn is None:
-                    # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
-                    print('Request user input (press Ctrl+D/Z when done) >> ')
-                    message = sys.stdin.read().rstrip()
+                    message = read_input(config)
                 else:
                     message = fake_user_response_fn(controller.get_state())
                 action = MessageAction(content=message)
@@ -195,31 +207,13 @@ if __name__ == '__main__':
     else:
         raise ValueError('No task provided. Please specify a task through -t, -f.')
     initial_user_action: MessageAction = MessageAction(content=task_str)
-    # Load the app config
-    # this will load config from config.toml in the current directory
-    # as well as from the environment variables
-    config = load_app_config(config_file=args.config_file)
 
-    # Override default LLM configs ([llm] section in config.toml)
-    if args.llm_config:
-        llm_config = get_llm_config_arg(args.llm_config)
-        if llm_config is None:
-            raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
-        config.set_llm_config(llm_config)
-
-    # Set default agent
-    config.default_agent = args.agent_cls
+    config = setup_config_from_args(args)
 
     # Set session name
     session_name = args.name
     sid = generate_sid(config, session_name)
 
-    # if max budget per task is not sent on the command line, use the config value
-    if args.max_budget_per_task is not None:
-        config.max_budget_per_task = args.max_budget_per_task
-    if args.max_iterations is not None:
-        config.max_iterations = args.max_iterations
-
     asyncio.run(
         run_controller(
             config=config,
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
index cb0778765e..743d6535ba 100644
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -122,12 +122,6 @@ class LLM(RetryMixin, DebugMixin):
         if self.is_function_calling_active():
             logger.debug('LLM: model supports function calling')
 
-        # Compatibility flag: use string serializer for DeepSeek models
-        # See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818
-        self._use_string_serializer = False
-        if 'deepseek' in self.config.model:
-            self._use_string_serializer = True
-
         # if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm
         if self.config.custom_tokenizer is not None:
             self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer)
@@ -449,21 +443,14 @@ class LLM(RetryMixin, DebugMixin):
 
         # Handle native_tool_calling user-defined configuration
         if self.config.native_tool_calling is None:
-            logger.debug(
-                f'Using default tool calling behavior based on model evaluation: {model_name_supported}'
-            )
             return model_name_supported
         elif self.config.native_tool_calling is False:
-            logger.debug('Function calling explicitly disabled via configuration')
             return False
         else:
             # try to enable native tool calling if supported by the model
             supports_fn_call = litellm.supports_function_calling(
                 model=self.config.model
             )
-            logger.debug(
-                f'Function calling explicitly enabled, litellm support: {supports_fn_call}'
-            )
             return supports_fn_call
 
     def _post_completion(self, response: ModelResponse) -> float:
diff --git a/tests/unit/test_arg_parser.py b/tests/unit/test_arg_parser.py
index ebfa629a5f..51c736f19c 100644
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -26,6 +26,7 @@ def test_parser_custom_values():
     parser = get_parser()
     args = parser.parse_args(
         [
+            '-v',
             '-d',
             '/path/to/dir',
             '-t',
@@ -67,6 +68,7 @@ def test_parser_custom_values():
     assert args.llm_config == 'gpt4'
     assert args.name == 'test_session'
     assert args.no_auto_continue
+    assert args.version
 
 
 def test_parser_file_overrides_task():
@@ -110,8 +112,9 @@ def test_help_message(capsys):
     print(help_output)
     expected_elements = [
         'usage:',
-        'Run an agent with a specific task',
+        'Run the agent via CLI',
         'options:',
+        '-v, --version',
         '-h, --help',
         '-d DIRECTORY, --directory DIRECTORY',
         '-t TASK, --task TASK',
@@ -134,4 +137,4 @@ def test_help_message(capsys):
         assert element in help_output, f"Expected '{element}' to be in the help message"
 
     option_count = help_output.count('  -')
-    assert option_count == 16, f'Expected 16 options, found {option_count}'
+    assert option_count == 17, f'Expected 17 options, found {option_count}'
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
new file mode 100644
index 0000000000..520d85d2aa
--- /dev/null
+++ b/tests/unit/test_cli.py
@@ -0,0 +1,27 @@
+from unittest.mock import patch
+
+from openhands.core.cli import read_input
+from openhands.core.config import AppConfig
+
+
+def test_single_line_input():
+    """Test that single line input works when cli_multiline_input is False"""
+    config = AppConfig()
+    config.cli_multiline_input = False
+
+    with patch('builtins.input', return_value='hello world'):
+        result = read_input(config)
+        assert result == 'hello world'
+
+
+def test_multiline_input():
+    """Test that multiline input works when cli_multiline_input is True"""
+    config = AppConfig()
+    config.cli_multiline_input = True
+
+    # Simulate multiple lines of input followed by /exit
+    mock_inputs = ['line 1', 'line 2', 'line 3', '/exit']
+
+    with patch('builtins.input', side_effect=mock_inputs):
+        result = read_input(config)
+        assert result == 'line 1\nline 2\nline 3'