From 3d2138d9ce8703aec87b4fe3d0c84b29238ae8fd Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Sun, 5 Jan 2025 03:58:26 +0100 Subject: [PATCH] Command line args fixes (#5990) --- config.template.toml | 16 +++++++++ openhands/core/cli.py | 46 ++++++++++++------------ openhands/core/config/__init__.py | 2 ++ openhands/core/config/app_config.py | 3 ++ openhands/core/config/utils.py | 54 +++++++++++++++++++++++++---- openhands/core/main.py | 42 ++++++++++------------ openhands/llm/llm.py | 13 ------- tests/unit/test_arg_parser.py | 7 ++-- tests/unit/test_cli.py | 27 +++++++++++++++ 9 files changed, 142 insertions(+), 68 deletions(-) create mode 100644 tests/unit/test_cli.py diff --git a/config.template.toml b/config.template.toml index 5890c5f301..de0ebf3a57 100644 --- a/config.template.toml +++ b/config.template.toml @@ -198,6 +198,16 @@ model = "gpt-4o" # agent.CodeActAgent ############################################################################## [agent] + +# whether the browsing tool is enabled +codeact_enable_browsing = true + +# whether the LLM draft editor is enabled +codeact_enable_llm_editor = false + +# whether the IPython tool is enabled +codeact_enable_jupyter = true + # Name of the micro agent to use for this agent #micro_agent_name = "" @@ -210,6 +220,12 @@ model = "gpt-4o" # LLM config group to use #llm_config = 'your-llm-config-group' +# Whether to use microagents at all +#use_microagents = true + +# List of microagents to disable +#disabled_microagents = [] + [agent.RepoExplorerAgent] # Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially # useful when an agent doesn't demand high quality but uses a lot of tokens diff --git a/openhands/core/cli.py b/openhands/core/cli.py index 7363a8af5a..f1f687fed5 100644 --- a/openhands/core/cli.py +++ b/openhands/core/cli.py @@ -6,11 +6,10 @@ from uuid import uuid4 from termcolor import colored import openhands.agenthub # noqa F401 (we import this to get the agents registered) -from openhands import __version__ from openhands.core.config import ( AppConfig, - get_parser, - load_app_config, + parse_arguments, + setup_config_from_args, ) from openhands.core.logger import openhands_logger as logger from openhands.core.loop import run_agent_until_done @@ -84,27 +83,30 @@ def display_event(event: Event, config: AppConfig): display_confirmation(event.confirmation_state) -async def main(loop): +def read_input(config: AppConfig) -> str: + """Read input from user based on config settings.""" + if config.cli_multiline_input: + print('Enter your message (enter "/exit" on a new line to finish):') + lines = [] + while True: + line = input('>> ').rstrip() + if line == '/exit': # finish input + break + lines.append(line) + return '\n'.join(lines) + else: + return input('>> ').rstrip() + + +async def main(loop: asyncio.AbstractEventLoop): """Runs the agent in CLI mode""" - parser = get_parser() - # Add the version argument - parser.add_argument( - '-v', - '--version', - action='version', - version=f'{__version__}', - help='Show the version number and exit', - default=None, - ) - args = parser.parse_args() - - if args.version: - print(f'OpenHands version: {__version__}') - return + args = parse_arguments() logger.setLevel(logging.WARNING) - config = load_app_config(config_file=args.config_file) + + config = setup_config_from_args(args) + sid = str(uuid4()) runtime = create_runtime(config, sid=sid, headless_mode=True) @@ -116,9 +118,7 @@ async def main(loop): async def prompt_for_next_task(): # Run input() in a thread pool to avoid blocking the event loop - next_message = await loop.run_in_executor( - None, lambda: input('How can I help? >> ') - ) + next_message = await loop.run_in_executor(None, read_input, config) if not next_message.strip(): await prompt_for_next_task() if next_message == 'exit': diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py index b8fefb715c..2e0f87e321 100644 --- a/openhands/core/config/__init__.py +++ b/openhands/core/config/__init__.py @@ -16,6 +16,7 @@ from openhands.core.config.utils import ( load_from_env, load_from_toml, parse_arguments, + setup_config_from_args, ) __all__ = [ @@ -34,4 +35,5 @@ __all__ = [ 'get_field_info', 'get_parser', 'parse_arguments', + 'setup_config_from_args', ] diff --git a/openhands/core/config/app_config.py b/openhands/core/config/app_config.py index ccdd445d77..2dbb4aeaa8 100644 --- a/openhands/core/config/app_config.py +++ b/openhands/core/config/app_config.py @@ -42,6 +42,8 @@ class AppConfig: file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited. file_uploads_restrict_file_types: Whether to restrict upload file types. file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all. + cli_multiline_input: Whether to enable multiline input in CLI. When disabled, + input is read line by line. When enabled, input continues until /exit command. """ llms: dict[str, LLMConfig] = field(default_factory=dict) @@ -71,6 +73,7 @@ class AppConfig: file_uploads_restrict_file_types: bool = False file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*']) runloop_api_key: str | None = None + cli_multiline_input: bool = False defaults_dict: ClassVar[dict] = {} diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py index c375d2d553..7719ce0d59 100644 --- a/openhands/core/config/utils.py +++ b/openhands/core/config/utils.py @@ -2,6 +2,7 @@ import argparse import os import pathlib import platform +import sys from dataclasses import is_dataclass from types import UnionType from typing import Any, MutableMapping, get_args, get_origin @@ -311,8 +312,14 @@ def get_llm_config_arg( # Command line arguments def get_parser() -> argparse.ArgumentParser: - """Get the parser for the command line arguments.""" - parser = argparse.ArgumentParser(description='Run an agent with a specific task') + """Get the argument parser.""" + parser = argparse.ArgumentParser(description='Run the agent via CLI') + + # Add version argument + parser.add_argument( + '-v', '--version', action='store_true', help='Show version information' + ) + parser.add_argument( '--config-file', type=str, @@ -406,16 +413,23 @@ def get_parser() -> argparse.ArgumentParser: parser.add_argument( '--no-auto-continue', action='store_true', - help='Disable automatic "continue" responses. Will read from stdin instead.', + help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.', ) return parser def parse_arguments() -> argparse.Namespace: - """Parse the command line arguments.""" + """Parse command line arguments.""" parser = get_parser() - parsed_args, _ = parser.parse_known_args() - return parsed_args + args = parser.parse_args() + + if args.version: + from openhands import __version__ + + print(f'OpenHands version: {__version__}') + sys.exit(0) + + return args def load_app_config( @@ -435,3 +449,31 @@ def load_app_config( logger.DEBUG = config.debug logger.DISABLE_COLOR_PRINTING = config.disable_color return config + + +def setup_config_from_args(args: argparse.Namespace) -> AppConfig: + """Load config from toml and override with command line arguments. + + Common setup used by both CLI and main.py entry points. + """ + # Load base config from toml and env vars + config = load_app_config(config_file=args.config_file) + + # Override with command line arguments if provided + if args.llm_config: + llm_config = get_llm_config_arg(args.llm_config) + if llm_config is None: + raise ValueError(f'Invalid toml file, cannot read {args.llm_config}') + config.set_llm_config(llm_config) + + # Override default agent if provided + if args.agent_cls: + config.default_agent = args.agent_cls + + # Set max iterations and max budget per task if provided, otherwise fall back to config values + if args.max_iterations is not None: + config.max_iterations = args.max_iterations + if args.max_budget_per_task is not None: + config.max_budget_per_task = args.max_budget_per_task + + return config diff --git a/openhands/core/main.py b/openhands/core/main.py index d0436f20e4..65e3286483 100644 --- a/openhands/core/main.py +++ b/openhands/core/main.py @@ -9,9 +9,8 @@ from openhands.controller.agent import Agent from openhands.controller.state.state import State from openhands.core.config import ( AppConfig, - get_llm_config_arg, - load_app_config, parse_arguments, + setup_config_from_args, ) from openhands.core.logger import openhands_logger as logger from openhands.core.loop import run_agent_until_done @@ -51,6 +50,21 @@ def read_task_from_stdin() -> str: return sys.stdin.read() +def read_input(config: AppConfig) -> str: + """Read input from user based on config settings.""" + if config.cli_multiline_input: + print('Enter your message (enter "/exit" on a new line to finish):') + lines = [] + while True: + line = input('>> ').rstrip() + if line == '/exit': # finish input + break + lines.append(line) + return '\n'.join(lines) + else: + return input('>> ').rstrip() + + async def run_controller( config: AppConfig, initial_user_action: Action, @@ -120,9 +134,7 @@ async def run_controller( if exit_on_message: message = '/exit' elif fake_user_response_fn is None: - # read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows) - print('Request user input (press Ctrl+D/Z when done) >> ') - message = sys.stdin.read().rstrip() + message = read_input(config) else: message = fake_user_response_fn(controller.get_state()) action = MessageAction(content=message) @@ -195,31 +207,13 @@ if __name__ == '__main__': else: raise ValueError('No task provided. Please specify a task through -t, -f.') initial_user_action: MessageAction = MessageAction(content=task_str) - # Load the app config - # this will load config from config.toml in the current directory - # as well as from the environment variables - config = load_app_config(config_file=args.config_file) - # Override default LLM configs ([llm] section in config.toml) - if args.llm_config: - llm_config = get_llm_config_arg(args.llm_config) - if llm_config is None: - raise ValueError(f'Invalid toml file, cannot read {args.llm_config}') - config.set_llm_config(llm_config) - - # Set default agent - config.default_agent = args.agent_cls + config = setup_config_from_args(args) # Set session name session_name = args.name sid = generate_sid(config, session_name) - # if max budget per task is not sent on the command line, use the config value - if args.max_budget_per_task is not None: - config.max_budget_per_task = args.max_budget_per_task - if args.max_iterations is not None: - config.max_iterations = args.max_iterations - asyncio.run( run_controller( config=config, diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index cb0778765e..743d6535ba 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -122,12 +122,6 @@ class LLM(RetryMixin, DebugMixin): if self.is_function_calling_active(): logger.debug('LLM: model supports function calling') - # Compatibility flag: use string serializer for DeepSeek models - # See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818 - self._use_string_serializer = False - if 'deepseek' in self.config.model: - self._use_string_serializer = True - # if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm if self.config.custom_tokenizer is not None: self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer) @@ -449,21 +443,14 @@ class LLM(RetryMixin, DebugMixin): # Handle native_tool_calling user-defined configuration if self.config.native_tool_calling is None: - logger.debug( - f'Using default tool calling behavior based on model evaluation: {model_name_supported}' - ) return model_name_supported elif self.config.native_tool_calling is False: - logger.debug('Function calling explicitly disabled via configuration') return False else: # try to enable native tool calling if supported by the model supports_fn_call = litellm.supports_function_calling( model=self.config.model ) - logger.debug( - f'Function calling explicitly enabled, litellm support: {supports_fn_call}' - ) return supports_fn_call def _post_completion(self, response: ModelResponse) -> float: diff --git a/tests/unit/test_arg_parser.py b/tests/unit/test_arg_parser.py index ebfa629a5f..51c736f19c 100644 --- a/tests/unit/test_arg_parser.py +++ b/tests/unit/test_arg_parser.py @@ -26,6 +26,7 @@ def test_parser_custom_values(): parser = get_parser() args = parser.parse_args( [ + '-v', '-d', '/path/to/dir', '-t', @@ -67,6 +68,7 @@ def test_parser_custom_values(): assert args.llm_config == 'gpt4' assert args.name == 'test_session' assert args.no_auto_continue + assert args.version def test_parser_file_overrides_task(): @@ -110,8 +112,9 @@ def test_help_message(capsys): print(help_output) expected_elements = [ 'usage:', - 'Run an agent with a specific task', + 'Run the agent via CLI', 'options:', + '-v, --version', '-h, --help', '-d DIRECTORY, --directory DIRECTORY', '-t TASK, --task TASK', @@ -134,4 +137,4 @@ def test_help_message(capsys): assert element in help_output, f"Expected '{element}' to be in the help message" option_count = help_output.count(' -') - assert option_count == 16, f'Expected 16 options, found {option_count}' + assert option_count == 17, f'Expected 17 options, found {option_count}' diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py new file mode 100644 index 0000000000..520d85d2aa --- /dev/null +++ b/tests/unit/test_cli.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + +from openhands.core.cli import read_input +from openhands.core.config import AppConfig + + +def test_single_line_input(): + """Test that single line input works when cli_multiline_input is False""" + config = AppConfig() + config.cli_multiline_input = False + + with patch('builtins.input', return_value='hello world'): + result = read_input(config) + assert result == 'hello world' + + +def test_multiline_input(): + """Test that multiline input works when cli_multiline_input is True""" + config = AppConfig() + config.cli_multiline_input = True + + # Simulate multiple lines of input followed by /exit + mock_inputs = ['line 1', 'line 2', 'line 3', '/exit'] + + with patch('builtins.input', side_effect=mock_inputs): + result = read_input(config) + assert result == 'line 1\nline 2\nline 3'