Command line args fixes (#5990)

This commit is contained in:
Engel Nyst 2025-01-05 03:58:26 +01:00 committed by GitHub
parent e4cf2eee2d
commit 3d2138d9ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 142 additions and 68 deletions

View File

@ -198,6 +198,16 @@ model = "gpt-4o"
# agent.CodeActAgent
##############################################################################
[agent]
# whether the browsing tool is enabled
codeact_enable_browsing = true
# whether the LLM draft editor is enabled
codeact_enable_llm_editor = false
# whether the IPython tool is enabled
codeact_enable_jupyter = true
# Name of the micro agent to use for this agent
#micro_agent_name = ""
@ -210,6 +220,12 @@ model = "gpt-4o"
# LLM config group to use
#llm_config = 'your-llm-config-group'
# Whether to use microagents at all
#use_microagents = true
# List of microagents to disable
#disabled_microagents = []
[agent.RepoExplorerAgent]
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
# useful when an agent doesn't demand high quality but uses a lot of tokens

View File

@ -6,11 +6,10 @@ from uuid import uuid4
from termcolor import colored
import openhands.agenthub # noqa F401 (we import this to get the agents registered)
from openhands import __version__
from openhands.core.config import (
AppConfig,
get_parser,
load_app_config,
parse_arguments,
setup_config_from_args,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.loop import run_agent_until_done
@ -84,27 +83,30 @@ def display_event(event: Event, config: AppConfig):
display_confirmation(event.confirmation_state)
async def main(loop):
def read_input(config: AppConfig) -> str:
"""Read input from user based on config settings."""
if config.cli_multiline_input:
print('Enter your message (enter "/exit" on a new line to finish):')
lines = []
while True:
line = input('>> ').rstrip()
if line == '/exit': # finish input
break
lines.append(line)
return '\n'.join(lines)
else:
return input('>> ').rstrip()
async def main(loop: asyncio.AbstractEventLoop):
"""Runs the agent in CLI mode"""
parser = get_parser()
# Add the version argument
parser.add_argument(
'-v',
'--version',
action='version',
version=f'{__version__}',
help='Show the version number and exit',
default=None,
)
args = parser.parse_args()
if args.version:
print(f'OpenHands version: {__version__}')
return
args = parse_arguments()
logger.setLevel(logging.WARNING)
config = load_app_config(config_file=args.config_file)
config = setup_config_from_args(args)
sid = str(uuid4())
runtime = create_runtime(config, sid=sid, headless_mode=True)
@ -116,9 +118,7 @@ async def main(loop):
async def prompt_for_next_task():
# Run input() in a thread pool to avoid blocking the event loop
next_message = await loop.run_in_executor(
None, lambda: input('How can I help? >> ')
)
next_message = await loop.run_in_executor(None, read_input, config)
if not next_message.strip():
await prompt_for_next_task()
if next_message == 'exit':

View File

@ -16,6 +16,7 @@ from openhands.core.config.utils import (
load_from_env,
load_from_toml,
parse_arguments,
setup_config_from_args,
)
__all__ = [
@ -34,4 +35,5 @@ __all__ = [
'get_field_info',
'get_parser',
'parse_arguments',
'setup_config_from_args',
]

View File

@ -42,6 +42,8 @@ class AppConfig:
file_uploads_max_file_size_mb: Maximum file upload size in MB. `0` means unlimited.
file_uploads_restrict_file_types: Whether to restrict upload file types.
file_uploads_allowed_extensions: Allowed file extensions. `['.*']` allows all.
cli_multiline_input: Whether to enable multiline input in CLI. When disabled,
input is read line by line. When enabled, input continues until /exit command.
"""
llms: dict[str, LLMConfig] = field(default_factory=dict)
@ -71,6 +73,7 @@ class AppConfig:
file_uploads_restrict_file_types: bool = False
file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
runloop_api_key: str | None = None
cli_multiline_input: bool = False
defaults_dict: ClassVar[dict] = {}

View File

@ -2,6 +2,7 @@ import argparse
import os
import pathlib
import platform
import sys
from dataclasses import is_dataclass
from types import UnionType
from typing import Any, MutableMapping, get_args, get_origin
@ -311,8 +312,14 @@ def get_llm_config_arg(
# Command line arguments
def get_parser() -> argparse.ArgumentParser:
"""Get the parser for the command line arguments."""
parser = argparse.ArgumentParser(description='Run an agent with a specific task')
"""Get the argument parser."""
parser = argparse.ArgumentParser(description='Run the agent via CLI')
# Add version argument
parser.add_argument(
'-v', '--version', action='store_true', help='Show version information'
)
parser.add_argument(
'--config-file',
type=str,
@ -406,16 +413,23 @@ def get_parser() -> argparse.ArgumentParser:
parser.add_argument(
'--no-auto-continue',
action='store_true',
help='Disable automatic "continue" responses. Will read from stdin instead.',
help='Disable automatic "continue" responses in headless mode. Will read from stdin instead.',
)
return parser
def parse_arguments() -> argparse.Namespace:
"""Parse the command line arguments."""
"""Parse command line arguments."""
parser = get_parser()
parsed_args, _ = parser.parse_known_args()
return parsed_args
args = parser.parse_args()
if args.version:
from openhands import __version__
print(f'OpenHands version: {__version__}')
sys.exit(0)
return args
def load_app_config(
@ -435,3 +449,31 @@ def load_app_config(
logger.DEBUG = config.debug
logger.DISABLE_COLOR_PRINTING = config.disable_color
return config
def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
"""Load config from toml and override with command line arguments.
Common setup used by both CLI and main.py entry points.
"""
# Load base config from toml and env vars
config = load_app_config(config_file=args.config_file)
# Override with command line arguments if provided
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
if llm_config is None:
raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
config.set_llm_config(llm_config)
# Override default agent if provided
if args.agent_cls:
config.default_agent = args.agent_cls
# Set max iterations and max budget per task if provided, otherwise fall back to config values
if args.max_iterations is not None:
config.max_iterations = args.max_iterations
if args.max_budget_per_task is not None:
config.max_budget_per_task = args.max_budget_per_task
return config

View File

@ -9,9 +9,8 @@ from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import (
AppConfig,
get_llm_config_arg,
load_app_config,
parse_arguments,
setup_config_from_args,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.loop import run_agent_until_done
@ -51,6 +50,21 @@ def read_task_from_stdin() -> str:
return sys.stdin.read()
def read_input(config: AppConfig) -> str:
"""Read input from user based on config settings."""
if config.cli_multiline_input:
print('Enter your message (enter "/exit" on a new line to finish):')
lines = []
while True:
line = input('>> ').rstrip()
if line == '/exit': # finish input
break
lines.append(line)
return '\n'.join(lines)
else:
return input('>> ').rstrip()
async def run_controller(
config: AppConfig,
initial_user_action: Action,
@ -120,9 +134,7 @@ async def run_controller(
if exit_on_message:
message = '/exit'
elif fake_user_response_fn is None:
# read until EOF (Ctrl+D on Unix, Ctrl+Z on Windows)
print('Request user input (press Ctrl+D/Z when done) >> ')
message = sys.stdin.read().rstrip()
message = read_input(config)
else:
message = fake_user_response_fn(controller.get_state())
action = MessageAction(content=message)
@ -195,31 +207,13 @@ if __name__ == '__main__':
else:
raise ValueError('No task provided. Please specify a task through -t, -f.')
initial_user_action: MessageAction = MessageAction(content=task_str)
# Load the app config
# this will load config from config.toml in the current directory
# as well as from the environment variables
config = load_app_config(config_file=args.config_file)
# Override default LLM configs ([llm] section in config.toml)
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
if llm_config is None:
raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
config.set_llm_config(llm_config)
# Set default agent
config.default_agent = args.agent_cls
config = setup_config_from_args(args)
# Set session name
session_name = args.name
sid = generate_sid(config, session_name)
# if max budget per task is not sent on the command line, use the config value
if args.max_budget_per_task is not None:
config.max_budget_per_task = args.max_budget_per_task
if args.max_iterations is not None:
config.max_iterations = args.max_iterations
asyncio.run(
run_controller(
config=config,

View File

@ -122,12 +122,6 @@ class LLM(RetryMixin, DebugMixin):
if self.is_function_calling_active():
logger.debug('LLM: model supports function calling')
# Compatibility flag: use string serializer for DeepSeek models
# See this issue: https://github.com/All-Hands-AI/OpenHands/issues/5818
self._use_string_serializer = False
if 'deepseek' in self.config.model:
self._use_string_serializer = True
# if using a custom tokenizer, make sure it's loaded and accessible in the format expected by litellm
if self.config.custom_tokenizer is not None:
self.tokenizer = create_pretrained_tokenizer(self.config.custom_tokenizer)
@ -449,21 +443,14 @@ class LLM(RetryMixin, DebugMixin):
# Handle native_tool_calling user-defined configuration
if self.config.native_tool_calling is None:
logger.debug(
f'Using default tool calling behavior based on model evaluation: {model_name_supported}'
)
return model_name_supported
elif self.config.native_tool_calling is False:
logger.debug('Function calling explicitly disabled via configuration')
return False
else:
# try to enable native tool calling if supported by the model
supports_fn_call = litellm.supports_function_calling(
model=self.config.model
)
logger.debug(
f'Function calling explicitly enabled, litellm support: {supports_fn_call}'
)
return supports_fn_call
def _post_completion(self, response: ModelResponse) -> float:

View File

@ -26,6 +26,7 @@ def test_parser_custom_values():
parser = get_parser()
args = parser.parse_args(
[
'-v',
'-d',
'/path/to/dir',
'-t',
@ -67,6 +68,7 @@ def test_parser_custom_values():
assert args.llm_config == 'gpt4'
assert args.name == 'test_session'
assert args.no_auto_continue
assert args.version
def test_parser_file_overrides_task():
@ -110,8 +112,9 @@ def test_help_message(capsys):
print(help_output)
expected_elements = [
'usage:',
'Run an agent with a specific task',
'Run the agent via CLI',
'options:',
'-v, --version',
'-h, --help',
'-d DIRECTORY, --directory DIRECTORY',
'-t TASK, --task TASK',
@ -134,4 +137,4 @@ def test_help_message(capsys):
assert element in help_output, f"Expected '{element}' to be in the help message"
option_count = help_output.count(' -')
assert option_count == 16, f'Expected 16 options, found {option_count}'
assert option_count == 17, f'Expected 17 options, found {option_count}'

27
tests/unit/test_cli.py Normal file
View File

@ -0,0 +1,27 @@
from unittest.mock import patch
from openhands.core.cli import read_input
from openhands.core.config import AppConfig
def test_single_line_input():
"""Test that single line input works when cli_multiline_input is False"""
config = AppConfig()
config.cli_multiline_input = False
with patch('builtins.input', return_value='hello world'):
result = read_input(config)
assert result == 'hello world'
def test_multiline_input():
"""Test that multiline input works when cli_multiline_input is True"""
config = AppConfig()
config.cli_multiline_input = True
# Simulate multiple lines of input followed by /exit
mock_inputs = ['line 1', 'line 2', 'line 3', '/exit']
with patch('builtins.input', side_effect=mock_inputs):
result = read_input(config)
assert result == 'line 1\nline 2\nline 3'