Merge branch 'main' into fix-temperature-top-p-reasoning-models

This commit is contained in:
Graham Neubig 2025-10-07 23:54:01 -04:00 committed by GitHub
commit beed3371f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 998 additions and 56 deletions

View File

@ -67,7 +67,7 @@
"@playwright/test": "^1.55.1",
"@react-router/dev": "^7.9.3",
"@tailwindcss/typography": "^0.5.19",
"@tanstack/eslint-plugin-query": "^5.90.1",
"@tanstack/eslint-plugin-query": "^5.91.0",
"@testing-library/dom": "^10.4.1",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",
@ -5954,12 +5954,12 @@
}
},
"node_modules/@tanstack/eslint-plugin-query": {
"version": "5.90.1",
"resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.90.1.tgz",
"integrity": "sha512-Ki4hl+8ZtnMFZ3amZbQl6sSMUq6L8oSJ14vmi3j5t1/SqXclL5SI/1kcuH36iIk05B/bN5pEOS1PTO3Ut/FbVA==",
"version": "5.91.0",
"resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.91.0.tgz",
"integrity": "sha512-Kn6yWyRe3dIPf7NqyDMhcsTBz2Oh8jPSOpBdlnLQhGBJ6iTMBFYA4B1UreGJ/WdfzQskSMh5imcyWF+wqa/Q5g==",
"dev": true,
"dependencies": {
"@typescript-eslint/utils": "^8.44.0"
"@typescript-eslint/utils": "^8.44.1"
},
"funding": {
"type": "github",

View File

@ -98,7 +98,7 @@
"@playwright/test": "^1.55.1",
"@react-router/dev": "^7.9.3",
"@tailwindcss/typography": "^0.5.19",
"@tanstack/eslint-plugin-query": "^5.90.1",
"@tanstack/eslint-plugin-query": "^5.91.0",
"@testing-library/dom": "^10.4.1",
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^16.3.0",

View File

@ -5,19 +5,22 @@ Provides a conversation interface with an AI agent using OpenHands patterns.
"""
import sys
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from datetime import datetime
from openhands.sdk import (
BaseConversation,
Message,
TextContent,
)
from openhands.sdk.conversation.state import AgentExecutionStatus
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands_cli.runner import ConversationRunner
from openhands_cli.setup import MissingAgentSpec, setup_conversation
from openhands_cli.tui.settings.mcp_screen import MCPScreen
from openhands_cli.tui.settings.settings_screen import SettingsScreen
from openhands_cli.tui.status import display_status
from openhands_cli.tui.tui import (
display_help,
display_welcome,
@ -26,6 +29,32 @@ from openhands_cli.user_actions import UserConfirmation, exit_session_confirmati
from openhands_cli.user_actions.utils import get_session_prompter
def _start_fresh_conversation(resume_conversation_id: str | None = None) -> BaseConversation:
"""Start a fresh conversation by creating a new conversation instance.
Handles the complete conversation setup process including settings screen
if agent configuration is missing.
Args:
resume_conversation_id: Optional conversation ID to resume
Returns:
BaseConversation: A new conversation instance
"""
conversation = None
settings_screen = SettingsScreen()
while not conversation:
try:
conversation = setup_conversation(resume_conversation_id)
except MissingAgentSpec:
settings_screen.handle_basic_settings(escapable=False)
return conversation
def _restore_tty() -> None:
"""
Ensure terminal modes are reset in case prompt_toolkit cleanup didn't run.
@ -62,17 +91,12 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
EOFError: If EOF is encountered
"""
conversation = None
settings_screen = SettingsScreen()
while not conversation:
try:
conversation = setup_conversation(resume_conversation_id)
except MissingAgentSpec:
settings_screen.handle_basic_settings(escapable=False)
conversation = _start_fresh_conversation(resume_conversation_id)
display_welcome(conversation.id, bool(resume_conversation_id))
# Track session start time for uptime calculation
session_start_time = datetime.now()
# Create conversation runner to handle state machine logic
runner = ConversationRunner(conversation)
session = get_session_prompter()
@ -118,21 +142,28 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
display_welcome(conversation.id)
continue
elif command == '/new':
try:
# Start a fresh conversation (no resume ID = new conversation)
conversation = _start_fresh_conversation()
runner = ConversationRunner(conversation)
display_welcome(conversation.id, resume=False)
print_formatted_text(
HTML('<green>✓ Started fresh conversation</green>')
)
continue
except Exception as e:
print_formatted_text(
HTML(f'<red>Error starting fresh conversation: {e}</red>')
)
continue
elif command == '/help':
display_help()
continue
elif command == '/status':
print_formatted_text(
HTML(f'<grey>Conversation ID: {conversation.id}</grey>')
)
print_formatted_text(HTML('<grey>Status: Active</grey>'))
confirmation_status = (
'enabled' if conversation.state.confirmation_mode else 'disabled'
)
print_formatted_text(
HTML(f'<grey>Confirmation mode: {confirmation_status}</grey>')
)
display_status(conversation, session_start_time=session_start_time)
continue
elif command == '/confirm':

View File

@ -0,0 +1,56 @@
"""Main argument parser for OpenHands CLI."""
import argparse
def create_main_parser() -> argparse.ArgumentParser:
"""Create the main argument parser with CLI as default and serve as subcommand.
Returns:
The configured argument parser
"""
parser = argparse.ArgumentParser(
description='OpenHands CLI - Terminal User Interface for OpenHands AI Agent',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
By default, OpenHands runs in CLI mode (terminal interface).
Use 'serve' subcommand to launch the GUI server instead.
Examples:
openhands # Start CLI mode
openhands --resume conversation-id # Resume a conversation in CLI mode
openhands serve # Launch GUI server
openhands serve --gpu # Launch GUI server with GPU support
"""
)
# CLI arguments at top level (default mode)
parser.add_argument(
'--resume',
type=str,
help='Conversation ID to resume'
)
# Only serve as subcommand
subparsers = parser.add_subparsers(
dest='command',
help='Additional commands'
)
# Add serve subcommand
serve_parser = subparsers.add_parser(
'serve',
help='Launch the OpenHands GUI server using Docker (web interface)'
)
serve_parser.add_argument(
'--mount-cwd',
action='store_true',
help='Mount the current working directory in the Docker container'
)
serve_parser.add_argument(
'--gpu',
action='store_true',
help='Enable GPU support in the Docker container'
)
return parser

View File

@ -0,0 +1,31 @@
"""Argument parser for serve subcommand."""
import argparse
def add_serve_parser(subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser:
"""Add serve subcommand parser.
Args:
subparsers: The subparsers object to add the serve parser to
Returns:
The serve argument parser
"""
serve_parser = subparsers.add_parser(
'serve',
help='Launch the OpenHands GUI server using Docker (web interface)'
)
serve_parser.add_argument(
'--mount-cwd',
help='Mount the current working directory into the GUI server container',
action='store_true',
default=False,
)
serve_parser.add_argument(
'--gpu',
help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
action='store_true',
default=False,
)
return serve_parser

View File

@ -0,0 +1,229 @@
"""GUI launcher for OpenHands CLI."""
import os
import shutil
import subprocess
import sys
from pathlib import Path
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands_cli.locations import PERSISTENCE_DIR
def _format_docker_command_for_logging(cmd: list[str]) -> str:
"""Format a Docker command for logging with grey color.
Args:
cmd (list[str]): The Docker command as a list of strings
Returns:
str: The formatted command string in grey HTML color
"""
cmd_str = ' '.join(cmd)
return f'<grey>Running Docker command: {cmd_str}</grey>'
def check_docker_requirements() -> bool:
"""Check if Docker is installed and running.
Returns:
bool: True if Docker is available and running, False otherwise.
"""
# Check if Docker is installed
if not shutil.which('docker'):
print_formatted_text(
HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
)
print_formatted_text(
HTML(
'<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
)
)
return False
# Check if Docker daemon is running
try:
result = subprocess.run(
['docker', 'info'], capture_output=True, text=True, timeout=10
)
if result.returncode != 0:
print_formatted_text(
HTML('<ansired>❌ Docker daemon is not running.</ansired>')
)
print_formatted_text(
HTML('<grey>Please start Docker and try again.</grey>')
)
return False
except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
print_formatted_text(
HTML('<ansired>❌ Failed to check Docker status.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
return False
return True
def ensure_config_dir_exists() -> Path:
"""Ensure the OpenHands configuration directory exists and return its path."""
path = Path(PERSISTENCE_DIR)
path.mkdir(exist_ok=True, parents=True)
return path
def get_openhands_version() -> str:
"""Get the OpenHands version for Docker images.
Returns:
str: The version string to use for Docker images
"""
# For now, use 'latest' as the default version
# In the future, this could be read from a version file or environment variable
return os.environ.get('OPENHANDS_VERSION', 'latest')
def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
"""Launch the OpenHands GUI server using Docker.
Args:
mount_cwd: If True, mount the current working directory into the container.
gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
"""
print_formatted_text(
HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
)
print_formatted_text('')
# Check Docker requirements
if not check_docker_requirements():
sys.exit(1)
# Ensure config directory exists
config_dir = ensure_config_dir_exists()
# Get the current version for the Docker image
version = get_openhands_version()
runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
# Pull the runtime image first
pull_cmd = ['docker', 'pull', runtime_image]
print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
try:
subprocess.run(
pull_cmd,
check=True,
timeout=300, # 5 minutes timeout
)
except subprocess.CalledProcessError:
print_formatted_text(
HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
)
sys.exit(1)
except subprocess.TimeoutExpired:
print_formatted_text(
HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
)
sys.exit(1)
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
)
print_formatted_text(
HTML('<grey>The server will be available at: http://localhost:3000</grey>')
)
print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
print_formatted_text('')
# Build the Docker command
docker_cmd = [
'docker',
'run',
'-it',
'--rm',
'--pull=always',
'-e',
f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
'-e',
'LOG_ALL_EVENTS=true',
'-v',
'/var/run/docker.sock:/var/run/docker.sock',
'-v',
f'{config_dir}:/.openhands',
]
# Add GPU support if requested
if gpu:
print_formatted_text(
HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
)
# Add the --gpus all flag to enable all GPUs
docker_cmd.insert(2, '--gpus')
docker_cmd.insert(3, 'all')
# Add environment variable to pass GPU support to sandbox containers
docker_cmd.extend(
[
'-e',
'SANDBOX_ENABLE_GPU=true',
]
)
# Add current working directory mount if requested
if mount_cwd:
cwd = Path.cwd()
# Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
docker_cmd.extend(
[
'-e',
f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
]
)
# Set user ID for Unix-like systems only
if os.name != 'nt': # Not Windows
try:
user_id = subprocess.check_output(['id', '-u'], text=True).strip()
docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
except (subprocess.CalledProcessError, FileNotFoundError):
# If 'id' command fails or doesn't exist, skip setting user ID
pass
# Print the folder that will be mounted to inform the user
print_formatted_text(
HTML(
f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
)
)
docker_cmd.extend(
[
'-p',
'3000:3000',
'--add-host',
'host.docker.internal:host-gateway',
'--name',
'openhands-app',
app_image,
]
)
try:
# Log and run the Docker command
print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
subprocess.run(docker_cmd, check=True)
except subprocess.CalledProcessError as e:
print_formatted_text('')
print_formatted_text(
HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
)
print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
sys.exit(1)
except KeyboardInterrupt:
print_formatted_text('')
print_formatted_text(
HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
)
sys.exit(0)

View File

@ -4,9 +4,9 @@ Simple main entry point for OpenHands CLI.
This is a simplified version that demonstrates the TUI functionality.
"""
import argparse
import logging
import os
import sys
import warnings
debug_env = os.getenv('DEBUG', 'false').lower()
@ -17,7 +17,7 @@ if debug_env != '1' and debug_env != 'true':
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from openhands_cli.agent_chat import run_cli_entry
from openhands_cli.argparsers.main_parser import create_main_parser
def main() -> None:
@ -27,35 +27,28 @@ def main() -> None:
ImportError: If agent chat dependencies are missing
Exception: On other error conditions
"""
parser = argparse.ArgumentParser(
description='OpenHands CLI - Terminal User Interface for OpenHands AI Agent'
)
parser.add_argument(
'--resume',
type=str,
help='Conversation ID to use for the session. If not provided, a random UUID will be generated.',
)
parser = create_main_parser()
args = parser.parse_args()
try:
# Start agent chat
run_cli_entry(resume_conversation_id=args.resume)
if args.command == 'serve':
# Import gui_launcher only when needed
from openhands_cli.gui_launcher import launch_gui_server
except ImportError as e:
print_formatted_text(
HTML(f'<red>Error: Agent chat requires additional dependencies: {e}</red>')
)
print_formatted_text(
HTML('<yellow>Please ensure the agent SDK is properly installed.</yellow>')
)
raise
launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
else:
# Default CLI behavior - no subcommand needed
# Import agent_chat only when needed
from openhands_cli.agent_chat import run_cli_entry
# Start agent chat
run_cli_entry(resume_conversation_id=args.resume)
except KeyboardInterrupt:
print_formatted_text(HTML('\n<yellow>Goodbye! 👋</yellow>'))
except EOFError:
print_formatted_text(HTML('\n<yellow>Goodbye! 👋</yellow>'))
except Exception as e:
print_formatted_text(HTML(f'<red>Error starting agent chat: {e}</red>'))
print_formatted_text(HTML(f'<red>Error: {e}</red>'))
import traceback
traceback.print_exc()

View File

@ -0,0 +1,109 @@
"""Status display components for OpenHands CLI TUI."""
from datetime import datetime
from openhands.sdk import BaseConversation
from prompt_toolkit import print_formatted_text
from prompt_toolkit.formatted_text import HTML
from prompt_toolkit.shortcuts import print_container
from prompt_toolkit.widgets import Frame, TextArea
def display_status(
conversation: BaseConversation,
session_start_time: datetime,
) -> None:
"""Display detailed conversation status including metrics and uptime.
Args:
conversation: The conversation to display status for
session_start_time: The session start time for uptime calculation
"""
# Get conversation stats
stats = conversation.conversation_stats.get_combined_metrics()
# Calculate uptime from session start time
now = datetime.now()
diff = now - session_start_time
# Format as hours, minutes, seconds
total_seconds = int(diff.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
uptime_str = f"{hours}h {minutes}m {seconds}s"
# Display conversation ID and uptime
print_formatted_text(HTML(f'<grey>Conversation ID: {conversation.id}</grey>'))
print_formatted_text(HTML(f'<grey>Uptime: {uptime_str}</grey>'))
print_formatted_text('')
# Calculate token metrics
token_usage = stats.accumulated_token_usage
total_input_tokens = token_usage.prompt_tokens if token_usage else 0
total_output_tokens = token_usage.completion_tokens if token_usage else 0
cache_hits = token_usage.cache_read_tokens if token_usage else 0
cache_writes = token_usage.cache_write_tokens if token_usage else 0
total_tokens = total_input_tokens + total_output_tokens
total_cost = stats.accumulated_cost
# Use prompt_toolkit containers for formatted display
_display_usage_metrics_container(
total_cost,
total_input_tokens,
total_output_tokens,
cache_hits,
cache_writes,
total_tokens
)
def _display_usage_metrics_container(
total_cost: float,
total_input_tokens: int,
total_output_tokens: int,
cache_hits: int,
cache_writes: int,
total_tokens: int
) -> None:
"""Display usage metrics using prompt_toolkit containers."""
# Format values with proper formatting
cost_str = f'${total_cost:.6f}'
input_tokens_str = f'{total_input_tokens:,}'
cache_read_str = f'{cache_hits:,}'
cache_write_str = f'{cache_writes:,}'
output_tokens_str = f'{total_output_tokens:,}'
total_tokens_str = f'{total_tokens:,}'
labels_and_values = [
(' Total Cost (USD):', cost_str),
('', ''),
(' Total Input Tokens:', input_tokens_str),
(' Cache Hits:', cache_read_str),
(' Cache Writes:', cache_write_str),
(' Total Output Tokens:', output_tokens_str),
('', ''),
(' Total Tokens:', total_tokens_str),
]
# Calculate max widths for alignment
max_label_width = max(len(label) for label, _ in labels_and_values)
max_value_width = max(len(value) for _, value in labels_and_values)
# Construct the summary text with aligned columns
summary_lines = [
f'{label:<{max_label_width}} {value:<{max_value_width}}'
for label, value in labels_and_values
]
summary_text = '\n'.join(summary_lines)
container = Frame(
TextArea(
text=summary_text,
read_only=True,
wrap_lines=True,
),
title='Usage Metrics',
)
print_container(container)

View File

@ -17,6 +17,7 @@ COMMANDS = {
'/exit': 'Exit the application',
'/help': 'Display available commands',
'/clear': 'Clear the screen',
'/new': 'Start a fresh conversation',
'/status': 'Display conversation details',
'/confirm': 'Toggle confirmation mode on/off',
'/resume': 'Resume a paused conversation',

View File

@ -0,0 +1,201 @@
"""Tests for GUI launcher functionality."""
import os
import subprocess
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from openhands_cli.gui_launcher import (
_format_docker_command_for_logging,
check_docker_requirements,
get_openhands_version,
launch_gui_server,
)
class TestFormatDockerCommand:
"""Test the Docker command formatting function."""
@pytest.mark.parametrize(
"cmd,expected",
[
(
['docker', 'run', 'hello-world'],
'<grey>Running Docker command: docker run hello-world</grey>',
),
(
['docker', 'run', '-it', '--rm', '-p', '3000:3000', 'openhands:latest'],
'<grey>Running Docker command: docker run -it --rm -p 3000:3000 openhands:latest</grey>',
),
([], '<grey>Running Docker command: </grey>'),
],
)
def test_format_docker_command(self, cmd, expected):
"""Test formatting Docker commands."""
result = _format_docker_command_for_logging(cmd)
assert result == expected
class TestCheckDockerRequirements:
"""Test Docker requirements checking."""
@pytest.mark.parametrize(
"which_return,run_side_effect,expected_result,expected_print_count",
[
# Docker not installed
(None, None, False, 2),
# Docker daemon not running
('/usr/bin/docker', MagicMock(returncode=1), False, 2),
# Docker timeout
('/usr/bin/docker', subprocess.TimeoutExpired('docker info', 10), False, 2),
# Docker available
('/usr/bin/docker', MagicMock(returncode=0), True, 0),
],
)
@patch('shutil.which')
@patch('subprocess.run')
def test_docker_requirements(
self, mock_run, mock_which, which_return, run_side_effect, expected_result, expected_print_count
):
"""Test Docker requirements checking scenarios."""
mock_which.return_value = which_return
if run_side_effect is not None:
if isinstance(run_side_effect, Exception):
mock_run.side_effect = run_side_effect
else:
mock_run.return_value = run_side_effect
with patch('openhands_cli.gui_launcher.print_formatted_text') as mock_print:
result = check_docker_requirements()
assert result is expected_result
assert mock_print.call_count == expected_print_count
class TestGetOpenHandsVersion:
"""Test version retrieval."""
@pytest.mark.parametrize(
"env_value,expected",
[
(None, 'latest'), # No environment variable set
('1.2.3', '1.2.3'), # Environment variable set
],
)
def test_version_retrieval(self, env_value, expected):
"""Test version retrieval from environment."""
if env_value:
os.environ['OPENHANDS_VERSION'] = env_value
result = get_openhands_version()
assert result == expected
class TestLaunchGuiServer:
"""Test GUI server launching."""
@patch('openhands_cli.gui_launcher.check_docker_requirements')
@patch('openhands_cli.gui_launcher.print_formatted_text')
def test_launch_gui_server_docker_not_available(self, mock_print, mock_check_docker):
"""Test that launch_gui_server exits when Docker is not available."""
mock_check_docker.return_value = False
with pytest.raises(SystemExit) as exc_info:
launch_gui_server()
assert exc_info.value.code == 1
@pytest.mark.parametrize(
"pull_side_effect,run_side_effect,expected_exit_code,mount_cwd,gpu",
[
# Docker pull failure
(subprocess.CalledProcessError(1, 'docker pull'), None, 1, False, False),
# Docker pull timeout
(subprocess.TimeoutExpired('docker pull', 300), None, 1, False, False),
# Docker run failure
(MagicMock(returncode=0), subprocess.CalledProcessError(1, 'docker run'), 1, False, False),
# KeyboardInterrupt during run
(MagicMock(returncode=0), KeyboardInterrupt(), 0, False, False),
# Success with mount_cwd
(MagicMock(returncode=0), MagicMock(returncode=0), None, True, False),
# Success with GPU
(MagicMock(returncode=0), MagicMock(returncode=0), None, False, True),
],
)
@patch('openhands_cli.gui_launcher.check_docker_requirements')
@patch('openhands_cli.gui_launcher.ensure_config_dir_exists')
@patch('openhands_cli.gui_launcher.get_openhands_version')
@patch('subprocess.run')
@patch('subprocess.check_output')
@patch('pathlib.Path.cwd')
@patch('openhands_cli.gui_launcher.print_formatted_text')
def test_launch_gui_server_scenarios(
self,
mock_print,
mock_cwd,
mock_check_output,
mock_run,
mock_version,
mock_config_dir,
mock_check_docker,
pull_side_effect,
run_side_effect,
expected_exit_code,
mount_cwd,
gpu,
):
"""Test various GUI server launch scenarios."""
# Setup mocks
mock_check_docker.return_value = True
mock_config_dir.return_value = Path('/home/user/.openhands')
mock_version.return_value = 'latest'
mock_check_output.return_value = '1000\n'
mock_cwd.return_value = Path('/current/dir')
# Configure subprocess.run side effects
side_effects = []
if pull_side_effect is not None:
if isinstance(pull_side_effect, Exception):
side_effects.append(pull_side_effect)
else:
side_effects.append(pull_side_effect)
if run_side_effect is not None:
if isinstance(run_side_effect, Exception):
side_effects.append(run_side_effect)
else:
side_effects.append(run_side_effect)
mock_run.side_effect = side_effects
# Test the function
if expected_exit_code is not None:
with pytest.raises(SystemExit) as exc_info:
launch_gui_server(mount_cwd=mount_cwd, gpu=gpu)
assert exc_info.value.code == expected_exit_code
else:
# Should not raise SystemExit for successful cases
launch_gui_server(mount_cwd=mount_cwd, gpu=gpu)
# Verify subprocess.run was called correctly
assert mock_run.call_count == 2 # Pull and run commands
# Check pull command
pull_call = mock_run.call_args_list[0]
pull_cmd = pull_call[0][0]
assert pull_cmd[0:3] == ['docker', 'pull', 'docker.all-hands.dev/all-hands-ai/runtime:latest-nikolaik']
# Check run command
run_call = mock_run.call_args_list[1]
run_cmd = run_call[0][0]
assert run_cmd[0:2] == ['docker', 'run']
if mount_cwd:
assert 'SANDBOX_VOLUMES=/current/dir:/workspace:rw' in ' '.join(run_cmd)
assert 'SANDBOX_USER_ID=1000' in ' '.join(run_cmd)
if gpu:
assert '--gpus' in run_cmd
assert 'all' in run_cmd
assert 'SANDBOX_ENABLE_GPU=true' in ' '.join(run_cmd)

View File

@ -1,15 +1,19 @@
"""Tests for main entry point functionality."""
import sys
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from openhands_cli import simple_main
from openhands_cli.simple_main import main
class TestMainEntryPoint:
"""Test the main entry point behavior."""
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands'])
def test_main_starts_agent_chat_directly(
self, mock_run_agent_chat: MagicMock
@ -24,7 +28,7 @@ class TestMainEntryPoint:
# Should call run_cli_entry with no resume conversation ID
mock_run_agent_chat.assert_called_once_with(resume_conversation_id=None)
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands'])
def test_main_handles_import_error(self, mock_run_agent_chat: MagicMock) -> None:
"""Test that main() handles ImportError gracefully."""
@ -36,7 +40,7 @@ class TestMainEntryPoint:
assert str(exc_info.value) == 'Missing dependency'
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands'])
def test_main_handles_keyboard_interrupt(
self, mock_run_agent_chat: MagicMock
@ -48,7 +52,7 @@ class TestMainEntryPoint:
# Should complete without raising an exception (graceful exit)
simple_main.main()
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands'])
def test_main_handles_eof_error(self, mock_run_agent_chat: MagicMock) -> None:
"""Test that main() handles EOFError gracefully."""
@ -58,7 +62,7 @@ class TestMainEntryPoint:
# Should complete without raising an exception (graceful exit)
simple_main.main()
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands'])
def test_main_handles_general_exception(
self, mock_run_agent_chat: MagicMock
@ -72,7 +76,7 @@ class TestMainEntryPoint:
assert str(exc_info.value) == 'Unexpected error'
@patch('openhands_cli.simple_main.run_cli_entry')
@patch('openhands_cli.agent_chat.run_cli_entry')
@patch('sys.argv', ['openhands', '--resume', 'test-conversation-id'])
def test_main_with_resume_argument(self, mock_run_agent_chat: MagicMock) -> None:
"""Test that main() passes resume conversation ID when provided."""
@ -86,3 +90,65 @@ class TestMainEntryPoint:
mock_run_agent_chat.assert_called_once_with(
resume_conversation_id='test-conversation-id'
)
@pytest.mark.parametrize(
"argv,expected_kwargs",
[
(['openhands'], {"resume_conversation_id": None}),
(['openhands', '--resume', 'test-id'], {"resume_conversation_id": 'test-id'}),
],
)
def test_main_cli_calls_run_cli_entry(monkeypatch, argv, expected_kwargs):
# Patch sys.argv since main() takes no params
monkeypatch.setattr(sys, "argv", argv, raising=False)
called = {}
fake_agent_chat = SimpleNamespace(
run_cli_entry=lambda **kw: called.setdefault("kwargs", kw)
)
# Provide the symbol that main() will import
monkeypatch.setitem(sys.modules, "openhands_cli.agent_chat", fake_agent_chat)
# Execute (no SystemExit expected on success)
main()
assert called["kwargs"] == expected_kwargs
@pytest.mark.parametrize(
"argv,expected_kwargs",
[
(['openhands', 'serve'], {"mount_cwd": False, "gpu": False}),
(['openhands', 'serve', '--mount-cwd'], {"mount_cwd": True, "gpu": False}),
(['openhands', 'serve', '--gpu'], {"mount_cwd": False, "gpu": True}),
(['openhands', 'serve', '--mount-cwd', '--gpu'], {"mount_cwd": True, "gpu": True}),
],
)
def test_main_serve_calls_launch_gui_server(monkeypatch, argv, expected_kwargs):
monkeypatch.setattr(sys, "argv", argv, raising=False)
called = {}
fake_gui = SimpleNamespace(
launch_gui_server=lambda **kw: called.setdefault("kwargs", kw)
)
monkeypatch.setitem(sys.modules, "openhands_cli.gui_launcher", fake_gui)
main()
assert called["kwargs"] == expected_kwargs
@pytest.mark.parametrize(
"argv,expected_exit_code",
[
(['openhands', 'invalid-command'], 2), # argparse error
(['openhands', '--help'], 0), # top-level help
(['openhands', 'serve', '--help'], 0), # subcommand help
],
)
def test_help_and_invalid(monkeypatch, argv, expected_exit_code):
monkeypatch.setattr(sys, "argv", argv, raising=False)
with pytest.raises(SystemExit) as exc:
main()
assert exc.value.code == expected_exit_code

View File

@ -0,0 +1,100 @@
"""Tests for the /new command functionality."""
from unittest.mock import MagicMock, patch
from uuid import UUID
from openhands_cli.agent_chat import _start_fresh_conversation
from unittest.mock import MagicMock, patch
from prompt_toolkit.input.defaults import create_pipe_input
from prompt_toolkit.output.defaults import DummyOutput
from openhands_cli.setup import MissingAgentSpec
from openhands_cli.user_actions import UserConfirmation
@patch('openhands_cli.agent_chat.setup_conversation')
def test_start_fresh_conversation_success(mock_setup_conversation):
"""Test that _start_fresh_conversation creates a new conversation successfully."""
# Mock the conversation object
mock_conversation = MagicMock()
mock_conversation.id = UUID('12345678-1234-5678-9abc-123456789abc')
mock_setup_conversation.return_value = mock_conversation
# Call the function
result = _start_fresh_conversation()
# Verify the result
assert result == mock_conversation
mock_setup_conversation.assert_called_once_with(None)
@patch('openhands_cli.agent_chat.SettingsScreen')
@patch('openhands_cli.agent_chat.setup_conversation')
def test_start_fresh_conversation_missing_agent_spec(
mock_setup_conversation,
mock_settings_screen_class
):
"""Test that _start_fresh_conversation handles MissingAgentSpec exception."""
# Mock the SettingsScreen instance
mock_settings_screen = MagicMock()
mock_settings_screen_class.return_value = mock_settings_screen
# Mock setup_conversation to raise MissingAgentSpec on first call, then succeed
mock_conversation = MagicMock()
mock_conversation.id = UUID('12345678-1234-5678-9abc-123456789abc')
mock_setup_conversation.side_effect = [
MissingAgentSpec("Agent spec missing"),
mock_conversation
]
# Call the function
result = _start_fresh_conversation()
# Verify the result
assert result == mock_conversation
# Should be called twice: first fails, second succeeds
assert mock_setup_conversation.call_count == 2
# Settings screen should be called once
mock_settings_screen.handle_basic_settings.assert_called_once_with(escapable=False)
@patch('openhands_cli.agent_chat.exit_session_confirmation')
@patch('openhands_cli.agent_chat.get_session_prompter')
@patch('openhands_cli.agent_chat.setup_conversation')
@patch('openhands_cli.agent_chat.ConversationRunner')
def test_new_command_resets_confirmation_mode(
mock_runner_cls,
mock_setup_conversation,
mock_get_session_prompter,
mock_exit_confirm,
):
# Auto-accept the exit prompt to avoid interactive UI and EOFError
mock_exit_confirm.return_value = UserConfirmation.ACCEPT
conv1 = MagicMock(); conv1.id = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa')
conv2 = MagicMock(); conv2.id = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb')
mock_setup_conversation.side_effect = [conv1, conv2]
# Distinct runner instances for each conversation
runner1 = MagicMock(); runner1.is_confirmation_mode_enabled = True
runner2 = MagicMock(); runner2.is_confirmation_mode_enabled = False
mock_runner_cls.side_effect = [runner1, runner2]
# Real session fed by a pipe (no interactive confirmation now)
from openhands_cli.user_actions.utils import get_session_prompter as real_get_session_prompter
with create_pipe_input() as pipe:
output = DummyOutput()
session = real_get_session_prompter(input=pipe, output=output)
mock_get_session_prompter.return_value = session
from openhands_cli.agent_chat import run_cli_entry
# Trigger /new, then /status, then /exit (exit will be auto-accepted)
for ch in "/new\r/exit\r":
pipe.send_text(ch)
run_cli_entry(None)
# Assert we switched to a new runner for conv2
assert mock_runner_cls.call_count == 2
assert mock_runner_cls.call_args_list[0].args[0] is conv1
assert mock_runner_cls.call_args_list[1].args[0] is conv2

View File

@ -0,0 +1,124 @@
"""Simplified tests for the /status command functionality."""
from datetime import datetime, timedelta
from uuid import uuid4
from unittest.mock import Mock, patch
import pytest
from openhands_cli.tui.status import display_status
from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage
# ---------- Fixtures & helpers ----------
@pytest.fixture
def conversation():
"""Minimal conversation with empty events and pluggable stats."""
conv = Mock()
conv.id = uuid4()
conv.state = Mock(events=[])
conv.conversation_stats = Mock()
return conv
def make_metrics(cost=None, usage=None) -> Metrics:
m = Metrics()
if cost is not None:
m.accumulated_cost = cost
m.accumulated_token_usage = usage
return m
def call_display_status(conversation, session_start):
"""Call display_status with prints patched; return (mock_pf, mock_pc, text)."""
with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
patch('openhands_cli.tui.status.print_container') as pc:
display_status(conversation, session_start_time=session_start)
# First container call; extract the Frame/TextArea text
container = pc.call_args_list[0][0][0]
text = getattr(container.body, "text", "")
return pf, pc, str(text)
# ---------- Tests ----------
def test_display_status_box_title(conversation):
session_start = datetime.now()
conversation.conversation_stats.get_combined_metrics.return_value = make_metrics()
with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
patch('openhands_cli.tui.status.print_container') as pc:
display_status(conversation, session_start_time=session_start)
assert pf.called and pc.called
container = pc.call_args_list[0][0][0]
assert hasattr(container, "title")
assert "Usage Metrics" in container.title
@pytest.mark.parametrize(
"delta,expected",
[
(timedelta(seconds=0), "0h 0m"),
(timedelta(minutes=5, seconds=30), "5m"),
(timedelta(hours=1, minutes=30, seconds=45), "1h 30m"),
(timedelta(hours=2, minutes=15, seconds=30), "2h 15m"),
],
)
def test_display_status_uptime(conversation, delta, expected):
session_start = datetime.now() - delta
conversation.conversation_stats.get_combined_metrics.return_value = make_metrics()
with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
patch('openhands_cli.tui.status.print_container'):
display_status(conversation, session_start_time=session_start)
# uptime is printed in the 2nd print_formatted_text call
uptime_call_str = str(pf.call_args_list[1])
assert expected in uptime_call_str
# conversation id appears in the first print call
id_call_str = str(pf.call_args_list[0])
assert str(conversation.id) in id_call_str
@pytest.mark.parametrize(
"cost,usage,expecteds",
[
# Empty/zero case
(None, None, ["$0.000000", "0", "0", "0", "0", "0"]),
# Only cost, usage=None
(0.05, None, ["$0.050000", "0", "0", "0", "0", "0"]),
# Full metrics
(
0.123456,
TokenUsage(
prompt_tokens=1500,
completion_tokens=800,
cache_read_tokens=200,
cache_write_tokens=100,
),
["$0.123456", "1,500", "800", "200", "100", "2,300"],
),
# Larger numbers (comprehensive)
(
1.234567,
TokenUsage(
prompt_tokens=5000,
completion_tokens=3000,
cache_read_tokens=500,
cache_write_tokens=250,
),
["$1.234567", "5,000", "3,000", "500", "250", "8,000"],
),
],
)
def test_display_status_metrics(conversation, cost, usage, expecteds):
session_start = datetime.now()
conversation.conversation_stats.get_combined_metrics.return_value = make_metrics(cost, usage)
pf, pc, text = call_display_status(conversation, session_start)
assert pf.called and pc.called
for expected in expecteds:
assert expected in text

View File

@ -77,6 +77,7 @@ def test_commands_dict() -> None:
'/exit',
'/help',
'/clear',
'/new',
'/status',
'/confirm',
'/resume',