Merge branch 'main' into fix-temperature-top-p-reasoning-models

2025-12-26 13:52:43 +08:00 · 2025-10-07 23:54:01 -04:00 · 2025-10-07 23:54:01 -04:00 · beed3371f3
commit beed3371f3
parent 572517ec07 c0221e5468
14 changed files with 998 additions and 56 deletions
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -67,7 +67,7 @@
        "@playwright/test": "^1.55.1",
        "@react-router/dev": "^7.9.3",
        "@tailwindcss/typography": "^0.5.19",
-        "@tanstack/eslint-plugin-query": "^5.90.1",
+        "@tanstack/eslint-plugin-query": "^5.91.0",
        "@testing-library/dom": "^10.4.1",
        "@testing-library/jest-dom": "^6.8.0",
        "@testing-library/react": "^16.3.0",
@ -5954,12 +5954,12 @@
      }
    },
    "node_modules/@tanstack/eslint-plugin-query": {
-      "version": "5.90.1",
-      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.90.1.tgz",
-      "integrity": "sha512-Ki4hl+8ZtnMFZ3amZbQl6sSMUq6L8oSJ14vmi3j5t1/SqXclL5SI/1kcuH36iIk05B/bN5pEOS1PTO3Ut/FbVA==",
+      "version": "5.91.0",
+      "resolved": "https://registry.npmjs.org/@tanstack/eslint-plugin-query/-/eslint-plugin-query-5.91.0.tgz",
+      "integrity": "sha512-Kn6yWyRe3dIPf7NqyDMhcsTBz2Oh8jPSOpBdlnLQhGBJ6iTMBFYA4B1UreGJ/WdfzQskSMh5imcyWF+wqa/Q5g==",
      "dev": true,
      "dependencies": {
-        "@typescript-eslint/utils": "^8.44.0"
+        "@typescript-eslint/utils": "^8.44.1"
      },
      "funding": {
        "type": "github",
--- a/frontend/package.json
+++ b/frontend/package.json
@ -98,7 +98,7 @@
    "@playwright/test": "^1.55.1",
    "@react-router/dev": "^7.9.3",
    "@tailwindcss/typography": "^0.5.19",
-    "@tanstack/eslint-plugin-query": "^5.90.1",
+    "@tanstack/eslint-plugin-query": "^5.91.0",
    "@testing-library/dom": "^10.4.1",
    "@testing-library/jest-dom": "^6.8.0",
    "@testing-library/react": "^16.3.0",
--- a/openhands-cli/openhands_cli/agent_chat.py
+++ b/openhands-cli/openhands_cli/agent_chat.py
@ -5,19 +5,22 @@ Provides a conversation interface with an AI agent using OpenHands patterns.
 """

 import sys
-
-from prompt_toolkit import print_formatted_text
-from prompt_toolkit.formatted_text import HTML
+from datetime import datetime

 from openhands.sdk import (
+    BaseConversation,
    Message,
    TextContent,
 )
 from openhands.sdk.conversation.state import AgentExecutionStatus
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+
 from openhands_cli.runner import ConversationRunner
 from openhands_cli.setup import MissingAgentSpec, setup_conversation
 from openhands_cli.tui.settings.mcp_screen import MCPScreen
 from openhands_cli.tui.settings.settings_screen import SettingsScreen
+from openhands_cli.tui.status import display_status
 from openhands_cli.tui.tui import (
    display_help,
    display_welcome,
@ -26,6 +29,32 @@ from openhands_cli.user_actions import UserConfirmation, exit_session_confirmati
 from openhands_cli.user_actions.utils import get_session_prompter


+
+
+def _start_fresh_conversation(resume_conversation_id: str | None = None) -> BaseConversation:
+    """Start a fresh conversation by creating a new conversation instance.
+    
+    Handles the complete conversation setup process including settings screen
+    if agent configuration is missing.
+
+    Args:
+        resume_conversation_id: Optional conversation ID to resume
+
+    Returns:
+        BaseConversation: A new conversation instance
+    """
+    conversation = None
+    settings_screen = SettingsScreen()
+
+    while not conversation:
+        try:
+            conversation = setup_conversation(resume_conversation_id)
+        except MissingAgentSpec:
+            settings_screen.handle_basic_settings(escapable=False)
+    
+    return conversation
+
+
 def _restore_tty() -> None:
    """
    Ensure terminal modes are reset in case prompt_toolkit cleanup didn't run.
@ -62,17 +91,12 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
        EOFError: If EOF is encountered
    """

-    conversation = None
-    settings_screen = SettingsScreen()
-
-    while not conversation:
-        try:
-            conversation = setup_conversation(resume_conversation_id)
-        except MissingAgentSpec:
-            settings_screen.handle_basic_settings(escapable=False)
-
+    conversation = _start_fresh_conversation(resume_conversation_id)
    display_welcome(conversation.id, bool(resume_conversation_id))

+    # Track session start time for uptime calculation
+    session_start_time = datetime.now()
+
    # Create conversation runner to handle state machine logic
    runner = ConversationRunner(conversation)
    session = get_session_prompter()
@ -118,21 +142,28 @@ def run_cli_entry(resume_conversation_id: str | None = None) -> None:
                display_welcome(conversation.id)
                continue

+            elif command == '/new':
+                try:
+                    # Start a fresh conversation (no resume ID = new conversation)
+                    conversation = _start_fresh_conversation()
+                    runner = ConversationRunner(conversation)
+                    display_welcome(conversation.id, resume=False)
+                    print_formatted_text(
+                        HTML('<green>✓ Started fresh conversation</green>')
+                    )
+                    continue
+                except Exception as e:
+                    print_formatted_text(
+                        HTML(f'<red>Error starting fresh conversation: {e}</red>')
+                    )
+                    continue
+
            elif command == '/help':
                display_help()
                continue

            elif command == '/status':
-                print_formatted_text(
-                    HTML(f'<grey>Conversation ID: {conversation.id}</grey>')
-                )
-                print_formatted_text(HTML('<grey>Status: Active</grey>'))
-                confirmation_status = (
-                    'enabled' if conversation.state.confirmation_mode else 'disabled'
-                )
-                print_formatted_text(
-                    HTML(f'<grey>Confirmation mode: {confirmation_status}</grey>')
-                )
+                display_status(conversation, session_start_time=session_start_time)
                continue

            elif command == '/confirm':
--- a/openhands-cli/openhands_cli/argparsers/main_parser.py
+++ b/openhands-cli/openhands_cli/argparsers/main_parser.py
@ -0,0 +1,56 @@
+"""Main argument parser for OpenHands CLI."""
+
+import argparse
+
+
+def create_main_parser() -> argparse.ArgumentParser:
+    """Create the main argument parser with CLI as default and serve as subcommand.
+    
+    Returns:
+        The configured argument parser
+    """
+    parser = argparse.ArgumentParser(
+        description='OpenHands CLI - Terminal User Interface for OpenHands AI Agent',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+By default, OpenHands runs in CLI mode (terminal interface).
+Use 'serve' subcommand to launch the GUI server instead.
+
+Examples:
+  openhands                           # Start CLI mode
+  openhands --resume conversation-id  # Resume a conversation in CLI mode
+  openhands serve                     # Launch GUI server
+  openhands serve --gpu               # Launch GUI server with GPU support
+"""
+    )
+    
+    # CLI arguments at top level (default mode)
+    parser.add_argument(
+        '--resume',
+        type=str,
+        help='Conversation ID to resume'
+    )
+    
+    # Only serve as subcommand
+    subparsers = parser.add_subparsers(
+        dest='command',
+        help='Additional commands'
+    )
+    
+    # Add serve subcommand
+    serve_parser = subparsers.add_parser(
+        'serve',
+        help='Launch the OpenHands GUI server using Docker (web interface)'
+    )
+    serve_parser.add_argument(
+        '--mount-cwd',
+        action='store_true',
+        help='Mount the current working directory in the Docker container'
+    )
+    serve_parser.add_argument(
+        '--gpu',
+        action='store_true',
+        help='Enable GPU support in the Docker container'
+    )
+    
+    return parser
--- a/openhands-cli/openhands_cli/argparsers/serve_parser.py
+++ b/openhands-cli/openhands_cli/argparsers/serve_parser.py
@ -0,0 +1,31 @@
+"""Argument parser for serve subcommand."""
+
+import argparse
+
+
+def add_serve_parser(subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser:
+    """Add serve subcommand parser.
+    
+    Args:
+        subparsers: The subparsers object to add the serve parser to
+        
+    Returns:
+        The serve argument parser
+    """
+    serve_parser = subparsers.add_parser(
+        'serve',
+        help='Launch the OpenHands GUI server using Docker (web interface)'
+    )
+    serve_parser.add_argument(
+        '--mount-cwd',
+        help='Mount the current working directory into the GUI server container',
+        action='store_true',
+        default=False,
+    )
+    serve_parser.add_argument(
+        '--gpu',
+        help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
+        action='store_true',
+        default=False,
+    )
+    return serve_parser
--- a/openhands-cli/openhands_cli/gui_launcher.py
+++ b/openhands-cli/openhands_cli/gui_launcher.py
@ -0,0 +1,229 @@
+"""GUI launcher for OpenHands CLI."""
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+from openhands_cli.locations import PERSISTENCE_DIR
+
+
+def _format_docker_command_for_logging(cmd: list[str]) -> str:
+    """Format a Docker command for logging with grey color.
+
+    Args:
+        cmd (list[str]): The Docker command as a list of strings
+
+    Returns:
+        str: The formatted command string in grey HTML color
+    """
+    cmd_str = ' '.join(cmd)
+    return f'<grey>Running Docker command: {cmd_str}</grey>'
+
+
+def check_docker_requirements() -> bool:
+    """Check if Docker is installed and running.
+
+    Returns:
+        bool: True if Docker is available and running, False otherwise.
+    """
+    # Check if Docker is installed
+    if not shutil.which('docker'):
+        print_formatted_text(
+            HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
+        )
+        print_formatted_text(
+            HTML(
+                '<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
+            )
+        )
+        return False
+
+    # Check if Docker daemon is running
+    try:
+        result = subprocess.run(
+            ['docker', 'info'], capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            print_formatted_text(
+                HTML('<ansired>❌ Docker daemon is not running.</ansired>')
+            )
+            print_formatted_text(
+                HTML('<grey>Please start Docker and try again.</grey>')
+            )
+            return False
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to check Docker status.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        return False
+
+    return True
+
+
+def ensure_config_dir_exists() -> Path:
+    """Ensure the OpenHands configuration directory exists and return its path."""
+    path = Path(PERSISTENCE_DIR)
+    path.mkdir(exist_ok=True, parents=True)
+    return path
+
+
+def get_openhands_version() -> str:
+    """Get the OpenHands version for Docker images.
+
+    Returns:
+        str: The version string to use for Docker images
+    """
+    # For now, use 'latest' as the default version
+    # In the future, this could be read from a version file or environment variable
+    return os.environ.get('OPENHANDS_VERSION', 'latest')
+
+
+def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
+    """Launch the OpenHands GUI server using Docker.
+
+    Args:
+        mount_cwd: If True, mount the current working directory into the container.
+        gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
+    """
+    print_formatted_text(
+        HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
+    )
+    print_formatted_text('')
+
+    # Check Docker requirements
+    if not check_docker_requirements():
+        sys.exit(1)
+
+    # Ensure config directory exists
+    config_dir = ensure_config_dir_exists()
+
+    # Get the current version for the Docker image
+    version = get_openhands_version()
+    runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
+    app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
+
+    print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
+
+    # Pull the runtime image first
+    pull_cmd = ['docker', 'pull', runtime_image]
+    print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
+    try:
+        subprocess.run(
+            pull_cmd,
+            check=True,
+            timeout=300,  # 5 minutes timeout
+        )
+    except subprocess.CalledProcessError:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
+        )
+        sys.exit(1)
+    except subprocess.TimeoutExpired:
+        print_formatted_text(
+            HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
+        )
+        sys.exit(1)
+
+    print_formatted_text('')
+    print_formatted_text(
+        HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
+    )
+    print_formatted_text(
+        HTML('<grey>The server will be available at: http://localhost:3000</grey>')
+    )
+    print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
+    print_formatted_text('')
+
+    # Build the Docker command
+    docker_cmd = [
+        'docker',
+        'run',
+        '-it',
+        '--rm',
+        '--pull=always',
+        '-e',
+        f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
+        '-e',
+        'LOG_ALL_EVENTS=true',
+        '-v',
+        '/var/run/docker.sock:/var/run/docker.sock',
+        '-v',
+        f'{config_dir}:/.openhands',
+    ]
+
+    # Add GPU support if requested
+    if gpu:
+        print_formatted_text(
+            HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
+        )
+        # Add the --gpus all flag to enable all GPUs
+        docker_cmd.insert(2, '--gpus')
+        docker_cmd.insert(3, 'all')
+        # Add environment variable to pass GPU support to sandbox containers
+        docker_cmd.extend(
+            [
+                '-e',
+                'SANDBOX_ENABLE_GPU=true',
+            ]
+        )
+
+    # Add current working directory mount if requested
+    if mount_cwd:
+        cwd = Path.cwd()
+        # Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
+        docker_cmd.extend(
+            [
+                '-e',
+                f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
+            ]
+        )
+
+        # Set user ID for Unix-like systems only
+        if os.name != 'nt':  # Not Windows
+            try:
+                user_id = subprocess.check_output(['id', '-u'], text=True).strip()
+                docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                # If 'id' command fails or doesn't exist, skip setting user ID
+                pass
+        # Print the folder that will be mounted to inform the user
+        print_formatted_text(
+            HTML(
+                f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
+            )
+        )
+
+    docker_cmd.extend(
+        [
+            '-p',
+            '3000:3000',
+            '--add-host',
+            'host.docker.internal:host-gateway',
+            '--name',
+            'openhands-app',
+            app_image,
+        ]
+    )
+
+    try:
+        # Log and run the Docker command
+        print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
+        subprocess.run(docker_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
+        )
+        sys.exit(0)
--- a/openhands-cli/openhands_cli/simple_main.py
+++ b/openhands-cli/openhands_cli/simple_main.py
@ -4,9 +4,9 @@ Simple main entry point for OpenHands CLI.
 This is a simplified version that demonstrates the TUI functionality.
 """

-import argparse
 import logging
 import os
+import sys
 import warnings

 debug_env = os.getenv('DEBUG', 'false').lower()
@ -17,7 +17,7 @@ if debug_env != '1' and debug_env != 'true':
 from prompt_toolkit import print_formatted_text
 from prompt_toolkit.formatted_text import HTML

-from openhands_cli.agent_chat import run_cli_entry
+from openhands_cli.argparsers.main_parser import create_main_parser


 def main() -> None:
@ -27,35 +27,28 @@ def main() -> None:
        ImportError: If agent chat dependencies are missing
        Exception: On other error conditions
    """
-    parser = argparse.ArgumentParser(
-        description='OpenHands CLI - Terminal User Interface for OpenHands AI Agent'
-    )
-    parser.add_argument(
-        '--resume',
-        type=str,
-        help='Conversation ID to use for the session. If not provided, a random UUID will be generated.',
-    )
-
+    parser = create_main_parser()
    args = parser.parse_args()

    try:
-        # Start agent chat
-        run_cli_entry(resume_conversation_id=args.resume)
+        if args.command == 'serve':
+            # Import gui_launcher only when needed
+            from openhands_cli.gui_launcher import launch_gui_server

-    except ImportError as e:
-        print_formatted_text(
-            HTML(f'<red>Error: Agent chat requires additional dependencies: {e}</red>')
-        )
-        print_formatted_text(
-            HTML('<yellow>Please ensure the agent SDK is properly installed.</yellow>')
-        )
-        raise
+            launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
+        else:
+            # Default CLI behavior - no subcommand needed
+            # Import agent_chat only when needed
+            from openhands_cli.agent_chat import run_cli_entry
+
+            # Start agent chat
+            run_cli_entry(resume_conversation_id=args.resume)
    except KeyboardInterrupt:
        print_formatted_text(HTML('\n<yellow>Goodbye! 👋</yellow>'))
    except EOFError:
        print_formatted_text(HTML('\n<yellow>Goodbye! 👋</yellow>'))
    except Exception as e:
-        print_formatted_text(HTML(f'<red>Error starting agent chat: {e}</red>'))
+        print_formatted_text(HTML(f'<red>Error: {e}</red>'))
        import traceback

        traceback.print_exc()
--- a/openhands-cli/openhands_cli/tui/status.py
+++ b/openhands-cli/openhands_cli/tui/status.py
@ -0,0 +1,109 @@
+"""Status display components for OpenHands CLI TUI."""
+
+from datetime import datetime
+
+from openhands.sdk import BaseConversation
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+from prompt_toolkit.shortcuts import print_container
+from prompt_toolkit.widgets import Frame, TextArea
+
+
+def display_status(
+    conversation: BaseConversation,
+    session_start_time: datetime,
+) -> None:
+    """Display detailed conversation status including metrics and uptime.
+
+    Args:
+        conversation: The conversation to display status for
+        session_start_time: The session start time for uptime calculation
+    """
+    # Get conversation stats
+    stats = conversation.conversation_stats.get_combined_metrics()
+
+    # Calculate uptime from session start time
+    now = datetime.now()
+    diff = now - session_start_time
+
+    # Format as hours, minutes, seconds
+    total_seconds = int(diff.total_seconds())
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    seconds = total_seconds % 60
+    uptime_str = f"{hours}h {minutes}m {seconds}s"
+
+    # Display conversation ID and uptime
+    print_formatted_text(HTML(f'<grey>Conversation ID: {conversation.id}</grey>'))
+    print_formatted_text(HTML(f'<grey>Uptime:          {uptime_str}</grey>'))
+    print_formatted_text('')
+
+    # Calculate token metrics
+    token_usage = stats.accumulated_token_usage
+    total_input_tokens = token_usage.prompt_tokens if token_usage else 0
+    total_output_tokens = token_usage.completion_tokens if token_usage else 0
+    cache_hits = token_usage.cache_read_tokens if token_usage else 0
+    cache_writes = token_usage.cache_write_tokens if token_usage else 0
+    total_tokens = total_input_tokens + total_output_tokens
+    total_cost = stats.accumulated_cost
+
+    # Use prompt_toolkit containers for formatted display
+    _display_usage_metrics_container(
+        total_cost,
+        total_input_tokens,
+        total_output_tokens,
+        cache_hits,
+        cache_writes,
+        total_tokens
+    )
+
+
+def _display_usage_metrics_container(
+    total_cost: float,
+    total_input_tokens: int,
+    total_output_tokens: int,
+    cache_hits: int,
+    cache_writes: int,
+    total_tokens: int
+) -> None:
+    """Display usage metrics using prompt_toolkit containers."""
+    # Format values with proper formatting
+    cost_str = f'${total_cost:.6f}'
+    input_tokens_str = f'{total_input_tokens:,}'
+    cache_read_str = f'{cache_hits:,}'
+    cache_write_str = f'{cache_writes:,}'
+    output_tokens_str = f'{total_output_tokens:,}'
+    total_tokens_str = f'{total_tokens:,}'
+
+    labels_and_values = [
+        ('   Total Cost (USD):', cost_str),
+        ('', ''),
+        ('   Total Input Tokens:', input_tokens_str),
+        ('      Cache Hits:', cache_read_str),
+        ('      Cache Writes:', cache_write_str),
+        ('   Total Output Tokens:', output_tokens_str),
+        ('', ''),
+        ('   Total Tokens:', total_tokens_str),
+    ]
+
+    # Calculate max widths for alignment
+    max_label_width = max(len(label) for label, _ in labels_and_values)
+    max_value_width = max(len(value) for _, value in labels_and_values)
+
+    # Construct the summary text with aligned columns
+    summary_lines = [
+        f'{label:<{max_label_width}} {value:<{max_value_width}}'
+        for label, value in labels_and_values
+    ]
+    summary_text = '\n'.join(summary_lines)
+
+    container = Frame(
+        TextArea(
+            text=summary_text,
+            read_only=True,
+            wrap_lines=True,
+        ),
+        title='Usage Metrics',
+    )
+
+    print_container(container)
--- a/openhands-cli/openhands_cli/tui/tui.py
+++ b/openhands-cli/openhands_cli/tui/tui.py
@ -17,6 +17,7 @@ COMMANDS = {
    '/exit': 'Exit the application',
    '/help': 'Display available commands',
    '/clear': 'Clear the screen',
+    '/new': 'Start a fresh conversation',
    '/status': 'Display conversation details',
    '/confirm': 'Toggle confirmation mode on/off',
    '/resume': 'Resume a paused conversation',
--- a/openhands-cli/tests/test_gui_launcher.py
+++ b/openhands-cli/tests/test_gui_launcher.py
@ -0,0 +1,201 @@
+"""Tests for GUI launcher functionality."""
+
+import os
+import subprocess
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from openhands_cli.gui_launcher import (
+    _format_docker_command_for_logging,
+    check_docker_requirements,
+    get_openhands_version,
+    launch_gui_server,
+)
+
+
+class TestFormatDockerCommand:
+    """Test the Docker command formatting function."""
+
+    @pytest.mark.parametrize(
+        "cmd,expected",
+        [
+            (
+                ['docker', 'run', 'hello-world'],
+                '<grey>Running Docker command: docker run hello-world</grey>',
+            ),
+            (
+                ['docker', 'run', '-it', '--rm', '-p', '3000:3000', 'openhands:latest'],
+                '<grey>Running Docker command: docker run -it --rm -p 3000:3000 openhands:latest</grey>',
+            ),
+            ([], '<grey>Running Docker command: </grey>'),
+        ],
+    )
+    def test_format_docker_command(self, cmd, expected):
+        """Test formatting Docker commands."""
+        result = _format_docker_command_for_logging(cmd)
+        assert result == expected
+
+
+class TestCheckDockerRequirements:
+    """Test Docker requirements checking."""
+
+    @pytest.mark.parametrize(
+        "which_return,run_side_effect,expected_result,expected_print_count",
+        [
+            # Docker not installed
+            (None, None, False, 2),
+            # Docker daemon not running
+            ('/usr/bin/docker', MagicMock(returncode=1), False, 2),
+            # Docker timeout
+            ('/usr/bin/docker', subprocess.TimeoutExpired('docker info', 10), False, 2),
+            # Docker available
+            ('/usr/bin/docker', MagicMock(returncode=0), True, 0),
+        ],
+    )
+    @patch('shutil.which')
+    @patch('subprocess.run')
+    def test_docker_requirements(
+        self, mock_run, mock_which, which_return, run_side_effect, expected_result, expected_print_count
+    ):
+        """Test Docker requirements checking scenarios."""
+        mock_which.return_value = which_return
+        if run_side_effect is not None:
+            if isinstance(run_side_effect, Exception):
+                mock_run.side_effect = run_side_effect
+            else:
+                mock_run.return_value = run_side_effect
+
+        with patch('openhands_cli.gui_launcher.print_formatted_text') as mock_print:
+            result = check_docker_requirements()
+
+        assert result is expected_result
+        assert mock_print.call_count == expected_print_count
+
+
+class TestGetOpenHandsVersion:
+    """Test version retrieval."""
+
+    @pytest.mark.parametrize(
+        "env_value,expected",
+        [
+            (None, 'latest'),  # No environment variable set
+            ('1.2.3', '1.2.3'),  # Environment variable set
+        ],
+    )
+    def test_version_retrieval(self, env_value, expected):
+        """Test version retrieval from environment."""
+        if env_value:
+            os.environ['OPENHANDS_VERSION'] = env_value
+        result = get_openhands_version()
+        assert result == expected
+
+
+class TestLaunchGuiServer:
+    """Test GUI server launching."""
+
+    @patch('openhands_cli.gui_launcher.check_docker_requirements')
+    @patch('openhands_cli.gui_launcher.print_formatted_text')
+    def test_launch_gui_server_docker_not_available(self, mock_print, mock_check_docker):
+        """Test that launch_gui_server exits when Docker is not available."""
+        mock_check_docker.return_value = False
+
+        with pytest.raises(SystemExit) as exc_info:
+            launch_gui_server()
+
+        assert exc_info.value.code == 1
+
+    @pytest.mark.parametrize(
+        "pull_side_effect,run_side_effect,expected_exit_code,mount_cwd,gpu",
+        [
+            # Docker pull failure
+            (subprocess.CalledProcessError(1, 'docker pull'), None, 1, False, False),
+            # Docker pull timeout
+            (subprocess.TimeoutExpired('docker pull', 300), None, 1, False, False),
+            # Docker run failure
+            (MagicMock(returncode=0), subprocess.CalledProcessError(1, 'docker run'), 1, False, False),
+            # KeyboardInterrupt during run
+            (MagicMock(returncode=0), KeyboardInterrupt(), 0, False, False),
+            # Success with mount_cwd
+            (MagicMock(returncode=0), MagicMock(returncode=0), None, True, False),
+            # Success with GPU
+            (MagicMock(returncode=0), MagicMock(returncode=0), None, False, True),
+        ],
+    )
+    @patch('openhands_cli.gui_launcher.check_docker_requirements')
+    @patch('openhands_cli.gui_launcher.ensure_config_dir_exists')
+    @patch('openhands_cli.gui_launcher.get_openhands_version')
+    @patch('subprocess.run')
+    @patch('subprocess.check_output')
+    @patch('pathlib.Path.cwd')
+    @patch('openhands_cli.gui_launcher.print_formatted_text')
+    def test_launch_gui_server_scenarios(
+        self,
+        mock_print,
+        mock_cwd,
+        mock_check_output,
+        mock_run,
+        mock_version,
+        mock_config_dir,
+        mock_check_docker,
+        pull_side_effect,
+        run_side_effect,
+        expected_exit_code,
+        mount_cwd,
+        gpu,
+    ):
+        """Test various GUI server launch scenarios."""
+        # Setup mocks
+        mock_check_docker.return_value = True
+        mock_config_dir.return_value = Path('/home/user/.openhands')
+        mock_version.return_value = 'latest'
+        mock_check_output.return_value = '1000\n'
+        mock_cwd.return_value = Path('/current/dir')
+
+        # Configure subprocess.run side effects
+        side_effects = []
+        if pull_side_effect is not None:
+            if isinstance(pull_side_effect, Exception):
+                side_effects.append(pull_side_effect)
+            else:
+                side_effects.append(pull_side_effect)
+
+        if run_side_effect is not None:
+            if isinstance(run_side_effect, Exception):
+                side_effects.append(run_side_effect)
+            else:
+                side_effects.append(run_side_effect)
+
+        mock_run.side_effect = side_effects
+
+        # Test the function
+        if expected_exit_code is not None:
+            with pytest.raises(SystemExit) as exc_info:
+                launch_gui_server(mount_cwd=mount_cwd, gpu=gpu)
+            assert exc_info.value.code == expected_exit_code
+        else:
+            # Should not raise SystemExit for successful cases
+            launch_gui_server(mount_cwd=mount_cwd, gpu=gpu)
+
+            # Verify subprocess.run was called correctly
+            assert mock_run.call_count == 2  # Pull and run commands
+
+            # Check pull command
+            pull_call = mock_run.call_args_list[0]
+            pull_cmd = pull_call[0][0]
+            assert pull_cmd[0:3] == ['docker', 'pull', 'docker.all-hands.dev/all-hands-ai/runtime:latest-nikolaik']
+
+            # Check run command
+            run_call = mock_run.call_args_list[1]
+            run_cmd = run_call[0][0]
+            assert run_cmd[0:2] == ['docker', 'run']
+
+            if mount_cwd:
+                assert 'SANDBOX_VOLUMES=/current/dir:/workspace:rw' in ' '.join(run_cmd)
+                assert 'SANDBOX_USER_ID=1000' in ' '.join(run_cmd)
+
+            if gpu:
+                assert '--gpus' in run_cmd
+                assert 'all' in run_cmd
+                assert 'SANDBOX_ENABLE_GPU=true' in ' '.join(run_cmd)
--- a/openhands-cli/tests/test_main.py
+++ b/openhands-cli/tests/test_main.py
@ -1,15 +1,19 @@
 """Tests for main entry point functionality."""

+import sys
+from types import SimpleNamespace
 from unittest.mock import MagicMock, patch

 import pytest
 from openhands_cli import simple_main
+from openhands_cli.simple_main import main
+


 class TestMainEntryPoint:
    """Test the main entry point behavior."""

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands'])
    def test_main_starts_agent_chat_directly(
        self, mock_run_agent_chat: MagicMock
@ -24,7 +28,7 @@ class TestMainEntryPoint:
        # Should call run_cli_entry with no resume conversation ID
        mock_run_agent_chat.assert_called_once_with(resume_conversation_id=None)

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands'])
    def test_main_handles_import_error(self, mock_run_agent_chat: MagicMock) -> None:
        """Test that main() handles ImportError gracefully."""
@ -36,7 +40,7 @@ class TestMainEntryPoint:

        assert str(exc_info.value) == 'Missing dependency'

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands'])
    def test_main_handles_keyboard_interrupt(
        self, mock_run_agent_chat: MagicMock
@ -48,7 +52,7 @@ class TestMainEntryPoint:
        # Should complete without raising an exception (graceful exit)
        simple_main.main()

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands'])
    def test_main_handles_eof_error(self, mock_run_agent_chat: MagicMock) -> None:
        """Test that main() handles EOFError gracefully."""
@ -58,7 +62,7 @@ class TestMainEntryPoint:
        # Should complete without raising an exception (graceful exit)
        simple_main.main()

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands'])
    def test_main_handles_general_exception(
        self, mock_run_agent_chat: MagicMock
@ -72,7 +76,7 @@ class TestMainEntryPoint:

        assert str(exc_info.value) == 'Unexpected error'

-    @patch('openhands_cli.simple_main.run_cli_entry')
+    @patch('openhands_cli.agent_chat.run_cli_entry')
    @patch('sys.argv', ['openhands', '--resume', 'test-conversation-id'])
    def test_main_with_resume_argument(self, mock_run_agent_chat: MagicMock) -> None:
        """Test that main() passes resume conversation ID when provided."""
@ -86,3 +90,65 @@ class TestMainEntryPoint:
        mock_run_agent_chat.assert_called_once_with(
            resume_conversation_id='test-conversation-id'
        )
+
+
+
+
+@pytest.mark.parametrize(
+    "argv,expected_kwargs",
+    [
+        (['openhands'], {"resume_conversation_id": None}),
+        (['openhands', '--resume', 'test-id'], {"resume_conversation_id": 'test-id'}),
+    ],
+)
+def test_main_cli_calls_run_cli_entry(monkeypatch, argv, expected_kwargs):
+    # Patch sys.argv since main() takes no params
+    monkeypatch.setattr(sys, "argv", argv, raising=False)
+
+    called = {}
+    fake_agent_chat = SimpleNamespace(
+        run_cli_entry=lambda **kw: called.setdefault("kwargs", kw)
+    )
+    # Provide the symbol that main() will import
+    monkeypatch.setitem(sys.modules, "openhands_cli.agent_chat", fake_agent_chat)
+
+    # Execute (no SystemExit expected on success)
+    main()
+    assert called["kwargs"] == expected_kwargs
+
+
+@pytest.mark.parametrize(
+    "argv,expected_kwargs",
+    [
+        (['openhands', 'serve'], {"mount_cwd": False, "gpu": False}),
+        (['openhands', 'serve', '--mount-cwd'], {"mount_cwd": True, "gpu": False}),
+        (['openhands', 'serve', '--gpu'], {"mount_cwd": False, "gpu": True}),
+        (['openhands', 'serve', '--mount-cwd', '--gpu'], {"mount_cwd": True, "gpu": True}),
+    ],
+)
+def test_main_serve_calls_launch_gui_server(monkeypatch, argv, expected_kwargs):
+    monkeypatch.setattr(sys, "argv", argv, raising=False)
+
+    called = {}
+    fake_gui = SimpleNamespace(
+        launch_gui_server=lambda **kw: called.setdefault("kwargs", kw)
+    )
+    monkeypatch.setitem(sys.modules, "openhands_cli.gui_launcher", fake_gui)
+
+    main()
+    assert called["kwargs"] == expected_kwargs
+
+
+@pytest.mark.parametrize(
+    "argv,expected_exit_code",
+    [
+        (['openhands', 'invalid-command'], 2),  # argparse error
+        (['openhands', '--help'], 0),           # top-level help
+        (['openhands', 'serve', '--help'], 0),  # subcommand help
+    ],
+)
+def test_help_and_invalid(monkeypatch, argv, expected_exit_code):
+    monkeypatch.setattr(sys, "argv", argv, raising=False)
+    with pytest.raises(SystemExit) as exc:
+        main()
+    assert exc.value.code == expected_exit_code
--- a/openhands-cli/tests/test_new_command.py
+++ b/openhands-cli/tests/test_new_command.py
@ -0,0 +1,100 @@
+"""Tests for the /new command functionality."""
+
+from unittest.mock import MagicMock, patch
+from uuid import UUID
+from openhands_cli.agent_chat import _start_fresh_conversation
+from unittest.mock import MagicMock, patch
+from prompt_toolkit.input.defaults import create_pipe_input
+from prompt_toolkit.output.defaults import DummyOutput
+from openhands_cli.setup import MissingAgentSpec
+from openhands_cli.user_actions import UserConfirmation
+
+@patch('openhands_cli.agent_chat.setup_conversation')
+def test_start_fresh_conversation_success(mock_setup_conversation):
+    """Test that _start_fresh_conversation creates a new conversation successfully."""
+    # Mock the conversation object
+    mock_conversation = MagicMock()
+    mock_conversation.id = UUID('12345678-1234-5678-9abc-123456789abc')
+    mock_setup_conversation.return_value = mock_conversation
+
+    # Call the function
+    result = _start_fresh_conversation()
+
+    # Verify the result
+    assert result == mock_conversation
+    mock_setup_conversation.assert_called_once_with(None)
+
+
+@patch('openhands_cli.agent_chat.SettingsScreen')
+@patch('openhands_cli.agent_chat.setup_conversation')
+def test_start_fresh_conversation_missing_agent_spec(
+    mock_setup_conversation,
+    mock_settings_screen_class
+):
+    """Test that _start_fresh_conversation handles MissingAgentSpec exception."""
+    # Mock the SettingsScreen instance
+    mock_settings_screen = MagicMock()
+    mock_settings_screen_class.return_value = mock_settings_screen
+
+    # Mock setup_conversation to raise MissingAgentSpec on first call, then succeed
+    mock_conversation = MagicMock()
+    mock_conversation.id = UUID('12345678-1234-5678-9abc-123456789abc')
+    mock_setup_conversation.side_effect = [
+        MissingAgentSpec("Agent spec missing"),
+        mock_conversation
+    ]
+
+    # Call the function
+    result = _start_fresh_conversation()
+
+    # Verify the result
+    assert result == mock_conversation
+    # Should be called twice: first fails, second succeeds
+    assert mock_setup_conversation.call_count == 2
+    # Settings screen should be called once
+    mock_settings_screen.handle_basic_settings.assert_called_once_with(escapable=False)
+
+
+
+
+
+@patch('openhands_cli.agent_chat.exit_session_confirmation')
+@patch('openhands_cli.agent_chat.get_session_prompter')
+@patch('openhands_cli.agent_chat.setup_conversation')
+@patch('openhands_cli.agent_chat.ConversationRunner')
+def test_new_command_resets_confirmation_mode(
+    mock_runner_cls,
+    mock_setup_conversation,
+    mock_get_session_prompter,
+    mock_exit_confirm,
+):
+    # Auto-accept the exit prompt to avoid interactive UI and EOFError
+    mock_exit_confirm.return_value = UserConfirmation.ACCEPT
+
+    conv1 = MagicMock(); conv1.id = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa')
+    conv2 = MagicMock(); conv2.id = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb')
+    mock_setup_conversation.side_effect = [conv1, conv2]
+
+    # Distinct runner instances for each conversation
+    runner1 = MagicMock(); runner1.is_confirmation_mode_enabled = True
+    runner2 = MagicMock(); runner2.is_confirmation_mode_enabled = False
+    mock_runner_cls.side_effect = [runner1, runner2]
+
+    # Real session fed by a pipe (no interactive confirmation now)
+    from openhands_cli.user_actions.utils import get_session_prompter as real_get_session_prompter
+    with create_pipe_input() as pipe:
+        output = DummyOutput()
+        session = real_get_session_prompter(input=pipe, output=output)
+        mock_get_session_prompter.return_value = session
+
+        from openhands_cli.agent_chat import run_cli_entry
+        # Trigger /new, then /status, then /exit (exit will be auto-accepted)
+        for ch in "/new\r/exit\r":
+            pipe.send_text(ch)
+
+        run_cli_entry(None)
+
+    # Assert we switched to a new runner for conv2
+    assert mock_runner_cls.call_count == 2
+    assert mock_runner_cls.call_args_list[0].args[0] is conv1
+    assert mock_runner_cls.call_args_list[1].args[0] is conv2
--- a/openhands-cli/tests/test_status_command.py
+++ b/openhands-cli/tests/test_status_command.py
@ -0,0 +1,124 @@
+"""Simplified tests for the /status command functionality."""
+
+from datetime import datetime, timedelta
+from uuid import uuid4
+from unittest.mock import Mock, patch
+
+import pytest
+
+from openhands_cli.tui.status import display_status
+from openhands.sdk.llm.utils.metrics import Metrics, TokenUsage
+
+
+# ---------- Fixtures & helpers ----------
+
+@pytest.fixture
+def conversation():
+    """Minimal conversation with empty events and pluggable stats."""
+    conv = Mock()
+    conv.id = uuid4()
+    conv.state = Mock(events=[])
+    conv.conversation_stats = Mock()
+    return conv
+
+
+def make_metrics(cost=None, usage=None) -> Metrics:
+    m = Metrics()
+    if cost is not None:
+        m.accumulated_cost = cost
+    m.accumulated_token_usage = usage
+    return m
+
+
+def call_display_status(conversation, session_start):
+    """Call display_status with prints patched; return (mock_pf, mock_pc, text)."""
+    with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
+         patch('openhands_cli.tui.status.print_container') as pc:
+        display_status(conversation, session_start_time=session_start)
+        # First container call; extract the Frame/TextArea text
+        container = pc.call_args_list[0][0][0]
+        text = getattr(container.body, "text", "")
+        return pf, pc, str(text)
+
+
+# ---------- Tests ----------
+
+def test_display_status_box_title(conversation):
+    session_start = datetime.now()
+    conversation.conversation_stats.get_combined_metrics.return_value = make_metrics()
+
+    with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
+         patch('openhands_cli.tui.status.print_container') as pc:
+        display_status(conversation, session_start_time=session_start)
+
+        assert pf.called and pc.called
+
+        container = pc.call_args_list[0][0][0]
+        assert hasattr(container, "title")
+        assert "Usage Metrics" in container.title
+
+
+@pytest.mark.parametrize(
+    "delta,expected",
+    [
+        (timedelta(seconds=0), "0h 0m"),
+        (timedelta(minutes=5, seconds=30), "5m"),
+        (timedelta(hours=1, minutes=30, seconds=45), "1h 30m"),
+        (timedelta(hours=2, minutes=15, seconds=30), "2h 15m"),
+    ],
+)
+def test_display_status_uptime(conversation, delta, expected):
+    session_start = datetime.now() - delta
+    conversation.conversation_stats.get_combined_metrics.return_value = make_metrics()
+
+    with patch('openhands_cli.tui.status.print_formatted_text') as pf, \
+         patch('openhands_cli.tui.status.print_container'):
+        display_status(conversation, session_start_time=session_start)
+        # uptime is printed in the 2nd print_formatted_text call
+        uptime_call_str = str(pf.call_args_list[1])
+        assert expected in uptime_call_str
+        # conversation id appears in the first print call
+        id_call_str = str(pf.call_args_list[0])
+        assert str(conversation.id) in id_call_str
+
+
+@pytest.mark.parametrize(
+    "cost,usage,expecteds",
+    [
+        # Empty/zero case
+        (None, None, ["$0.000000", "0", "0", "0", "0", "0"]),
+        # Only cost, usage=None
+        (0.05, None, ["$0.050000", "0", "0", "0", "0", "0"]),
+        # Full metrics
+        (
+            0.123456,
+            TokenUsage(
+                prompt_tokens=1500,
+                completion_tokens=800,
+                cache_read_tokens=200,
+                cache_write_tokens=100,
+            ),
+            ["$0.123456", "1,500", "800", "200", "100", "2,300"],
+        ),
+        # Larger numbers (comprehensive)
+        (
+            1.234567,
+            TokenUsage(
+                prompt_tokens=5000,
+                completion_tokens=3000,
+                cache_read_tokens=500,
+                cache_write_tokens=250,
+            ),
+            ["$1.234567", "5,000", "3,000", "500", "250", "8,000"],
+        ),
+    ],
+)
+def test_display_status_metrics(conversation, cost, usage, expecteds):
+    session_start = datetime.now()
+    conversation.conversation_stats.get_combined_metrics.return_value = make_metrics(cost, usage)
+
+    pf, pc, text = call_display_status(conversation, session_start)
+
+    assert pf.called and pc.called
+    for expected in expecteds:
+        assert expected in text
--- a/openhands-cli/tests/test_tui.py
+++ b/openhands-cli/tests/test_tui.py
@ -77,6 +77,7 @@ def test_commands_dict() -> None:
        '/exit',
        '/help',
        '/clear',
+        '/new',
        '/status',
        '/confirm',
        '/resume',