diff --git a/.github/scripts/update_pr_description.sh b/.github/scripts/update_pr_description.sh
index 04fe71109a..cca991e2d4 100755
--- a/.github/scripts/update_pr_description.sh
+++ b/.github/scripts/update_pr_description.sh
@@ -1,33 +1,53 @@
 #!/bin/bash
 
+set -euxo pipefail
+
 # This script updates the PR description with commands to run the PR locally
 # It adds both Docker and uvx commands
 
 # Get the branch name for the PR
-BRANCH_NAME=$(gh pr view $PR_NUMBER --json headRefName --jq .headRefName)
+BRANCH_NAME=$(gh pr view "$PR_NUMBER" --json headRefName --jq .headRefName)
 
 # Define the Docker command
 DOCKER_RUN_COMMAND="docker run -it --rm \
   -p 3000:3000 \
   -v /var/run/docker.sock:/var/run/docker.sock \
   --add-host host.docker.internal:host-gateway \
-  -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:$SHORT_SHA-nikolaik \
-  --name openhands-app-$SHORT_SHA \
-  docker.all-hands.dev/all-hands-ai/openhands:$SHORT_SHA"
+  -e SANDBOX_RUNTIME_CONTAINER_IMAGE=docker.all-hands.dev/all-hands-ai/runtime:${SHORT_SHA}-nikolaik \
+  --name openhands-app-${SHORT_SHA} \
+  docker.all-hands.dev/all-hands-ai/openhands:${SHORT_SHA}"
 
 # Define the uvx command
-UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@$BRANCH_NAME openhands"
+UVX_RUN_COMMAND="uvx --python 3.12 --from git+https://github.com/All-Hands-AI/OpenHands@${BRANCH_NAME} openhands"
 
 # Get the current PR body
-PR_BODY=$(gh pr view $PR_NUMBER --json body --jq .body)
+PR_BODY=$(gh pr view "$PR_NUMBER" --json body --jq .body)
 
 # Prepare the new PR body with both commands
 if echo "$PR_BODY" | grep -q "To run this PR locally, use the following command:"; then
-  # For existing PR descriptions, replace the command section
-  NEW_PR_BODY=$(echo "$PR_BODY" | sed "s|To run this PR locally, use the following command:.*\`\`\`|To run this PR locally, use the following command:\n\nGUI with Docker:\n\`\`\`\n$DOCKER_RUN_COMMAND\n\`\`\`\n\nCLI with uvx:\n\`\`\`\n$UVX_RUN_COMMAND\n\`\`\`|s")
+  # For existing PR descriptions, use a more robust approach
+  # Split the PR body at the "To run this PR locally" section and replace everything after it
+  BEFORE_SECTION=$(echo "$PR_BODY" | sed '/To run this PR locally, use the following command:/,$d')
+  NEW_PR_BODY=$(cat <<EOF
+${BEFORE_SECTION}
+
+To run this PR locally, use the following command:
+
+GUI with Docker:
+\`\`\`
+${DOCKER_RUN_COMMAND}
+\`\`\`
+
+CLI with uvx:
+\`\`\`
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
 else
-  # For new PR descriptions
-  NEW_PR_BODY="${PR_BODY}
+  # For new PR descriptions: use heredoc safely without indentation
+  NEW_PR_BODY=$(cat <<EOF
+$PR_BODY
 
 ---
 
@@ -35,15 +55,17 @@ To run this PR locally, use the following command:
 
 GUI with Docker:
 \`\`\`
-$DOCKER_RUN_COMMAND
+${DOCKER_RUN_COMMAND}
 \`\`\`
 
 CLI with uvx:
 \`\`\`
-$UVX_RUN_COMMAND
-\`\`\`"
+${UVX_RUN_COMMAND}
+\`\`\`
+EOF
+)
 fi
 
 # Update the PR description
 echo "Updating PR description with Docker and uvx commands"
-gh pr edit $PR_NUMBER --body "$NEW_PR_BODY"
+gh pr edit "$PR_NUMBER" --body "$NEW_PR_BODY"
diff --git a/docs/usage/how-to/gui-mode.mdx b/docs/usage/how-to/gui-mode.mdx
index 331ecd595d..ef1242ba24 100644
--- a/docs/usage/how-to/gui-mode.mdx
+++ b/docs/usage/how-to/gui-mode.mdx
@@ -7,6 +7,67 @@ description: High level overview of the Graphical User Interface (GUI) in OpenHa
 
 - [OpenHands is running](/usage/local-setup)
 
+## Launching the GUI Server
+
+### Using the CLI Command
+
+You can launch the OpenHands GUI server directly from the command line using the `serve` command:
+
+<Callout type="info">
+**Prerequisites**: You need to have the [OpenHands CLI installed](/usage/how-to/cli-mode) first, OR have `uv` installed and run `uvx --python 3.12 --from openhands-ai openhands serve`. Otherwise, you'll need to use Docker directly (see the [Docker section](#using-docker-directly) below).
+</Callout>
+
+```bash
+openhands serve
+```
+
+This command will:
+- Check that Docker is installed and running
+- Pull the required Docker images
+- Launch the OpenHands GUI server at http://localhost:3000
+- Use the same configuration directory (`~/.openhands`) as the CLI mode
+
+#### Mounting Your Current Directory
+
+To mount your current working directory into the GUI server container, use the `--mount-cwd` flag:
+
+```bash
+openhands serve --mount-cwd
+```
+
+This is useful when you want to work on files in your current directory through the GUI. The directory will be mounted at `/workspace` inside the container.
+
+#### Using GPU Support
+
+If you have NVIDIA GPUs and want to make them available to the OpenHands container, use the `--gpu` flag:
+
+```bash
+openhands serve --gpu
+```
+
+This will enable GPU support via nvidia-docker, mounting all available GPUs into the container. You can combine this with other flags:
+
+```bash
+openhands serve --gpu --mount-cwd
+```
+
+**Prerequisites for GPU support:**
+- NVIDIA GPU drivers must be installed on your host system
+- [NVIDIA Container Toolkit (nvidia-docker2)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) must be installed and configured
+
+#### Requirements
+
+Before using the `openhands serve` command, ensure that:
+- Docker is installed and running on your system
+- You have internet access to pull the required Docker images
+- Port 3000 is available on your system
+
+The CLI will automatically check these requirements and provide helpful error messages if anything is missing.
+
+### Using Docker Directly
+
+Alternatively, you can run the GUI server using Docker directly. See the [local setup guide](/usage/local-setup) for detailed Docker instructions.
+
 ## Overview
 
 ### Initial Setup
diff --git a/docs/usage/local-setup.mdx b/docs/usage/local-setup.mdx
index 86217b01ae..67bf7900f0 100644
--- a/docs/usage/local-setup.mdx
+++ b/docs/usage/local-setup.mdx
@@ -66,6 +66,30 @@ A system with a modern processor and a minimum of **4GB RAM** is recommended to
 
 ### Start the App
 
+#### Option 1: Using the CLI Launcher (Recommended)
+
+If you have Python 3.12+ installed, you can use the CLI launcher for a simpler experience:
+
+```bash
+# Install OpenHands
+pip install openhands-ai
+
+# Launch the GUI server
+openhands serve
+
+# Or with GPU support (requires nvidia-docker)
+openhands serve --gpu
+
+# Or with current directory mounted
+openhands serve --mount-cwd
+```
+
+Or using `uvx --python 3.12 --from openhands-ai openhands serve` if you have [uv](https://docs.astral.sh/uv/) installed.
+
+This will automatically handle Docker requirements checking, image pulling, and launching the GUI server. The `--gpu` flag enables GPU support via nvidia-docker, and `--mount-cwd` mounts your current directory into the container.
+
+#### Option 2: Using Docker Directly
+
 ```bash
 docker pull docker.all-hands.dev/all-hands-ai/runtime:0.51-nikolaik
 
diff --git a/evaluation/benchmarks/EDA/run_infer.py b/evaluation/benchmarks/EDA/run_infer.py
index a80b745ce7..649b72b11e 100644
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -172,7 +172,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--answerer_model', '-a', default='gpt-3.5-turbo', help='answerer model'
     )
diff --git a/evaluation/benchmarks/commit0/run_infer.py b/evaluation/benchmarks/commit0/run_infer.py
index 99c5b4a43d..176d8f7233 100644
--- a/evaluation/benchmarks/commit0/run_infer.py
+++ b/evaluation/benchmarks/commit0/run_infer.py
@@ -26,8 +26,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -525,7 +525,7 @@ def commit0_setup(dataset: pd.DataFrame, repo_split: str) -> pd.DataFrame:
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/gaia/run_infer.py b/evaluation/benchmarks/gaia/run_infer.py
index 82b656e52c..cc21cfc25d 100644
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -31,8 +31,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
     load_from_toml,
 )
 from openhands.core.config.utils import get_agent_config_arg
@@ -294,7 +294,7 @@ Here is the task:
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--level',
         type=str,
diff --git a/evaluation/benchmarks/gorilla/run_infer.py b/evaluation/benchmarks/gorilla/run_infer.py
index 652e774503..79e5fffdc6 100644
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -20,8 +20,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -134,7 +134,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--hubs',
         type=str,
diff --git a/evaluation/benchmarks/gpqa/run_infer.py b/evaluation/benchmarks/gpqa/run_infer.py
index 3bc981378b..cb1bbd68b1 100644
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -38,8 +38,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -312,7 +312,7 @@ Ok now its time to start solving the question. Good luck!
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     # data split must be one of 'gpqa_main', 'gqpa_diamond', 'gpqa_experts', 'gpqa_extended'
     parser.add_argument(
         '--data-split',
diff --git a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
index 40846e1e61..2aad6fb1b5 100644
--- a/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/eval_infer.py
@@ -21,7 +21,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
     LLMConfig,
     OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
     load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -167,7 +167,7 @@ def process_predictions(predictions_path: str):
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '-s',
         '--eval-split',
diff --git a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
index 0fe5e79e3b..1dba49413d 100644
--- a/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
+++ b/evaluation/benchmarks/lca_ci_build_repair/run_infer.py
@@ -30,8 +30,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
     load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -358,7 +358,7 @@ Be thorough in your exploration, testing, and reasoning. It's fine if your think
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '-s',
         '--eval-split',
diff --git a/evaluation/benchmarks/logic_reasoning/run_infer.py b/evaluation/benchmarks/logic_reasoning/run_infer.py
index 39743cd674..eb4342f44a 100644
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -18,8 +18,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -267,7 +267,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/mint/run_infer.py b/evaluation/benchmarks/mint/run_infer.py
index d0cec13a2f..b7fac0e44d 100644
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -23,8 +23,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -229,7 +229,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
 
     SUBSETS = [
         # Eurus subset: https://arxiv.org/abs/2404.02078
diff --git a/evaluation/benchmarks/ml_bench/run_analysis.py b/evaluation/benchmarks/ml_bench/run_analysis.py
index 9c0958060f..8baddcffb1 100644
--- a/evaluation/benchmarks/ml_bench/run_analysis.py
+++ b/evaluation/benchmarks/ml_bench/run_analysis.py
@@ -4,7 +4,11 @@ import pprint
 
 import tqdm
 
-from openhands.core.config import get_llm_config_arg, get_parser, load_openhands_config
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_llm_config_arg,
+    load_openhands_config,
+)
 from openhands.core.logger import openhands_logger as logger
 from openhands.llm.llm import LLM
 
@@ -111,7 +115,7 @@ def classify_error(llm: LLM, failed_case: dict) -> str:
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--json_file_path',
         type=str,
diff --git a/evaluation/benchmarks/ml_bench/run_infer.py b/evaluation/benchmarks/ml_bench/run_infer.py
index 40874cf9a4..1b4b094520 100644
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -34,8 +34,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
     load_openhands_config,
 )
 from openhands.core.logger import openhands_logger as logger
@@ -273,7 +273,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '-s',
         '--eval-split',
diff --git a/evaluation/benchmarks/multi_swe_bench/eval_infer.py b/evaluation/benchmarks/multi_swe_bench/eval_infer.py
index c895bb0b62..ae259cb597 100644
--- a/evaluation/benchmarks/multi_swe_bench/eval_infer.py
+++ b/evaluation/benchmarks/multi_swe_bench/eval_infer.py
@@ -30,7 +30,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
     LLMConfig,
     OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -323,7 +323,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--input-file',
         type=str,
diff --git a/evaluation/benchmarks/multi_swe_bench/run_infer.py b/evaluation/benchmarks/multi_swe_bench/run_infer.py
index 4f12677dc2..ca33f65298 100644
--- a/evaluation/benchmarks/multi_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/multi_swe_bench/run_infer.py
@@ -32,8 +32,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -772,7 +772,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
 
 if __name__ == '__main__':
     # pdb.set_trace()
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/scienceagentbench/run_infer.py b/evaluation/benchmarks/scienceagentbench/run_infer.py
index 8a83a7e200..c346ac1da1 100644
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -21,8 +21,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -239,7 +239,7 @@ If the program uses some packages that are incompatible, please figure out alter
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--use-knowledge',
         type=str,
diff --git a/evaluation/benchmarks/swe_bench/eval_infer.py b/evaluation/benchmarks/swe_bench/eval_infer.py
index 48170be8b5..81b7decd4c 100644
--- a/evaluation/benchmarks/swe_bench/eval_infer.py
+++ b/evaluation/benchmarks/swe_bench/eval_infer.py
@@ -26,7 +26,7 @@ from evaluation.utils.shared import (
 from openhands.core.config import (
     LLMConfig,
     OpenHandsConfig,
-    get_parser,
+    get_evaluation_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
@@ -353,7 +353,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--input-file',
         type=str,
diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py
index 52ca9a9b81..7873b115a1 100644
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -43,8 +43,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -732,7 +732,7 @@ def filter_dataset(dataset: pd.DataFrame, filter_column: str) -> pd.DataFrame:
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/swe_bench/run_infer_interact.py b/evaluation/benchmarks/swe_bench/run_infer_interact.py
index 1ed4cc4e2f..c97a2d6b3f 100755
--- a/evaluation/benchmarks/swe_bench/run_infer_interact.py
+++ b/evaluation/benchmarks/swe_bench/run_infer_interact.py
@@ -28,8 +28,8 @@ from evaluation.utils.shared import (
 )
 from openhands.controller.state.state import State
 from openhands.core.config import (
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
 from openhands.core.config.utils import get_condenser_config_arg
@@ -201,7 +201,7 @@ def process_instance(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/swe_bench/run_localize.py b/evaluation/benchmarks/swe_bench/run_localize.py
index f17d40b87c..0c34991577 100644
--- a/evaluation/benchmarks/swe_bench/run_localize.py
+++ b/evaluation/benchmarks/swe_bench/run_localize.py
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -644,7 +644,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/testgeneval/eval_infer.py b/evaluation/benchmarks/testgeneval/eval_infer.py
index 6312ff66e5..99eea4ba43 100644
--- a/evaluation/benchmarks/testgeneval/eval_infer.py
+++ b/evaluation/benchmarks/testgeneval/eval_infer.py
@@ -41,7 +41,7 @@ from evaluation.utils.shared import (
     reset_logger_for_multiprocessing,
     run_evaluation,
 )
-from openhands.core.config import OpenHandsConfig, SandboxConfig, get_parser
+from openhands.core.config import OpenHandsConfig, SandboxConfig, get_evaluation_parser
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime
 from openhands.events.action import CmdRunAction
@@ -484,7 +484,7 @@ def count_and_log_fields(evaluated_predictions, fields, key):
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--input-file', type=str, required=True, help='Path to input predictions file'
     )
diff --git a/evaluation/benchmarks/testgeneval/run_infer.py b/evaluation/benchmarks/testgeneval/run_infer.py
index 5338914369..39288ff537 100644
--- a/evaluation/benchmarks/testgeneval/run_infer.py
+++ b/evaluation/benchmarks/testgeneval/run_infer.py
@@ -37,8 +37,8 @@ from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
     SandboxConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -491,7 +491,7 @@ def prepare_dataset_pre(dataset: pd.DataFrame, filter_column: str) -> pd.DataFra
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
index e71550d0d9..9f5780d559 100644
--- a/evaluation/benchmarks/the_agent_company/run_infer.py
+++ b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -18,8 +18,8 @@ from openhands.core.config import (
     LLMConfig,
     OpenHandsConfig,
     get_agent_config_arg,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.config.agent_config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
@@ -197,7 +197,7 @@ def run_evaluator(
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--task-image-name',
         type=str,
diff --git a/evaluation/benchmarks/toolqa/run_infer.py b/evaluation/benchmarks/toolqa/run_infer.py
index 29e65d944f..4db988efa1 100644
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -19,8 +19,8 @@ from evaluation.utils.shared import (
 from openhands.controller.state.state import State
 from openhands.core.config import (
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -157,7 +157,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
 
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/evaluation/benchmarks/visual_swe_bench/run_infer.py b/evaluation/benchmarks/visual_swe_bench/run_infer.py
index d07e885b1d..215f7933b1 100644
--- a/evaluation/benchmarks/visual_swe_bench/run_infer.py
+++ b/evaluation/benchmarks/visual_swe_bench/run_infer.py
@@ -31,8 +31,8 @@ from openhands.controller.state.state import State
 from openhands.core.config import (
     AgentConfig,
     OpenHandsConfig,
+    get_evaluation_parser,
     get_llm_config_arg,
-    get_parser,
 )
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
@@ -565,7 +565,7 @@ SWEGYM_EXCLUDE_IDS = [
 ]
 
 if __name__ == '__main__':
-    parser = get_parser()
+    parser = get_evaluation_parser()
     parser.add_argument(
         '--dataset',
         type=str,
diff --git a/openhands/cli/__init__.py b/openhands/cli/__init__.py
new file mode 100644
index 0000000000..9315930b74
--- /dev/null
+++ b/openhands/cli/__init__.py
@@ -0,0 +1 @@
+"""OpenHands CLI module."""
diff --git a/openhands/cli/entry.py b/openhands/cli/entry.py
new file mode 100644
index 0000000000..1fcb6d4b63
--- /dev/null
+++ b/openhands/cli/entry.py
@@ -0,0 +1,54 @@
+"""Main entry point for OpenHands CLI with subcommand support."""
+
+import sys
+
+import openhands
+import openhands.cli.suppress_warnings  # noqa: F401
+from openhands.cli.gui_launcher import launch_gui_server
+from openhands.cli.main import run_cli_command
+from openhands.core.config import get_cli_parser
+from openhands.core.config.arg_utils import get_subparser
+
+
+def main():
+    """Main entry point with subcommand support and backward compatibility."""
+    parser = get_cli_parser()
+
+    # If user only asks for --help or -h without a subcommand
+    if len(sys.argv) == 2 and sys.argv[1] in ('--help', '-h'):
+        # Print top-level help
+        print(parser.format_help())
+
+        # Also print help for `cli` subcommand
+        print('\n' + '=' * 80)
+        print('CLI command help:\n')
+
+        cli_parser = get_subparser(parser, 'cli')
+        print(cli_parser.format_help())
+
+        sys.exit(0)
+
+    # Special case: no subcommand provided, simulate "openhands cli"
+    if len(sys.argv) == 1 or (
+        len(sys.argv) > 1 and sys.argv[1] not in ['cli', 'serve']
+    ):
+        # Inject 'cli' as default command
+        sys.argv.insert(1, 'cli')
+
+    args = parser.parse_args()
+
+    if hasattr(args, 'version') and args.version:
+        print(f'OpenHands CLI version: {openhands.get_version()}')
+        sys.exit(0)
+
+    if args.command == 'serve':
+        launch_gui_server(mount_cwd=args.mount_cwd, gpu=args.gpu)
+    elif args.command == 'cli' or args.command is None:
+        run_cli_command(args)
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/openhands/cli/gui_launcher.py b/openhands/cli/gui_launcher.py
new file mode 100644
index 0000000000..fb3b6a6ff8
--- /dev/null
+++ b/openhands/cli/gui_launcher.py
@@ -0,0 +1,219 @@
+"""GUI launcher for OpenHands CLI."""
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from prompt_toolkit import print_formatted_text
+from prompt_toolkit.formatted_text import HTML
+
+from openhands import __version__
+
+
+def _format_docker_command_for_logging(cmd: list[str]) -> str:
+    """Format a Docker command for logging with grey color.
+
+    Args:
+        cmd (list[str]): The Docker command as a list of strings
+
+    Returns:
+        str: The formatted command string in grey HTML color
+    """
+    cmd_str = ' '.join(cmd)
+    return f'<grey>Running Docker command: {cmd_str}</grey>'
+
+
+def check_docker_requirements() -> bool:
+    """Check if Docker is installed and running.
+
+    Returns:
+        bool: True if Docker is available and running, False otherwise.
+    """
+    # Check if Docker is installed
+    if not shutil.which('docker'):
+        print_formatted_text(
+            HTML('<ansired>❌ Docker is not installed or not in PATH.</ansired>')
+        )
+        print_formatted_text(
+            HTML(
+                '<grey>Please install Docker first: https://docs.docker.com/get-docker/</grey>'
+            )
+        )
+        return False
+
+    # Check if Docker daemon is running
+    try:
+        result = subprocess.run(
+            ['docker', 'info'], capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            print_formatted_text(
+                HTML('<ansired>❌ Docker daemon is not running.</ansired>')
+            )
+            print_formatted_text(
+                HTML('<grey>Please start Docker and try again.</grey>')
+            )
+            return False
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to check Docker status.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        return False
+
+    return True
+
+
+def ensure_config_dir_exists() -> Path:
+    """Ensure the OpenHands configuration directory exists and return its path."""
+    config_dir = Path.home() / '.openhands'
+    config_dir.mkdir(exist_ok=True)
+    return config_dir
+
+
+def launch_gui_server(mount_cwd: bool = False, gpu: bool = False) -> None:
+    """Launch the OpenHands GUI server using Docker.
+
+    Args:
+        mount_cwd: If True, mount the current working directory into the container.
+        gpu: If True, enable GPU support by mounting all GPUs into the container via nvidia-docker.
+    """
+    print_formatted_text(
+        HTML('<ansiblue>🚀 Launching OpenHands GUI server...</ansiblue>')
+    )
+    print_formatted_text('')
+
+    # Check Docker requirements
+    if not check_docker_requirements():
+        sys.exit(1)
+
+    # Ensure config directory exists
+    config_dir = ensure_config_dir_exists()
+
+    # Get the current version for the Docker image
+    version = __version__
+    runtime_image = f'docker.all-hands.dev/all-hands-ai/runtime:{version}-nikolaik'
+    app_image = f'docker.all-hands.dev/all-hands-ai/openhands:{version}'
+
+    print_formatted_text(HTML('<grey>Pulling required Docker images...</grey>'))
+
+    # Pull the runtime image first
+    pull_cmd = ['docker', 'pull', runtime_image]
+    print_formatted_text(HTML(_format_docker_command_for_logging(pull_cmd)))
+    try:
+        subprocess.run(
+            pull_cmd,
+            check=True,
+            timeout=300,  # 5 minutes timeout
+        )
+    except subprocess.CalledProcessError:
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to pull runtime image.</ansired>')
+        )
+        sys.exit(1)
+    except subprocess.TimeoutExpired:
+        print_formatted_text(
+            HTML('<ansired>❌ Timeout while pulling runtime image.</ansired>')
+        )
+        sys.exit(1)
+
+    print_formatted_text('')
+    print_formatted_text(
+        HTML('<ansigreen>✅ Starting OpenHands GUI server...</ansigreen>')
+    )
+    print_formatted_text(
+        HTML('<grey>The server will be available at: http://localhost:3000</grey>')
+    )
+    print_formatted_text(HTML('<grey>Press Ctrl+C to stop the server.</grey>'))
+    print_formatted_text('')
+
+    # Build the Docker command
+    docker_cmd = [
+        'docker',
+        'run',
+        '-it',
+        '--rm',
+        '--pull=always',
+        '-e',
+        f'SANDBOX_RUNTIME_CONTAINER_IMAGE={runtime_image}',
+        '-e',
+        'LOG_ALL_EVENTS=true',
+        '-v',
+        '/var/run/docker.sock:/var/run/docker.sock',
+        '-v',
+        f'{config_dir}:/.openhands',
+    ]
+
+    # Add GPU support if requested
+    if gpu:
+        print_formatted_text(
+            HTML('<ansigreen>🖥️ Enabling GPU support via nvidia-docker...</ansigreen>')
+        )
+        # Add the --gpus all flag to enable all GPUs
+        docker_cmd.insert(2, '--gpus')
+        docker_cmd.insert(3, 'all')
+        # Add environment variable to pass GPU support to sandbox containers
+        docker_cmd.extend(
+            [
+                '-e',
+                'SANDBOX_ENABLE_GPU=true',
+            ]
+        )
+
+    # Add current working directory mount if requested
+    if mount_cwd:
+        cwd = Path.cwd()
+        # Following the documentation at https://docs.all-hands.dev/usage/runtimes/docker#connecting-to-your-filesystem
+        docker_cmd.extend(
+            [
+                '-e',
+                f'SANDBOX_VOLUMES={cwd}:/workspace:rw',
+            ]
+        )
+
+        # Set user ID for Unix-like systems only
+        if os.name != 'nt':  # Not Windows
+            try:
+                user_id = subprocess.check_output(['id', '-u'], text=True).strip()
+                docker_cmd.extend(['-e', f'SANDBOX_USER_ID={user_id}'])
+            except (subprocess.CalledProcessError, FileNotFoundError):
+                # If 'id' command fails or doesn't exist, skip setting user ID
+                pass
+        # Print the folder that will be mounted to inform the user
+        print_formatted_text(
+            HTML(
+                f'<ansigreen>📂 Mounting current directory:</ansigreen> <ansiyellow>{cwd}</ansiyellow> <ansigreen>to</ansigreen> <ansiyellow>/workspace</ansiyellow>'
+            )
+        )
+
+    docker_cmd.extend(
+        [
+            '-p',
+            '3000:3000',
+            '--add-host',
+            'host.docker.internal:host-gateway',
+            '--name',
+            'openhands-app',
+            app_image,
+        ]
+    )
+
+    try:
+        # Log and run the Docker command
+        print_formatted_text(HTML(_format_docker_command_for_logging(docker_cmd)))
+        subprocess.run(docker_cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansired>❌ Failed to start OpenHands GUI server.</ansired>')
+        )
+        print_formatted_text(HTML(f'<grey>Error: {e}</grey>'))
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print_formatted_text('')
+        print_formatted_text(
+            HTML('<ansigreen>✓ OpenHands GUI server stopped successfully.</ansigreen>')
+        )
+        sys.exit(0)
diff --git a/openhands/cli/main.py b/openhands/cli/main.py
index fd5f8b18cc..17aaaad895 100644
--- a/openhands/cli/main.py
+++ b/openhands/cli/main.py
@@ -45,7 +45,6 @@ from openhands.controller import AgentController
 from openhands.controller.agent import Agent
 from openhands.core.config import (
     OpenHandsConfig,
-    parse_arguments,
     setup_config_from_args,
 )
 from openhands.core.config.condenser_config import NoOpCondenserConfig
@@ -524,10 +523,8 @@ def run_alias_setup_flow(config: OpenHandsConfig) -> None:
     print_formatted_text('')
 
 
-async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
+async def main_with_loop(loop: asyncio.AbstractEventLoop, args) -> None:
     """Runs the agent in CLI mode."""
-    args = parse_arguments()
-
     # Set log level from command line argument if provided
     if args.log_level and isinstance(args.log_level, str):
         log_level = getattr(logging, str(args.log_level).upper())
@@ -575,13 +572,9 @@ async def main_with_loop(loop: asyncio.AbstractEventLoop) -> None:
 
     # Use settings from settings store if available and override with command line arguments
     if settings:
-        # Handle agent configuration
-        if args.agent_cls:
-            config.default_agent = str(args.agent_cls)
-        else:
-            # settings.agent is not None because we check for it in setup_config_from_args
-            assert settings.agent is not None
-            config.default_agent = settings.agent
+        # settings.agent is not None because we check for it in setup_config_from_args
+        assert settings.agent is not None
+        config.default_agent = settings.agent
 
         # Handle LLM configuration with proper precedence:
         # 1. CLI parameters (-l) have highest precedence (already handled in setup_config_from_args)
@@ -719,18 +712,19 @@ After reviewing the file, please ask the user what they would like to do with it
     get_runtime_cls(config.runtime).teardown(config)
 
 
-def main():
+def run_cli_command(args):
+    """Run the CLI command with proper error handling and cleanup."""
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
-        loop.run_until_complete(main_with_loop(loop))
+        loop.run_until_complete(main_with_loop(loop, args))
     except KeyboardInterrupt:
         print_formatted_text('⚠️ Session was interrupted: interrupted\n')
     except ConnectionRefusedError as e:
-        print(f'Connection refused: {e}')
+        print_formatted_text(f'Connection refused: {e}')
         sys.exit(1)
     except Exception as e:
-        print(f'An error occurred: {e}')
+        print_formatted_text(f'An error occurred: {e}')
         sys.exit(1)
     finally:
         try:
@@ -743,9 +737,5 @@ def main():
             loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
             loop.close()
         except Exception as e:
-            print(f'Error during cleanup: {e}')
+            print_formatted_text(f'Error during cleanup: {e}')
             sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/openhands/core/config/__init__.py b/openhands/core/config/__init__.py
index cf78955711..97d71bd5f1 100644
--- a/openhands/core/config/__init__.py
+++ b/openhands/core/config/__init__.py
@@ -1,4 +1,9 @@
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import (
+    get_cli_parser,
+    get_evaluation_parser,
+    get_headless_parser,
+)
 from openhands.core.config.cli_config import CLIConfig
 from openhands.core.config.config_utils import (
     OH_DEFAULT_AGENT,
@@ -15,7 +20,6 @@ from openhands.core.config.utils import (
     finalize_config,
     get_agent_config_arg,
     get_llm_config_arg,
-    get_parser,
     load_from_env,
     load_from_toml,
     load_openhands_config,
@@ -41,7 +45,9 @@ __all__ = [
     'get_agent_config_arg',
     'get_llm_config_arg',
     'get_field_info',
-    'get_parser',
+    'get_cli_parser',
+    'get_headless_parser',
+    'get_evaluation_parser',
     'parse_arguments',
     'setup_config_from_args',
 ]
diff --git a/openhands/core/config/arg_utils.py b/openhands/core/config/arg_utils.py
new file mode 100644
index 0000000000..d39a4856e0
--- /dev/null
+++ b/openhands/core/config/arg_utils.py
@@ -0,0 +1,224 @@
+"""Centralized command line argument configuration for OpenHands CLI and headless modes."""
+
+import argparse
+from argparse import ArgumentParser, _SubParsersAction
+
+
+def get_subparser(parser: ArgumentParser, name: str) -> ArgumentParser:
+    for action in parser._actions:
+        if isinstance(action, _SubParsersAction):
+            if name in action.choices:
+                return action.choices[name]
+    raise ValueError(f"Subparser '{name}' not found")
+
+
+def add_common_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add common arguments shared between CLI and headless modes."""
+    parser.add_argument(
+        '--config-file',
+        type=str,
+        default='config.toml',
+        help='Path to the config file (default: config.toml in the current directory)',
+    )
+    parser.add_argument(
+        '-t',
+        '--task',
+        type=str,
+        default='',
+        help='The task for the agent to perform',
+    )
+    parser.add_argument(
+        '-f',
+        '--file',
+        type=str,
+        help='Path to a file containing the task. Overrides -t if both are provided.',
+    )
+    parser.add_argument(
+        '-n',
+        '--name',
+        help='Session name',
+        type=str,
+        default='',
+    )
+    parser.add_argument(
+        '--log-level',
+        help='Set the log level',
+        type=str,
+        default=None,
+    )
+    parser.add_argument(
+        '-l',
+        '--llm-config',
+        default=None,
+        type=str,
+        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
+    )
+    parser.add_argument(
+        '--agent-config',
+        default=None,
+        type=str,
+        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
+    )
+    parser.add_argument(
+        '-v', '--version', action='store_true', help='Show version information'
+    )
+
+
+def add_evaluation_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to evaluation mode."""
+    # Evaluation-specific arguments
+    parser.add_argument(
+        '--eval-output-dir',
+        default='evaluation/evaluation_outputs/outputs',
+        type=str,
+        help='The directory to save evaluation output',
+    )
+    parser.add_argument(
+        '--eval-n-limit',
+        default=None,
+        type=int,
+        help='The number of instances to evaluate',
+    )
+    parser.add_argument(
+        '--eval-num-workers',
+        default=4,
+        type=int,
+        help='The number of workers to use for evaluation',
+    )
+    parser.add_argument(
+        '--eval-note',
+        default=None,
+        type=str,
+        help='The note to add to the evaluation directory',
+    )
+    parser.add_argument(
+        '--eval-ids',
+        default=None,
+        type=str,
+        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
+    )
+
+
+def add_headless_specific_arguments(parser: argparse.ArgumentParser) -> None:
+    """Add arguments specific to headless mode (full evaluation suite)."""
+    parser.add_argument(
+        '-d',
+        '--directory',
+        type=str,
+        help='The working directory for the agent',
+    )
+    parser.add_argument(
+        '-c',
+        '--agent-cls',
+        default=None,
+        type=str,
+        help='Name of the default agent to use',
+    )
+    parser.add_argument(
+        '-i',
+        '--max-iterations',
+        default=None,
+        type=int,
+        help='The maximum number of iterations to run the agent',
+    )
+    parser.add_argument(
+        '-b',
+        '--max-budget-per-task',
+        type=float,
+        help='The maximum budget allowed per task, beyond which the agent will stop.',
+    )
+    # Additional headless-specific arguments
+    parser.add_argument(
+        '--no-auto-continue',
+        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
+        action='store_true',
+        default=False,
+    )
+    parser.add_argument(
+        '--selected-repo',
+        help='GitHub repository to clone (format: owner/repo)',
+        type=str,
+        default=None,
+    )
+
+
+def get_cli_parser() -> argparse.ArgumentParser:
+    """Create argument parser for CLI mode with simplified argument set."""
+    # Create a description with welcome message explaining available commands
+    description = (
+        'Welcome to OpenHands: Code Less, Make More\n\n'
+        'OpenHands supports two main commands:\n'
+        '  serve - Launch the OpenHands GUI server (web interface)\n'
+        '  cli   - Run OpenHands in CLI mode (terminal interface)\n\n'
+        'Running "openhands" without a command is the same as "openhands cli"'
+    )
+
+    parser = argparse.ArgumentParser(
+        description=description,
+        prog='openhands',
+        formatter_class=argparse.RawDescriptionHelpFormatter,  # Preserve formatting in description
+        epilog='For more information about a command, run: openhands COMMAND --help',
+    )
+
+    # Create subparsers
+    subparsers = parser.add_subparsers(
+        dest='command',
+        title='commands',
+        description='OpenHands supports two main commands:',
+        metavar='COMMAND',
+    )
+
+    # Add 'serve' subcommand
+    serve_parser = subparsers.add_parser(
+        'serve', help='Launch the OpenHands GUI server using Docker (web interface)'
+    )
+    serve_parser.add_argument(
+        '--mount-cwd',
+        help='Mount the current working directory into the GUI server container',
+        action='store_true',
+        default=False,
+    )
+    serve_parser.add_argument(
+        '--gpu',
+        help='Enable GPU support by mounting all GPUs into the Docker container via nvidia-docker',
+        action='store_true',
+        default=False,
+    )
+
+    # Add 'cli' subcommand - import all the existing CLI arguments
+    cli_parser = subparsers.add_parser(
+        'cli', help='Run OpenHands in CLI mode (terminal interface)'
+    )
+    add_common_arguments(cli_parser)
+
+    cli_parser.add_argument(
+        '--override-cli-mode',
+        help='Override the default settings for CLI mode',
+        type=bool,
+        default=False,
+    )
+    parser.add_argument(
+        '--conversation',
+        help='The conversation id to continue',
+        type=str,
+        default=None,
+    )
+
+    return parser
+
+
+def get_headless_parser() -> argparse.ArgumentParser:
+    """Create argument parser for headless mode with full argument set."""
+    parser = argparse.ArgumentParser(description='Run the agent via CLI')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    return parser
+
+
+def get_evaluation_parser() -> argparse.ArgumentParser:
+    """Create argument parser for evaluation mode."""
+    parser = argparse.ArgumentParser(description='Run OpenHands in evaluation mode')
+    add_common_arguments(parser)
+    add_headless_specific_arguments(parser)
+    add_evaluation_arguments(parser)
+    return parser
diff --git a/openhands/core/config/utils.py b/openhands/core/config/utils.py
index 81ac040a5f..cf95e75d7d 100644
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -15,6 +15,7 @@ from pydantic import BaseModel, SecretStr, ValidationError
 from openhands import __version__
 from openhands.core import logger
 from openhands.core.config.agent_config import AgentConfig
+from openhands.core.config.arg_utils import get_headless_parser
 from openhands.core.config.condenser_config import (
     CondenserConfig,
     condenser_config_from_toml_section,
@@ -670,148 +671,9 @@ def get_condenser_config_arg(
         return None
 
 
-# Command line arguments
-def get_parser() -> argparse.ArgumentParser:
-    """Get the argument parser."""
-    parser = argparse.ArgumentParser(description='Run the agent via CLI')
-
-    # Add version argument
-    parser.add_argument(
-        '-v', '--version', action='store_true', help='Show version information'
-    )
-
-    parser.add_argument(
-        '--config-file',
-        type=str,
-        default='config.toml',
-        help='Path to the config file (default: config.toml in the current directory)',
-    )
-    parser.add_argument(
-        '-d',
-        '--directory',
-        type=str,
-        help='The working directory for the agent',
-    )
-    parser.add_argument(
-        '-t',
-        '--task',
-        type=str,
-        default='',
-        help='The task for the agent to perform',
-    )
-    parser.add_argument(
-        '-f',
-        '--file',
-        type=str,
-        help='Path to a file containing the task. Overrides -t if both are provided.',
-    )
-    parser.add_argument(
-        '-c',
-        '--agent-cls',
-        default=None,
-        type=str,
-        help='Name of the default agent to use',
-    )
-    parser.add_argument(
-        '-i',
-        '--max-iterations',
-        default=None,
-        type=int,
-        help='The maximum number of iterations to run the agent',
-    )
-    parser.add_argument(
-        '-b',
-        '--max-budget-per-task',
-        type=float,
-        help='The maximum budget allowed per task, beyond which the agent will stop.',
-    )
-    # --eval configs are for evaluations only
-    parser.add_argument(
-        '--eval-output-dir',
-        default='evaluation/evaluation_outputs/outputs',
-        type=str,
-        help='The directory to save evaluation output',
-    )
-    parser.add_argument(
-        '--eval-n-limit',
-        default=None,
-        type=int,
-        help='The number of instances to evaluate',
-    )
-    parser.add_argument(
-        '--eval-num-workers',
-        default=4,
-        type=int,
-        help='The number of workers to use for evaluation',
-    )
-    parser.add_argument(
-        '--eval-note',
-        default=None,
-        type=str,
-        help='The note to add to the evaluation directory',
-    )
-    parser.add_argument(
-        '-l',
-        '--llm-config',
-        default=None,
-        type=str,
-        help='Replace default LLM ([llm] section in config.toml) config with the specified LLM config, e.g. "llama3" for [llm.llama3] section in config.toml',
-    )
-    parser.add_argument(
-        '--agent-config',
-        default=None,
-        type=str,
-        help='Replace default Agent ([agent] section in config.toml) config with the specified Agent config, e.g. "CodeAct" for [agent.CodeAct] section in config.toml',
-    )
-    parser.add_argument(
-        '-n',
-        '--name',
-        help='Session name',
-        type=str,
-        default='',
-    )
-    parser.add_argument(
-        '--conversation',
-        help='The conversation id to continue',
-        type=str,
-        default=None,
-    )
-    parser.add_argument(
-        '--eval-ids',
-        default=None,
-        type=str,
-        help='The comma-separated list (in quotes) of IDs of the instances to evaluate',
-    )
-    parser.add_argument(
-        '--no-auto-continue',
-        help='Disable auto-continue responses in headless mode (i.e. headless will read from stdin instead of auto-continuing)',
-        action='store_true',
-        default=False,
-    )
-    parser.add_argument(
-        '--selected-repo',
-        help='GitHub repository to clone (format: owner/repo)',
-        type=str,
-        default=None,
-    )
-    parser.add_argument(
-        '--override-cli-mode',
-        help='Override the default settings for CLI mode',
-        type=bool,
-        default=False,
-    )
-    parser.add_argument(
-        '--log-level',
-        help='Set the log level',
-        type=str,
-        default=None,
-    )
-    return parser
-
-
 def parse_arguments() -> argparse.Namespace:
     """Parse command line arguments."""
-    parser = get_parser()
+    parser = get_headless_parser()
     args = parser.parse_args()
 
     if args.version:
@@ -916,17 +778,17 @@ def setup_config_from_args(args: argparse.Namespace) -> OpenHandsConfig:
         )
 
     # Override default agent if provided
-    if args.agent_cls:
+    if hasattr(args, 'agent_cls') and args.agent_cls:
         config.default_agent = args.agent_cls
 
     # Set max iterations and max budget per task if provided, otherwise fall back to config values
-    if args.max_iterations is not None:
+    if hasattr(args, 'max_iterations') and args.max_iterations is not None:
         config.max_iterations = args.max_iterations
-    if args.max_budget_per_task is not None:
+    if hasattr(args, 'max_budget_per_task') and args.max_budget_per_task is not None:
         config.max_budget_per_task = args.max_budget_per_task
 
     # Read selected repository in config for use by CLI and main.py
-    if args.selected_repo is not None:
+    if hasattr(args, 'selected_repo') and args.selected_repo is not None:
         config.sandbox.selected_repo = args.selected_repo
 
     return config
diff --git a/openhands/utils/term_color.py b/openhands/utils/term_color.py
index 6938369da3..c1c062faa3 100644
--- a/openhands/utils/term_color.py
+++ b/openhands/utils/term_color.py
@@ -10,6 +10,7 @@ class TermColor(Enum):
     SUCCESS = 'green'
     ERROR = 'red'
     INFO = 'blue'
+    GREY = 'dark_grey'
 
 
 def colorize(text: str, color: TermColor = TermColor.WARNING) -> str:
diff --git a/pyproject.toml b/pyproject.toml
index c18b3d20a4..721973ce04 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -166,7 +166,7 @@ joblib = "*"
 swebench = { git = "https://github.com/ryanhoangt/SWE-bench.git", rev = "fix-modal-patch-eval" }
 
 [tool.poetry.scripts]
-openhands = "openhands.cli.main:main"
+openhands = "openhands.cli.entry:main"
 
 [tool.poetry.group.testgeneval.dependencies]
 fuzzywuzzy = "^0.18.0"
diff --git a/tests/unit/test_arg_parser.py b/tests/unit/test_arg_parser.py
index 619b24b63f..c1a7bc84c7 100644
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -1,10 +1,29 @@
 import pytest
 
-from openhands.core.config import get_parser
+from openhands.core.config import (
+    get_evaluation_parser,
+    get_headless_parser,
+)
 
 
-def test_parser_default_values():
-    parser = get_parser()
+def test_headless_parser_default_values():
+    parser = get_headless_parser()
+    args = parser.parse_args([])
+
+    assert args.directory is None
+    assert args.task == ''
+    assert args.file is None
+    assert args.agent_cls is None
+    assert args.max_iterations is None
+    assert args.max_budget_per_task is None
+    assert args.llm_config is None
+    assert args.name == ''
+    assert not args.no_auto_continue
+    assert args.selected_repo is None
+
+
+def test_evaluation_parser_default_values():
+    parser = get_evaluation_parser()
     args = parser.parse_args([])
 
     assert args.directory is None
@@ -23,8 +42,8 @@ def test_parser_default_values():
     assert args.selected_repo is None
 
 
-def test_parser_custom_values():
-    parser = get_parser()
+def test_evaluation_parser_custom_values():
+    parser = get_evaluation_parser()
     args = parser.parse_args(
         [
             '-v',
@@ -76,7 +95,7 @@ def test_parser_custom_values():
 
 
 def test_parser_file_overrides_task():
-    parser = get_parser()
+    parser = get_headless_parser()
     args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])
 
     assert args.task == 'task from command'
@@ -84,31 +103,31 @@ def test_parser_file_overrides_task():
 
 
 def test_parser_invalid_max_iterations():
-    parser = get_parser()
+    parser = get_headless_parser()
     with pytest.raises(SystemExit):
         parser.parse_args(['-i', 'not_a_number'])
 
 
 def test_parser_invalid_max_budget():
-    parser = get_parser()
+    parser = get_headless_parser()
     with pytest.raises(SystemExit):
         parser.parse_args(['-b', 'not_a_number'])
 
 
-def test_parser_invalid_eval_n_limit():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_n_limit():
+    parser = get_evaluation_parser()
     with pytest.raises(SystemExit):
         parser.parse_args(['--eval-n-limit', 'not_a_number'])
 
 
-def test_parser_invalid_eval_num_workers():
-    parser = get_parser()
+def test_evaluation_parser_invalid_eval_num_workers():
+    parser = get_evaluation_parser()
     with pytest.raises(SystemExit):
         parser.parse_args(['--eval-num-workers', 'not_a_number'])
 
 
-def test_help_message(capsys):
-    parser = get_parser()
+def test_headless_parser_help_message(capsys):
+    parser = get_headless_parser()
     with pytest.raises(SystemExit):
         parser.parse_args(['--help'])
     captured = capsys.readouterr()
@@ -126,6 +145,41 @@ def test_help_message(capsys):
         '-c AGENT_CLS, --agent-cls AGENT_CLS',
         '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
         '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
+        '-l LLM_CONFIG, --llm-config LLM_CONFIG',
+        '--agent-config AGENT_CONFIG',
+        '-n NAME, --name NAME',
+        '--config-file CONFIG_FILE',
+        '--no-auto-continue',
+        '--selected-repo SELECTED_REPO',
+        '--log-level LOG_LEVEL',
+    ]
+
+    for element in expected_elements:
+        assert element in help_output, f"Expected '{element}' to be in the help message"
+
+    option_count = help_output.count('  -')
+    assert option_count == 15, f'Expected 15 options, found {option_count}'
+
+
+def test_evaluation_parser_help_message(capsys):
+    parser = get_evaluation_parser()
+    with pytest.raises(SystemExit):
+        parser.parse_args(['--help'])
+    captured = capsys.readouterr()
+    help_output = captured.out
+    print(help_output)
+    expected_elements = [
+        'usage:',
+        'Run OpenHands in evaluation mode',
+        'options:',
+        '-v, --version',
+        '-h, --help',
+        '-d DIRECTORY, --directory DIRECTORY',
+        '-t TASK, --task TASK',
+        '-f FILE, --file FILE',
+        '-c AGENT_CLS, --agent-cls AGENT_CLS',
+        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
+        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
         '--eval-output-dir EVAL_OUTPUT_DIR',
         '--eval-n-limit EVAL_N_LIMIT',
         '--eval-num-workers EVAL_NUM_WORKERS',
@@ -137,20 +191,18 @@ def test_help_message(capsys):
         '--config-file CONFIG_FILE',
         '--no-auto-continue',
         '--selected-repo SELECTED_REPO',
-        '--override-cli-mode OVERRIDE_CLI_MODE',
         '--log-level LOG_LEVEL',
-        '--conversation CONVERSATION',
     ]
 
     for element in expected_elements:
         assert element in help_output, f"Expected '{element}' to be in the help message"
 
     option_count = help_output.count('  -')
-    assert option_count == 22, f'Expected 22 options, found {option_count}'
+    assert option_count == 20, f'Expected 20 options, found {option_count}'
 
 
 def test_selected_repo_format():
     """Test that the selected-repo argument accepts owner/repo format."""
-    parser = get_parser()
+    parser = get_headless_parser()
     args = parser.parse_args(['--selected-repo', 'owner/repo'])
     assert args.selected_repo == 'owner/repo'
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 2eb5df7492..80b53e8869 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -325,7 +325,6 @@ async def test_run_session_with_initial_action(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -345,7 +344,6 @@ async def test_main_without_task(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test main function without a task."""
     loop = asyncio.get_running_loop()
@@ -360,7 +358,9 @@ async def test_main_without_task(
     mock_args.name = None
     mock_args.file = None
     mock_args.conversation = None
-    mock_parse_args.return_value = mock_args
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock config
     mock_config = MagicMock()
@@ -394,10 +394,9 @@ async def test_main_without_task(
     mock_run_session.return_value = False
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()
@@ -418,7 +417,6 @@ async def test_main_without_task(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -438,7 +436,6 @@ async def test_main_with_task(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test main function with a task."""
     loop = asyncio.get_running_loop()
@@ -451,7 +448,11 @@ async def test_main_with_task(
     mock_args.agent_cls = 'custom-agent'
     mock_args.llm_config = 'custom-config'
     mock_args.file = None
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock config
     mock_config = MagicMock()
@@ -486,10 +487,9 @@ async def test_main_with_task(
     mock_run_session.side_effect = [True, False]
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()
@@ -520,7 +520,6 @@ async def test_main_with_task(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -540,7 +539,6 @@ async def test_main_with_session_name_passes_name_to_run_session(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test main function with a session name passes it to run_session."""
     loop = asyncio.get_running_loop()
@@ -556,7 +554,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
     mock_args.name = test_session_name  # Set the session name
     mock_args.file = None
     mock_args.conversation = None
-    mock_parse_args.return_value = mock_args
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock config
     mock_config = MagicMock()
@@ -590,10 +590,9 @@ async def test_main_with_session_name_passes_name_to_run_session(
     mock_run_session.return_value = False
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()
@@ -713,7 +712,6 @@ async def test_run_session_with_name_attempts_state_restore(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -733,7 +731,6 @@ async def test_main_security_check_fails(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test main function when security check fails."""
     loop = asyncio.get_running_loop()
@@ -743,7 +740,14 @@ async def test_main_security_check_fails(
 
     # Mock arguments
     mock_args = MagicMock()
-    mock_parse_args.return_value = mock_args
+    mock_args.agent_cls = None
+    mock_args.llm_config = None
+    mock_args.name = None
+    mock_args.file = None
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock config
     mock_config = MagicMock()
@@ -765,10 +769,9 @@ async def test_main_security_check_fails(
     mock_check_security.return_value = False
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()
@@ -779,7 +782,6 @@ async def test_main_security_check_fails(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -799,7 +801,6 @@ async def test_config_loading_order(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test the order of configuration loading in the main function.
 
@@ -820,7 +821,10 @@ async def test_config_loading_order(
     # Add a file property to avoid file I/O errors
     mock_args.file = None
     mock_args.log_level = 'INFO'
-    mock_parse_args.return_value = mock_args
+    mock_args.name = None
+    mock_args.conversation = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock read_task to return a dummy task
     mock_read_task.return_value = 'Test task'
@@ -863,10 +867,9 @@ async def test_config_loading_order(
     mock_run_session.return_value = False  # No new session requested
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions for argument parsing and config setup
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()
@@ -896,7 +899,6 @@ async def test_config_loading_order(
 
 
 @pytest.mark.asyncio
-@patch('openhands.cli.main.parse_arguments')
 @patch('openhands.cli.main.setup_config_from_args')
 @patch('openhands.cli.main.FileSettingsStore.get_instance')
 @patch('openhands.cli.main.check_folder_security_agreement')
@@ -918,7 +920,6 @@ async def test_main_with_file_option(
     mock_check_security,
     mock_get_settings_store,
     mock_setup_config,
-    mock_parse_args,
 ):
     """Test main function with a file option."""
     loop = asyncio.get_running_loop()
@@ -933,7 +934,10 @@ async def test_main_with_file_option(
     mock_args.name = None
     mock_args.file = '/path/to/test/file.txt'
     mock_args.task = None
-    mock_parse_args.return_value = mock_args
+    mock_args.conversation = None
+    mock_args.log_level = None
+    mock_args.config_file = 'config.toml'
+    mock_args.override_cli_mode = None
 
     # Mock config
     mock_config = MagicMock()
@@ -969,10 +973,9 @@ async def test_main_with_file_option(
     mock_run_session.return_value = False
 
     # Run the function
-    await cli.main_with_loop(loop)
+    await cli.main_with_loop(loop, mock_args)
 
     # Assertions
-    mock_parse_args.assert_called_once()
     mock_setup_config.assert_called_once_with(mock_args)
     mock_get_settings_store.assert_called_once()
     mock_settings_store.load.assert_called_once()