mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Add comprehensive support for enterprise API gateways (e.g., Tachyon) including: - Gateway authentication with identity provider token management - Token caching with TTL-based expiration - Custom headers and body parameters for gateway requests - Template variable substitution for dynamic values - CLI configuration UI for gateway settings - Full test coverage for gateway functionality This enables OpenHands to integrate with enterprise environments that require authenticated access through API gateways for LLM providers.
558 lines
18 KiB
TOML
558 lines
18 KiB
TOML
###################### OpenHands Configuration Example ######################
|
|
#
|
|
# All settings have default values, so you only need to uncomment and
|
|
# modify what you want to change
|
|
# The fields within each section are sorted in alphabetical order.
|
|
#
|
|
##############################################################################
|
|
|
|
#################################### Core ####################################
|
|
# General core configurations
|
|
##############################################################################
|
|
[core]
|
|
# API keys and configuration for core services
|
|
|
|
# Base path for the workspace
|
|
#workspace_base = "./workspace"
|
|
|
|
# Cache directory path
|
|
#cache_dir = "/tmp/cache"
|
|
|
|
# Debugging enabled
|
|
#debug = false
|
|
|
|
# Disable color in terminal output
|
|
#disable_color = false
|
|
|
|
# Path to store trajectories, can be a folder or a file
|
|
# If it's a folder, the session id will be used as the file name
|
|
#save_trajectory_path="./trajectories"
|
|
|
|
# Whether to save screenshots in the trajectory
|
|
# The screenshots are encoded and can make trajectory json files very large
|
|
#save_screenshots_in_trajectory = false
|
|
|
|
# Path to replay a trajectory, must be a file path
|
|
# If provided, trajectory will be loaded and replayed before the
|
|
# agent responds to any user instruction
|
|
#replay_trajectory_path = ""
|
|
|
|
# File store path
|
|
#file_store_path = "/tmp/file_store"
|
|
|
|
# File store type
|
|
#file_store = "memory"
|
|
|
|
# Maximum file size for uploads, in megabytes
|
|
#file_uploads_max_file_size_mb = 0
|
|
|
|
# Enable the browser environment
|
|
#enable_browser = true
|
|
|
|
# Maximum budget per task, 0.0 means no limit
|
|
#max_budget_per_task = 0.0
|
|
|
|
# Maximum number of iterations
|
|
#max_iterations = 500
|
|
|
|
# Path to mount the workspace in the sandbox
|
|
#workspace_mount_path_in_sandbox = "/workspace"
|
|
|
|
# Path to mount the workspace
|
|
#workspace_mount_path = ""
|
|
|
|
# Path to rewrite the workspace mount path to
|
|
#workspace_mount_rewrite = ""
|
|
|
|
# Run as openhands
|
|
#run_as_openhands = true
|
|
|
|
# Runtime environment
|
|
#runtime = "docker"
|
|
|
|
# Name of the default agent
|
|
#default_agent = "CodeActAgent"
|
|
|
|
# JWT secret for authentication
|
|
#jwt_secret = ""
|
|
|
|
# Restrict file types for file uploads
|
|
#file_uploads_restrict_file_types = false
|
|
|
|
# List of allowed file extensions for uploads
|
|
#file_uploads_allowed_extensions = [".*"]
|
|
|
|
# Whether to enable the default LLM summarizing condenser when no condenser is specified in config
|
|
# When true, a LLMSummarizingCondenserConfig will be used as the default condenser
|
|
# When false, a NoOpCondenserConfig (no summarization) will be used
|
|
#enable_default_condenser = true
|
|
|
|
# Maximum number of concurrent conversations per user
|
|
#max_concurrent_conversations = 3
|
|
|
|
# Maximum age of conversations in seconds before they are automatically closed
|
|
#conversation_max_age_seconds = 864000 # 10 days
|
|
|
|
#################################### LLM #####################################
|
|
# Configuration for LLM models (group name starts with 'llm')
|
|
# use 'llm' for the default LLM config
|
|
##############################################################################
|
|
[llm]
|
|
# AWS access key ID
|
|
#aws_access_key_id = ""
|
|
|
|
# AWS region name
|
|
#aws_region_name = ""
|
|
|
|
# AWS secret access key
|
|
#aws_secret_access_key = ""
|
|
|
|
# API key to use (For Headless / CLI only - In Web this is overridden by Session Init)
|
|
api_key = ""
|
|
|
|
# API base URL (For Headless / CLI only - In Web this is overridden by Session Init)
|
|
#base_url = ""
|
|
|
|
# API version
|
|
#api_version = ""
|
|
|
|
# Reasoning effort for OpenAI o-series models (low, medium, high, or not set)
|
|
#reasoning_effort = "medium"
|
|
|
|
# Cost per input token
|
|
#input_cost_per_token = 0.0
|
|
|
|
# Cost per output token
|
|
#output_cost_per_token = 0.0
|
|
|
|
# Custom LLM provider
|
|
#custom_llm_provider = ""
|
|
|
|
# Maximum number of characters in an observation's content
|
|
#max_message_chars = 10000
|
|
|
|
# Maximum number of input tokens
|
|
#max_input_tokens = 0
|
|
|
|
# Maximum number of output tokens
|
|
#max_output_tokens = 0
|
|
|
|
# Model to use. (For Headless / CLI only - In Web this is overridden by Session Init)
|
|
model = "gpt-4o"
|
|
|
|
# Number of retries to attempt when an operation fails with the LLM.
|
|
# Increase this value to allow more attempts before giving up
|
|
#num_retries = 8
|
|
|
|
# Maximum wait time (in seconds) between retry attempts
|
|
# This caps the exponential backoff to prevent excessively long
|
|
#retry_max_wait = 120
|
|
|
|
# Minimum wait time (in seconds) between retry attempts
|
|
# This sets the initial delay before the first retry
|
|
#retry_min_wait = 15
|
|
|
|
# Multiplier for exponential backoff calculation
|
|
# The wait time increases by this factor after each failed attempt
|
|
# A value of 2.0 means each retry waits twice as long as the previous one
|
|
#retry_multiplier = 2.0
|
|
|
|
# Drop any unmapped (unsupported) params without causing an exception
|
|
#drop_params = false
|
|
|
|
# Modify params for litellm to do transformations like adding a default message, when a message is empty.
|
|
# Note: this setting is global, unlike drop_params, it cannot be overridden in each call to litellm.
|
|
#modify_params = true
|
|
|
|
# Using the prompt caching feature if provided by the LLM and supported
|
|
#caching_prompt = true
|
|
|
|
# Base URL for the OLLAMA API
|
|
#ollama_base_url = ""
|
|
|
|
# Temperature for the API
|
|
#temperature = 0.0
|
|
|
|
# Timeout for the API
|
|
#timeout = 0
|
|
|
|
# Top p for the API
|
|
#top_p = 1.0
|
|
|
|
# If model is vision capable, this option allows to disable image processing (useful for cost reduction).
|
|
#disable_vision = true
|
|
|
|
# Custom tokenizer to use for token counting
|
|
# https://docs.litellm.ai/docs/completion/token_usage
|
|
#custom_tokenizer = ""
|
|
|
|
# Whether to use native tool calling if supported by the model. Can be true, false, or None by default, which chooses the model's default behavior based on the evaluation.
|
|
# ATTENTION: Based on evaluation, enabling native function calling may lead to worse results
|
|
# in some scenarios. Use with caution and consider testing with your specific use case.
|
|
# https://github.com/All-Hands-AI/OpenHands/pull/4711
|
|
#native_tool_calling = None
|
|
|
|
|
|
# Safety settings for models that support them (e.g., Mistral AI, Gemini)
|
|
# Example for Mistral AI:
|
|
# safety_settings = [
|
|
# { "category" = "hate", "threshold" = "low" },
|
|
# { "category" = "harassment", "threshold" = "low" },
|
|
# { "category" = "sexual", "threshold" = "low" },
|
|
# { "category" = "dangerous", "threshold" = "low" }
|
|
# ]
|
|
#
|
|
# Example for Gemini:
|
|
# safety_settings = [
|
|
# { "category" = "HARM_CATEGORY_HARASSMENT", "threshold" = "BLOCK_NONE" },
|
|
# { "category" = "HARM_CATEGORY_HATE_SPEECH", "threshold" = "BLOCK_NONE" },
|
|
# { "category" = "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold" = "BLOCK_NONE" },
|
|
# { "category" = "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold" = "BLOCK_NONE" }
|
|
# ]
|
|
#safety_settings = []
|
|
|
|
# Enterprise gateway integration (e.g., Tachyon)
|
|
#gateway_provider = ""
|
|
#gateway_auth_url = ""
|
|
#gateway_auth_method = "POST"
|
|
#gateway_auth_headers = { "X-Identity-Key" = "", "X-Identity-Secret" = "" }
|
|
#gateway_auth_body = { "audience" = "tachyon" }
|
|
#gateway_auth_token_path = "access_token"
|
|
#gateway_auth_expires_in_path = "expires_in"
|
|
#gateway_auth_token_ttl = 300
|
|
#gateway_token_header = "Authorization"
|
|
#gateway_token_prefix = "Bearer "
|
|
#gateway_auth_verify_ssl = true
|
|
#custom_headers = { "X-Tachyon-Key" = "" }
|
|
#extra_body_params = { "metadata" = { "source" = "openhands-cli" } }
|
|
|
|
[llm.draft_editor]
|
|
# The number of times llm_editor tries to fix an error when editing.
|
|
correct_num = 5
|
|
|
|
[llm.gpt4o-mini]
|
|
api_key = ""
|
|
model = "gpt-4o"
|
|
|
|
# Example routing LLM configuration for multimodal model routing
|
|
# Uncomment and configure to enable model routing with a secondary model
|
|
#[llm.secondary_model]
|
|
#model = "kimi-k2"
|
|
#api_key = ""
|
|
#for_routing = true
|
|
#max_input_tokens = 128000
|
|
|
|
|
|
#################################### Agent ###################################
|
|
# Configuration for agents (group name starts with 'agent')
|
|
# Use 'agent' for the default agent config
|
|
# otherwise, group name must be `agent.<agent_name>` (case-sensitive), e.g.
|
|
# agent.CodeActAgent
|
|
##############################################################################
|
|
[agent]
|
|
|
|
# Whether the browsing tool is enabled
|
|
# Note: when this is set to true, enable_browser in the core config must also be true
|
|
enable_browsing = true
|
|
|
|
# Whether the LLM draft editor is enabled
|
|
enable_llm_editor = false
|
|
|
|
# Whether the standard editor tool (str_replace_editor) is enabled
|
|
# Only has an effect if enable_llm_editor is False
|
|
enable_editor = true
|
|
|
|
# Whether the IPython tool is enabled
|
|
enable_jupyter = true
|
|
|
|
# Whether the command tool is enabled
|
|
enable_cmd = true
|
|
|
|
# Whether the think tool is enabled
|
|
enable_think = true
|
|
|
|
# Whether the finish tool is enabled
|
|
enable_finish = true
|
|
|
|
# LLM config group to use
|
|
#llm_config = 'your-llm-config-group'
|
|
|
|
# Whether to use prompt extension (e.g., microagent, repo/runtime info) at all
|
|
#enable_prompt_extensions = true
|
|
|
|
# List of microagents to disable
|
|
#disabled_microagents = []
|
|
|
|
# Whether history should be truncated to continue the session when hitting LLM context
|
|
# length limit
|
|
enable_history_truncation = true
|
|
|
|
# Whether the condensation request tool is enabled
|
|
enable_condensation_request = false
|
|
|
|
[agent.RepoExplorerAgent]
|
|
# Example: use a cheaper model for RepoExplorerAgent to reduce cost, especially
|
|
# useful when an agent doesn't demand high quality but uses a lot of tokens
|
|
llm_config = 'gpt3'
|
|
|
|
[agent.CustomAgent]
|
|
# Example: use a custom agent from a different package
|
|
# This will be automatically be registered as a new agent named "CustomAgent"
|
|
classpath = "my_package.my_module.MyCustomAgent"
|
|
|
|
#################################### Sandbox ###################################
|
|
# Configuration for the sandbox
|
|
##############################################################################
|
|
[sandbox]
|
|
# Sandbox timeout in seconds
|
|
#timeout = 120
|
|
|
|
# Sandbox user ID
|
|
#user_id = 1000
|
|
|
|
# Container image to use for the sandbox
|
|
#base_container_image = "nikolaik/python-nodejs:python3.12-nodejs22"
|
|
|
|
# Use host network
|
|
#use_host_network = false
|
|
|
|
# Runtime extra build args
|
|
#runtime_extra_build_args = ["--network=host", "--add-host=host.docker.internal:host-gateway"]
|
|
|
|
# Enable auto linting after editing
|
|
#enable_auto_lint = false
|
|
|
|
# Whether to initialize plugins
|
|
#initialize_plugins = true
|
|
|
|
# Extra dependencies to install in the runtime image
|
|
#runtime_extra_deps = ""
|
|
|
|
# Environment variables to set at the launch of the runtime
|
|
#runtime_startup_env_vars = {}
|
|
|
|
# BrowserGym environment to use for evaluation
|
|
#browsergym_eval_env = ""
|
|
|
|
# Platform to use for building the runtime image (e.g., "linux/amd64")
|
|
#platform = ""
|
|
|
|
# Force rebuild of runtime image even if it exists
|
|
#force_rebuild_runtime = false
|
|
|
|
# Runtime container image to use (if not provided, will be built from base_container_image)
|
|
#runtime_container_image = ""
|
|
|
|
# Keep runtime alive after session ends
|
|
#keep_runtime_alive = false
|
|
|
|
# Pause closed runtimes instead of stopping them
|
|
#pause_closed_runtimes = false
|
|
|
|
# Delay in seconds before closing idle runtimes
|
|
#close_delay = 300
|
|
|
|
# Remove all containers when stopping the runtime
|
|
#rm_all_containers = false
|
|
|
|
# Enable GPU support in the runtime
|
|
#enable_gpu = false
|
|
|
|
# When there are multiple cards, you can specify the GPU by ID
|
|
#cuda_visible_devices = ''
|
|
|
|
# Additional Docker runtime kwargs
|
|
#docker_runtime_kwargs = {}
|
|
|
|
# Specific port to use for VSCode. If not set, a random port will be chosen.
|
|
# Useful when deploying OpenHands in a remote machine where you need to expose a specific port.
|
|
#vscode_port = 41234
|
|
|
|
# Volume mounts in the format 'host_path:container_path[:mode]'
|
|
# e.g. '/my/host/dir:/workspace:rw'
|
|
# Multiple mounts can be specified using commas
|
|
# e.g. '/path1:/workspace/path1,/path2:/workspace/path2:ro'
|
|
|
|
# Configure volumes under the [sandbox] section:
|
|
# [sandbox]
|
|
# volumes = "/my/host/dir:/workspace:rw,/path2:/workspace/path2:ro"
|
|
|
|
#################################### Security ###################################
|
|
# Configuration for security features
|
|
##############################################################################
|
|
[security]
|
|
|
|
# Enable confirmation mode (For Headless / CLI only - In Web this is overridden by Session Init)
|
|
#confirmation_mode = false
|
|
|
|
# The security analyzer to use (For Headless / CLI only - In Web this is overridden by Session Init)
|
|
# Available options: 'llm' (default), 'invariant'
|
|
#security_analyzer = "llm"
|
|
|
|
# Whether to enable security analyzer
|
|
#enable_security_analyzer = true
|
|
|
|
#################################### Condenser #################################
|
|
# Condensers control how conversation history is managed and compressed when
|
|
# the context grows too large. Each agent uses one condenser configuration.
|
|
##############################################################################
|
|
[condenser]
|
|
# The type of condenser to use. Available options:
|
|
# - "noop": No condensing, keeps full history (default)
|
|
# - "observation_masking": Keeps full event structure but masks older observations
|
|
# - "recent": Keeps only recent events and discards older ones
|
|
# - "llm": Uses an LLM to summarize conversation history
|
|
# - "amortized": Intelligently forgets older events while preserving important context
|
|
# - "llm_attention": Uses an LLM to prioritize most relevant context
|
|
type = "noop"
|
|
|
|
# Examples for each condenser type (uncomment and modify as needed):
|
|
|
|
# 1. NoOp Condenser - No additional settings needed
|
|
#type = "noop"
|
|
|
|
# 2. Observation Masking Condenser
|
|
#type = "observation_masking"
|
|
# Number of most-recent events where observations will not be masked
|
|
#attention_window = 100
|
|
|
|
# 3. Recent Events Condenser
|
|
#type = "recent"
|
|
# Number of initial events to always keep (typically includes task description)
|
|
#keep_first = 1
|
|
# Maximum number of events to keep in history
|
|
#max_events = 100
|
|
|
|
# 4. LLM Summarizing Condenser
|
|
#type = "llm"
|
|
# Reference to an LLM config to use for summarization
|
|
#llm_config = "condenser"
|
|
# Number of initial events to always keep (typically includes task description)
|
|
#keep_first = 1
|
|
# Maximum size of history before triggering summarization
|
|
#max_size = 100
|
|
|
|
# 5. Amortized Forgetting Condenser
|
|
#type = "amortized"
|
|
# Number of initial events to always keep (typically includes task description)
|
|
#keep_first = 1
|
|
# Maximum size of history before triggering forgetting
|
|
#max_size = 100
|
|
|
|
# 6. LLM Attention Condenser
|
|
#type = "llm_attention"
|
|
# Reference to an LLM config to use for attention scoring
|
|
#llm_config = "condenser"
|
|
# Number of initial events to always keep (typically includes task description)
|
|
#keep_first = 1
|
|
# Maximum size of history before triggering attention mechanism
|
|
#max_size = 100
|
|
|
|
# Example of a custom LLM configuration for condensers that require an LLM
|
|
# If not provided, it falls back to the default LLM
|
|
#[llm.condenser]
|
|
#model = "gpt-4o"
|
|
#temperature = 0.1
|
|
#max_input_tokens = 1024
|
|
|
|
#################################### Eval ####################################
|
|
# Configuration for the evaluation, please refer to the specific evaluation
|
|
# plugin for the available options
|
|
##############################################################################
|
|
|
|
|
|
########################### Kubernetes #######################################
|
|
# Kubernetes configuration when using the Kubernetes runtime
|
|
##############################################################################
|
|
[kubernetes]
|
|
# The Kubernetes namespace to use for OpenHands resources
|
|
#namespace = "default"
|
|
|
|
# Domain for ingress resources
|
|
#ingress_domain = "localhost"
|
|
|
|
# Size of the persistent volume claim
|
|
#pvc_storage_size = "2Gi"
|
|
|
|
# Storage class for persistent volume claims
|
|
#pvc_storage_class = "standard"
|
|
|
|
# CPU request for runtime pods
|
|
#resource_cpu_request = "1"
|
|
|
|
# Memory request for runtime pods
|
|
#resource_memory_request = "1Gi"
|
|
|
|
# Memory limit for runtime pods
|
|
#resource_memory_limit = "2Gi"
|
|
|
|
# Optional name of image pull secret for private registries
|
|
#image_pull_secret = ""
|
|
|
|
# Optional name of TLS secret for ingress
|
|
#ingress_tls_secret = ""
|
|
|
|
# Optional node selector key for pod scheduling
|
|
#node_selector_key = ""
|
|
|
|
# Optional node selector value for pod scheduling
|
|
#node_selector_val = ""
|
|
|
|
# Optional YAML string defining pod tolerations
|
|
#tolerations_yaml = ""
|
|
|
|
# Run the runtime sandbox container in privileged mode for use with docker-in-docker
|
|
#privileged = false
|
|
|
|
#################################### MCP #####################################
|
|
# Configuration for Model Context Protocol (MCP) servers
|
|
# MCP allows OpenHands to communicate with external tool servers
|
|
##############################################################################
|
|
[mcp]
|
|
# SSE servers - Server-Sent Events transport (legacy)
|
|
#sse_servers = [
|
|
# # Basic SSE server with just a URL
|
|
# "http://localhost:8080/mcp/sse",
|
|
#
|
|
# # SSE server with authentication
|
|
# {url = "https://api.example.com/mcp/sse", api_key = "your-api-key"}
|
|
#]
|
|
|
|
# SHTTP servers - Streamable HTTP transport (recommended)
|
|
#shttp_servers = [
|
|
# # Basic SHTTP server with default 60s timeout
|
|
# "https://api.example.com/mcp/shttp",
|
|
#
|
|
# # SHTTP server with custom timeout for long-running tools
|
|
# {
|
|
# url = "https://api.example.com/mcp/shttp",
|
|
# api_key = "your-api-key",
|
|
# timeout = 180 # 3 minutes for processing-heavy tools (1-3600 seconds)
|
|
# }
|
|
#]
|
|
|
|
# Stdio servers - Direct process communication (development only)
|
|
#stdio_servers = [
|
|
# # Basic stdio server
|
|
# {name = "filesystem", command = "npx", args = ["@modelcontextprotocol/server-filesystem", "/"]},
|
|
#
|
|
# # Stdio server with environment variables
|
|
# {
|
|
# name = "fetch",
|
|
# command = "uvx",
|
|
# args = ["mcp-server-fetch"],
|
|
# env = {DEBUG = "true"}
|
|
# }
|
|
#]
|
|
|
|
#################################### Model Routing ############################
|
|
# Configuration for experimental model routing feature
|
|
# Enables intelligent switching between different LLM models for specific purposes
|
|
##############################################################################
|
|
[model_routing]
|
|
# Router to use for model selection
|
|
# Available options:
|
|
# - "noop_router" (default): No routing, always uses primary LLM
|
|
# - "multimodal_router": A router that switches between primary and secondary models, depending on whether the input is multimodal or not
|
|
#router_name = "noop_router"
|