Fix llm_config fallback (#4415)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst 2025-01-15 02:17:37 +01:00 committed by GitHub
parent 7ce1fb85ff
commit c5797d1d5a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 489 additions and 11 deletions

View File

@ -140,7 +140,11 @@ The LLM (Large Language Model) configuration options are defined in the `[llm]`
To use these with the docker command, pass in `-e LLM_<option>`. Example: `-e LLM_NUM_RETRIES`.
### AWS Credentials
:::note
For development setups, you can also define custom named LLM configurations. See [Custom LLM Configurations](https://docs.all-hands.dev/modules/usage/llms/custom-llm-configs) for details.
:::
**AWS Credentials**
- `aws_access_key_id`
- Type: `str`
- Default: `""`

View File

@ -0,0 +1,106 @@
# Custom LLM Configurations
OpenHands supports defining multiple named LLM configurations in your `config.toml` file. This feature allows you to use different LLM configurations for different purposes, such as using a cheaper model for tasks that don't require high-quality responses, or using different models with different parameters for specific agents.
## How It Works
Named LLM configurations are defined in the `config.toml` file using sections that start with `llm.`. For example:
```toml
# Default LLM configuration
[llm]
model = "gpt-4"
api_key = "your-api-key"
temperature = 0.0
# Custom LLM configuration for a cheaper model
[llm.gpt3]
model = "gpt-3.5-turbo"
api_key = "your-api-key"
temperature = 0.2
# Another custom configuration with different parameters
[llm.high-creativity]
model = "gpt-4"
api_key = "your-api-key"
temperature = 0.8
top_p = 0.9
```
Each named configuration inherits all settings from the default `[llm]` section and can override any of those settings. You can define as many custom configurations as needed.
## Using Custom Configurations
### With Agents
You can specify which LLM configuration an agent should use by setting the `llm_config` parameter in the agent's configuration section:
```toml
[agent.RepoExplorerAgent]
# Use the cheaper GPT-3 configuration for this agent
llm_config = 'gpt3'
[agent.CodeWriterAgent]
# Use the high creativity configuration for this agent
llm_config = 'high-creativity'
```
### Configuration Options
Each named LLM configuration supports all the same options as the default LLM configuration. These include:
- Model selection (`model`)
- API configuration (`api_key`, `base_url`, etc.)
- Model parameters (`temperature`, `top_p`, etc.)
- Retry settings (`num_retries`, `retry_multiplier`, etc.)
- Token limits (`max_input_tokens`, `max_output_tokens`)
- And all other LLM configuration options
For a complete list of available options, see the LLM Configuration section in the [Configuration Options](../configuration-options.md) documentation.
## Use Cases
Custom LLM configurations are particularly useful in several scenarios:
- **Cost Optimization**: Use cheaper models for tasks that don't require high-quality responses, like repository exploration or simple file operations.
- **Task-Specific Tuning**: Configure different temperature and top_p values for tasks that require different levels of creativity or determinism.
- **Different Providers**: Use different LLM providers or API endpoints for different tasks.
- **Testing and Development**: Easily switch between different model configurations during development and testing.
## Example: Cost Optimization
A practical example of using custom LLM configurations to optimize costs:
```toml
# Default configuration using GPT-4 for high-quality responses
[llm]
model = "gpt-4"
api_key = "your-api-key"
temperature = 0.0
# Cheaper configuration for repository exploration
[llm.repo-explorer]
model = "gpt-3.5-turbo"
temperature = 0.2
# Configuration for code generation
[llm.code-gen]
model = "gpt-4"
temperature = 0.0
max_output_tokens = 2000
[agent.RepoExplorerAgent]
llm_config = 'repo-explorer'
[agent.CodeWriterAgent]
llm_config = 'code-gen'
```
In this example:
- Repository exploration uses a cheaper model since it mainly involves understanding and navigating code
- Code generation uses GPT-4 with a higher token limit for generating larger code blocks
- The default configuration remains available for other tasks
:::note
Custom LLM configurations are only available when using OpenHands in development mode, via `main.py` or `cli.py`. When running via `docker run`, please use the standard configuration options.
:::

View File

@ -138,8 +138,19 @@ class LLMConfig:
This function is used to create an LLMConfig object from a dictionary,
with the exception of the 'draft_editor' key, which is a nested LLMConfig object.
"""
args = {k: v for k, v in llm_config_dict.items() if not isinstance(v, dict)}
if 'draft_editor' in llm_config_dict:
draft_editor_config = LLMConfig(**llm_config_dict['draft_editor'])
args['draft_editor'] = draft_editor_config
# Keep None values to preserve defaults, filter out other dicts
args = {
k: v
for k, v in llm_config_dict.items()
if not isinstance(v, dict) or v is None
}
if (
'draft_editor' in llm_config_dict
and llm_config_dict['draft_editor'] is not None
):
if isinstance(llm_config_dict['draft_editor'], LLMConfig):
args['draft_editor'] = llm_config_dict['draft_editor']
else:
draft_editor_config = LLMConfig(**llm_config_dict['draft_editor'])
args['draft_editor'] = draft_editor_config
return cls(**args)

View File

@ -144,15 +144,48 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
logger.openhands_logger.debug(
'Attempt to load default LLM config from config toml'
)
llm_config = LLMConfig.from_dict(value)
cfg.set_llm_config(llm_config, 'llm')
# TODO clean up draft_editor
# Extract generic LLM fields, keeping draft_editor
generic_llm_fields = {}
for k, v in value.items():
if not isinstance(v, dict) or k == 'draft_editor':
generic_llm_fields[k] = v
generic_llm_config = LLMConfig.from_dict(generic_llm_fields)
cfg.set_llm_config(generic_llm_config, 'llm')
# Process custom named LLM configs
for nested_key, nested_value in value.items():
if isinstance(nested_value, dict):
logger.openhands_logger.debug(
f'Attempt to load group {nested_key} from config toml as llm config'
f'Processing custom LLM config "{nested_key}":'
)
llm_config = LLMConfig.from_dict(nested_value)
cfg.set_llm_config(llm_config, nested_key)
# Apply generic LLM config with custom LLM overrides, e.g.
# [llm]
# model="..."
# num_retries = 5
# [llm.claude]
# model="claude-3-5-sonnet"
# results in num_retries APPLIED to claude-3-5-sonnet
custom_fields = {}
for k, v in nested_value.items():
if not isinstance(v, dict) or k == 'draft_editor':
custom_fields[k] = v
merged_llm_dict = generic_llm_config.__dict__.copy()
merged_llm_dict.update(custom_fields)
# TODO clean up draft_editor
# Handle draft_editor with fallback values:
# - If draft_editor is "null", use None
# - If draft_editor is in custom fields, use that value
# - If draft_editor is not specified, fall back to generic config value
if 'draft_editor' in custom_fields:
if custom_fields['draft_editor'] == 'null':
merged_llm_dict['draft_editor'] = None
else:
merged_llm_dict['draft_editor'] = (
generic_llm_config.draft_editor
)
custom_llm_config = LLMConfig.from_dict(merged_llm_dict)
cfg.set_llm_config(custom_llm_config, nested_key)
elif key is not None and key.lower() == 'security':
logger.openhands_logger.debug(
'Attempt to load security config from config toml'
@ -458,7 +491,11 @@ def setup_config_from_args(args: argparse.Namespace) -> AppConfig:
# Override with command line arguments if provided
if args.llm_config:
llm_config = get_llm_config_arg(args.llm_config)
# if we didn't already load it, get it from the toml file
if args.llm_config not in config.llms:
llm_config = get_llm_config_arg(args.llm_config)
else:
llm_config = config.llms[args.llm_config]
if llm_config is None:
raise ValueError(f'Invalid toml file, cannot read {args.llm_config}')
config.set_llm_config(llm_config)

View File

@ -0,0 +1,228 @@
import pathlib
import pytest
from openhands.core.config import AppConfig
from openhands.core.config.utils import load_from_toml
@pytest.fixture
def default_config(monkeypatch):
# Fixture to provide a default AppConfig instance
yield AppConfig()
@pytest.fixture
def generic_llm_toml(tmp_path: pathlib.Path) -> str:
"""Fixture to create a generic LLM TOML configuration with all custom LLMs
providing mandatory 'model' and 'api_key', and testing fallback to the generic section values
for other attributes like 'num_retries'.
"""
toml_content = """
[core]
workspace_base = "./workspace"
[llm]
model = "base-model"
api_key = "base-api-key"
embedding_model = "base-embedding"
num_retries = 3
[llm.custom1]
model = "custom-model-1"
api_key = "custom-api-key-1"
# 'num_retries' is not overridden and should fallback to the value from [llm]
[llm.custom2]
model = "custom-model-2"
api_key = "custom-api-key-2"
num_retries = 5 # Overridden value
[llm.custom3]
model = "custom-model-3"
api_key = "custom-api-key-3"
# No overrides for additional attributes
"""
toml_file = tmp_path / 'llm_config.toml'
toml_file.write_text(toml_content)
return str(toml_file)
def test_load_from_toml_llm_with_fallback(
default_config: AppConfig, generic_llm_toml: str
) -> None:
"""Test that custom LLM configurations fallback non-overridden attributes
like 'num_retries' from the generic [llm] section.
"""
load_from_toml(default_config, generic_llm_toml)
# Verify generic LLM configuration
generic_llm = default_config.get_llm_config('llm')
assert generic_llm.model == 'base-model'
assert generic_llm.api_key == 'base-api-key'
assert generic_llm.embedding_model == 'base-embedding'
assert generic_llm.num_retries == 3
# Verify custom1 LLM falls back 'num_retries' from base
custom1 = default_config.get_llm_config('custom1')
assert custom1.model == 'custom-model-1'
assert custom1.api_key == 'custom-api-key-1'
assert custom1.embedding_model == 'base-embedding'
assert custom1.num_retries == 3 # from [llm]
# Verify custom2 LLM overrides 'num_retries'
custom2 = default_config.get_llm_config('custom2')
assert custom2.model == 'custom-model-2'
assert custom2.api_key == 'custom-api-key-2'
assert custom2.embedding_model == 'base-embedding'
assert custom2.num_retries == 5 # overridden value
# Verify custom3 LLM inherits all attributes except 'model' and 'api_key'
custom3 = default_config.get_llm_config('custom3')
assert custom3.model == 'custom-model-3'
assert custom3.api_key == 'custom-api-key-3'
assert custom3.embedding_model == 'base-embedding'
assert custom3.num_retries == 3 # from [llm]
def test_load_from_toml_llm_custom_overrides_all(
default_config: AppConfig, tmp_path: pathlib.Path
) -> None:
"""Test that a custom LLM can fully override all attributes from the generic [llm] section."""
toml_content = """
[core]
workspace_base = "./workspace"
[llm]
model = "base-model"
api_key = "base-api-key"
embedding_model = "base-embedding"
num_retries = 3
[llm.custom_full]
model = "full-custom-model"
api_key = "full-custom-api-key"
embedding_model = "full-custom-embedding"
num_retries = 10
"""
toml_file = tmp_path / 'full_override_llm.toml'
toml_file.write_text(toml_content)
load_from_toml(default_config, str(toml_file))
# Verify generic LLM configuration remains unchanged
generic_llm = default_config.get_llm_config('llm')
assert generic_llm.model == 'base-model'
assert generic_llm.api_key == 'base-api-key'
assert generic_llm.embedding_model == 'base-embedding'
assert generic_llm.num_retries == 3
# Verify custom_full LLM overrides all attributes
custom_full = default_config.get_llm_config('custom_full')
assert custom_full.model == 'full-custom-model'
assert custom_full.api_key == 'full-custom-api-key'
assert custom_full.embedding_model == 'full-custom-embedding'
assert custom_full.num_retries == 10 # overridden value
def test_load_from_toml_llm_custom_partial_override(
default_config: AppConfig, generic_llm_toml: str
) -> None:
"""Test that custom LLM configurations can partially override attributes
from the generic [llm] section while inheriting others.
"""
load_from_toml(default_config, generic_llm_toml)
# Verify custom1 LLM overrides 'model' and 'api_key' but inherits 'num_retries'
custom1 = default_config.get_llm_config('custom1')
assert custom1.model == 'custom-model-1'
assert custom1.api_key == 'custom-api-key-1'
assert custom1.embedding_model == 'base-embedding'
assert custom1.num_retries == 3 # from [llm]
# Verify custom2 LLM overrides 'model', 'api_key', and 'num_retries'
custom2 = default_config.get_llm_config('custom2')
assert custom2.model == 'custom-model-2'
assert custom2.api_key == 'custom-api-key-2'
assert custom2.embedding_model == 'base-embedding'
assert custom2.num_retries == 5 # Overridden value
def test_load_from_toml_llm_custom_no_override(
default_config: AppConfig, generic_llm_toml: str
) -> None:
"""Test that custom LLM configurations with no additional overrides
inherit all non-specified attributes from the generic [llm] section.
"""
load_from_toml(default_config, generic_llm_toml)
# Verify custom3 LLM inherits 'embedding_model' and 'num_retries' from generic
custom3 = default_config.get_llm_config('custom3')
assert custom3.model == 'custom-model-3'
assert custom3.api_key == 'custom-api-key-3'
assert custom3.embedding_model == 'base-embedding'
assert custom3.num_retries == 3 # from [llm]
def test_load_from_toml_llm_missing_generic(
default_config: AppConfig, tmp_path: pathlib.Path
) -> None:
"""Test that custom LLM configurations without a generic [llm] section
use only their own attributes and fallback to defaults for others.
"""
toml_content = """
[core]
workspace_base = "./workspace"
[llm.custom_only]
model = "custom-only-model"
api_key = "custom-only-api-key"
"""
toml_file = tmp_path / 'custom_only_llm.toml'
toml_file.write_text(toml_content)
load_from_toml(default_config, str(toml_file))
# Verify custom_only LLM uses its own attributes and defaults for others
custom_only = default_config.get_llm_config('custom_only')
assert custom_only.model == 'custom-only-model'
assert custom_only.api_key == 'custom-only-api-key'
assert custom_only.embedding_model == 'local' # default value
assert custom_only.num_retries == 8 # default value
def test_load_from_toml_llm_invalid_config(
default_config: AppConfig, tmp_path: pathlib.Path
) -> None:
"""Test that invalid custom LLM configurations do not override the generic
and raise appropriate warnings.
"""
toml_content = """
[core]
workspace_base = "./workspace"
[llm]
model = "base-model"
api_key = "base-api-key"
num_retries = 3
[llm.invalid_custom]
unknown_attr = "should_not_exist"
"""
toml_file = tmp_path / 'invalid_custom_llm.toml'
toml_file.write_text(toml_content)
load_from_toml(default_config, str(toml_file))
# Verify generic LLM is loaded correctly
generic_llm = default_config.get_llm_config('llm')
assert generic_llm.model == 'base-model'
assert generic_llm.api_key == 'base-api-key'
assert generic_llm.num_retries == 3
# Verify invalid_custom LLM does not override generic attributes
custom_invalid = default_config.get_llm_config('invalid_custom')
assert custom_invalid.model == 'base-model'
assert custom_invalid.api_key == 'base-api-key'
assert custom_invalid.num_retries == 3 # default value
assert custom_invalid.embedding_model == 'local' # default value

View File

@ -0,0 +1,92 @@
import pathlib
import pytest
from openhands.core.config import AppConfig
from openhands.core.config.utils import load_from_toml
@pytest.fixture
def draft_llm_toml(tmp_path: pathlib.Path) -> str:
toml_content = """
[core]
workspace_base = "./workspace"
[llm]
model = "base-model"
api_key = "base-api-key"
draft_editor = { model = "draft-model", api_key = "draft-api-key" }
[llm.custom1]
model = "custom-model-1"
api_key = "custom-api-key-1"
# Should use draft_editor from [llm] as fallback
[llm.custom2]
model = "custom-model-2"
api_key = "custom-api-key-2"
draft_editor = { model = "custom-draft", api_key = "custom-draft-key" }
[llm.custom3]
model = "custom-model-3"
api_key = "custom-api-key-3"
draft_editor = "null" # Explicitly set to null in TOML
"""
toml_file = tmp_path / 'llm_config.toml'
toml_file.write_text(toml_content)
return str(toml_file)
def test_draft_editor_fallback(draft_llm_toml):
"""Test that draft_editor is correctly handled in different scenarios:
- Falls back to generic [llm] section value
- Uses custom value when specified
- Can be explicitly set to null
"""
config = AppConfig()
# Verify default draft_editor is None
default_llm = config.get_llm_config('llm')
assert default_llm.draft_editor is None
# Load config from TOML
load_from_toml(config, draft_llm_toml)
# Verify generic LLM draft_editor
generic_llm = config.get_llm_config('llm')
assert generic_llm.draft_editor is not None
assert generic_llm.draft_editor.model == 'draft-model'
assert generic_llm.draft_editor.api_key == 'draft-api-key'
# Verify custom1 uses draft_editor from generic as fallback
custom1 = config.get_llm_config('custom1')
assert custom1.model == 'custom-model-1'
assert custom1.draft_editor is not None
assert custom1.draft_editor.model == 'draft-model'
assert custom1.draft_editor.api_key == 'draft-api-key'
# Verify custom2 overrides draft_editor
custom2 = config.get_llm_config('custom2')
assert custom2.model == 'custom-model-2'
assert custom2.draft_editor is not None
assert custom2.draft_editor.model == 'custom-draft'
assert custom2.draft_editor.api_key == 'custom-draft-key'
# Verify custom3 has draft_editor explicitly set to None
custom3 = config.get_llm_config('custom3')
assert custom3.model == 'custom-model-3'
assert custom3.draft_editor is None
def test_draft_editor_defaults(draft_llm_toml):
"""Test that draft_editor uses default values from LLMConfig when not specified"""
config = AppConfig()
load_from_toml(config, draft_llm_toml)
generic_llm = config.get_llm_config('llm')
assert generic_llm.draft_editor.num_retries == 8 # Default from LLMConfig
assert generic_llm.draft_editor.embedding_model == 'local' # Default from LLMConfig
custom2 = config.get_llm_config('custom2')
assert custom2.draft_editor.num_retries == 8 # Default from LLMConfig
assert custom2.draft_editor.embedding_model == 'local' # Default from LLMConfig