mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Improve OpenHands provider pricing documentation (#10974)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
a7b9a4f291
commit
89e3d2a867
@ -30,6 +30,20 @@ When running OpenHands, you'll need to set the following in the OpenHands UI thr
|
||||
|
||||
## Pricing
|
||||
|
||||
Pricing follows official API provider rates. [You can view model prices here.](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||
Pricing follows official API provider rates. Below are the current pricing details for OpenHands models:
|
||||
|
||||
For `qwen3-coder-480b`, we charge the cheapest FP8 rate available on openrouter: \$0.4 per million input tokens and \$1.6 per million output tokens.
|
||||
| Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens) | Max Input Tokens | Max Output Tokens |
|
||||
|-------|----------------------------|-----------------------------------|------------------------------|------------------|-------------------|
|
||||
| claude-opus-4-20250514 | $15.00 | $1.50 | $75.00 | 200,000 | 32,000 |
|
||||
| claude-sonnet-4-20250514 | $3.00 | $0.30 | $15.00 | 200,000 | 64,000 |
|
||||
| devstral-medium-2507 | $0.40 | N/A | $2.00 | 128,000 | 128,000 |
|
||||
| devstral-small-2505 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
|
||||
| devstral-small-2507 | $0.10 | N/A | $0.30 | 128,000 | 128,000 |
|
||||
| gemini-2.5-pro | $1.25 | $0.31 | $10.00 | 1,048,576 | 65,535 |
|
||||
| gpt-5-2025-08-07 | $1.25 | $0.125 | $10.00 | 400,000 | 128,000 |
|
||||
| gpt-5-mini-2025-08-07 | $0.25 | $0.025 | $2.00 | 400,000 | 128,000 |
|
||||
| o3 | $2.00 | $0.50 | $8.00 | 200,000 | 100,000 |
|
||||
| o4-mini | $1.10 | $0.28 | $4.40 | 200,000 | 100,000 |
|
||||
| qwen3-coder-480b | $0.40 | N/A | $1.60 | N/A | N/A |
|
||||
|
||||
**Note:** Cached input tokens are charged at a reduced rate when the same content is reused across requests. Models that don't support prompt caching show "N/A" for cached input cost.
|
||||
|
||||
290
tests/unit/test_pricing_documentation.py
Normal file
290
tests/unit/test_pricing_documentation.py
Normal file
@ -0,0 +1,290 @@
|
||||
"""
|
||||
Unit tests to verify pricing documentation consistency.
|
||||
"""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
class TestPricingDocumentation:
|
||||
"""Test class for pricing documentation consistency."""
|
||||
|
||||
@pytest.fixture
|
||||
def pricing_data(self) -> dict[str, Any]:
|
||||
"""Fetch pricing data from LiteLLM repository."""
|
||||
url = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
@pytest.fixture
|
||||
def openhands_models(self) -> list[str]:
|
||||
"""Get the list of OpenHands models from the codebase."""
|
||||
# Read the models directly from the source code file
|
||||
llm_utils_path = (
|
||||
Path(__file__).parent.parent.parent / 'openhands' / 'utils' / 'llm.py'
|
||||
)
|
||||
content = llm_utils_path.read_text()
|
||||
|
||||
# Extract the openhands_models list from the file
|
||||
import ast
|
||||
|
||||
# Parse the Python file
|
||||
tree = ast.parse(content)
|
||||
|
||||
# Find the openhands_models assignment
|
||||
for node in ast.walk(tree):
|
||||
if (
|
||||
isinstance(node, ast.Assign)
|
||||
and len(node.targets) == 1
|
||||
and isinstance(node.targets[0], ast.Name)
|
||||
and node.targets[0].id == 'openhands_models'
|
||||
):
|
||||
# Extract the list values
|
||||
if isinstance(node.value, ast.List):
|
||||
models = []
|
||||
for elt in node.value.elts:
|
||||
if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
|
||||
# Remove 'openhands/' prefix and filter out secret models
|
||||
model = elt.value
|
||||
if model.startswith('openhands/'):
|
||||
model = model[10:] # Remove 'openhands/' prefix
|
||||
if not model.startswith('<secret'):
|
||||
models.append(model)
|
||||
return models
|
||||
|
||||
# Fallback if parsing fails
|
||||
raise ValueError('Could not extract openhands_models from llm.py')
|
||||
|
||||
@pytest.fixture
|
||||
def documentation_content(self) -> str:
|
||||
"""Read the OpenHands LLM documentation content."""
|
||||
docs_path = (
|
||||
Path(__file__).parent.parent.parent
|
||||
/ 'docs'
|
||||
/ 'usage'
|
||||
/ 'llms'
|
||||
/ 'openhands-llms.mdx'
|
||||
)
|
||||
return docs_path.read_text()
|
||||
|
||||
def extract_pricing_from_docs(self, content: str) -> dict[str, dict[str, float]]:
|
||||
"""Extract pricing information from documentation."""
|
||||
# Updated pattern to handle cached input cost column (which can be N/A)
|
||||
pricing_table_pattern = (
|
||||
r'\| ([^|]+) \| \$([0-9.]+) \| ([^|]+) \| \$([0-9.]+) \|'
|
||||
)
|
||||
matches = re.findall(pricing_table_pattern, content)
|
||||
|
||||
pricing_data = {}
|
||||
for match in matches:
|
||||
model_name = match[0].strip()
|
||||
input_cost = float(match[1])
|
||||
cached_input_str = match[2].strip()
|
||||
output_cost = float(match[3])
|
||||
|
||||
# Parse cached input cost (can be N/A or $X.XX)
|
||||
cached_input_cost = None
|
||||
if cached_input_str != 'N/A':
|
||||
cached_input_cost = float(cached_input_str.replace('$', ''))
|
||||
|
||||
pricing_data[model_name] = {
|
||||
'input_cost_per_million_tokens': input_cost,
|
||||
'cached_input_cost_per_million_tokens': cached_input_cost,
|
||||
'output_cost_per_million_tokens': output_cost,
|
||||
}
|
||||
|
||||
return pricing_data
|
||||
|
||||
def get_litellm_pricing(
|
||||
self, model: str, pricing_data: dict[str, Any]
|
||||
) -> dict[str, float]:
|
||||
"""Get pricing for a model from LiteLLM data."""
|
||||
# Try different variations of the model name
|
||||
variations = [
|
||||
model,
|
||||
f'openai/{model}',
|
||||
f'anthropic/{model}',
|
||||
f'google/{model}',
|
||||
f'mistral/{model}',
|
||||
]
|
||||
|
||||
for variation in variations:
|
||||
if variation in pricing_data:
|
||||
model_data = pricing_data[variation]
|
||||
result = {
|
||||
'input_cost_per_million_tokens': model_data.get(
|
||||
'input_cost_per_token', 0
|
||||
)
|
||||
* 1_000_000,
|
||||
'output_cost_per_million_tokens': model_data.get(
|
||||
'output_cost_per_token', 0
|
||||
)
|
||||
* 1_000_000,
|
||||
}
|
||||
|
||||
# Add cached input cost if available
|
||||
cached_cost = model_data.get('cache_read_input_token_cost', 0)
|
||||
if cached_cost > 0:
|
||||
result['cached_input_cost_per_million_tokens'] = (
|
||||
cached_cost * 1_000_000
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
return {}
|
||||
|
||||
def test_pricing_table_exists(self, documentation_content: str):
|
||||
"""Test that the pricing table exists in the documentation."""
|
||||
assert (
|
||||
'| Model | Input Cost (per 1M tokens) | Cached Input Cost (per 1M tokens) | Output Cost (per 1M tokens)'
|
||||
in documentation_content
|
||||
)
|
||||
assert 'claude-opus-4-20250514' in documentation_content
|
||||
assert 'qwen3-coder-480b' in documentation_content
|
||||
|
||||
def test_no_external_json_link(self, documentation_content: str):
|
||||
"""Test that the external JSON link has been removed."""
|
||||
assert (
|
||||
'github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json'
|
||||
not in documentation_content
|
||||
)
|
||||
|
||||
def test_pricing_consistency_with_litellm(
|
||||
self, pricing_data: dict[str, Any], documentation_content: str
|
||||
):
|
||||
"""Test that pricing in documentation matches LiteLLM data where applicable."""
|
||||
docs_pricing = self.extract_pricing_from_docs(documentation_content)
|
||||
|
||||
# Special case for qwen3-coder-480b (custom pricing)
|
||||
qwen_pricing = docs_pricing.get('qwen3-coder-480b')
|
||||
assert qwen_pricing is not None
|
||||
assert qwen_pricing['input_cost_per_million_tokens'] == 0.4
|
||||
assert qwen_pricing['output_cost_per_million_tokens'] == 1.6
|
||||
assert qwen_pricing['cached_input_cost_per_million_tokens'] is None # N/A
|
||||
|
||||
# Test other models against LiteLLM data
|
||||
for model_name, doc_pricing in docs_pricing.items():
|
||||
if model_name == 'qwen3-coder-480b':
|
||||
continue # Skip custom pricing model
|
||||
|
||||
litellm_pricing = self.get_litellm_pricing(model_name, pricing_data)
|
||||
|
||||
if litellm_pricing: # Only test if we found pricing in LiteLLM
|
||||
assert (
|
||||
abs(
|
||||
doc_pricing['input_cost_per_million_tokens']
|
||||
- litellm_pricing['input_cost_per_million_tokens']
|
||||
)
|
||||
< 0.01
|
||||
), (
|
||||
f'Input pricing mismatch for {model_name}: docs={doc_pricing["input_cost_per_million_tokens"]}, litellm={litellm_pricing["input_cost_per_million_tokens"]}'
|
||||
)
|
||||
|
||||
assert (
|
||||
abs(
|
||||
doc_pricing['output_cost_per_million_tokens']
|
||||
- litellm_pricing['output_cost_per_million_tokens']
|
||||
)
|
||||
< 0.01
|
||||
), (
|
||||
f'Output pricing mismatch for {model_name}: docs={doc_pricing["output_cost_per_million_tokens"]}, litellm={litellm_pricing["output_cost_per_million_tokens"]}'
|
||||
)
|
||||
|
||||
# Test cached input cost if both have it
|
||||
doc_cached = doc_pricing.get('cached_input_cost_per_million_tokens')
|
||||
litellm_cached = litellm_pricing.get(
|
||||
'cached_input_cost_per_million_tokens'
|
||||
)
|
||||
|
||||
if doc_cached is not None and litellm_cached is not None:
|
||||
assert abs(doc_cached - litellm_cached) < 0.01, (
|
||||
f'Cached input pricing mismatch for {model_name}: docs={doc_cached}, litellm={litellm_cached}'
|
||||
)
|
||||
elif doc_cached is None and litellm_cached is not None:
|
||||
# Documentation shows N/A but LiteLLM has cached pricing - this might be intentional
|
||||
pass
|
||||
elif doc_cached is not None and litellm_cached is None:
|
||||
# Documentation has cached pricing but LiteLLM doesn't - this shouldn't happen
|
||||
raise AssertionError(
|
||||
f'Documentation has cached pricing for {model_name} but LiteLLM does not'
|
||||
)
|
||||
|
||||
def test_all_openhands_models_documented(
|
||||
self, openhands_models: list[str], documentation_content: str
|
||||
):
|
||||
"""Test that all OpenHands models are documented in the pricing table."""
|
||||
docs_pricing = self.extract_pricing_from_docs(documentation_content)
|
||||
documented_models = set(docs_pricing.keys())
|
||||
|
||||
# Filter out models that might not have pricing (like kimi-k2-0711-preview)
|
||||
expected_models = set(openhands_models)
|
||||
|
||||
# Check that most models are documented (allowing for some models without pricing)
|
||||
documented_count = len(documented_models.intersection(expected_models))
|
||||
total_count = len(expected_models)
|
||||
|
||||
# We should have at least 80% of models documented
|
||||
coverage_ratio = documented_count / total_count if total_count > 0 else 0
|
||||
assert coverage_ratio >= 0.8, (
|
||||
f'Only {documented_count}/{total_count} models documented in pricing table'
|
||||
)
|
||||
|
||||
def test_model_list_consistency(
|
||||
self, openhands_models: list[str], documentation_content: str
|
||||
):
|
||||
"""Test that the model list in documentation is consistent with the code."""
|
||||
docs_pricing = self.extract_pricing_from_docs(documentation_content)
|
||||
documented_models = set(docs_pricing.keys())
|
||||
code_models = set(openhands_models)
|
||||
|
||||
# Find models that are in code but not in docs
|
||||
missing_from_docs = code_models - documented_models
|
||||
# Find models that are in docs but not in code
|
||||
extra_in_docs = documented_models - code_models
|
||||
|
||||
# Allow some models to be missing from docs (e.g., if they don't have pricing)
|
||||
# but no extra models should be in docs that aren't in code
|
||||
assert not extra_in_docs, (
|
||||
f'Models in documentation but not in code: {extra_in_docs}'
|
||||
)
|
||||
|
||||
# Report missing models for visibility (but don't fail the test)
|
||||
if missing_from_docs:
|
||||
print(f'Models in code but not documented: {missing_from_docs}')
|
||||
|
||||
def test_pricing_format_consistency(self, documentation_content: str):
|
||||
"""Test that pricing format is consistent in the documentation."""
|
||||
docs_pricing = self.extract_pricing_from_docs(documentation_content)
|
||||
|
||||
for model_name, pricing in docs_pricing.items():
|
||||
# Check that prices are reasonable (not negative, not extremely high)
|
||||
assert pricing['input_cost_per_million_tokens'] >= 0, (
|
||||
f'Negative input cost for {model_name}'
|
||||
)
|
||||
assert pricing['output_cost_per_million_tokens'] >= 0, (
|
||||
f'Negative output cost for {model_name}'
|
||||
)
|
||||
assert pricing['input_cost_per_million_tokens'] <= 100, (
|
||||
f'Unreasonably high input cost for {model_name}'
|
||||
)
|
||||
assert pricing['output_cost_per_million_tokens'] <= 200, (
|
||||
f'Unreasonably high output cost for {model_name}'
|
||||
)
|
||||
|
||||
# Output cost should generally be higher than input cost
|
||||
if pricing['input_cost_per_million_tokens'] > 0:
|
||||
ratio = (
|
||||
pricing['output_cost_per_million_tokens']
|
||||
/ pricing['input_cost_per_million_tokens']
|
||||
)
|
||||
assert ratio >= 1.0, (
|
||||
f'Output cost should be >= input cost for {model_name}'
|
||||
)
|
||||
assert ratio <= 20.0, (
|
||||
f'Output/input cost ratio too high for {model_name}'
|
||||
)
|
||||
Loading…
x
Reference in New Issue
Block a user