From 31ad7fc1758bca83b8145ff88471662356997036 Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Sat, 24 May 2025 01:35:30 +0800 Subject: [PATCH] chore: add claude 4 to verified mode & global replace 3.7 to claude 4 (#8665) Co-authored-by: openhands --- .github/workflows/openhands-resolver.yml | 2 +- README.md | 2 +- docs/README.md | 2 +- .../current/usage/llms/llms.md | 2 +- .../current/usage/llms/llms.md | 2 +- .../current/usage/llms/llms.md | 2 +- .../current/usage/llms/llms.md | 2 +- docs/modules/usage/how-to/cli-mode.md | 2 +- docs/modules/usage/how-to/headless-mode.md | 2 +- docs/modules/usage/llms/llms.md | 2 +- docs/translation_updater.py | 2 +- frontend/__tests__/routes/llm-settings.test.tsx | 8 ++++---- .../utils/extract-model-and-provider.test.ts | 12 ++++++++++++ frontend/src/mocks/handlers.ts | 2 +- frontend/src/routes/llm-settings.tsx | 6 +++--- frontend/src/services/settings.ts | 2 +- frontend/src/utils/verified-models.ts | 4 ++++ openhands/cli/utils.py | 2 ++ openhands/core/config/llm_config.py | 2 +- openhands/resolver/README.md | 2 +- openhands/resolver/examples/openhands-resolver.yml | 2 +- tests/unit/test_cli_utils.py | 8 ++++---- 22 files changed, 45 insertions(+), 27 deletions(-) diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml index a320881d1a..1012df45ca 100644 --- a/.github/workflows/openhands-resolver.yml +++ b/.github/workflows/openhands-resolver.yml @@ -24,7 +24,7 @@ on: LLM_MODEL: required: false type: string - default: "anthropic/claude-3-7-sonnet-20250219" + default: "anthropic/claude-sonnet-4-20250514" LLM_API_VERSION: required: false type: string diff --git a/README.md b/README.md index fad069106a..60c18e9629 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ docker run -it --rm --pull=always \ You'll find OpenHands running at [http://localhost:3000](http://localhost:3000)! When you open the application, you'll be asked to choose an LLM provider and add an API key. -[Anthropic's Claude 3.7 Sonnet](https://www.anthropic.com/api) (`anthropic/claude-3-7-sonnet-20250219`) +[Anthropic's Claude Sonnet 4](https://www.anthropic.com/api) (`anthropic/claude-sonnet-4-20250514`) works best, but you have [many options](https://docs.all-hands.dev/modules/usage/llms). ## 💡 Other ways to run OpenHands diff --git a/docs/README.md b/docs/README.md index 45d2bf681c..fa64e94f1a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -52,4 +52,4 @@ $ poetry run python docs/translation_updater.py # ... ``` -This process uses `claude-3-7-sonnet-20250219` as base model and each language consumes at least ~30k input tokens and ~35k output tokens. +This process uses `claude-sonnet-4-20250514` as base model and each language consumes at least ~30k input tokens and ~35k output tokens. diff --git a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md index a0aa85ebdc..7b98b4678a 100644 --- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -13,7 +13,7 @@ recommandations pour la sélection de modèles. Nos derniers résultats d'évalu Sur la base de ces résultats et des retours de la communauté, les modèles suivants ont été vérifiés comme fonctionnant raisonnablement bien avec OpenHands : -- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api) (recommandé) +- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommandé) - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) - [deepseek/deepseek-chat](https://api-docs.deepseek.com/) - [openai/o3-mini](https://openai.com/index/openai-o3-mini/) diff --git a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/llms/llms.md index 32ee7b9cbc..6459f0e857 100644 --- a/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/ja/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -13,7 +13,7 @@ OpenHandsはLiteLLMでサポートされているあらゆるLLMに接続でき これらの調査結果とコミュニティからのフィードバックに基づき、以下のモデルはOpenHandsでうまく動作することが確認されています: -- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api) (推奨) +- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (推奨) - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) - [deepseek/deepseek-chat](https://api-docs.deepseek.com/) - [openai/o3-mini](https://openai.com/index/openai-o3-mini/) diff --git a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/llms/llms.md index 840b3750a3..4d4bd45411 100644 --- a/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/pt-BR/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -13,7 +13,7 @@ recomendações para seleção de modelos. Nossos resultados de benchmarking mai Com base nessas descobertas e feedback da comunidade, os seguintes modelos foram verificados e funcionam razoavelmente bem com o OpenHands: -- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api) (recomendado) +- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recomendado) - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) - [deepseek/deepseek-chat](https://api-docs.deepseek.com/) - [openai/o3-mini](https://openai.com/index/openai-o3-mini/) diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md index 814e0ca2fc..1a5f253730 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md @@ -12,7 +12,7 @@ OpenHands 可以连接到任何 LiteLLM 支持的 LLM。但是,它需要一个 基于这些发现和社区反馈,以下模型已被验证可以与 OpenHands 合理地配合使用: -- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api)(推荐) +- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api)(推荐) - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) - [deepseek/deepseek-chat](https://api-docs.deepseek.com/) - [openai/o3-mini](https://openai.com/index/openai-o3-mini/) diff --git a/docs/modules/usage/how-to/cli-mode.md b/docs/modules/usage/how-to/cli-mode.md index 7415d931c1..b4a6e0b805 100644 --- a/docs/modules/usage/how-to/cli-mode.md +++ b/docs/modules/usage/how-to/cli-mode.md @@ -23,7 +23,7 @@ This command opens an interactive prompt where you can type tasks or commands an 1. Set the following environment variables in your terminal: - `SANDBOX_VOLUMES` to specify the directory you want OpenHands to access ([See using SANDBOX_VOLUMES for more info](../runtimes/docker#using-sandbox_volumes)) - - `LLM_MODEL` - the LLM model to use (e.g. `export LLM_MODEL="anthropic/claude-3-7-sonnet-20250219"`) + - `LLM_MODEL` - the LLM model to use (e.g. `export LLM_MODEL="anthropic/claude-sonnet-4-20250514"`) - `LLM_API_KEY` - your API key (e.g. `export LLM_API_KEY="sk_test_12345"`) 2. Run the following command: diff --git a/docs/modules/usage/how-to/headless-mode.md b/docs/modules/usage/how-to/headless-mode.md index 0eeb124b01..b35d8203ea 100644 --- a/docs/modules/usage/how-to/headless-mode.md +++ b/docs/modules/usage/how-to/headless-mode.md @@ -23,7 +23,7 @@ To run OpenHands in Headless mode with Docker: 1. Set the following environment variables in your terminal: - `SANDBOX_VOLUMES` to specify the directory you want OpenHands to access ([See using SANDBOX_VOLUMES for more info](../runtimes/docker#using-sandbox_volumes)) - - `LLM_MODEL` - the LLM model to use (e.g. `export LLM_MODEL="anthropic/claude-3-7-sonnet-20250219"`) + - `LLM_MODEL` - the LLM model to use (e.g. `export LLM_MODEL="anthropic/claude-sonnet-4-20250514"`) - `LLM_API_KEY` - your API key (e.g. `export LLM_API_KEY="sk_test_12345"`) 2. Run the following Docker command: diff --git a/docs/modules/usage/llms/llms.md b/docs/modules/usage/llms/llms.md index 704ee94e8a..2933a0a4aa 100644 --- a/docs/modules/usage/llms/llms.md +++ b/docs/modules/usage/llms/llms.md @@ -13,7 +13,7 @@ recommendations for model selection. Our latest benchmarking results can be foun Based on these findings and community feedback, these are the latest models that have been verified to work reasonably well with OpenHands: -- [anthropic/claude-3-7-sonnet-20250219](https://www.anthropic.com/api) (recommended) +- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended) - [openai/o4-mini](https://openai.com/index/introducing-o3-and-o4-mini/) - [gemini/gemini-2.5-pro](https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/) - [deepseek/deepseek-chat](https://api-docs.deepseek.com/) diff --git a/docs/translation_updater.py b/docs/translation_updater.py index e8ac6c92f3..ad87e7c68f 100644 --- a/docs/translation_updater.py +++ b/docs/translation_updater.py @@ -57,7 +57,7 @@ def translate_content(content, target_lang): system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.' message = client.messages.create( - model='claude-3-7-sonnet-20250219', + model='claude-sonnet-4-20250514', max_tokens=4096, temperature=0, system=system_prompt, diff --git a/frontend/__tests__/routes/llm-settings.test.tsx b/frontend/__tests__/routes/llm-settings.test.tsx index 4037ff89ed..86cce488c1 100644 --- a/frontend/__tests__/routes/llm-settings.test.tsx +++ b/frontend/__tests__/routes/llm-settings.test.tsx @@ -48,7 +48,7 @@ describe("Content", () => { await waitFor(() => { expect(provider).toHaveValue("Anthropic"); - expect(model).toHaveValue("claude-3-7-sonnet-20250219"); + expect(model).toHaveValue("claude-sonnet-4-20250514"); expect(apiKey).toHaveValue(""); expect(apiKey).toHaveProperty("placeholder", ""); @@ -135,7 +135,7 @@ describe("Content", () => { ); const condensor = screen.getByTestId("enable-memory-condenser-switch"); - expect(model).toHaveValue("anthropic/claude-3-7-sonnet-20250219"); + expect(model).toHaveValue("anthropic/claude-sonnet-4-20250514"); expect(baseUrl).toHaveValue(""); expect(apiKey).toHaveValue(""); expect(apiKey).toHaveProperty("placeholder", ""); @@ -542,7 +542,7 @@ describe("Form submission", () => { // select model await userEvent.click(model); - const modelOption = screen.getByText("claude-3-7-sonnet-20250219"); + const modelOption = screen.getByText("claude-sonnet-4-20250514"); await userEvent.click(modelOption); const submitButton = screen.getByTestId("submit-button"); @@ -550,7 +550,7 @@ describe("Form submission", () => { expect(saveSettingsSpy).toHaveBeenCalledWith( expect.objectContaining({ - llm_model: "anthropic/claude-3-7-sonnet-20250219", + llm_model: "anthropic/claude-sonnet-4-20250514", llm_base_url: "", confirmation_mode: false, }), diff --git a/frontend/__tests__/utils/extract-model-and-provider.test.ts b/frontend/__tests__/utils/extract-model-and-provider.test.ts index bfd3a9da1b..83d09cc180 100644 --- a/frontend/__tests__/utils/extract-model-and-provider.test.ts +++ b/frontend/__tests__/utils/extract-model-and-provider.test.ts @@ -71,6 +71,18 @@ describe("extractModelAndProvider", () => { separator: "/", }); + expect(extractModelAndProvider("claude-sonnet-4-20250514")).toEqual({ + provider: "anthropic", + model: "claude-sonnet-4-20250514", + separator: "/", + }); + + expect(extractModelAndProvider("claude-opus-4-20250514")).toEqual({ + provider: "anthropic", + model: "claude-opus-4-20250514", + separator: "/", + }); + expect(extractModelAndProvider("claude-3-haiku-20240307")).toEqual({ provider: "anthropic", model: "claude-3-haiku-20240307", diff --git a/frontend/src/mocks/handlers.ts b/frontend/src/mocks/handlers.ts index 8036623621..489053330c 100644 --- a/frontend/src/mocks/handlers.ts +++ b/frontend/src/mocks/handlers.ts @@ -100,7 +100,7 @@ const openHandsHandlers = [ "gpt-4o", "gpt-4o-mini", "anthropic/claude-3.5", - "anthropic/claude-3-7-sonnet-20250219", + "anthropic/claude-sonnet-4-20250514", ]), ), diff --git a/frontend/src/routes/llm-settings.tsx b/frontend/src/routes/llm-settings.tsx index 7422c9b1b5..9c719519c9 100644 --- a/frontend/src/routes/llm-settings.tsx +++ b/frontend/src/routes/llm-settings.tsx @@ -279,7 +279,7 @@ function LlmSettingsScreen() { @@ -342,9 +342,9 @@ function LlmSettingsScreen() { name="llm-custom-model-input" label={t(I18nKey.SETTINGS$CUSTOM_MODEL)} defaultValue={ - settings.LLM_MODEL || "anthropic/claude-3-7-sonnet-20250219" + settings.LLM_MODEL || "anthropic/claude-sonnet-4-20250514" } - placeholder="anthropic/claude-3-7-sonnet-20250219" + placeholder="anthropic/claude-sonnet-4-20250514" type="text" className="w-[680px]" onChange={handleCustomModelIsDirty} diff --git a/frontend/src/services/settings.ts b/frontend/src/services/settings.ts index f07d84829c..bf79e542f9 100644 --- a/frontend/src/services/settings.ts +++ b/frontend/src/services/settings.ts @@ -3,7 +3,7 @@ import { Settings } from "#/types/settings"; export const LATEST_SETTINGS_VERSION = 5; export const DEFAULT_SETTINGS: Settings = { - LLM_MODEL: "anthropic/claude-3-7-sonnet-20250219", + LLM_MODEL: "anthropic/claude-sonnet-4-20250514", LLM_BASE_URL: "", AGENT: "CodeActAgent", LANGUAGE: "en", diff --git a/frontend/src/utils/verified-models.ts b/frontend/src/utils/verified-models.ts index a2fc72ba9a..0c7dfb6d9e 100644 --- a/frontend/src/utils/verified-models.ts +++ b/frontend/src/utils/verified-models.ts @@ -6,6 +6,8 @@ export const VERIFIED_MODELS = [ "o4-mini-2025-04-16", "claude-3-5-sonnet-20241022", "claude-3-7-sonnet-20250219", + "claude-sonnet-4-20250514", + "claude-opus-4-20250514", "deepseek-chat", ]; @@ -39,4 +41,6 @@ export const VERIFIED_ANTHROPIC_MODELS = [ "claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-7-sonnet-20250219", + "claude-sonnet-4-20250514", + "claude-opus-4-20250514", ]; diff --git a/openhands/cli/utils.py b/openhands/cli/utils.py index f56b290280..ebe54cd871 100644 --- a/openhands/cli/utils.py +++ b/openhands/cli/utils.py @@ -167,6 +167,8 @@ VERIFIED_ANTHROPIC_MODELS = [ 'claude-3-opus-20240229', 'claude-3-sonnet-20240229', 'claude-3-7-sonnet-20250219', + 'claude-sonnet-4-20250514', + 'claude-opus-4-20250514', ] diff --git a/openhands/core/config/llm_config.py b/openhands/core/config/llm_config.py index 0353fbcc37..6a3fe10190 100644 --- a/openhands/core/config/llm_config.py +++ b/openhands/core/config/llm_config.py @@ -47,7 +47,7 @@ class LLMConfig(BaseModel): seed: The seed to use for the LLM. """ - model: str = Field(default='claude-3-7-sonnet-20250219') + model: str = Field(default='claude-sonnet-4-20250514') api_key: SecretStr | None = Field(default=None) base_url: str | None = Field(default=None) api_version: str | None = Field(default=None) diff --git a/openhands/resolver/README.md b/openhands/resolver/README.md index 96b5b9397d..29c43398f1 100644 --- a/openhands/resolver/README.md +++ b/openhands/resolver/README.md @@ -109,7 +109,7 @@ export GIT_USERNAME="your-gitlab-username" # Optional, defaults to token owner # LLM configuration -export LLM_MODEL="anthropic/claude-3-7-sonnet-20250219" # Recommended +export LLM_MODEL="anthropic/claude-sonnet-4-20250514" # Recommended export LLM_API_KEY="your-llm-api-key" export LLM_BASE_URL="your-api-url" # Optional, for API proxies ``` diff --git a/openhands/resolver/examples/openhands-resolver.yml b/openhands/resolver/examples/openhands-resolver.yml index 35e960aad7..4268545e96 100644 --- a/openhands/resolver/examples/openhands-resolver.yml +++ b/openhands/resolver/examples/openhands-resolver.yml @@ -24,7 +24,7 @@ jobs: macro: ${{ vars.OPENHANDS_MACRO || '@openhands-agent' }} max_iterations: ${{ fromJson(vars.OPENHANDS_MAX_ITER || 50) }} base_container_image: ${{ vars.OPENHANDS_BASE_CONTAINER_IMAGE || '' }} - LLM_MODEL: ${{ vars.LLM_MODEL || 'anthropic/claude-3-7-sonnet-20250219' }} + LLM_MODEL: ${{ vars.LLM_MODEL || 'anthropic/claude-sonnet-4-20250514' }} target_branch: ${{ vars.TARGET_BRANCH || 'main' }} runner: ${{ vars.TARGET_RUNNER }} secrets: diff --git a/tests/unit/test_cli_utils.py b/tests/unit/test_cli_utils.py index 53da84b268..02ffdd09ad 100644 --- a/tests/unit/test_cli_utils.py +++ b/tests/unit/test_cli_utils.py @@ -354,11 +354,11 @@ class TestModelAndProviderFunctions: assert result['separator'] == '/' def test_extract_model_and_provider_anthropic_implicit(self): - model = 'claude-3-7-sonnet-20250219' + model = 'claude-sonnet-4-20250514' result = extract_model_and_provider(model) assert result['provider'] == 'anthropic' - assert result['model'] == 'claude-3-7-sonnet-20250219' + assert result['model'] == 'claude-sonnet-4-20250514' assert result['separator'] == '/' def test_extract_model_and_provider_versioned(self): @@ -380,7 +380,7 @@ class TestModelAndProviderFunctions: def test_organize_models_and_providers(self): models = [ 'openai/gpt-4o', - 'anthropic/claude-3-7-sonnet-20250219', + 'anthropic/claude-sonnet-4-20250514', 'o3-mini', 'anthropic.claude-3-5', # Should be ignored as it uses dot separator for anthropic 'unknown-model', @@ -397,7 +397,7 @@ class TestModelAndProviderFunctions: assert 'o3-mini' in result['openai']['models'] assert len(result['anthropic']['models']) == 1 - assert 'claude-3-7-sonnet-20250219' in result['anthropic']['models'] + assert 'claude-sonnet-4-20250514' in result['anthropic']['models'] assert len(result['other']['models']) == 1 assert 'unknown-model' in result['other']['models']