mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Better LLM retry behavior (#6557)
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
parent
82b5325792
commit
3a478c2303
@ -63,22 +63,22 @@ We have a few guides for running OpenHands with specific model providers:
|
||||
### API retries and rate limits
|
||||
|
||||
LLM providers typically have rate limits, sometimes very low, and may require retries. OpenHands will automatically
|
||||
retry requests if it receives a Rate Limit Error (429 error code), API connection error, or other transient errors.
|
||||
retry requests if it receives a Rate Limit Error (429 error code).
|
||||
|
||||
You can customize these options as you need for the provider you're using. Check their documentation, and set the
|
||||
following environment variables to control the number of retries and the time between retries:
|
||||
|
||||
- `LLM_NUM_RETRIES` (Default of 8)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
|
||||
- `LLM_NUM_RETRIES` (Default of 4 times)
|
||||
- `LLM_RETRY_MIN_WAIT` (Default of 5 seconds)
|
||||
- `LLM_RETRY_MAX_WAIT` (Default of 30 seconds)
|
||||
- `LLM_RETRY_MULTIPLIER` (Default of 2)
|
||||
|
||||
If you are running OpenHands in development mode, you can also set these options in the `config.toml` file:
|
||||
|
||||
```toml
|
||||
[llm]
|
||||
num_retries = 8
|
||||
retry_min_wait = 15
|
||||
retry_max_wait = 120
|
||||
num_retries = 4
|
||||
retry_min_wait = 5
|
||||
retry_max_wait = 30
|
||||
retry_multiplier = 2
|
||||
```
|
||||
|
||||
@ -3803,6 +3803,37 @@
|
||||
"pt": "Erro ao autenticar com o provedor LLM. Por favor, verifique sua chave API",
|
||||
"tr": "LLM sağlayıcısı ile kimlik doğrulama hatası. Lütfen API anahtarınızı kontrol edin"
|
||||
},
|
||||
"STATUS$ERROR_LLM_SERVICE_UNAVAILABLE": {
|
||||
"en": "The LLM provider is currently unavailable. Please try again later.",
|
||||
"es": "El proveedor LLM no está actualmente disponible. Por favor, inténtelo de nuevo más tarde.",
|
||||
"zh-CN": "LLM提供商当前不可用",
|
||||
"zh-TW": "LLM提供商目前無法使用",
|
||||
"ko-KR": "LLM 공급자가 현재 사용 불가능합니다",
|
||||
"ja": "LLMプロバイダーが現在利用できません。後でもう一度試してください。",
|
||||
"no": "LLM-leverandøren er nå ikke tilgjengelig. Vennligst prøv igjen senere.",
|
||||
"ar": "المزود LLM غير متاح حالياً. يرجى المحاولة مرة أخرى لاحقًا.",
|
||||
"de": "Der LLM-Anbieter ist derzeit nicht verfügbar. Bitte versuchen Sie es später erneut.",
|
||||
"fr": "Le fournisseur LLM n'est actuellement pas disponible. Veuillez réessayer plus tard.",
|
||||
"it": "Il provider LLM non è attualmente disponibile. Per favore, riprova più tardi.",
|
||||
"pt": "O provedor LLM não está atualmente disponível. Por favor, tente novamente mais tarde.",
|
||||
"tr": "LLM sağlayıcısı şu anda kullanılamıyor. Lütfen daha sonra tekrar deneyin."
|
||||
},
|
||||
"STATUS$ERROR_LLM_INTERNAL_SERVER_ERROR": {
|
||||
"en": "The request failed with an internal server error.",
|
||||
"es": "La solicitud falló con un error del servidor interno.",
|
||||
"zh-CN": "请求失败,请稍后再试",
|
||||
"zh-TW": "請求失敗,請稍後再試",
|
||||
"ko-KR": "요청이 실패했습니다. 나중에 다시 시도해주세요.",
|
||||
"ja": "リクエストが内部サーバーエラーで失敗しました。後でもう一度試してください。",
|
||||
"no": "Det oppstod en feil ved tilkobling til kjøretidsmiljøet. Vennligst oppdater siden.",
|
||||
"ar": "حدث خطأ أثناء الاتصال بوقت التشغيل. يرجى تحديث الصفحة.",
|
||||
"de": "Beim Verbinden mit der Laufzeitumgebung ist ein Fehler aufgetreten. Bitte aktualisieren Sie die Seite.",
|
||||
"fr": "Une erreur s'est produite lors de la connexion à l'environnement d'exécution. Veuillez rafraîchir la page.",
|
||||
"it": "Si è verificato un errore durante la connessione al runtime. Aggiorna la pagina.",
|
||||
"pt": "Ocorreu um erro ao conectar ao ambiente de execução. Por favor, atualize a página.",
|
||||
"tr": "Çalışma zamanına bağlanırken bir hata oluştu. Lütfen sayfayı yenileyin."
|
||||
},
|
||||
|
||||
"STATUS$ERROR_RUNTIME_DISCONNECTED": {
|
||||
"en": "There was an error while connecting to the runtime. Please refresh the page.",
|
||||
"zh-CN": "运行时已断开连接",
|
||||
@ -3820,7 +3851,18 @@
|
||||
},
|
||||
"STATUS$LLM_RETRY": {
|
||||
"en": "Retrying LLM request",
|
||||
"zh-TW": "重新嘗試 LLM 請求中"
|
||||
"es": "Reintentando solicitud LLM",
|
||||
"zh-CN": "重试LLM请求",
|
||||
"zh-TW": "重試LLM請求",
|
||||
"ko-KR": "LLM 요청 재시도",
|
||||
"ja": "LLM リクエストを再試行中",
|
||||
"no": "Gjenforsøker LLM-forespørsel",
|
||||
"ar": "يتم إعادة تحميل الطلب LLM",
|
||||
"de": "LLM-Anfrage erneut versuchen",
|
||||
"fr": "Réessayer la requête LLM",
|
||||
"it": "Ritenta la richiesta LLM",
|
||||
"pt": "Reintentando a solicitação LLM",
|
||||
"tr": "LLM isteğini yeniden deniyor"
|
||||
},
|
||||
"AGENT_ERROR$BAD_ACTION": {
|
||||
"en": "Agent tried to execute a malformed action.",
|
||||
|
||||
@ -214,6 +214,17 @@ class AgentController:
|
||||
err_id = ''
|
||||
if isinstance(e, litellm.AuthenticationError):
|
||||
err_id = 'STATUS$ERROR_LLM_AUTHENTICATION'
|
||||
elif isinstance(
|
||||
e,
|
||||
(
|
||||
litellm.ServiceUnavailableError,
|
||||
litellm.APIConnectionError,
|
||||
litellm.APIError,
|
||||
),
|
||||
):
|
||||
err_id = 'STATUS$ERROR_LLM_SERVICE_UNAVAILABLE'
|
||||
elif isinstance(e, litellm.InternalServerError):
|
||||
err_id = 'STATUS$ERROR_LLM_INTERNAL_SERVER_ERROR'
|
||||
elif isinstance(e, RateLimitError):
|
||||
await self.set_agent_state_to(AgentState.RATE_LIMITED)
|
||||
return
|
||||
|
||||
@ -59,10 +59,11 @@ class LLMConfig(BaseModel):
|
||||
aws_region_name: str | None = Field(default=None)
|
||||
openrouter_site_url: str = Field(default='https://docs.all-hands.dev/')
|
||||
openrouter_app_name: str = Field(default='OpenHands')
|
||||
num_retries: int = Field(default=8)
|
||||
# total wait time: 5 + 10 + 20 + 30 = 65 seconds
|
||||
num_retries: int = Field(default=4)
|
||||
retry_multiplier: float = Field(default=2)
|
||||
retry_min_wait: int = Field(default=15)
|
||||
retry_max_wait: int = Field(default=120)
|
||||
retry_min_wait: int = Field(default=5)
|
||||
retry_max_wait: int = Field(default=30)
|
||||
timeout: int | None = Field(default=None)
|
||||
max_message_chars: int = Field(
|
||||
default=30_000
|
||||
|
||||
@ -18,11 +18,7 @@ from litellm import Message as LiteLLMMessage
|
||||
from litellm import completion as litellm_completion
|
||||
from litellm import completion_cost as litellm_completion_cost
|
||||
from litellm.exceptions import (
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
InternalServerError,
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
)
|
||||
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
||||
from litellm.utils import create_pretrained_tokenizer
|
||||
@ -41,15 +37,7 @@ from openhands.llm.retry_mixin import RetryMixin
|
||||
__all__ = ['LLM']
|
||||
|
||||
# tuple of exceptions to retry on
|
||||
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
|
||||
APIConnectionError,
|
||||
# FIXME: APIError is useful on 502 from a proxy for example,
|
||||
# but it also retries on other errors that are permanent
|
||||
APIError,
|
||||
InternalServerError,
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
)
|
||||
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)
|
||||
|
||||
# cache prompt supporting models
|
||||
# remove this when we gemini and deepseek are supported
|
||||
|
||||
@ -3,10 +3,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from litellm.exceptions import (
|
||||
APIConnectionError,
|
||||
InternalServerError,
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
)
|
||||
|
||||
from openhands.core.config import LLMConfig
|
||||
@ -187,21 +184,6 @@ def test_completion_with_mocked_logger(
|
||||
@pytest.mark.parametrize(
|
||||
'exception_class,extra_args,expected_retries',
|
||||
[
|
||||
(
|
||||
APIConnectionError,
|
||||
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
||||
2,
|
||||
),
|
||||
(
|
||||
InternalServerError,
|
||||
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
||||
2,
|
||||
),
|
||||
(
|
||||
ServiceUnavailableError,
|
||||
{'llm_provider': 'test_provider', 'model': 'test_model'},
|
||||
2,
|
||||
),
|
||||
(RateLimitError, {'llm_provider': 'test_provider', 'model': 'test_model'}, 2),
|
||||
],
|
||||
)
|
||||
@ -254,22 +236,6 @@ def test_completion_rate_limit_wait_time(mock_litellm_completion, default_config
|
||||
), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_completion_exhausts_retries(mock_litellm_completion, default_config):
|
||||
mock_litellm_completion.side_effect = APIConnectionError(
|
||||
'Persistent error', llm_provider='test_provider', model='test_model'
|
||||
)
|
||||
|
||||
llm = LLM(config=default_config)
|
||||
with pytest.raises(APIConnectionError):
|
||||
llm.completion(
|
||||
messages=[{'role': 'user', 'content': 'Hello!'}],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
assert mock_litellm_completion.call_count == llm.config.num_retries
|
||||
|
||||
|
||||
@patch('openhands.llm.llm.litellm_completion')
|
||||
def test_completion_operation_cancelled(mock_litellm_completion, default_config):
|
||||
mock_litellm_completion.side_effect = OperationCancelled('Operation cancelled')
|
||||
|
||||
@ -188,7 +188,7 @@ api_key = "custom-only-api-key"
|
||||
assert custom_only.model == 'custom-only-model'
|
||||
assert custom_only.api_key.get_secret_value() == 'custom-only-api-key'
|
||||
assert custom_only.embedding_model == 'local' # default value
|
||||
assert custom_only.num_retries == 8 # default value
|
||||
assert custom_only.num_retries == 4 # default value
|
||||
|
||||
|
||||
def test_load_from_toml_llm_invalid_config(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user