(enh) CodeActAgent: improve logging; sensible retry defaults in config (#3729)

* CodeActAgent: improve logging; sensible retry defaults for completion errors * CodeActAgent: reduce completion error message sent to UI * tweak values; docs+config template changes * fix format_messages; log exception in codeactagent again
2025-12-26 05:48:36 +08:00 · 2024-09-05 20:14:15 +02:00 · 2024-09-05 20:14:15 +02:00 · 03b5b03bb2
commit 03b5b03bb2
parent 681276f27c
9 changed files with 102 additions and 83 deletions
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -5,6 +5,7 @@ from agenthub.codeact_agent.action_parser import CodeActResponseParser
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
+from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
 from openhands.events.action import (
    Action,
@ -209,9 +210,11 @@ class CodeActAgent(Agent):

        try:
            response = self.llm.completion(**params)
-        except Exception:
+        except Exception as e:
+            logger.error(f'{e}')
+            error_message = '{}: {}'.format(type(e).__name__, str(e).split('\n')[0])
            return AgentFinishAction(
-                thought='Agent encountered an error while processing the last action. Please try again.'
+                thought=f'Agent encountered an error while processing the last action.\nError: {error_message}\nPlease try again.'
            )

        return self.action_parser.parse(response)
--- a/config.template.toml
+++ b/config.template.toml
@ -126,16 +126,21 @@ embedding_model = ""
 # Model to use
 model = "gpt-4o"

-# Number of retries to attempt
-#num_retries = 5
+# Number of retries to attempt when an operation fails with the LLM.
+# Increase this value to allow more attempts before giving up
+#num_retries = 8

-# Retry maximum wait time
-#retry_max_wait = 60
+# Maximum wait time (in seconds) between retry attempts
+# This caps the exponential backoff to prevent excessively long
+#retry_max_wait = 120

-# Retry minimum wait time
-#retry_min_wait = 3
+# Minimum wait time (in seconds) between retry attempts
+# This sets the initial delay before the first retry
+#retry_min_wait = 15

-# Retry multiplier for exponential backoff
+# Multiplier for exponential backoff calculation
+# The wait time increases by this factor after each failed attempt
+# A value of 2.0 means each retry waits twice as long as the previous one
 #retry_multiplier = 2.0

 # Drop any unmapped (unsupported) params without causing an exception
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/llms/llms.md
@ -41,4 +41,4 @@ ne peut être aussi puissant que les modèles qui le pilotent -- heureusement, l

 Certains LLM ont des limites de taux et peuvent nécessiter des réessais. OpenHands réessaiera automatiquement les demandes s'il reçoit une erreur 429 ou une erreur de connexion API.
 Vous pouvez définir les variables d'environnement `LLM_NUM_RETRIES`, `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` pour contrôler le nombre de réessais et le temps entre les réessais.
-Par défaut, `LLM_NUM_RETRIES` est 5 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 3 secondes et 60 secondes.
+Par défaut, `LLM_NUM_RETRIES` est 8 et `LLM_RETRY_MIN_WAIT`, `LLM_RETRY_MAX_WAIT` sont respectivement de 15 secondes et 120 secondes.
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/llms/llms.md
@ -43,4 +43,4 @@ OpenHands 将向你配置的 LLM 发出许多提示。大多数这些 LLM 都是

 一些 LLM 有速率限制，可能需要重试操作。OpenHands 会在收到 429 错误或 API 连接错误时自动重试请求。
 你可以设置 `LLM_NUM_RETRIES`，`LLM_RETRY_MIN_WAIT`，`LLM_RETRY_MAX_WAIT` 环境变量来控制重试次数和重试之间的时间。
-默认情况下，`LLM_NUM_RETRIES` 为 5，`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 3 秒和 60 秒。
+默认情况下，`LLM_NUM_RETRIES` 为 8，`LLM_RETRY_MIN_WAIT` 和 `LLM_RETRY_MAX_WAIT` 分别为 15 秒和 120 秒。
--- a/docs/modules/usage/llms/llms.md
+++ b/docs/modules/usage/llms/llms.md
@ -58,6 +58,6 @@ We have a few guides for running OpenHands with specific model providers:
 Some LLMs have rate limits and may require retries. OpenHands will automatically retry requests if it receives a 429 error or API connection error.
 You can set the following environment variables to control the number of retries and the time between retries:

-* `LLM_NUM_RETRIES` (Default of 5)
-* `LLM_RETRY_MIN_WAIT` (Default of 3 seconds)
-* `LLM_RETRY_MAX_WAIT` (Default of 60 seconds)
+* `LLM_NUM_RETRIES` (Default of 8)
+* `LLM_RETRY_MIN_WAIT` (Default of 15 seconds)
+* `LLM_RETRY_MAX_WAIT` (Default of 120 seconds)
--- a/openhands/core/config.py
+++ b/openhands/core/config.py
@ -65,10 +65,10 @@ class LLMConfig:
    aws_access_key_id: str | None = None
    aws_secret_access_key: str | None = None
    aws_region_name: str | None = None
-    num_retries: int = 10
+    num_retries: int = 8
    retry_multiplier: float = 2
-    retry_min_wait: int = 3
-    retry_max_wait: int = 300
+    retry_min_wait: int = 15
+    retry_max_wait: int = 120
    timeout: int | None = None
    max_message_chars: int = 10_000  # maximum number of characters in an observation's content when sent to the llm
    temperature: float = 0
@ -623,7 +623,7 @@ def get_llm_config_arg(
    model = 'gpt-3.5-turbo'
    api_key = '...'
    temperature = 0.5
-    num_retries = 10
+    num_retries = 8
    ...
    ```

--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@ -82,32 +82,31 @@ def format_messages(

    converted_messages = []
    for message in messages:
-        content_str = ''
+        content_parts = []
        role = 'user'
-        if 'role' in message:
-            role = message['role']
-        if isinstance(message, str):
-            content_str = content_str + message + '\n'
-            continue

-        if isinstance(message, dict):
-            if 'content' in message:
-                content_str = content_str + message['content'] + '\n'
+        if isinstance(message, str) and message:
+            content_parts.append(message)
+        elif isinstance(message, dict):
+            role = message.get('role', 'user')
+            if 'content' in message and message['content']:
+                content_parts.append(message['content'])
        elif isinstance(message, Message):
            role = message.role
            for content in message.content:
                if isinstance(content, list):
                    for item in content:
-                        if isinstance(item, TextContent):
-                            content_str = content_str + item.text + '\n'
-                elif isinstance(content, TextContent):
-                    content_str = content_str + content.text + '\n'
+                        if isinstance(item, TextContent) and item.text:
+                            content_parts.append(item.text)
+                elif isinstance(content, TextContent) and content.text:
+                    content_parts.append(content.text)
        else:
            logger.error(
                f'>>> `message` is not a string, dict, or Message: {type(message)}'
            )

-        if content_str:
+        if content_parts:
+            content_str = '\n'.join(content_parts)
            converted_messages.append(
                {
                    'role': role,
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@ -25,7 +25,7 @@ from tenacity import (
    retry,
    retry_if_exception_type,
    stop_after_attempt,
-    wait_random_exponential,
+    wait_exponential,
 )

 from openhands.core.exceptions import LLMResponseError, UserCancelledError
@ -83,6 +83,17 @@ class LLM:
        except Exception as e:
            logger.warning(f'Could not get model info for {config.model}:\n{e}')

+        # Tuple of exceptions to retry on
+        self.retry_exceptions = (
+            APIConnectionError,
+            ContentPolicyViolationError,
+            InternalServerError,
+            OpenAIError,
+            RateLimitError,
+        )
+
+        litellm.set_verbose = True
+
        # Set the max tokens in an LM-specific way if not set
        if self.config.max_input_tokens is None:
            if (
@ -122,33 +133,58 @@ class LLM:
            top_p=self.config.top_p,
        )

+        if self.vision_is_active():
+            logger.debug('LLM: model has vision enabled')
+
        completion_unwrapped = self._completion

        def attempt_on_error(retry_state):
+            """Custom attempt function for litellm completion."""
            logger.error(
-                f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+                f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
                exc_info=False,
            )
            return None

-        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
+        def custom_completion_wait(retry_state):
+            """Custom wait function for litellm completion."""
+            if not retry_state:
+                return 0
+            exception = retry_state.outcome.exception() if retry_state.outcome else None
+            if exception is None:
+                return 0
+
+            min_wait_time = self.config.retry_min_wait
+            max_wait_time = self.config.retry_max_wait
+
+            # for rate limit errors, wait 1 minute by default, max 4 minutes between retries
+            exception_type = type(exception).__name__
+            logger.error(f'\nexception_type: {exception_type}\n')
+
+            if exception_type == 'RateLimitError':
+                min_wait_time = 60
+                max_wait_time = 240
+            elif exception_type == 'BadRequestError' and exception.response:
+                # this should give us the burried, actual error message from
+                # the LLM model.
+                logger.error(f'\n\nBadRequestError: {exception.response}\n\n')
+
+            # Return the wait time using exponential backoff
+            exponential_wait = wait_exponential(
                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
+                min=min_wait_time,
+                max=max_wait_time,
+            )
+
+            # Call the exponential wait function with retry_state to get the actual wait time
+            return exponential_wait(retry_state)
+
+        @retry(
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        def wrapper(*args, **kwargs):
            """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
@ -230,23 +266,11 @@ class LLM:
        async_completion_unwrapped = self._async_completion

        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
-                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        async def async_completion_wrapper(*args, **kwargs):
            """Async wrapper for the litellm acompletion function."""
@ -336,23 +360,11 @@ class LLM:
                    pass

        @retry(
-            reraise=True,
-            stop=stop_after_attempt(self.config.num_retries),
-            wait=wait_random_exponential(
-                multiplier=self.config.retry_multiplier,
-                min=self.config.retry_min_wait,
-                max=self.config.retry_max_wait,
-            ),
-            retry=retry_if_exception_type(
-                (
-                    APIConnectionError,
-                    ContentPolicyViolationError,
-                    InternalServerError,
-                    OpenAIError,
-                    RateLimitError,
-                )
-            ),
            after=attempt_on_error,
+            stop=stop_after_attempt(self.config.num_retries),
+            reraise=True,
+            retry=retry_if_exception_type(self.retry_exceptions),
+            wait=custom_completion_wait,
        )
        async def async_acompletion_stream_wrapper(*args, **kwargs):
            """Async wrapper for the litellm acompletion with streaming function."""
--- a/openhands/memory/memory.py
+++ b/openhands/memory/memory.py
@ -43,7 +43,7 @@ if LLAMA_INDEX_AVAILABLE:

    def attempt_on_error(retry_state):
        logger.error(
-            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
+            f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize retry values in the configuration.',
            exc_info=False,
        )
        return None