add retries, tweak prompt

2026-03-22 05:37:20 +08:00 · 2024-06-28 05:25:55 +02:00
parent 6b2678acb7
commit 13b0ae18e8
6 changed files with 73 additions and 47 deletions
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -12,7 +12,7 @@ from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.core.exceptions import (
    ContextWindowLimitExceededError,
-    TokenLimitExceedError,
+    TokenLimitExceededError,
 )
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.action import (
@@ -227,7 +227,7 @@ class CodeActAgent(Agent):
                    ],
                    temperature=0.0,
                )
-            except (ContextWindowExceededError, TokenLimitExceedError):
+            except (ContextWindowExceededError, TokenLimitExceededError):
                logger.warning(
                    'Context window exceeded or token limit exceeded. Condensing memory, attempt %d',
                    attempt,
@@ -266,6 +266,9 @@ class CodeActAgent(Agent):
        for event in state.history.get_events():
            # split summarize message into action and observation
            if isinstance(event, AgentSummarizeAction):
+                logger.debug(
+                    f'Including summarize action: {event.id}({event._chunk_start}, {event._chunk_end}) \n{event.summarized_actions} \n{event.summarized_observations}'
+                )
                action_message = get_action_message(event)
                if action_message:
                    messages.append(action_message)
--- a/opendevin/core/exceptions.py
+++ b/opendevin/core/exceptions.py
@@ -94,7 +94,7 @@ class ContextWindowLimitExceededError(Exception):
        super().__init__(message)


-class TokenLimitExceedError(Exception):
+class TokenLimitExceededError(Exception):
    """Exception raised when the user-defined max_input_tokens limit is exceeded."""

    def __init__(self, message='User-defined token limit exceeded. Condensing memory.'):
--- a/opendevin/events/stream.py
+++ b/opendevin/events/stream.py
@@ -76,7 +76,6 @@ class EventStream:
                    if filter_out_type is None or not isinstance(
                        event, filter_out_type
                    ):
-                        logger.debug(f'{event_id}: {event}')
                        yield event
                except FileNotFoundError:
                    logger.debug(f'No event found for ID {event_id}')
@@ -91,7 +90,6 @@ class EventStream:
                    if filter_out_type is None or not isinstance(
                        event, filter_out_type
                    ):
-                        logger.debug(f'{event_id}: {event}')
                        yield event
                except FileNotFoundError:
                    logger.debug(f'No event found for ID {event_id}')
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -1,7 +1,7 @@
 import warnings
 from functools import partial

-from opendevin.core.exceptions import TokenLimitExceedError
+from opendevin.core.exceptions import TokenLimitExceededError

 with warnings.catch_warnings():
    warnings.simplefilter('ignore')
@@ -203,7 +203,7 @@ class LLM:
                messages = args[1]

            if self.is_over_token_limit(messages):
-                raise TokenLimitExceedError(
+                raise TokenLimitExceededError(
                    f'Token count exceeds the maximum of {self.max_input_tokens}. Attempting to condense memory.'
                )

--- a/opendevin/memory/condenser.py
+++ b/opendevin/memory/condenser.py
@@ -1,4 +1,9 @@
-from opendevin.core.exceptions import InvalidSummaryResponseError
+from litellm.exceptions import ContextWindowExceededError
+
+from opendevin.core.exceptions import (
+    InvalidSummaryResponseError,
+    TokenLimitExceededError,
+)
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.action.agent import (
    AgentFinishAction,
@@ -76,15 +81,19 @@ class MemoryCondenser:
            if not self._is_summarizable(event):
                if chunk and len(chunk) > 1:
                    # we've just gathered a chunk to summarize
-                    summary_action = self._summarize_chunk(chunk)
+                    # if invalid, skip and do next chunk
+                    if chunk_start_id is None or last_summarizable_id is None:
+                        logger.debug('Chunk start or end is None, skipping')
+                        # reset and try to continue with the next chunk
+                        chunk = []
+                        chunk_start_id = None
+                        last_summarizable_id = None
+                        continue

-                    # mypy is happy with assert, and in fact it cannot/should not be None
-                    assert chunk_start_id is not None
-                    summary_action._chunk_start = chunk_start_id
-
-                    # same here, a gift for mypy during development
-                    assert last_summarizable_id is not None
-                    summary_action._chunk_end = last_summarizable_id
+                    # good to go
+                    summary_action = self._summarize_chunk(
+                        chunk, chunk_start_id, last_summarizable_id
+                    )

                    # add it directly to history, so the agent only has to retry the request
                    history.add_summary(summary_action)
@@ -102,19 +111,22 @@ class MemoryCondenser:
                chunk.append(event)

        if chunk and len(chunk) > 1:
-            summary_action = self._summarize_chunk(chunk)
+            # we've just gathered a chunk to summarize
+            # if invalid, skip and do next chunk
+            if chunk_start_id is None or last_summarizable_id is None:
+                logger.debug('Chunk start or end is None, skipping')
+                # reset and don't do anything
+                chunk = []
+                chunk_start_id = None
+                last_summarizable_id = None
+            else:
+                # good to go
+                summary_action = self._summarize_chunk(
+                    chunk, chunk_start_id, last_summarizable_id
+                )

-            # keep mypy happy
-            assert chunk_start_id is not None
-            summary_action._chunk_start = chunk_start_id
-
-            # same here
-            assert last_summarizable_id is not None
-            summary_action._chunk_end = (
-                last_summarizable_id  # history.get_latest_event_id()
-            )
-            history.add_summary(summary_action)
-            return summary_action
+                history.add_summary(summary_action)
+                return summary_action

        # no more chunks of agent actions or observations
        # then summarize individual user messages that exceeed a certain length
@@ -129,9 +141,7 @@ class MemoryCondenser:
                    not last_event_was_finish
                    and len(event.content) > MAX_USER_MESSAGE_CHAR_COUNT
                ):
-                    summary_action = self._summarize_chunk([event])
-                    summary_action._chunk_start = event.id
-                    summary_action._chunk_end = event.id
+                    summary_action = self._summarize_chunk([event], event.id, event.id)
                    history.add_summary(summary_action)
                    return summary_action
                last_event_was_finish = False
@@ -140,7 +150,9 @@ class MemoryCondenser:

        return None

-    def _summarize_chunk(self, chunk: list[Event]) -> AgentSummarizeAction:
+    def _summarize_chunk(
+        self, chunk: list[Event], chunk_start: int, chunk_end: int
+    ) -> AgentSummarizeAction:
        """
        Summarizes the given chunk of events into a single sentence.

@@ -166,14 +178,28 @@ class MemoryCondenser:

                    action_response = response['choices'][0]['message']['content']
                    action = parse_summary_response(action_response)
+                    action._chunk_start = chunk_start
+                    action._chunk_end = chunk_end
+                    logger.debug(f'action = {action}')
                    break
                except InvalidSummaryResponseError as e:
-                    logger.error(f'Failed to summarize chunk: {e}')
-
                    if failed_response:
                        # we've already tried summarizing this chunk, so we're stuck
                        raise
                    failed_response = str(e)
+                except (ContextWindowExceededError, TokenLimitExceededError):
+                    if len(chunk) == 1:
+                        # we can't split a single event
+                        raise
+
+                    # split the chunk into two and try again
+                    mid = len(chunk) // 2
+                    half_chunk = chunk[:mid]
+                    action = self._summarize_chunk(
+                        half_chunk, chunk_start, half_chunk[-1].id
+                    )
+                    logger.debug(f'action = {action}')
+                    break

            return action
        except Exception as e:
--- a/opendevin/memory/prompts.py
+++ b/opendevin/memory/prompts.py
@@ -13,18 +13,17 @@ SUMMARY_PROMPT = """
 Given the following actions and observations, create a JSON response with:
    - "action": "summarize"
    - args:
-      - "summarized_actions": A sentence summarizing all the provided actions, at first person
-      - "summarized observations": A few sentences summarizing all the provided observations, at third person
-
+      - "summarized_actions": A precise sentence summarizing all the provided actions, written in the first person.
+      - "summarized_observations": A few precise sentences summarizing all the provided observations, written in the third person.
 Example:
-        {
-            "action": "summarize",
-            "args": {
-                "summarized_actions": "I opened the uml file.",
-                "summarized observations": "The agent ran a python script to open the uml.pdf file."
-            }
-        }
-Make sure to include in observations any relevant information that the agent should remember.
+{
+    "action": "summarize",
+    "args": {
+        "summarized_actions": "I located the UML specification PDF, parsed its content, and searched for information about sequence diagrams.",
+        "summarized_observations": "The agent encountered a UnicodeDecodeError when initially searching the PDF text, but was able to resolve this by installing the PyPDF2 library and successfully extracting relevant information about sequence diagrams."
+    }
+}
+Make sure to include in observations any relevant information that the agent needs to remember.
 %(events)s
 """

@@ -59,8 +58,8 @@ def parse_summary_response(response: str) -> AgentSummarizeAction:
            raise InvalidSummaryResponseError(error_message)
        action._source = EventSource.AGENT  # type: ignore
    except (LLMResponseError, LLMMalformedActionError) as e:
-        logger.error(f'Failed to parse summary response: {e}')
+        logger.error(f'Failed to parse summary response: {str(e)}')
        raise InvalidSummaryResponseError(
-            'Failed to parse the response: {str(e)}'
+            f'Failed to parse the response: {str(e)}'
        ) from e
    return action