memory condenser rewrite

opendevin: rework and doc according to review mypy, algo fixes
2026-03-22 13:47:19 +08:00 · 2024-05-23 17:57:05 +02:00
parent ef6cdb7532
commit a726111f27
9 changed files with 428 additions and 54 deletions
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@@ -30,7 +30,6 @@ from opendevin.memory.condenser import MemoryCondenser
 if config.agent.memory_enabled:
    from opendevin.memory.memory import LongTermMemory

-MAX_TOKEN_COUNT_PADDING = 512
 MAX_OUTPUT_LENGTH = 5000


@@ -45,7 +44,7 @@ class MonologueAgent(Agent):
    _initialized = False
    initial_thoughts: list[dict[str, str]]
    memory: 'LongTermMemory | None'
-    memory_condenser: MemoryCondenser
+    memory_condenser: MemoryCondenser | None

    def __init__(self, llm: LLM):
        """
@@ -55,6 +54,8 @@ class MonologueAgent(Agent):
        - llm (LLM): The llm to be used by this agent
        """
        super().__init__(llm)
+        self.memory = None
+        self.memory_condenser = None

    def _initialize(self, task: str):
        """
@@ -64,7 +65,7 @@ class MonologueAgent(Agent):
        Will execute again when called after reset.

        Parameters:
-        - task (str): The initial goal statement provided by the user
+        - task: The initial goal statement provided by the user

        Raises:
        - AgentNoInstructionError: If task is not provided
@@ -82,7 +83,7 @@ class MonologueAgent(Agent):
        else:
            self.memory = None

-        self.memory_condenser = MemoryCondenser()
+        # self.memory_condenser = MemoryCondenser(action_prompt=prompts.get_action_prompt)

        self._add_initial_thoughts(task)
        self._initialized = True
@@ -139,10 +140,10 @@ class MonologueAgent(Agent):
        Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.

        Parameters:
-        - state (State): The current state based on previous steps taken
+        - state: The current state based on previous steps taken

        Returns:
-        - Action: The next action to take based on LLM response
+        - The next action to take based on LLM response
        """

        goal = state.get_current_user_intent()
@@ -169,7 +170,7 @@ class MonologueAgent(Agent):
            goal,
            self.initial_thoughts,
            recent_events,
-            state.background_commands_obs,
+            state.background_commands_obs,  # FIXME is this part of recent_events?
        )

        messages: list[dict[str, str]] = [
@@ -222,10 +223,10 @@ class MonologueAgent(Agent):
        Uses search to produce top 10 results.

        Parameters:
-        - query (str): The query that we want to find related memories for
+        - The query that we want to find related memories for

        Returns:
-        - list[str]: A list of top 10 text results that matched the query
+        - A list of top 10 text results that matched the query
        """
        if self.memory is None:
            return []
--- a/agenthub/monologue_agent/utils/prompts.py
+++ b/agenthub/monologue_agent/utils/prompts.py
@@ -72,23 +72,34 @@ MONOLOGUE_SUMMARY_PROMPT = """
 Below is the internal monologue of an automated LLM agent. Each
 thought is an item in a JSON array. The thoughts may be memories,
 actions taken by the agent, or outputs from those actions.
-Please return a new, smaller JSON array, which summarizes the
-internal monologue. You can summarize individual thoughts, and
-you can condense related thoughts together with a description
-of their content.
+
+The monologue has two parts: the default memories, which you must not change,
+they are provided to you only for context, and the recent monologue.
+
+Please return a new, much smaller JSON array that summarizes the recent monologue.
+When summarizing, you should condense the events that appear earlier
+in the recent monologue list more aggressively, while preserving more details
+for the events that appear later in the list.
+
+You can summarize individual thoughts, and you can condense related thoughts
+together with a description of their content.

 %(monologue)s

-Make the summaries as pithy and informative as possible.
+Make the summaries as pithy and informative as possible, especially for the earlier events
+in the old monologue.
+
 Be specific about what happened and what was learned. The summary
 will be used as keywords for searching for the original memory.
 Be sure to preserve any key words or important information.

-Your response must be in JSON format. It must be an object with the
-key `new_monologue`, which is a JSON array containing the summarized monologue.
-Each entry in the array must have an `action` key, and an `args` key.
-The action key may be `summarize`, and `args.summary` should contain the summary.
-You can also use the same action and args from the source monologue.
+Your response must be in JSON format. It must be an object with the key `new_monologue`,
+which must be a smaller JSON array containing the summarized monologue.
+Each entry in the new monologue must have an `action` key, and an `args` key.
+You can add a summarized entry with `action` set to "summarize" and a concise summary
+in `args.summary`. You can also use the source recent event if relevant, with its original `action` and `args`.
+
+Remember you must only summarize the old monologue, not the default memories.
 """

 INITIAL_THOUGHTS = [
@@ -137,7 +148,7 @@ INITIAL_THOUGHTS = [
 ]


-def get_summarize_monologue_prompt(thoughts: list[dict]):
+def get_summarize_monologue_prompt(recent_events: list[dict]):
    """
    Gets the prompt for summarizing the monologue

@@ -145,13 +156,13 @@ def get_summarize_monologue_prompt(thoughts: list[dict]):
    - str: A formatted string with the current monologue within the prompt
    """
    return MONOLOGUE_SUMMARY_PROMPT % {
-        'monologue': json.dumps({'old_monologue': thoughts}, indent=2),
+        'monologue': json.dumps({'old_monologue': recent_events}, indent=2),
    }


 def get_request_action_prompt(
    task: str,
-    thoughts: list[dict],
+    default_events: list[dict],
    recent_events: list[dict],
    background_commands_obs: list[CmdOutputObservation] | None = None,
 ):
@@ -159,9 +170,9 @@ def get_request_action_prompt(
    Gets the action prompt formatted with appropriate values.

    Parameters:
-    - task (str): The current task the agent is trying to accomplish
-    - thoughts (list[dict]): The agent's current thoughts
-    - background_commands_obs (list[CmdOutputObservation]): list of all observed background commands running
+    - task: The current task the agent is trying to accomplish
+    - thoughts: The agent's current thoughts
+    - background_commands_obs: list of all observed background commands running

    Returns:
    - str: Formatted prompt string with hint, task, monologue, and background commands included
@@ -171,7 +182,7 @@ def get_request_action_prompt(
        background_commands_obs = []

    hint = ''
-    if len(recent_events) > 0:
+    if recent_events is not None and len(recent_events) > 0:
        latest_event = recent_events[-1]
        if 'action' in latest_event:
            if (
@@ -198,7 +209,7 @@ def get_request_action_prompt(

    user = 'opendevin' if config.run_as_devin else 'root'

-    monologue = thoughts + recent_events
+    monologue = default_events + recent_events

    return ACTION_PROMPT % {
        'task': task,
@@ -207,19 +218,103 @@ def get_request_action_prompt(
        'hint': hint,
        'user': user,
        'timeout': config.sandbox_timeout,
-        'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
+        # unused 'workspace_mount_path_in_sandbox': config.workspace_mount_path_in_sandbox,
    }


+def get_action_prompt_template(
+    task: str,
+    default_events: list[dict],
+    background_commands_obs: list[CmdOutputObservation],
+) -> str:
+    """
+    Gets the action prompt template with default_events pre-filled and a placeholder for recent_events and hint.
+
+    Parameters:
+    - task: The current task the agent is trying to accomplish
+    - default_events: The default events to include in the prompt
+    - background_commands_obs: list of all observed background commands running
+
+    Returns:
+    - str: Formatted prompt template string with default_events pre-filled and placeholders for recent_events and hint
+    """
+    bg_commands_message = format_background_commands(background_commands_obs)
+    user = 'opendevin' if config.run_as_devin else 'root'
+
+    return ACTION_PROMPT % {
+        'task': task,
+        'monologue': json.dumps(default_events, indent=2) + '%(recent_events)s',
+        'background_commands': bg_commands_message,
+        'hint': '%(hint)s',
+        'user': user,
+        'timeout': config.sandbox_timeout,
+    }
+
+
+def get_action_prompt_for_summarization(
+    prompt_template: str,
+    recent_events: list[dict],
+) -> str:
+    """
+    Gets the action prompt formatted with recent_events and a generated hint.
+
+    Parameters:
+    - prompt_template: The prompt template with placeholders for recent_events and hint
+    - recent_events: The recent events to include in the prompt
+
+    Returns:
+    - str: Formatted prompt string with recent_events and a generated hint included
+    """
+    hint = ''
+    if recent_events is not None and len(recent_events) > 0:
+        latest_thought = recent_events[-1]
+        if 'action' in latest_thought:
+            if latest_thought['action'] == 'message':
+                if latest_thought['args']['content'].startswith('OK so my task is'):
+                    hint = "You're just getting started! What should you do first?"
+                else:
+                    hint = "You've been thinking a lot lately. Maybe it's time to take action?"
+            elif latest_thought['action'] == 'error':
+                hint = 'Looks like that last command failed. Maybe you need to fix it, or try something else.'
+
+    return prompt_template % {
+        'recent_events': json.dumps(recent_events, indent=2),
+        'hint': hint,
+    }
+
+
+def format_background_commands(
+    background_commands_obs: list[CmdOutputObservation] | None,
+) -> str:
+    """
+    Formats the background commands for sending in the prompt
+
+    Parameters:
+    - background_commands_obs: list of all background commands running
+
+    Returns:
+    - Formatted string with all background commands
+    """
+    if background_commands_obs is None or len(background_commands_obs) == 0:
+        return ''
+
+    bg_commands_message = 'The following commands are running in the background:'
+    for obs in background_commands_obs:
+        bg_commands_message += f'\n`{obs.command_id}`: {obs.command}'
+    bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `command_id` above.'
+
+    return bg_commands_message
+
+
 def parse_action_response(orig_response: str) -> Action:
    """
    Parses a string to find an action within it

    Parameters:
-    - response (str): The string to be parsed
+    - orig_response: The string to be parsed

    Returns:
-    - Action: The action that was found in the response string
+    - The action that was found in the response string
    """
    # attempt to load the JSON dict from the response
    action_dict = json.loads(orig_response)
@@ -231,15 +326,30 @@ def parse_action_response(orig_response: str) -> Action:
    return action_from_dict(action_dict)


+def get_summarize_prompt(default_events: list[dict], recent_events: list[dict]):
+    """
+    Gets the prompt for summarizing the monologue
+
+    Returns:
+    - A formatted string with the current monologue within the prompt
+    """
+    return MONOLOGUE_SUMMARY_PROMPT % {
+        'monologue': json.dumps(
+            {'default_memories': default_events, 'old_monologue': recent_events},
+            indent=2,
+        ),
+    }
+
+
 def parse_summary_response(response: str) -> list[dict]:
    """
    Parses a summary of the monologue

    Parameters:
-    - response (str): The response string to be parsed
+    - response: The response string to be parsed

    Returns:
-    - list[dict]: The list of summaries output by the model
+    - The list of summaries output by the model
    """
    parsed = json.loads(response)
    return parsed['new_monologue']
--- a/opendevin/core/schema/observation.py
+++ b/opendevin/core/schema/observation.py
@@ -34,6 +34,8 @@ class ObservationTypeSchema(BaseModel):
    """The result of a task delegated to another agent
    """

+    SUMMARY: str = Field(default='summary')
+
    MESSAGE: str = Field(default='message')

    ERROR: str = Field(default='error')
--- a/opendevin/events/observation/summary.py
+++ b/opendevin/events/observation/summary.py
@@ -0,0 +1,20 @@
+from dataclasses import dataclass
+
+from opendevin.core.schema.observation import ObservationType
+from opendevin.events.observation.observation import Observation
+
+
+@dataclass
+class SummaryObservation(Observation):
+    """Represents a summary observation of multiple agent actions."""
+
+    priority: str | None = None
+    observation: str = ObservationType.SUMMARY
+
+    def to_dict(self) -> dict:
+        """Convert the SummaryObservation instance to a dictionary."""
+        return {
+            'observation': self.observation,
+            'content': self.content,
+            'priority': self.priority,
+        }
--- a/opendevin/memory/init.py
+++ b/opendevin/memory/init.py
@@ -1,5 +1,12 @@
 from .condenser import MemoryCondenser
 from .history import ShortTermHistory
 from .memory import LongTermMemory
+from .prompts import get_summarize_prompt, parse_summary_response

-__all__ = ['LongTermMemory', 'ShortTermHistory', 'MemoryCondenser']
+__all__ = [
+    'get_summarize_prompt',
+    'parse_summary_response',
+    'LongTermMemory',
+    'ShortTermHistory',
+    'MemoryCondenser',
+]
--- a/opendevin/memory/condenser.py
+++ b/opendevin/memory/condenser.py
@@ -1,26 +1,132 @@
 from opendevin.core.logger import opendevin_logger as logger
+from opendevin.events.event import Event, EventSource
+from opendevin.events.observation.summary import SummaryObservation
 from opendevin.llm.llm import LLM

+MAX_TOKEN_COUNT_PADDING = (
+    512  # estimation of tokens to add to the prompt for the max token count
+)
+

 class MemoryCondenser:
-    def condense(self, summarize_prompt: str, llm: LLM):
+    """
+    Condenses the prompt with a call to the LLM.
+    """
+
+    def __init__(
+        self,
+        llm: LLM,
+        max_context_limit: int | None = None,
+    ):
        """
-        Attempts to condense the monologue by using the llm
+        Initialize the MemoryCondenser.
+
+        llm is the language model to use for summarization.
+        max_context_limit is an optional integer specifying the maximum context limit for the LLM.
+        If not provided, the condenser will act lazily and only condense when a context window limit error occurs.

        Parameters:
-        - llm (LLM): llm to be used for summarization
-
-        Raises:
-        - Exception: the same exception as it got from the llm or processing the response
+        - llm: The language model to use for summarization.
+        - max_context_limit: Optional integer specifying the maximum context limit for the LLM.
        """
+        self.llm = llm
+        self.max_context_limit = max_context_limit

+    def condense(
+        self,
+        events: list[Event],
+    ) -> list[Event]:
+        """
+        Condenses the given list of events using the llm. Returns the condensed list of events.
+
+        Condensation heuristics:
+        - Keep initial messages (system, user instruction)
+        - Prioritize more recent history
+        - Lazily summarize between initial instruction and most recent, starting with earliest condensable turns
+        - Introduce a SummaryObservation event type for textual summaries
+        - Split events into chunks delimited by user message actions, condense each chunk into a sentence
+
+        Parameters:
+        - events: List of events to condense.
+
+        Returns:
+        - The condensed list of events.
+        """
+        condensed_events = []
+        chunk: list[Event] = []
+
+        for event in events:
+            if event.source == EventSource.USER:
+                # event.should_condense = False
+                condensed_events.append(event)
+                if chunk:
+                    # Summarize the previous chunk
+                    summary = self._summarize_chunk(chunk)
+                    summary_observation = SummaryObservation(
+                        content=summary,
+                        priority='low',
+                    )
+                    summary_observation._source = EventSource.USER  # type: ignore [attr-defined]
+                    condensed_events.append(summary_observation)
+                    chunk = []
+            elif hasattr(event, 'priority') and getattr(event, 'priority') == 'high':
+                condensed_events.append(event)
+                if chunk:
+                    # Summarize the previous chunk
+                    summary = self._summarize_chunk(chunk)
+                    summary_observation = SummaryObservation(
+                        content=summary,
+                        priority='low',
+                    )
+                    summary_observation._source = (EventSource.USER,)  # type: ignore [attr-defined]
+                    condensed_events.append(summary_observation)
+                    chunk = []
+                chunk.append(event)
+
+        # Summarize the last chunk if needed
+        if chunk:
+            summary = self._summarize_chunk(chunk)
+            summary_observation = SummaryObservation(
+                content=summary,
+                priority='low',
+            )
+            summary_observation._source = EventSource.USER  # type: ignore [attr-defined]
+            condensed_events.append(summary_observation)
+
+        return condensed_events
+
+    def _summarize_chunk(self, chunk: list[Event]) -> str:
+        """
+        Summarizes the given chunk of events into a single sentence.
+
+        Parameters:
+        - chunk: List of events to summarize.
+
+        Returns:
+        - The summary sentence.
+        """
        try:
-            messages = [{'content': summarize_prompt, 'role': 'user'}]
-            resp = llm.do_completion(messages=messages)
-            summary_response = resp['choices'][0]['message']['content']
-            return summary_response
+            prompt = f'Please summarize the following events into a single sentence:\n\n{chunk}\n\nSummary:'
+            messages = [{'role': 'user', 'content': prompt}]
+            response = self.llm.do_completion(messages=messages)
+            summary = response['choices'][0]['message']['content']
+            return summary
        except Exception as e:
-            logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
+            logger.error(f'Failed to summarize chunk: {e}')
+            # TODO: Implement proper error handling logic here.
+        return ''  # FIXME should this be an obs directly?

-            # TODO If the llm fails with ContextWindowExceededError, we can try to condense the monologue chunk by chunk
-            raise
+    def _estimate_token_count(self, events: list[dict]) -> int:
+        """
+        Estimates the token count of the given events using a rough tokenizer.
+
+        Parameters:
+        - events: List of events to estimate the token count for.
+
+        Returns:
+        - Estimated token count.
+        """
+        token_count = 0
+        for event in events:
+            token_count += len(event['content'].split())
+        return token_count + MAX_TOKEN_COUNT_PADDING
--- a/opendevin/memory/history.py
+++ b/opendevin/memory/history.py
@@ -6,14 +6,35 @@ from opendevin.core.logger import opendevin_logger as logger
 class ShortTermHistory:
    """
    The short term history is the most recent series of events.
+
+    The short term history includes core events, which the agent learned in the initial prompt, and recent events of interest from the event stream.
    An agent can send this in the prompt or use it for other purpose.
+    The list of recent events may be condensed when its too long, if the agent uses the memory condenser.
    """

    def __init__(self):
        """
-        Initialize the empty list of events
+        Initialize the empty lists of events
        """
-        self.events = []
+        # the list of events that the agent had in this session
+        self.recent_events = []
+        # default events are events that the agent learned in the initial prompt
+        self.default_events = []
+
+    def add_default_event(self, event_dict: dict):
+        """
+        Adds an event to the default memory (sent in every prompt), if it is a valid event.
+
+        Parameters:
+        - event_dict (dict): The event that we want to add to memory
+
+        Raises:
+        - AgentEventTypeError: If event_dict is not a dict
+        """
+        if not isinstance(event_dict, dict):
+            raise AgentEventTypeError()
+
+        self.default_events.append(event_dict)

    def add_event(self, event_dict: dict):
        """
@@ -27,18 +48,46 @@ class ShortTermHistory:
        """
        if not isinstance(event_dict, dict):
            raise AgentEventTypeError()
-        self.events.append(event_dict)

-    def get_events(self):
+        # add to the list of recent events
+        self.recent_events.append(event_dict)
+
+    def get_events(self) -> list[dict]:
        """
-        Get the events in the agent's recent history.
+        Get the events in the agent's recent history, including core knowledge (the events it learned in the initial prompt).

        Returns:
        - List: The list of events that the agent remembers easily.
        """
-        return self.events
+        return self.recent_events + self.default_events

-    def get_total_length(self):
+    def get_default_events(self) -> list[dict]:
+        """
+        Get the events in the agent's initial prompt.
+
+        Returns:
+        - List: The list of core events.
+        """
+        return self.default_events
+
+    def get_recent_events(self, num_events=None) -> list[dict]:
+        """
+        Get the most recent events in the agent's short term history.
+
+        Will not return default events.
+
+        Parameters:
+        - num_events (int): The number of recent events to return, defaults to all events.
+
+        Returns:
+        - List: The list of the most recent events.
+        """
+        if num_events is None:
+            return self.recent_events
+        else:
+            return self.recent_events[-num_events:]
+
+    def get_total_length(self) -> int:
        """
        Gives the total number of characters in all history

@@ -46,7 +95,7 @@ class ShortTermHistory:
        - Int: Total number of characters of the recent history.
        """
        total_length = 0
-        for t in self.events:
+        for t in self.recent_events:
            try:
                total_length += len(json.dumps(t))
            except TypeError as e:
--- a/opendevin/memory/prompts.py
+++ b/opendevin/memory/prompts.py
@@ -0,0 +1,49 @@
+from opendevin.core.utils import json
+
+SUMMARY_PROMPT = """
+Below is a list of events representing the history of an automated agent. Each event is an item in a JSON array.
+The events may be memories, actions taken by the agent, or outputs from those actions.
+
+Please return a new, much smaller JSON array that summarizes the events. When summarizing, you should condense the events that appear
+earlier in the list more aggressively, while preserving more details for the events that appear later in the list.
+
+You can summarize individual events, and you can condense related events together with a description of their content.
+
+%(events)s
+
+Make the summaries as concise and informative as possible, especially for the earlier events in the list.
+Be specific about what happened and what was learned. The summary will be used as keywords for searching for the original event.
+Be sure to preserve any key words or important information.
+
+Your response must be in JSON format. Each entry in the new monologue must have an `action` key, and an `args` key.
+You can add a summarized entry with `action` set to "summarize" and a concise summary in `args.summary`.
+You can also use the source event if relevant, with its original `action` and `args`.
+
+It must be an object with the key `summarized_events`, which must be a smaller JSON array containing the summarized events.
+"""
+
+
+def get_summarize_prompt(events: list[dict]) -> str:
+    """
+    Gets the prompt for summarizing the events
+
+    Returns:
+    - A formatted string with the current events within the prompt
+    """
+    return SUMMARY_PROMPT % {
+        'events': json.dumps(events, indent=2),
+    }
+
+
+def parse_summary_response(response: str) -> list[dict]:
+    """
+    Parses a summary of the events
+
+    Parameters:
+    - response: The response string to be parsed
+
+    Returns:
+    - The list of summarized events output by the model
+    """
+    parsed = json.loads(response)
+    return parsed['summarized_events']
--- a/tests/unit/test_prompts.py
+++ b/tests/unit/test_prompts.py
@@ -0,0 +1,30 @@
+from agenthub.monologue_agent.utils.prompts import (
+    format_background_commands,
+)
+from opendevin.core.schema.observation import ObservationType
+from opendevin.events.observation.commands import CmdOutputObservation
+
+
+def test_format_background_commands():
+    background_commands_obs = [
+        CmdOutputObservation(
+            command_id='1',
+            command='python server.py',
+            observation=ObservationType.RUN,
+            exit_code=0,
+            content='some content',
+        ),
+        CmdOutputObservation(
+            command_id='2',
+            command='npm start',
+            observation=ObservationType.RUN,
+            exit_code=0,
+            content='some content',
+        ),
+    ]
+
+    formatted_commands = format_background_commands(background_commands_obs)
+
+    assert 'python server.py' in formatted_commands
+    assert 'npm start' in formatted_commands
+    assert 'The following commands are running in the background:' in formatted_commands