add options for controlling memory (#1364)

* add options for controlling memory * Update agenthub/monologue_agent/utils/memory.py Co-authored-by: Jim Su <jimsu@protonmail.com> * move memory initialization switch back * fix lint * fix type --------- Co-authored-by: Jim Su <jimsu@protonmail.com>
2025-12-26 05:48:36 +08:00 · 2024-04-26 17:53:54 -04:00 · 2024-04-26 17:53:54 -04:00 · fd9e598136
commit fd9e598136
parent c0adb55bfa
4 changed files with 32 additions and 8 deletions
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@ -4,6 +4,8 @@ from opendevin.state import State
 from opendevin.llm.llm import LLM
 from opendevin.schema import ActionType, ObservationType
 from opendevin.exceptions import AgentNoInstructionError
+from opendevin.schema.config import ConfigType
+from opendevin import config

 from opendevin.action import (
    Action,
@ -27,7 +29,8 @@ from opendevin.observation import (

 import agenthub.monologue_agent.utils.prompts as prompts
 from agenthub.monologue_agent.utils.monologue import Monologue
-from agenthub.monologue_agent.utils.memory import LongTermMemory
+if config.get(ConfigType.AGENT_MEMORY_ENABLED):
+    from agenthub.monologue_agent.utils.memory import LongTermMemory

 MAX_MONOLOGUE_LENGTH = 20000
 MAX_OUTPUT_LENGTH = 5000
@ -86,6 +89,8 @@ class MonologueAgent(Agent):
    """

    _initialized = False
+    monologue: Monologue
+    memory: 'LongTermMemory | None'

    def __init__(self, llm: LLM):
        """
@ -95,8 +100,6 @@ class MonologueAgent(Agent):
        - llm (LLM): The llm to be used by this agent
        """
        super().__init__(llm)
-        self.monologue = Monologue()
-        self.memory = LongTermMemory()

    def _add_event(self, event: dict):
        """
@ -119,7 +122,8 @@ class MonologueAgent(Agent):
            )

        self.monologue.add_event(event)
-        self.memory.add_event(event)
+        if self.memory is not None:
+            self.memory.add_event(event)
        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
            self.monologue.condense(self.llm)

@ -141,8 +145,12 @@ class MonologueAgent(Agent):

        if task is None or task == '':
            raise AgentNoInstructionError()
+
        self.monologue = Monologue()
-        self.memory = LongTermMemory()
+        if config.get(ConfigType.AGENT_MEMORY_ENABLED):
+            self.memory = LongTermMemory()
+        else:
+            self.memory = None

        output_type = ''
        for thought in INITIAL_THOUGHTS:
@ -233,8 +241,14 @@ class MonologueAgent(Agent):
        Returns:
        - List[str]: A list of top 10 text results that matched the query
        """
+        if self.memory is None:
+            return []
        return self.memory.search(query)

    def reset(self) -> None:
        super().reset()
        self.monologue = Monologue()
+        if config.get(ConfigType.AGENT_MEMORY_ENABLED):
+            self.memory = LongTermMemory()
+        else:
+            self.memory = None
--- a/agenthub/monologue_agent/utils/memory.py
+++ b/agenthub/monologue_agent/utils/memory.py
@ -1,5 +1,5 @@
 import llama_index.embeddings.openai.base as llama_openai
-from threading import Thread
+import threading

 import chromadb
 from llama_index.core import Document
@ -86,6 +86,9 @@ else:
    )


+sema = threading.Semaphore(value=config.get(ConfigType.AGENT_MEMORY_MAX_THREADS))
+
+
 class LongTermMemory:
    """
    Responsible for storing information that the agent can call on later for better insights and context.
@ -102,6 +105,7 @@ class LongTermMemory:
        self.index = VectorStoreIndex.from_vector_store(
            vector_store, embed_model=embed_model)
        self.thought_idx = 0
+        self._add_threads = []

    def add_event(self, event: dict):
        """
@ -129,11 +133,13 @@ class LongTermMemory:
        )
        self.thought_idx += 1
        logger.debug('Adding %s event to memory: %d', t, self.thought_idx)
-        thread = Thread(target=self._add_doc, args=(doc,))
+        thread = threading.Thread(target=self._add_doc, args=(doc,))
+        self._add_threads.append(thread)
        thread.start()  # We add the doc concurrently so we don't have to wait ~500ms for the insert

    def _add_doc(self, doc):
-        self.index.insert(doc)
+        with sema:
+            self.index.insert(doc)

    def search(self, query: str, k: int = 10):
        """
--- a/opendevin/config.py
+++ b/opendevin/config.py
@ -36,6 +36,8 @@ DEFAULT_CONFIG: dict = {
    ConfigType.LLM_RETRY_MIN_WAIT: 3,
    ConfigType.LLM_RETRY_MAX_WAIT: 60,
    ConfigType.MAX_ITERATIONS: 100,
+    ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
+    ConfigType.AGENT_MEMORY_ENABLED: False,
    # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
    # we cannot easily count number of tokens, but we can count characters.
    # Assuming 5 characters per token, 5 million is a reasonable default limit.
--- a/opendevin/schema/config.py
+++ b/opendevin/schema/config.py
@ -18,6 +18,8 @@ class ConfigType(str, Enum):
    LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'
    LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT'
    LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT'
+    AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS'
+    AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED'
    MAX_ITERATIONS = 'MAX_ITERATIONS'
    MAX_CHARS = 'MAX_CHARS'
    AGENT = 'AGENT'