From fd9e5981369a7bc5e5b779ca4ca0e30b3c793744 Mon Sep 17 00:00:00 2001 From: Robert Brennan Date: Fri, 26 Apr 2024 17:53:54 -0400 Subject: [PATCH] add options for controlling memory (#1364) * add options for controlling memory * Update agenthub/monologue_agent/utils/memory.py Co-authored-by: Jim Su * move memory initialization switch back * fix lint * fix type --------- Co-authored-by: Jim Su --- agenthub/monologue_agent/agent.py | 24 +++++++++++++++++++----- agenthub/monologue_agent/utils/memory.py | 12 +++++++++--- opendevin/config.py | 2 ++ opendevin/schema/config.py | 2 ++ 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/agenthub/monologue_agent/agent.py b/agenthub/monologue_agent/agent.py index 6ef1450883..c1fbab4148 100644 --- a/agenthub/monologue_agent/agent.py +++ b/agenthub/monologue_agent/agent.py @@ -4,6 +4,8 @@ from opendevin.state import State from opendevin.llm.llm import LLM from opendevin.schema import ActionType, ObservationType from opendevin.exceptions import AgentNoInstructionError +from opendevin.schema.config import ConfigType +from opendevin import config from opendevin.action import ( Action, @@ -27,7 +29,8 @@ from opendevin.observation import ( import agenthub.monologue_agent.utils.prompts as prompts from agenthub.monologue_agent.utils.monologue import Monologue -from agenthub.monologue_agent.utils.memory import LongTermMemory +if config.get(ConfigType.AGENT_MEMORY_ENABLED): + from agenthub.monologue_agent.utils.memory import LongTermMemory MAX_MONOLOGUE_LENGTH = 20000 MAX_OUTPUT_LENGTH = 5000 @@ -86,6 +89,8 @@ class MonologueAgent(Agent): """ _initialized = False + monologue: Monologue + memory: 'LongTermMemory | None' def __init__(self, llm: LLM): """ @@ -95,8 +100,6 @@ class MonologueAgent(Agent): - llm (LLM): The llm to be used by this agent """ super().__init__(llm) - self.monologue = Monologue() - self.memory = LongTermMemory() def _add_event(self, event: dict): """ @@ -119,7 +122,8 @@ class MonologueAgent(Agent): ) self.monologue.add_event(event) - self.memory.add_event(event) + if self.memory is not None: + self.memory.add_event(event) if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH: self.monologue.condense(self.llm) @@ -141,8 +145,12 @@ class MonologueAgent(Agent): if task is None or task == '': raise AgentNoInstructionError() + self.monologue = Monologue() - self.memory = LongTermMemory() + if config.get(ConfigType.AGENT_MEMORY_ENABLED): + self.memory = LongTermMemory() + else: + self.memory = None output_type = '' for thought in INITIAL_THOUGHTS: @@ -233,8 +241,14 @@ class MonologueAgent(Agent): Returns: - List[str]: A list of top 10 text results that matched the query """ + if self.memory is None: + return [] return self.memory.search(query) def reset(self) -> None: super().reset() self.monologue = Monologue() + if config.get(ConfigType.AGENT_MEMORY_ENABLED): + self.memory = LongTermMemory() + else: + self.memory = None diff --git a/agenthub/monologue_agent/utils/memory.py b/agenthub/monologue_agent/utils/memory.py index 5fb2e361eb..9500d78287 100644 --- a/agenthub/monologue_agent/utils/memory.py +++ b/agenthub/monologue_agent/utils/memory.py @@ -1,5 +1,5 @@ import llama_index.embeddings.openai.base as llama_openai -from threading import Thread +import threading import chromadb from llama_index.core import Document @@ -86,6 +86,9 @@ else: ) +sema = threading.Semaphore(value=config.get(ConfigType.AGENT_MEMORY_MAX_THREADS)) + + class LongTermMemory: """ Responsible for storing information that the agent can call on later for better insights and context. @@ -102,6 +105,7 @@ class LongTermMemory: self.index = VectorStoreIndex.from_vector_store( vector_store, embed_model=embed_model) self.thought_idx = 0 + self._add_threads = [] def add_event(self, event: dict): """ @@ -129,11 +133,13 @@ class LongTermMemory: ) self.thought_idx += 1 logger.debug('Adding %s event to memory: %d', t, self.thought_idx) - thread = Thread(target=self._add_doc, args=(doc,)) + thread = threading.Thread(target=self._add_doc, args=(doc,)) + self._add_threads.append(thread) thread.start() # We add the doc concurrently so we don't have to wait ~500ms for the insert def _add_doc(self, doc): - self.index.insert(doc) + with sema: + self.index.insert(doc) def search(self, query: str, k: int = 10): """ diff --git a/opendevin/config.py b/opendevin/config.py index 4404cde530..7cd60653f5 100644 --- a/opendevin/config.py +++ b/opendevin/config.py @@ -36,6 +36,8 @@ DEFAULT_CONFIG: dict = { ConfigType.LLM_RETRY_MIN_WAIT: 3, ConfigType.LLM_RETRY_MAX_WAIT: 60, ConfigType.MAX_ITERATIONS: 100, + ConfigType.AGENT_MEMORY_MAX_THREADS: 2, + ConfigType.AGENT_MEMORY_ENABLED: False, # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side, # we cannot easily count number of tokens, but we can count characters. # Assuming 5 characters per token, 5 million is a reasonable default limit. diff --git a/opendevin/schema/config.py b/opendevin/schema/config.py index 71de38b0cf..0d5e7eef03 100644 --- a/opendevin/schema/config.py +++ b/opendevin/schema/config.py @@ -18,6 +18,8 @@ class ConfigType(str, Enum): LLM_NUM_RETRIES = 'LLM_NUM_RETRIES' LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT' LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT' + AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS' + AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED' MAX_ITERATIONS = 'MAX_ITERATIONS' MAX_CHARS = 'MAX_CHARS' AGENT = 'AGENT'