Refactor agent interface a bit (#74)

* start moving files * initial refactor * factor out command management * fix command runner * add workspace to gitignore * factor out command manager * remove dupe add_event * update docs * fix init * fix langchain agent after merge
2025-12-26 05:48:36 +08:00 · 2024-03-21 11:35:28 -04:00 · 2024-03-21 11:35:28 -04:00 · b84463f512
commit b84463f512
parent 2de75d4782
17 changed files with 248 additions and 137 deletions
--- a/.gitignore
+++ b/.gitignore
@ -14,7 +14,7 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+./lib/
 lib64/
 parts/
 sdist/
--- a/agenthub/README.md
+++ b/agenthub/README.md
@ -4,3 +4,54 @@ In this folder, there may exist multiple implementations of `Agent` that will be

 For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
 Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
+
+## Constructing an Agent
+Your agent must implement the following methods:
+
+### `step`
+```
+def step(self, cmd_mgr: CommandManager) -> Event:
+```
+`step` moves the agent forward one step towards its goal. This probably means
+sending a prompt to the LLM, then parsing the response into an action `Event`.
+
+Each Event has an `action` and a dict of `args`. Supported Events include:
+* `read` - reads the contents of a file. Arguments:
+  * `path` - the path of the file to read
+* `write` - writes the contents to a file. Arguments:
+  * `path` - the path of the file to write
+  * `contents` - the contents to write to the file
+* `run` - runs a command. Arguments:
+  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
+* `browse` - opens a web page. Arguments:
+  * `url` - the URL to open
+* `recall` - recalls a past memory. Arguments:
+  * `query` - the query to search for
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
+* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
+
+For Events like `read` and `run`, a follow-up event will be added via `add_event` with the output.
+
+### `add_event`
+```
+def add_event(self, event: Event) -> None:
+```
+`add_event` adds an event to the agent's history. This could be a user message,
+an action taken by the agent, log output, file contents, or anything else.
+
+You'll probably want to keep a history of events, and use them in your prompts
+so that the agent knows what it did recently. You may also want to keep events
+in a vector database so the agent can refer back to them.
+
+The output of `step` will automatically be passed to this method.
+
+### `search_memory`
+```
+def search_memory(self, query: str) -> List[str]:
+```
+`search_memory` should return a list of events that match the query. This will be used
+for the `recall` action.
--- a/agenthub/langchains_agent/init.py
+++ b/agenthub/langchains_agent/init.py
@ -1,10 +1,11 @@
 import os
 import argparse
+from typing import List, Dict, Type

 from opendevin.agent import Agent, Message

 from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event

 INITIAL_THOUGHTS = [
    "I exist!",
@ -46,33 +47,12 @@ INITIAL_THOUGHTS = [


 class LangchainsAgent(Agent):
+    _initialized = False

-    def _run_loop(self, agent: LangchainsAgentImpl, max_iterations=100):
-        # TODO: make it add a Message to the history for each turn / event
-        for i in range(max_iterations):
-            print("STEP", i, flush=True)
-            log_events = agent.get_background_logs()
-            for event in log_events:
-                print(event, flush=True)
-            action = agent.get_next_action()
-            if action.action == "finish":
-                print("Done!", flush=True)
-                break
-            print(action, flush=True)
-            print("---", flush=True)
-            out = agent.maybe_perform_latest_action()
-            print(out, flush=True)
-            print("==============", flush=True)
-
-    def run(self) -> None:
-        """
-        Starts the execution of the assigned instruction. This method should
-        be implemented by subclasses to define the specific execution logic.
-        """
-        print("Working in directory:", self.workspace_dir)
-        os.chdir(self.workspace_dir)
-
-        agent = LangchainsAgentImpl(self.instruction)
+    def _initialize(self):
+        if self._initialized:
+            return
+        self.agent = LangchainsAgentImpl(self.instruction)
        next_is_output = False
        for thought in INITIAL_THOUGHTS:
            thought = thought.replace("$TASK", self.instruction)
@ -94,12 +74,18 @@ class LangchainsAgent(Agent):
                    next_is_output = True
                else:
                    event = Event("think", {"thought": thought})
+            self.agent.add_event(event)
+        self._initialized = True

-            agent.add_event(event)
-        self._run_loop(agent, self.max_steps)
+    def add_event(self, event: Event) -> None:
+        self.agent.add_event(event)

-        # Set the agent's completion status to True
-        self._complete = True
+    def step(self, cmd_mgr) -> Event:
+        self._initialize()
+        return self.agent.get_next_action(cmd_mgr)
+
+    def search_memory(self, query: str) -> List[str]:
+        return self.agent.memory.search(query)

    def chat(self, message: str) -> None:
        """
--- a/agenthub/langchains_agent/utils/actions/kill.py
+++ b/agenthub/langchains_agent/utils/actions/kill.py
@ -1,7 +0,0 @@
-def kill(id, agent):
-    if id < 0 or id >= len(agent.background_commands):
-        raise ValueError('Invalid command id to kill')
-    agent.background_commands[id].kill()
-    agent.background_commands.pop(id)
-    return "Background command %d killed" % id
-
--- a/agenthub/langchains_agent/utils/actions/run.py
+++ b/agenthub/langchains_agent/utils/actions/run.py
@ -1,18 +0,0 @@
-import subprocess
-import os
-
-def run(cmd, agent, background=False):
-    if background:
-        return run_background(cmd, agent)
-    result = subprocess.run(["/bin/bash", "-c", cmd], capture_output=True, text=True)
-    output = result.stdout + result.stderr
-    exit_code = result.returncode
-    if exit_code != 0:
-        raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
-    return output
-
-def run_background(cmd, agent):
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
-    agent.background_commands.append(process)
-    return "Background command started. To stop it, send a `kill` action with id " + str(len(agent.background_commands) - 1)
-
--- a/agenthub/langchains_agent/utils/agent.py
+++ b/agenthub/langchains_agent/utils/agent.py
@ -1,8 +1,6 @@
-import select
-
 from agenthub.langchains_agent.utils.monologue import Monologue
 from agenthub.langchains_agent.utils.memory import LongTermMemory
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event
 import agenthub.langchains_agent.utils.llm as llm

 MAX_OUTPUT_LENGTH = 5000
@ -13,7 +11,6 @@ class Agent:
        self.task = task
        self.monologue = Monologue()
        self.memory = LongTermMemory()
-        self.background_commands = []

    def add_event(self, event):
        self.monologue.add_event(event)
@ -21,65 +18,9 @@ class Agent:
        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
            self.monologue.condense()

-    def get_next_action(self):
-        bg_commands = [cmd.args for cmd in self.background_commands]
-        action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), bg_commands)
+    def get_next_action(self, cmd_mgr):
+        action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), cmd_mgr.background_commands)
        event = Event(action_dict['action'], action_dict['args'])
        self.latest_action = event
-        self.add_event(event)
        return event

-    def maybe_perform_latest_action(self):
-        if not (self.latest_action and self.latest_action.is_runnable()):
-            return
-        action = 'output'
-        try:
-            output = self.latest_action.run(self)
-        except Exception as e:
-            output = 'Error: ' + str(e)
-            action = 'error'
-        if len(output) > MAX_OUTPUT_LENGTH:
-            output = output[:MAX_OUTPUT_LENGTH] + '...'
-        out_event = Event(action, {'output': output})
-        self.add_event(out_event)
-        return out_event
-
-    def get_background_log(self, idx, cmd, stream, name):
-        logs = ""
-        while True:
-            readable, _, _ = select.select([stream], [], [], .1)
-            if not readable:
-                break
-            next = stream.readline()
-            if next == '':
-                break
-            logs += next
-        if logs == "": return
-
-        event = Event('output', {
-            'output': logs,
-            'stream':name,
-            'id': idx,
-            'command': cmd.args,
-        })
-        self.add_event(event)
-        return event
-
-    def get_background_logs(self):
-        all_events = []
-        for idx, cmd in enumerate(self.background_commands):
-            stdout_event = self.get_background_log(idx, cmd, cmd.stdout, 'stdout')
-            if stdout_event:
-                all_events.append(stdout_event)
-            stderr_event = self.get_background_log(idx, cmd, cmd.stderr, 'stderr')
-            if stderr_event:
-                all_events.append(stderr_event)
-
-            exit_code = cmd.poll()
-            if exit_code is not None:
-                event = Event('output', {'output': 'Background command %d exited with code %d' % (idx, exit_code)})
-                all_events.append(event)
-                self.add_event(event)
-
-        self.background_commands = [cmd for cmd in self.background_commands if cmd.poll() is None]
-        return all_events
--- a/agenthub/langchains_agent/utils/llm.py
+++ b/agenthub/langchains_agent/utils/llm.py
@ -127,9 +127,8 @@ def request_action(task, thoughts, background_commands=[]):
    bg_commands_message = ""
    if len(background_commands) > 0:
        bg_commands_message = "The following commands are running in the background:"
-        for idx, command in enumerate(background_commands):
-            # TODO: make command IDs long-lived, instead of the index
-            bg_commands_message += f"\n* {idx}: {command}"
+        for id, command in background_commands.items():
+            bg_commands_message += f"\n`{id}`: {command.command}"
        bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."

    latest_thought = thoughts[-1]
--- a/agenthub/langchains_agent/utils/monologue.py
+++ b/agenthub/langchains_agent/utils/monologue.py
@ -1,5 +1,5 @@
 import agenthub.langchains_agent.utils.json as json
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event

 import agenthub.langchains_agent.utils.llm as llm

--- a/opendevin/agent.py
+++ b/opendevin/agent.py
@ -3,6 +3,9 @@ from typing import List, Dict, Type
 from dataclasses import dataclass
 from enum import Enum

+from .lib.event import Event
+from .lib.command_manager import CommandManager
+from .controller import AgentController

 class Role(Enum):
    SYSTEM = "system"  # system message for LLM
@ -86,7 +89,17 @@ class Agent(ABC):
        return self._history

    @abstractmethod
-    def run(self) -> None:
+    def add_event(self, event: Event) -> None:
+        """
+        Adds an event to the agent's history.
+
+        Parameters:
+        - event (Event): The event to add to the history.
+        """
+        pass
+
+    @abstractmethod
+    def step(self, cmd_mgr: CommandManager) -> Event:
        """
        Starts the execution of the assigned instruction. This method should
        be implemented by subclasses to define the specific execution logic.
@ -94,13 +107,15 @@ class Agent(ABC):
        pass

    @abstractmethod
-    def chat(self, message: str) -> None:
+    def search_memory(self, query: str) -> List[str]:
        """
-        Optional method for interactive communication with the agent during its execution. Implementations
-        can use this method to modify the agent's behavior or state based on chat inputs.
+        Searches the agent's memory for information relevant to the given query.

        Parameters:
-        - message (str): The chat message or command.
+        - query (str): The query to search for in the agent's memory.
+
+        Returns:
+        - response (str): The response to the query.
        """
        pass

--- a/opendevin/controller.py
+++ b/opendevin/controller.py
@ -0,0 +1,49 @@
+from opendevin.lib.command_manager import CommandManager
+from opendevin.lib.event import Event
+
+def print_callback(event):
+    print(event, flush=True)
+
+class AgentController:
+    def __init__(self, agent, max_iterations=100, callbacks=[]):
+        self.agent = agent
+        self.max_iterations = max_iterations
+        self.background_commands = []
+        self.command_manager = CommandManager()
+        self.callbacks = callbacks
+        self.callbacks.append(self.agent.add_event)
+        self.callbacks.append(print_callback)
+
+    def maybe_perform_action(self, event):
+        if not (event and event.is_runnable()):
+            return
+        action = 'output'
+        try:
+            output = event.run(self)
+        except Exception as e:
+            output = 'Error: ' + str(e)
+            action = 'error'
+        out_event = Event(action, {'output': output})
+        return out_event
+
+    def start_loop(self):
+        output = None
+        for i in range(self.max_iterations):
+            print("STEP", i, flush=True)
+            log_events = self.command_manager.get_background_events()
+            for event in log_events:
+                for callback in self.callbacks:
+                    callback(event)
+
+            action_event = self.agent.step(self.command_manager)
+            for callback in self.callbacks:
+                callback(action_event)
+            if action_event.action == 'finish':
+                break
+            print("---", flush=True)
+
+            output_event = self.maybe_perform_action(action_event)
+            if output_event is not None:
+                for callback in self.callbacks:
+                    callback(output_event)
+            print("==============", flush=True)
--- a/agenthub/langchains_agent/utils/actions/init.py
+++ b/agenthub/langchains_agent/utils/actions/init.py
@ -1,5 +1,3 @@
-from .run import run
-from .kill import kill
 from .browse import browse
 from .write import write
 from .read import read
--- a/agenthub/langchains_agent/utils/actions/browse.py
+++ b/agenthub/langchains_agent/utils/actions/browse.py
--- a/agenthub/langchains_agent/utils/actions/read.py
+++ b/agenthub/langchains_agent/utils/actions/read.py
--- a/agenthub/langchains_agent/utils/actions/write.py
+++ b/agenthub/langchains_agent/utils/actions/write.py
--- a/opendevin/lib/command_manager.py
+++ b/opendevin/lib/command_manager.py
@ -0,0 +1,89 @@
+import subprocess
+import select
+from typing import List
+
+from opendevin.lib.event import Event
+
+class BackgroundCommand:
+    def __init__(self, id: int, command: str, process: subprocess.Popen):
+        self.command = command
+        self.id = id
+        self.process = process
+
+    def _get_log_from_stream(self, stream):
+        logs = ""
+        while True:
+            readable, _, _ = select.select([stream], [], [], .1)
+            if not readable:
+                break
+            next = stream.readline()
+            if next == '':
+                break
+            logs += next
+        if logs == "": return
+        return logs
+
+    def get_logs(self):
+        stdout = self._get_log_from_stream(self.process.stdout)
+        stderr = self._get_log_from_stream(self.process.stderr)
+        exit_code = self.process.poll()
+        return stdout, stderr, exit_code
+
+class CommandManager:
+    def __init__(self):
+        self.cur_id = 0
+        self.background_commands = {}
+
+    def run_command(self, command: str, background=False) -> str:
+        if background:
+            return self.run_background(command)
+        else:
+            return self.run_immediately(command)
+
+    def run_immediately(self, command: str) -> str:
+        result = subprocess.run(["/bin/bash", "-c", command], capture_output=True, text=True)
+        output = result.stdout + result.stderr
+        exit_code = result.returncode
+        if exit_code != 0:
+            raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
+        return output
+
+    def run_background(self, command: str) -> str:
+        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
+        bg_cmd = BackgroundCommand(self.cur_id, command, process)
+        self.cur_id += 1
+        self.background_commands[bg_cmd.id] = bg_cmd
+        return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id)
+
+    def kill_command(self, id: int) -> str:
+        # TODO: get log events before killing
+        self.background_commands[id].processs.kill()
+        del self.background_commands[id]
+
+    def get_background_events(self) -> List[Event]:
+        events = []
+        for id, cmd in self.background_commands.items():
+            stdout, stderr, exit_code = cmd.get_logs()
+            if stdout is not None:
+                events.append(Event('output', {
+                    'output': stdout,
+                    'stream': 'stdout',
+                    'id': id,
+                    'command': cmd.command,
+                }))
+            if stderr is not None:
+                events.append(Event('output', {
+                    'output': stderr,
+                    'stream': 'stderr',
+                    'id': id,
+                    'command': cmd.command,
+                }))
+            if exit_code is not None:
+                events.append(Event('output', {
+                    'exit_code': exit_code,
+                    'output': 'Background command %d exited with code %d' % (idx, exit_code),
+                    'id': id,
+                    'command': cmd.command,
+                }))
+                del self.background_commands[id]
+        return events
--- a/agenthub/langchains_agent/utils/event.py
+++ b/agenthub/langchains_agent/utils/event.py
@ -1,9 +1,14 @@
 import os
 import json
-import agenthub.langchains_agent.utils.actions as actions
+import opendevin.lib.actions as actions
+
+ACTION_TYPES = ['run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish']
+RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall']

 class Event:
    def __init__(self, action, args):
+        if action not in ACTION_TYPES:
+            raise ValueError('Invalid action type: ' + action)
        self.action = action
        self.args = args

@ -17,18 +22,18 @@ class Event:
        }

    def is_runnable(self):
-        return self.action in ['run', 'kill', 'browse', 'read', 'write', 'recall']
+        return self.action in RUNNABLE_ACTIONS

-    def run(self, agent):
+    def run(self, agent_controller):
        if self.action == 'run':
            cmd = self.args['command']
            background = False
            if 'background' in self.args and self.args['background']:
                background = True
-            return actions.run(cmd, agent, background)
+            return agent_controller.command_manager.run_command(cmd, background)
        if self.action == 'kill':
            id = self.args['id']
-            return actions.kill(id, agent)
+            return agent_controller.command_manager.kill_command(id)
        elif self.action == 'browse':
            url = self.args['url']
            return actions.browse(url)
@ -40,6 +45,6 @@ class Event:
            contents = self.args['contents']
            return actions.write(path, contents)
        elif self.action == 'recall':
-            return agent.memory.search(self.args['query'])
+            return agent_controller.agent.search_memory(self.args['query'])
        else:
            raise ValueError('Invalid action type')
--- a/opendevin/main.py
+++ b/opendevin/main.py
@ -3,6 +3,7 @@ import argparse

 import agenthub  # for the agent registry
 from opendevin.agent import Agent
+from opendevin.controller import AgentController

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run an agent with a specific task")
@ -18,4 +19,6 @@ if __name__ == "__main__":
        workspace_dir=args.directory,
        model_name=args.model_name
    )
-    agent.run()
+
+    controller = AgentController(agent)
+    controller.start_loop()