diff --git a/.gitignore b/.gitignore index f7fd501a1b..bc1a7db343 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ dist/ downloads/ eggs/ .eggs/ -lib/ +./lib/ lib64/ parts/ sdist/ diff --git a/agenthub/README.md b/agenthub/README.md index 2ec672c663..f408811e71 100644 --- a/agenthub/README.md +++ b/agenthub/README.md @@ -4,3 +4,54 @@ In this folder, there may exist multiple implementations of `Agent` that will be For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc. Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions. + +## Constructing an Agent +Your agent must implement the following methods: + +### `step` +``` +def step(self, cmd_mgr: CommandManager) -> Event: +``` +`step` moves the agent forward one step towards its goal. This probably means +sending a prompt to the LLM, then parsing the response into an action `Event`. + +Each Event has an `action` and a dict of `args`. Supported Events include: +* `read` - reads the contents of a file. Arguments: + * `path` - the path of the file to read +* `write` - writes the contents to a file. Arguments: + * `path` - the path of the file to write + * `contents` - the contents to write to the file +* `run` - runs a command. Arguments: + * `command` - the command to run + * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true. +* `kill` - kills a background command + * `id` - the ID of the background command to kill +* `browse` - opens a web page. Arguments: + * `url` - the URL to open +* `recall` - recalls a past memory. Arguments: + * `query` - the query to search for +* `think` - make a plan, set a goal, or record your thoughts. Arguments: + * `thought` - the thought to record +* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working. + +For Events like `read` and `run`, a follow-up event will be added via `add_event` with the output. + +### `add_event` +``` +def add_event(self, event: Event) -> None: +``` +`add_event` adds an event to the agent's history. This could be a user message, +an action taken by the agent, log output, file contents, or anything else. + +You'll probably want to keep a history of events, and use them in your prompts +so that the agent knows what it did recently. You may also want to keep events +in a vector database so the agent can refer back to them. + +The output of `step` will automatically be passed to this method. + +### `search_memory` +``` +def search_memory(self, query: str) -> List[str]: +``` +`search_memory` should return a list of events that match the query. This will be used +for the `recall` action. diff --git a/agenthub/langchains_agent/__init__.py b/agenthub/langchains_agent/__init__.py index cc48f550d3..fd875b3f08 100644 --- a/agenthub/langchains_agent/__init__.py +++ b/agenthub/langchains_agent/__init__.py @@ -1,10 +1,11 @@ import os import argparse +from typing import List, Dict, Type from opendevin.agent import Agent, Message from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl -from agenthub.langchains_agent.utils.event import Event +from opendevin.lib.event import Event INITIAL_THOUGHTS = [ "I exist!", @@ -46,33 +47,12 @@ INITIAL_THOUGHTS = [ class LangchainsAgent(Agent): + _initialized = False - def _run_loop(self, agent: LangchainsAgentImpl, max_iterations=100): - # TODO: make it add a Message to the history for each turn / event - for i in range(max_iterations): - print("STEP", i, flush=True) - log_events = agent.get_background_logs() - for event in log_events: - print(event, flush=True) - action = agent.get_next_action() - if action.action == "finish": - print("Done!", flush=True) - break - print(action, flush=True) - print("---", flush=True) - out = agent.maybe_perform_latest_action() - print(out, flush=True) - print("==============", flush=True) - - def run(self) -> None: - """ - Starts the execution of the assigned instruction. This method should - be implemented by subclasses to define the specific execution logic. - """ - print("Working in directory:", self.workspace_dir) - os.chdir(self.workspace_dir) - - agent = LangchainsAgentImpl(self.instruction) + def _initialize(self): + if self._initialized: + return + self.agent = LangchainsAgentImpl(self.instruction) next_is_output = False for thought in INITIAL_THOUGHTS: thought = thought.replace("$TASK", self.instruction) @@ -94,12 +74,18 @@ class LangchainsAgent(Agent): next_is_output = True else: event = Event("think", {"thought": thought}) + self.agent.add_event(event) + self._initialized = True - agent.add_event(event) - self._run_loop(agent, self.max_steps) + def add_event(self, event: Event) -> None: + self.agent.add_event(event) - # Set the agent's completion status to True - self._complete = True + def step(self, cmd_mgr) -> Event: + self._initialize() + return self.agent.get_next_action(cmd_mgr) + + def search_memory(self, query: str) -> List[str]: + return self.agent.memory.search(query) def chat(self, message: str) -> None: """ diff --git a/agenthub/langchains_agent/utils/actions/kill.py b/agenthub/langchains_agent/utils/actions/kill.py deleted file mode 100644 index c40ac236ee..0000000000 --- a/agenthub/langchains_agent/utils/actions/kill.py +++ /dev/null @@ -1,7 +0,0 @@ -def kill(id, agent): - if id < 0 or id >= len(agent.background_commands): - raise ValueError('Invalid command id to kill') - agent.background_commands[id].kill() - agent.background_commands.pop(id) - return "Background command %d killed" % id - diff --git a/agenthub/langchains_agent/utils/actions/run.py b/agenthub/langchains_agent/utils/actions/run.py deleted file mode 100644 index d9684fd429..0000000000 --- a/agenthub/langchains_agent/utils/actions/run.py +++ /dev/null @@ -1,18 +0,0 @@ -import subprocess -import os - -def run(cmd, agent, background=False): - if background: - return run_background(cmd, agent) - result = subprocess.run(["/bin/bash", "-c", cmd], capture_output=True, text=True) - output = result.stdout + result.stderr - exit_code = result.returncode - if exit_code != 0: - raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output) - return output - -def run_background(cmd, agent): - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True) - agent.background_commands.append(process) - return "Background command started. To stop it, send a `kill` action with id " + str(len(agent.background_commands) - 1) - diff --git a/agenthub/langchains_agent/utils/agent.py b/agenthub/langchains_agent/utils/agent.py index 583120202c..0d7cfa64a1 100644 --- a/agenthub/langchains_agent/utils/agent.py +++ b/agenthub/langchains_agent/utils/agent.py @@ -1,8 +1,6 @@ -import select - from agenthub.langchains_agent.utils.monologue import Monologue from agenthub.langchains_agent.utils.memory import LongTermMemory -from agenthub.langchains_agent.utils.event import Event +from opendevin.lib.event import Event import agenthub.langchains_agent.utils.llm as llm MAX_OUTPUT_LENGTH = 5000 @@ -13,7 +11,6 @@ class Agent: self.task = task self.monologue = Monologue() self.memory = LongTermMemory() - self.background_commands = [] def add_event(self, event): self.monologue.add_event(event) @@ -21,65 +18,9 @@ class Agent: if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH: self.monologue.condense() - def get_next_action(self): - bg_commands = [cmd.args for cmd in self.background_commands] - action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), bg_commands) + def get_next_action(self, cmd_mgr): + action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), cmd_mgr.background_commands) event = Event(action_dict['action'], action_dict['args']) self.latest_action = event - self.add_event(event) return event - def maybe_perform_latest_action(self): - if not (self.latest_action and self.latest_action.is_runnable()): - return - action = 'output' - try: - output = self.latest_action.run(self) - except Exception as e: - output = 'Error: ' + str(e) - action = 'error' - if len(output) > MAX_OUTPUT_LENGTH: - output = output[:MAX_OUTPUT_LENGTH] + '...' - out_event = Event(action, {'output': output}) - self.add_event(out_event) - return out_event - - def get_background_log(self, idx, cmd, stream, name): - logs = "" - while True: - readable, _, _ = select.select([stream], [], [], .1) - if not readable: - break - next = stream.readline() - if next == '': - break - logs += next - if logs == "": return - - event = Event('output', { - 'output': logs, - 'stream':name, - 'id': idx, - 'command': cmd.args, - }) - self.add_event(event) - return event - - def get_background_logs(self): - all_events = [] - for idx, cmd in enumerate(self.background_commands): - stdout_event = self.get_background_log(idx, cmd, cmd.stdout, 'stdout') - if stdout_event: - all_events.append(stdout_event) - stderr_event = self.get_background_log(idx, cmd, cmd.stderr, 'stderr') - if stderr_event: - all_events.append(stderr_event) - - exit_code = cmd.poll() - if exit_code is not None: - event = Event('output', {'output': 'Background command %d exited with code %d' % (idx, exit_code)}) - all_events.append(event) - self.add_event(event) - - self.background_commands = [cmd for cmd in self.background_commands if cmd.poll() is None] - return all_events diff --git a/agenthub/langchains_agent/utils/llm.py b/agenthub/langchains_agent/utils/llm.py index fc85089293..e7136936e3 100644 --- a/agenthub/langchains_agent/utils/llm.py +++ b/agenthub/langchains_agent/utils/llm.py @@ -127,9 +127,8 @@ def request_action(task, thoughts, background_commands=[]): bg_commands_message = "" if len(background_commands) > 0: bg_commands_message = "The following commands are running in the background:" - for idx, command in enumerate(background_commands): - # TODO: make command IDs long-lived, instead of the index - bg_commands_message += f"\n* {idx}: {command}" + for id, command in background_commands.items(): + bg_commands_message += f"\n`{id}`: {command.command}" bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above." latest_thought = thoughts[-1] diff --git a/agenthub/langchains_agent/utils/monologue.py b/agenthub/langchains_agent/utils/monologue.py index 18729b4a17..c3bf794800 100644 --- a/agenthub/langchains_agent/utils/monologue.py +++ b/agenthub/langchains_agent/utils/monologue.py @@ -1,5 +1,5 @@ import agenthub.langchains_agent.utils.json as json -from agenthub.langchains_agent.utils.event import Event +from opendevin.lib.event import Event import agenthub.langchains_agent.utils.llm as llm diff --git a/opendevin/agent.py b/opendevin/agent.py index ca779240ad..21025ac0e1 100644 --- a/opendevin/agent.py +++ b/opendevin/agent.py @@ -3,6 +3,9 @@ from typing import List, Dict, Type from dataclasses import dataclass from enum import Enum +from .lib.event import Event +from .lib.command_manager import CommandManager +from .controller import AgentController class Role(Enum): SYSTEM = "system" # system message for LLM @@ -86,7 +89,17 @@ class Agent(ABC): return self._history @abstractmethod - def run(self) -> None: + def add_event(self, event: Event) -> None: + """ + Adds an event to the agent's history. + + Parameters: + - event (Event): The event to add to the history. + """ + pass + + @abstractmethod + def step(self, cmd_mgr: CommandManager) -> Event: """ Starts the execution of the assigned instruction. This method should be implemented by subclasses to define the specific execution logic. @@ -94,13 +107,15 @@ class Agent(ABC): pass @abstractmethod - def chat(self, message: str) -> None: + def search_memory(self, query: str) -> List[str]: """ - Optional method for interactive communication with the agent during its execution. Implementations - can use this method to modify the agent's behavior or state based on chat inputs. + Searches the agent's memory for information relevant to the given query. Parameters: - - message (str): The chat message or command. + - query (str): The query to search for in the agent's memory. + + Returns: + - response (str): The response to the query. """ pass diff --git a/opendevin/controller.py b/opendevin/controller.py new file mode 100644 index 0000000000..bc18d77b65 --- /dev/null +++ b/opendevin/controller.py @@ -0,0 +1,49 @@ +from opendevin.lib.command_manager import CommandManager +from opendevin.lib.event import Event + +def print_callback(event): + print(event, flush=True) + +class AgentController: + def __init__(self, agent, max_iterations=100, callbacks=[]): + self.agent = agent + self.max_iterations = max_iterations + self.background_commands = [] + self.command_manager = CommandManager() + self.callbacks = callbacks + self.callbacks.append(self.agent.add_event) + self.callbacks.append(print_callback) + + def maybe_perform_action(self, event): + if not (event and event.is_runnable()): + return + action = 'output' + try: + output = event.run(self) + except Exception as e: + output = 'Error: ' + str(e) + action = 'error' + out_event = Event(action, {'output': output}) + return out_event + + def start_loop(self): + output = None + for i in range(self.max_iterations): + print("STEP", i, flush=True) + log_events = self.command_manager.get_background_events() + for event in log_events: + for callback in self.callbacks: + callback(event) + + action_event = self.agent.step(self.command_manager) + for callback in self.callbacks: + callback(action_event) + if action_event.action == 'finish': + break + print("---", flush=True) + + output_event = self.maybe_perform_action(action_event) + if output_event is not None: + for callback in self.callbacks: + callback(output_event) + print("==============", flush=True) diff --git a/agenthub/langchains_agent/utils/actions/__init__.py b/opendevin/lib/actions/__init__.py similarity index 63% rename from agenthub/langchains_agent/utils/actions/__init__.py rename to opendevin/lib/actions/__init__.py index 656273e1a2..500c8b1069 100644 --- a/agenthub/langchains_agent/utils/actions/__init__.py +++ b/opendevin/lib/actions/__init__.py @@ -1,5 +1,3 @@ -from .run import run -from .kill import kill from .browse import browse from .write import write from .read import read diff --git a/agenthub/langchains_agent/utils/actions/browse.py b/opendevin/lib/actions/browse.py similarity index 100% rename from agenthub/langchains_agent/utils/actions/browse.py rename to opendevin/lib/actions/browse.py diff --git a/agenthub/langchains_agent/utils/actions/read.py b/opendevin/lib/actions/read.py similarity index 100% rename from agenthub/langchains_agent/utils/actions/read.py rename to opendevin/lib/actions/read.py diff --git a/agenthub/langchains_agent/utils/actions/write.py b/opendevin/lib/actions/write.py similarity index 100% rename from agenthub/langchains_agent/utils/actions/write.py rename to opendevin/lib/actions/write.py diff --git a/opendevin/lib/command_manager.py b/opendevin/lib/command_manager.py new file mode 100644 index 0000000000..7681de40ae --- /dev/null +++ b/opendevin/lib/command_manager.py @@ -0,0 +1,89 @@ +import subprocess +import select +from typing import List + +from opendevin.lib.event import Event + +class BackgroundCommand: + def __init__(self, id: int, command: str, process: subprocess.Popen): + self.command = command + self.id = id + self.process = process + + def _get_log_from_stream(self, stream): + logs = "" + while True: + readable, _, _ = select.select([stream], [], [], .1) + if not readable: + break + next = stream.readline() + if next == '': + break + logs += next + if logs == "": return + return logs + + def get_logs(self): + stdout = self._get_log_from_stream(self.process.stdout) + stderr = self._get_log_from_stream(self.process.stderr) + exit_code = self.process.poll() + return stdout, stderr, exit_code + +class CommandManager: + def __init__(self): + self.cur_id = 0 + self.background_commands = {} + + def run_command(self, command: str, background=False) -> str: + if background: + return self.run_background(command) + else: + return self.run_immediately(command) + + def run_immediately(self, command: str) -> str: + result = subprocess.run(["/bin/bash", "-c", command], capture_output=True, text=True) + output = result.stdout + result.stderr + exit_code = result.returncode + if exit_code != 0: + raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output) + return output + + def run_background(self, command: str) -> str: + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True) + bg_cmd = BackgroundCommand(self.cur_id, command, process) + self.cur_id += 1 + self.background_commands[bg_cmd.id] = bg_cmd + return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id) + + def kill_command(self, id: int) -> str: + # TODO: get log events before killing + self.background_commands[id].processs.kill() + del self.background_commands[id] + + def get_background_events(self) -> List[Event]: + events = [] + for id, cmd in self.background_commands.items(): + stdout, stderr, exit_code = cmd.get_logs() + if stdout is not None: + events.append(Event('output', { + 'output': stdout, + 'stream': 'stdout', + 'id': id, + 'command': cmd.command, + })) + if stderr is not None: + events.append(Event('output', { + 'output': stderr, + 'stream': 'stderr', + 'id': id, + 'command': cmd.command, + })) + if exit_code is not None: + events.append(Event('output', { + 'exit_code': exit_code, + 'output': 'Background command %d exited with code %d' % (idx, exit_code), + 'id': id, + 'command': cmd.command, + })) + del self.background_commands[id] + return events diff --git a/agenthub/langchains_agent/utils/event.py b/opendevin/lib/event.py similarity index 63% rename from agenthub/langchains_agent/utils/event.py rename to opendevin/lib/event.py index df105fd360..652f76bc75 100644 --- a/agenthub/langchains_agent/utils/event.py +++ b/opendevin/lib/event.py @@ -1,9 +1,14 @@ import os import json -import agenthub.langchains_agent.utils.actions as actions +import opendevin.lib.actions as actions + +ACTION_TYPES = ['run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish'] +RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall'] class Event: def __init__(self, action, args): + if action not in ACTION_TYPES: + raise ValueError('Invalid action type: ' + action) self.action = action self.args = args @@ -17,18 +22,18 @@ class Event: } def is_runnable(self): - return self.action in ['run', 'kill', 'browse', 'read', 'write', 'recall'] + return self.action in RUNNABLE_ACTIONS - def run(self, agent): + def run(self, agent_controller): if self.action == 'run': cmd = self.args['command'] background = False if 'background' in self.args and self.args['background']: background = True - return actions.run(cmd, agent, background) + return agent_controller.command_manager.run_command(cmd, background) if self.action == 'kill': id = self.args['id'] - return actions.kill(id, agent) + return agent_controller.command_manager.kill_command(id) elif self.action == 'browse': url = self.args['url'] return actions.browse(url) @@ -40,6 +45,6 @@ class Event: contents = self.args['contents'] return actions.write(path, contents) elif self.action == 'recall': - return agent.memory.search(self.args['query']) + return agent_controller.agent.search_memory(self.args['query']) else: raise ValueError('Invalid action type') diff --git a/opendevin/main.py b/opendevin/main.py index c5bc0386ed..f3cd8e8bf1 100644 --- a/opendevin/main.py +++ b/opendevin/main.py @@ -3,6 +3,7 @@ import argparse import agenthub # for the agent registry from opendevin.agent import Agent +from opendevin.controller import AgentController if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run an agent with a specific task") @@ -18,4 +19,6 @@ if __name__ == "__main__": workspace_dir=args.directory, model_name=args.model_name ) - agent.run() + + controller = AgentController(agent) + controller.start_loop() \ No newline at end of file