Refactor agent interface a bit (#74)

* start moving files

* initial refactor

* factor out command management

* fix command runner

* add workspace to gitignore

* factor out command manager

* remove dupe add_event

* update docs

* fix init

* fix langchain agent after merge
This commit is contained in:
Robert Brennan 2024-03-21 11:35:28 -04:00 committed by GitHub
parent 2de75d4782
commit b84463f512
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 248 additions and 137 deletions

2
.gitignore vendored
View File

@ -14,7 +14,7 @@ dist/
downloads/
eggs/
.eggs/
lib/
./lib/
lib64/
parts/
sdist/

View File

@ -4,3 +4,54 @@ In this folder, there may exist multiple implementations of `Agent` that will be
For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
## Constructing an Agent
Your agent must implement the following methods:
### `step`
```
def step(self, cmd_mgr: CommandManager) -> Event:
```
`step` moves the agent forward one step towards its goal. This probably means
sending a prompt to the LLM, then parsing the response into an action `Event`.
Each Event has an `action` and a dict of `args`. Supported Events include:
* `read` - reads the contents of a file. Arguments:
* `path` - the path of the file to read
* `write` - writes the contents to a file. Arguments:
* `path` - the path of the file to write
* `contents` - the contents to write to the file
* `run` - runs a command. Arguments:
* `command` - the command to run
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
* `kill` - kills a background command
* `id` - the ID of the background command to kill
* `browse` - opens a web page. Arguments:
* `url` - the URL to open
* `recall` - recalls a past memory. Arguments:
* `query` - the query to search for
* `think` - make a plan, set a goal, or record your thoughts. Arguments:
* `thought` - the thought to record
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
For Events like `read` and `run`, a follow-up event will be added via `add_event` with the output.
### `add_event`
```
def add_event(self, event: Event) -> None:
```
`add_event` adds an event to the agent's history. This could be a user message,
an action taken by the agent, log output, file contents, or anything else.
You'll probably want to keep a history of events, and use them in your prompts
so that the agent knows what it did recently. You may also want to keep events
in a vector database so the agent can refer back to them.
The output of `step` will automatically be passed to this method.
### `search_memory`
```
def search_memory(self, query: str) -> List[str]:
```
`search_memory` should return a list of events that match the query. This will be used
for the `recall` action.

View File

@ -1,10 +1,11 @@
import os
import argparse
from typing import List, Dict, Type
from opendevin.agent import Agent, Message
from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
from agenthub.langchains_agent.utils.event import Event
from opendevin.lib.event import Event
INITIAL_THOUGHTS = [
"I exist!",
@ -46,33 +47,12 @@ INITIAL_THOUGHTS = [
class LangchainsAgent(Agent):
_initialized = False
def _run_loop(self, agent: LangchainsAgentImpl, max_iterations=100):
# TODO: make it add a Message to the history for each turn / event
for i in range(max_iterations):
print("STEP", i, flush=True)
log_events = agent.get_background_logs()
for event in log_events:
print(event, flush=True)
action = agent.get_next_action()
if action.action == "finish":
print("Done!", flush=True)
break
print(action, flush=True)
print("---", flush=True)
out = agent.maybe_perform_latest_action()
print(out, flush=True)
print("==============", flush=True)
def run(self) -> None:
"""
Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
"""
print("Working in directory:", self.workspace_dir)
os.chdir(self.workspace_dir)
agent = LangchainsAgentImpl(self.instruction)
def _initialize(self):
if self._initialized:
return
self.agent = LangchainsAgentImpl(self.instruction)
next_is_output = False
for thought in INITIAL_THOUGHTS:
thought = thought.replace("$TASK", self.instruction)
@ -94,12 +74,18 @@ class LangchainsAgent(Agent):
next_is_output = True
else:
event = Event("think", {"thought": thought})
self.agent.add_event(event)
self._initialized = True
agent.add_event(event)
self._run_loop(agent, self.max_steps)
def add_event(self, event: Event) -> None:
self.agent.add_event(event)
# Set the agent's completion status to True
self._complete = True
def step(self, cmd_mgr) -> Event:
self._initialize()
return self.agent.get_next_action(cmd_mgr)
def search_memory(self, query: str) -> List[str]:
return self.agent.memory.search(query)
def chat(self, message: str) -> None:
"""

View File

@ -1,7 +0,0 @@
def kill(id, agent):
if id < 0 or id >= len(agent.background_commands):
raise ValueError('Invalid command id to kill')
agent.background_commands[id].kill()
agent.background_commands.pop(id)
return "Background command %d killed" % id

View File

@ -1,18 +0,0 @@
import subprocess
import os
def run(cmd, agent, background=False):
if background:
return run_background(cmd, agent)
result = subprocess.run(["/bin/bash", "-c", cmd], capture_output=True, text=True)
output = result.stdout + result.stderr
exit_code = result.returncode
if exit_code != 0:
raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
return output
def run_background(cmd, agent):
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
agent.background_commands.append(process)
return "Background command started. To stop it, send a `kill` action with id " + str(len(agent.background_commands) - 1)

View File

@ -1,8 +1,6 @@
import select
from agenthub.langchains_agent.utils.monologue import Monologue
from agenthub.langchains_agent.utils.memory import LongTermMemory
from agenthub.langchains_agent.utils.event import Event
from opendevin.lib.event import Event
import agenthub.langchains_agent.utils.llm as llm
MAX_OUTPUT_LENGTH = 5000
@ -13,7 +11,6 @@ class Agent:
self.task = task
self.monologue = Monologue()
self.memory = LongTermMemory()
self.background_commands = []
def add_event(self, event):
self.monologue.add_event(event)
@ -21,65 +18,9 @@ class Agent:
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
self.monologue.condense()
def get_next_action(self):
bg_commands = [cmd.args for cmd in self.background_commands]
action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), bg_commands)
def get_next_action(self, cmd_mgr):
action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), cmd_mgr.background_commands)
event = Event(action_dict['action'], action_dict['args'])
self.latest_action = event
self.add_event(event)
return event
def maybe_perform_latest_action(self):
if not (self.latest_action and self.latest_action.is_runnable()):
return
action = 'output'
try:
output = self.latest_action.run(self)
except Exception as e:
output = 'Error: ' + str(e)
action = 'error'
if len(output) > MAX_OUTPUT_LENGTH:
output = output[:MAX_OUTPUT_LENGTH] + '...'
out_event = Event(action, {'output': output})
self.add_event(out_event)
return out_event
def get_background_log(self, idx, cmd, stream, name):
logs = ""
while True:
readable, _, _ = select.select([stream], [], [], .1)
if not readable:
break
next = stream.readline()
if next == '':
break
logs += next
if logs == "": return
event = Event('output', {
'output': logs,
'stream':name,
'id': idx,
'command': cmd.args,
})
self.add_event(event)
return event
def get_background_logs(self):
all_events = []
for idx, cmd in enumerate(self.background_commands):
stdout_event = self.get_background_log(idx, cmd, cmd.stdout, 'stdout')
if stdout_event:
all_events.append(stdout_event)
stderr_event = self.get_background_log(idx, cmd, cmd.stderr, 'stderr')
if stderr_event:
all_events.append(stderr_event)
exit_code = cmd.poll()
if exit_code is not None:
event = Event('output', {'output': 'Background command %d exited with code %d' % (idx, exit_code)})
all_events.append(event)
self.add_event(event)
self.background_commands = [cmd for cmd in self.background_commands if cmd.poll() is None]
return all_events

View File

@ -127,9 +127,8 @@ def request_action(task, thoughts, background_commands=[]):
bg_commands_message = ""
if len(background_commands) > 0:
bg_commands_message = "The following commands are running in the background:"
for idx, command in enumerate(background_commands):
# TODO: make command IDs long-lived, instead of the index
bg_commands_message += f"\n* {idx}: {command}"
for id, command in background_commands.items():
bg_commands_message += f"\n`{id}`: {command.command}"
bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
latest_thought = thoughts[-1]

View File

@ -1,5 +1,5 @@
import agenthub.langchains_agent.utils.json as json
from agenthub.langchains_agent.utils.event import Event
from opendevin.lib.event import Event
import agenthub.langchains_agent.utils.llm as llm

View File

@ -3,6 +3,9 @@ from typing import List, Dict, Type
from dataclasses import dataclass
from enum import Enum
from .lib.event import Event
from .lib.command_manager import CommandManager
from .controller import AgentController
class Role(Enum):
SYSTEM = "system" # system message for LLM
@ -86,7 +89,17 @@ class Agent(ABC):
return self._history
@abstractmethod
def run(self) -> None:
def add_event(self, event: Event) -> None:
"""
Adds an event to the agent's history.
Parameters:
- event (Event): The event to add to the history.
"""
pass
@abstractmethod
def step(self, cmd_mgr: CommandManager) -> Event:
"""
Starts the execution of the assigned instruction. This method should
be implemented by subclasses to define the specific execution logic.
@ -94,13 +107,15 @@ class Agent(ABC):
pass
@abstractmethod
def chat(self, message: str) -> None:
def search_memory(self, query: str) -> List[str]:
"""
Optional method for interactive communication with the agent during its execution. Implementations
can use this method to modify the agent's behavior or state based on chat inputs.
Searches the agent's memory for information relevant to the given query.
Parameters:
- message (str): The chat message or command.
- query (str): The query to search for in the agent's memory.
Returns:
- response (str): The response to the query.
"""
pass

49
opendevin/controller.py Normal file
View File

@ -0,0 +1,49 @@
from opendevin.lib.command_manager import CommandManager
from opendevin.lib.event import Event
def print_callback(event):
print(event, flush=True)
class AgentController:
def __init__(self, agent, max_iterations=100, callbacks=[]):
self.agent = agent
self.max_iterations = max_iterations
self.background_commands = []
self.command_manager = CommandManager()
self.callbacks = callbacks
self.callbacks.append(self.agent.add_event)
self.callbacks.append(print_callback)
def maybe_perform_action(self, event):
if not (event and event.is_runnable()):
return
action = 'output'
try:
output = event.run(self)
except Exception as e:
output = 'Error: ' + str(e)
action = 'error'
out_event = Event(action, {'output': output})
return out_event
def start_loop(self):
output = None
for i in range(self.max_iterations):
print("STEP", i, flush=True)
log_events = self.command_manager.get_background_events()
for event in log_events:
for callback in self.callbacks:
callback(event)
action_event = self.agent.step(self.command_manager)
for callback in self.callbacks:
callback(action_event)
if action_event.action == 'finish':
break
print("---", flush=True)
output_event = self.maybe_perform_action(action_event)
if output_event is not None:
for callback in self.callbacks:
callback(output_event)
print("==============", flush=True)

View File

@ -1,5 +1,3 @@
from .run import run
from .kill import kill
from .browse import browse
from .write import write
from .read import read

View File

@ -0,0 +1,89 @@
import subprocess
import select
from typing import List
from opendevin.lib.event import Event
class BackgroundCommand:
def __init__(self, id: int, command: str, process: subprocess.Popen):
self.command = command
self.id = id
self.process = process
def _get_log_from_stream(self, stream):
logs = ""
while True:
readable, _, _ = select.select([stream], [], [], .1)
if not readable:
break
next = stream.readline()
if next == '':
break
logs += next
if logs == "": return
return logs
def get_logs(self):
stdout = self._get_log_from_stream(self.process.stdout)
stderr = self._get_log_from_stream(self.process.stderr)
exit_code = self.process.poll()
return stdout, stderr, exit_code
class CommandManager:
def __init__(self):
self.cur_id = 0
self.background_commands = {}
def run_command(self, command: str, background=False) -> str:
if background:
return self.run_background(command)
else:
return self.run_immediately(command)
def run_immediately(self, command: str) -> str:
result = subprocess.run(["/bin/bash", "-c", command], capture_output=True, text=True)
output = result.stdout + result.stderr
exit_code = result.returncode
if exit_code != 0:
raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
return output
def run_background(self, command: str) -> str:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
bg_cmd = BackgroundCommand(self.cur_id, command, process)
self.cur_id += 1
self.background_commands[bg_cmd.id] = bg_cmd
return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id)
def kill_command(self, id: int) -> str:
# TODO: get log events before killing
self.background_commands[id].processs.kill()
del self.background_commands[id]
def get_background_events(self) -> List[Event]:
events = []
for id, cmd in self.background_commands.items():
stdout, stderr, exit_code = cmd.get_logs()
if stdout is not None:
events.append(Event('output', {
'output': stdout,
'stream': 'stdout',
'id': id,
'command': cmd.command,
}))
if stderr is not None:
events.append(Event('output', {
'output': stderr,
'stream': 'stderr',
'id': id,
'command': cmd.command,
}))
if exit_code is not None:
events.append(Event('output', {
'exit_code': exit_code,
'output': 'Background command %d exited with code %d' % (idx, exit_code),
'id': id,
'command': cmd.command,
}))
del self.background_commands[id]
return events

View File

@ -1,9 +1,14 @@
import os
import json
import agenthub.langchains_agent.utils.actions as actions
import opendevin.lib.actions as actions
ACTION_TYPES = ['run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish']
RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall']
class Event:
def __init__(self, action, args):
if action not in ACTION_TYPES:
raise ValueError('Invalid action type: ' + action)
self.action = action
self.args = args
@ -17,18 +22,18 @@ class Event:
}
def is_runnable(self):
return self.action in ['run', 'kill', 'browse', 'read', 'write', 'recall']
return self.action in RUNNABLE_ACTIONS
def run(self, agent):
def run(self, agent_controller):
if self.action == 'run':
cmd = self.args['command']
background = False
if 'background' in self.args and self.args['background']:
background = True
return actions.run(cmd, agent, background)
return agent_controller.command_manager.run_command(cmd, background)
if self.action == 'kill':
id = self.args['id']
return actions.kill(id, agent)
return agent_controller.command_manager.kill_command(id)
elif self.action == 'browse':
url = self.args['url']
return actions.browse(url)
@ -40,6 +45,6 @@ class Event:
contents = self.args['contents']
return actions.write(path, contents)
elif self.action == 'recall':
return agent.memory.search(self.args['query'])
return agent_controller.agent.search_memory(self.args['query'])
else:
raise ValueError('Invalid action type')

View File

@ -3,6 +3,7 @@ import argparse
import agenthub # for the agent registry
from opendevin.agent import Agent
from opendevin.controller import AgentController
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run an agent with a specific task")
@ -18,4 +19,6 @@ if __name__ == "__main__":
workspace_dir=args.directory,
model_name=args.model_name
)
agent.run()
controller = AgentController(agent)
controller.start_loop()