mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Refactor agent interface a bit (#74)
* start moving files * initial refactor * factor out command management * fix command runner * add workspace to gitignore * factor out command manager * remove dupe add_event * update docs * fix init * fix langchain agent after merge
This commit is contained in:
parent
2de75d4782
commit
b84463f512
2
.gitignore
vendored
2
.gitignore
vendored
@ -14,7 +14,7 @@ dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
./lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
|
||||
@ -4,3 +4,54 @@ In this folder, there may exist multiple implementations of `Agent` that will be
|
||||
|
||||
For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
|
||||
Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
|
||||
|
||||
## Constructing an Agent
|
||||
Your agent must implement the following methods:
|
||||
|
||||
### `step`
|
||||
```
|
||||
def step(self, cmd_mgr: CommandManager) -> Event:
|
||||
```
|
||||
`step` moves the agent forward one step towards its goal. This probably means
|
||||
sending a prompt to the LLM, then parsing the response into an action `Event`.
|
||||
|
||||
Each Event has an `action` and a dict of `args`. Supported Events include:
|
||||
* `read` - reads the contents of a file. Arguments:
|
||||
* `path` - the path of the file to read
|
||||
* `write` - writes the contents to a file. Arguments:
|
||||
* `path` - the path of the file to write
|
||||
* `contents` - the contents to write to the file
|
||||
* `run` - runs a command. Arguments:
|
||||
* `command` - the command to run
|
||||
* `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
|
||||
* `kill` - kills a background command
|
||||
* `id` - the ID of the background command to kill
|
||||
* `browse` - opens a web page. Arguments:
|
||||
* `url` - the URL to open
|
||||
* `recall` - recalls a past memory. Arguments:
|
||||
* `query` - the query to search for
|
||||
* `think` - make a plan, set a goal, or record your thoughts. Arguments:
|
||||
* `thought` - the thought to record
|
||||
* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
|
||||
|
||||
For Events like `read` and `run`, a follow-up event will be added via `add_event` with the output.
|
||||
|
||||
### `add_event`
|
||||
```
|
||||
def add_event(self, event: Event) -> None:
|
||||
```
|
||||
`add_event` adds an event to the agent's history. This could be a user message,
|
||||
an action taken by the agent, log output, file contents, or anything else.
|
||||
|
||||
You'll probably want to keep a history of events, and use them in your prompts
|
||||
so that the agent knows what it did recently. You may also want to keep events
|
||||
in a vector database so the agent can refer back to them.
|
||||
|
||||
The output of `step` will automatically be passed to this method.
|
||||
|
||||
### `search_memory`
|
||||
```
|
||||
def search_memory(self, query: str) -> List[str]:
|
||||
```
|
||||
`search_memory` should return a list of events that match the query. This will be used
|
||||
for the `recall` action.
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
import os
|
||||
import argparse
|
||||
from typing import List, Dict, Type
|
||||
|
||||
from opendevin.agent import Agent, Message
|
||||
|
||||
from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
|
||||
from agenthub.langchains_agent.utils.event import Event
|
||||
from opendevin.lib.event import Event
|
||||
|
||||
INITIAL_THOUGHTS = [
|
||||
"I exist!",
|
||||
@ -46,33 +47,12 @@ INITIAL_THOUGHTS = [
|
||||
|
||||
|
||||
class LangchainsAgent(Agent):
|
||||
_initialized = False
|
||||
|
||||
def _run_loop(self, agent: LangchainsAgentImpl, max_iterations=100):
|
||||
# TODO: make it add a Message to the history for each turn / event
|
||||
for i in range(max_iterations):
|
||||
print("STEP", i, flush=True)
|
||||
log_events = agent.get_background_logs()
|
||||
for event in log_events:
|
||||
print(event, flush=True)
|
||||
action = agent.get_next_action()
|
||||
if action.action == "finish":
|
||||
print("Done!", flush=True)
|
||||
break
|
||||
print(action, flush=True)
|
||||
print("---", flush=True)
|
||||
out = agent.maybe_perform_latest_action()
|
||||
print(out, flush=True)
|
||||
print("==============", flush=True)
|
||||
|
||||
def run(self) -> None:
|
||||
"""
|
||||
Starts the execution of the assigned instruction. This method should
|
||||
be implemented by subclasses to define the specific execution logic.
|
||||
"""
|
||||
print("Working in directory:", self.workspace_dir)
|
||||
os.chdir(self.workspace_dir)
|
||||
|
||||
agent = LangchainsAgentImpl(self.instruction)
|
||||
def _initialize(self):
|
||||
if self._initialized:
|
||||
return
|
||||
self.agent = LangchainsAgentImpl(self.instruction)
|
||||
next_is_output = False
|
||||
for thought in INITIAL_THOUGHTS:
|
||||
thought = thought.replace("$TASK", self.instruction)
|
||||
@ -94,12 +74,18 @@ class LangchainsAgent(Agent):
|
||||
next_is_output = True
|
||||
else:
|
||||
event = Event("think", {"thought": thought})
|
||||
self.agent.add_event(event)
|
||||
self._initialized = True
|
||||
|
||||
agent.add_event(event)
|
||||
self._run_loop(agent, self.max_steps)
|
||||
def add_event(self, event: Event) -> None:
|
||||
self.agent.add_event(event)
|
||||
|
||||
# Set the agent's completion status to True
|
||||
self._complete = True
|
||||
def step(self, cmd_mgr) -> Event:
|
||||
self._initialize()
|
||||
return self.agent.get_next_action(cmd_mgr)
|
||||
|
||||
def search_memory(self, query: str) -> List[str]:
|
||||
return self.agent.memory.search(query)
|
||||
|
||||
def chat(self, message: str) -> None:
|
||||
"""
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
def kill(id, agent):
|
||||
if id < 0 or id >= len(agent.background_commands):
|
||||
raise ValueError('Invalid command id to kill')
|
||||
agent.background_commands[id].kill()
|
||||
agent.background_commands.pop(id)
|
||||
return "Background command %d killed" % id
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
def run(cmd, agent, background=False):
|
||||
if background:
|
||||
return run_background(cmd, agent)
|
||||
result = subprocess.run(["/bin/bash", "-c", cmd], capture_output=True, text=True)
|
||||
output = result.stdout + result.stderr
|
||||
exit_code = result.returncode
|
||||
if exit_code != 0:
|
||||
raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
|
||||
return output
|
||||
|
||||
def run_background(cmd, agent):
|
||||
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
|
||||
agent.background_commands.append(process)
|
||||
return "Background command started. To stop it, send a `kill` action with id " + str(len(agent.background_commands) - 1)
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
import select
|
||||
|
||||
from agenthub.langchains_agent.utils.monologue import Monologue
|
||||
from agenthub.langchains_agent.utils.memory import LongTermMemory
|
||||
from agenthub.langchains_agent.utils.event import Event
|
||||
from opendevin.lib.event import Event
|
||||
import agenthub.langchains_agent.utils.llm as llm
|
||||
|
||||
MAX_OUTPUT_LENGTH = 5000
|
||||
@ -13,7 +11,6 @@ class Agent:
|
||||
self.task = task
|
||||
self.monologue = Monologue()
|
||||
self.memory = LongTermMemory()
|
||||
self.background_commands = []
|
||||
|
||||
def add_event(self, event):
|
||||
self.monologue.add_event(event)
|
||||
@ -21,65 +18,9 @@ class Agent:
|
||||
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
|
||||
self.monologue.condense()
|
||||
|
||||
def get_next_action(self):
|
||||
bg_commands = [cmd.args for cmd in self.background_commands]
|
||||
action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), bg_commands)
|
||||
def get_next_action(self, cmd_mgr):
|
||||
action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), cmd_mgr.background_commands)
|
||||
event = Event(action_dict['action'], action_dict['args'])
|
||||
self.latest_action = event
|
||||
self.add_event(event)
|
||||
return event
|
||||
|
||||
def maybe_perform_latest_action(self):
|
||||
if not (self.latest_action and self.latest_action.is_runnable()):
|
||||
return
|
||||
action = 'output'
|
||||
try:
|
||||
output = self.latest_action.run(self)
|
||||
except Exception as e:
|
||||
output = 'Error: ' + str(e)
|
||||
action = 'error'
|
||||
if len(output) > MAX_OUTPUT_LENGTH:
|
||||
output = output[:MAX_OUTPUT_LENGTH] + '...'
|
||||
out_event = Event(action, {'output': output})
|
||||
self.add_event(out_event)
|
||||
return out_event
|
||||
|
||||
def get_background_log(self, idx, cmd, stream, name):
|
||||
logs = ""
|
||||
while True:
|
||||
readable, _, _ = select.select([stream], [], [], .1)
|
||||
if not readable:
|
||||
break
|
||||
next = stream.readline()
|
||||
if next == '':
|
||||
break
|
||||
logs += next
|
||||
if logs == "": return
|
||||
|
||||
event = Event('output', {
|
||||
'output': logs,
|
||||
'stream':name,
|
||||
'id': idx,
|
||||
'command': cmd.args,
|
||||
})
|
||||
self.add_event(event)
|
||||
return event
|
||||
|
||||
def get_background_logs(self):
|
||||
all_events = []
|
||||
for idx, cmd in enumerate(self.background_commands):
|
||||
stdout_event = self.get_background_log(idx, cmd, cmd.stdout, 'stdout')
|
||||
if stdout_event:
|
||||
all_events.append(stdout_event)
|
||||
stderr_event = self.get_background_log(idx, cmd, cmd.stderr, 'stderr')
|
||||
if stderr_event:
|
||||
all_events.append(stderr_event)
|
||||
|
||||
exit_code = cmd.poll()
|
||||
if exit_code is not None:
|
||||
event = Event('output', {'output': 'Background command %d exited with code %d' % (idx, exit_code)})
|
||||
all_events.append(event)
|
||||
self.add_event(event)
|
||||
|
||||
self.background_commands = [cmd for cmd in self.background_commands if cmd.poll() is None]
|
||||
return all_events
|
||||
|
||||
@ -127,9 +127,8 @@ def request_action(task, thoughts, background_commands=[]):
|
||||
bg_commands_message = ""
|
||||
if len(background_commands) > 0:
|
||||
bg_commands_message = "The following commands are running in the background:"
|
||||
for idx, command in enumerate(background_commands):
|
||||
# TODO: make command IDs long-lived, instead of the index
|
||||
bg_commands_message += f"\n* {idx}: {command}"
|
||||
for id, command in background_commands.items():
|
||||
bg_commands_message += f"\n`{id}`: {command.command}"
|
||||
bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
|
||||
|
||||
latest_thought = thoughts[-1]
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import agenthub.langchains_agent.utils.json as json
|
||||
from agenthub.langchains_agent.utils.event import Event
|
||||
from opendevin.lib.event import Event
|
||||
|
||||
import agenthub.langchains_agent.utils.llm as llm
|
||||
|
||||
|
||||
@ -3,6 +3,9 @@ from typing import List, Dict, Type
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from .lib.event import Event
|
||||
from .lib.command_manager import CommandManager
|
||||
from .controller import AgentController
|
||||
|
||||
class Role(Enum):
|
||||
SYSTEM = "system" # system message for LLM
|
||||
@ -86,7 +89,17 @@ class Agent(ABC):
|
||||
return self._history
|
||||
|
||||
@abstractmethod
|
||||
def run(self) -> None:
|
||||
def add_event(self, event: Event) -> None:
|
||||
"""
|
||||
Adds an event to the agent's history.
|
||||
|
||||
Parameters:
|
||||
- event (Event): The event to add to the history.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def step(self, cmd_mgr: CommandManager) -> Event:
|
||||
"""
|
||||
Starts the execution of the assigned instruction. This method should
|
||||
be implemented by subclasses to define the specific execution logic.
|
||||
@ -94,13 +107,15 @@ class Agent(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def chat(self, message: str) -> None:
|
||||
def search_memory(self, query: str) -> List[str]:
|
||||
"""
|
||||
Optional method for interactive communication with the agent during its execution. Implementations
|
||||
can use this method to modify the agent's behavior or state based on chat inputs.
|
||||
Searches the agent's memory for information relevant to the given query.
|
||||
|
||||
Parameters:
|
||||
- message (str): The chat message or command.
|
||||
- query (str): The query to search for in the agent's memory.
|
||||
|
||||
Returns:
|
||||
- response (str): The response to the query.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
49
opendevin/controller.py
Normal file
49
opendevin/controller.py
Normal file
@ -0,0 +1,49 @@
|
||||
from opendevin.lib.command_manager import CommandManager
|
||||
from opendevin.lib.event import Event
|
||||
|
||||
def print_callback(event):
|
||||
print(event, flush=True)
|
||||
|
||||
class AgentController:
|
||||
def __init__(self, agent, max_iterations=100, callbacks=[]):
|
||||
self.agent = agent
|
||||
self.max_iterations = max_iterations
|
||||
self.background_commands = []
|
||||
self.command_manager = CommandManager()
|
||||
self.callbacks = callbacks
|
||||
self.callbacks.append(self.agent.add_event)
|
||||
self.callbacks.append(print_callback)
|
||||
|
||||
def maybe_perform_action(self, event):
|
||||
if not (event and event.is_runnable()):
|
||||
return
|
||||
action = 'output'
|
||||
try:
|
||||
output = event.run(self)
|
||||
except Exception as e:
|
||||
output = 'Error: ' + str(e)
|
||||
action = 'error'
|
||||
out_event = Event(action, {'output': output})
|
||||
return out_event
|
||||
|
||||
def start_loop(self):
|
||||
output = None
|
||||
for i in range(self.max_iterations):
|
||||
print("STEP", i, flush=True)
|
||||
log_events = self.command_manager.get_background_events()
|
||||
for event in log_events:
|
||||
for callback in self.callbacks:
|
||||
callback(event)
|
||||
|
||||
action_event = self.agent.step(self.command_manager)
|
||||
for callback in self.callbacks:
|
||||
callback(action_event)
|
||||
if action_event.action == 'finish':
|
||||
break
|
||||
print("---", flush=True)
|
||||
|
||||
output_event = self.maybe_perform_action(action_event)
|
||||
if output_event is not None:
|
||||
for callback in self.callbacks:
|
||||
callback(output_event)
|
||||
print("==============", flush=True)
|
||||
@ -1,5 +1,3 @@
|
||||
from .run import run
|
||||
from .kill import kill
|
||||
from .browse import browse
|
||||
from .write import write
|
||||
from .read import read
|
||||
89
opendevin/lib/command_manager.py
Normal file
89
opendevin/lib/command_manager.py
Normal file
@ -0,0 +1,89 @@
|
||||
import subprocess
|
||||
import select
|
||||
from typing import List
|
||||
|
||||
from opendevin.lib.event import Event
|
||||
|
||||
class BackgroundCommand:
|
||||
def __init__(self, id: int, command: str, process: subprocess.Popen):
|
||||
self.command = command
|
||||
self.id = id
|
||||
self.process = process
|
||||
|
||||
def _get_log_from_stream(self, stream):
|
||||
logs = ""
|
||||
while True:
|
||||
readable, _, _ = select.select([stream], [], [], .1)
|
||||
if not readable:
|
||||
break
|
||||
next = stream.readline()
|
||||
if next == '':
|
||||
break
|
||||
logs += next
|
||||
if logs == "": return
|
||||
return logs
|
||||
|
||||
def get_logs(self):
|
||||
stdout = self._get_log_from_stream(self.process.stdout)
|
||||
stderr = self._get_log_from_stream(self.process.stderr)
|
||||
exit_code = self.process.poll()
|
||||
return stdout, stderr, exit_code
|
||||
|
||||
class CommandManager:
|
||||
def __init__(self):
|
||||
self.cur_id = 0
|
||||
self.background_commands = {}
|
||||
|
||||
def run_command(self, command: str, background=False) -> str:
|
||||
if background:
|
||||
return self.run_background(command)
|
||||
else:
|
||||
return self.run_immediately(command)
|
||||
|
||||
def run_immediately(self, command: str) -> str:
|
||||
result = subprocess.run(["/bin/bash", "-c", command], capture_output=True, text=True)
|
||||
output = result.stdout + result.stderr
|
||||
exit_code = result.returncode
|
||||
if exit_code != 0:
|
||||
raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
|
||||
return output
|
||||
|
||||
def run_background(self, command: str) -> str:
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
|
||||
bg_cmd = BackgroundCommand(self.cur_id, command, process)
|
||||
self.cur_id += 1
|
||||
self.background_commands[bg_cmd.id] = bg_cmd
|
||||
return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id)
|
||||
|
||||
def kill_command(self, id: int) -> str:
|
||||
# TODO: get log events before killing
|
||||
self.background_commands[id].processs.kill()
|
||||
del self.background_commands[id]
|
||||
|
||||
def get_background_events(self) -> List[Event]:
|
||||
events = []
|
||||
for id, cmd in self.background_commands.items():
|
||||
stdout, stderr, exit_code = cmd.get_logs()
|
||||
if stdout is not None:
|
||||
events.append(Event('output', {
|
||||
'output': stdout,
|
||||
'stream': 'stdout',
|
||||
'id': id,
|
||||
'command': cmd.command,
|
||||
}))
|
||||
if stderr is not None:
|
||||
events.append(Event('output', {
|
||||
'output': stderr,
|
||||
'stream': 'stderr',
|
||||
'id': id,
|
||||
'command': cmd.command,
|
||||
}))
|
||||
if exit_code is not None:
|
||||
events.append(Event('output', {
|
||||
'exit_code': exit_code,
|
||||
'output': 'Background command %d exited with code %d' % (idx, exit_code),
|
||||
'id': id,
|
||||
'command': cmd.command,
|
||||
}))
|
||||
del self.background_commands[id]
|
||||
return events
|
||||
@ -1,9 +1,14 @@
|
||||
import os
|
||||
import json
|
||||
import agenthub.langchains_agent.utils.actions as actions
|
||||
import opendevin.lib.actions as actions
|
||||
|
||||
ACTION_TYPES = ['run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish']
|
||||
RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall']
|
||||
|
||||
class Event:
|
||||
def __init__(self, action, args):
|
||||
if action not in ACTION_TYPES:
|
||||
raise ValueError('Invalid action type: ' + action)
|
||||
self.action = action
|
||||
self.args = args
|
||||
|
||||
@ -17,18 +22,18 @@ class Event:
|
||||
}
|
||||
|
||||
def is_runnable(self):
|
||||
return self.action in ['run', 'kill', 'browse', 'read', 'write', 'recall']
|
||||
return self.action in RUNNABLE_ACTIONS
|
||||
|
||||
def run(self, agent):
|
||||
def run(self, agent_controller):
|
||||
if self.action == 'run':
|
||||
cmd = self.args['command']
|
||||
background = False
|
||||
if 'background' in self.args and self.args['background']:
|
||||
background = True
|
||||
return actions.run(cmd, agent, background)
|
||||
return agent_controller.command_manager.run_command(cmd, background)
|
||||
if self.action == 'kill':
|
||||
id = self.args['id']
|
||||
return actions.kill(id, agent)
|
||||
return agent_controller.command_manager.kill_command(id)
|
||||
elif self.action == 'browse':
|
||||
url = self.args['url']
|
||||
return actions.browse(url)
|
||||
@ -40,6 +45,6 @@ class Event:
|
||||
contents = self.args['contents']
|
||||
return actions.write(path, contents)
|
||||
elif self.action == 'recall':
|
||||
return agent.memory.search(self.args['query'])
|
||||
return agent_controller.agent.search_memory(self.args['query'])
|
||||
else:
|
||||
raise ValueError('Invalid action type')
|
||||
@ -3,6 +3,7 @@ import argparse
|
||||
|
||||
import agenthub # for the agent registry
|
||||
from opendevin.agent import Agent
|
||||
from opendevin.controller import AgentController
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run an agent with a specific task")
|
||||
@ -18,4 +19,6 @@ if __name__ == "__main__":
|
||||
workspace_dir=args.directory,
|
||||
model_name=args.model_name
|
||||
)
|
||||
agent.run()
|
||||
|
||||
controller = AgentController(agent)
|
||||
controller.start_loop()
|
||||
Loading…
x
Reference in New Issue
Block a user