Extract logic from init from langchains_agent and codeact_agent (#167)

This commit is contained in:
Anas DORBANI 2024-03-28 12:51:21 +00:00 committed by GitHub
parent b1944a63ef
commit 82c215ed5d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 310 additions and 306 deletions

View File

@ -1,130 +1,4 @@
import re
from typing import List, Mapping
from termcolor import colored
from opendevin.agent import Agent
from opendevin.state import State
from opendevin.action import (
Action,
CmdRunAction,
AgentEchoAction,
AgentFinishAction,
)
from opendevin.observation import (
CmdOutputObservation,
AgentMessageObservation,
)
from opendevin.llm.llm import LLM
SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
For example:
You can list the files in the current directory by executing the following command:
<execute>ls</execute>
You can also install packages using pip:
<execute> pip install numpy </execute>
You can also write a block of code to a file:
<execute>
echo "import math
print(math.pi)" > math.py
</execute>
When you are done, execute "exit" to close the shell and end the conversation.
"""
INVALID_INPUT_MESSAGE = (
"I don't understand your input. \n"
"If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
"If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
)
def parse_response(response) -> str:
action = response.choices[0].message.content
if "<execute>" in action and "</execute>" not in action:
action += "</execute>"
return action
class CodeActAgent(Agent):
def __init__(
self,
llm: LLM,
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
Parameters:
- instruction (str): The instruction for the agent to execute.
- max_steps (int): The maximum number of steps to run the agent.
"""
super().__init__(llm)
self.messages: List[Mapping[str, str]] = []
self.instruction: str = ""
def step(self, state: State) -> Action:
if len(self.messages) == 0:
assert self.instruction, "Expecting instruction to be set"
self.messages = [
{"role": "system", "content": SYSTEM_MESSAGE},
{"role": "user", "content": self.instruction},
]
print(colored("===USER:===\n" + self.instruction, "green"))
updated_info = state.updated_info
if updated_info:
for prev_action, obs in updated_info:
assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
if isinstance(obs, AgentMessageObservation): # warning message from itself
self.messages.append({"role": "user", "content": obs.content})
print(colored("===USER:===\n" + obs.content, "green"))
elif isinstance(obs, CmdOutputObservation):
content = "OBSERVATION:\n" + obs.content
content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
self.messages.append({"role": "user", "content": content})
print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
else:
raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
response = self.llm.completion(
messages=self.messages,
stop=["</execute>"],
temperature=0.0,
seed=42,
)
action_str: str = parse_response(response)
self.messages.append({"role": "assistant", "content": action_str})
print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
if command is not None:
# a command was found
command_group = command.group(1)
if command_group.strip() == "exit":
print(colored("Exit received. Exiting...", "red"))
return AgentFinishAction()
return CmdRunAction(command = command_group)
# # execute the code
# # TODO: does exit_code get loaded into Message?
# exit_code, observation = self.env.execute(command_group)
# self._history.append(Message(Role.ASSISTANT, observation))
# print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
else:
# we could provide a error message for the model to continue similar to
# https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
# observation = INVALID_INPUT_MESSAGE
# self._history.append(Message(Role.ASSISTANT, observation))
# print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
return AgentEchoAction(content=INVALID_INPUT_MESSAGE) # warning message to itself
def search_memory(self, query: str) -> List[str]:
raise NotImplementedError("Implement this abstract method")
from .codeact_agent import CodeActAgent
Agent.register("CodeActAgent", CodeActAgent)

View File

@ -0,0 +1,128 @@
import re
from typing import List, Mapping
from termcolor import colored
from opendevin.agent import Agent
from opendevin.state import State
from opendevin.action import (
Action,
CmdRunAction,
AgentEchoAction,
AgentFinishAction,
)
from opendevin.observation import (
CmdOutputObservation,
AgentMessageObservation,
)
from opendevin.llm.llm import LLM
SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
For example:
You can list the files in the current directory by executing the following command:
<execute>ls</execute>
You can also install packages using pip:
<execute> pip install numpy </execute>
You can also write a block of code to a file:
<execute>
echo "import math
print(math.pi)" > math.py
</execute>
When you are done, execute "exit" to close the shell and end the conversation.
"""
INVALID_INPUT_MESSAGE = (
"I don't understand your input. \n"
"If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
"If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
)
def parse_response(response) -> str:
action = response.choices[0].message.content
if "<execute>" in action and "</execute>" not in action:
action += "</execute>"
return action
class CodeActAgent(Agent):
def __init__(
self,
llm: LLM,
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
Parameters:
- instruction (str): The instruction for the agent to execute.
- max_steps (int): The maximum number of steps to run the agent.
"""
super().__init__(llm)
self.messages: List[Mapping[str, str]] = []
self.instruction: str = ""
def step(self, state: State) -> Action:
if len(self.messages) == 0:
assert self.instruction, "Expecting instruction to be set"
self.messages = [
{"role": "system", "content": SYSTEM_MESSAGE},
{"role": "user", "content": self.instruction},
]
print(colored("===USER:===\n" + self.instruction, "green"))
updated_info = state.updated_info
if updated_info:
for prev_action, obs in updated_info:
assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
if isinstance(obs, AgentMessageObservation): # warning message from itself
self.messages.append({"role": "user", "content": obs.content})
print(colored("===USER:===\n" + obs.content, "green"))
elif isinstance(obs, CmdOutputObservation):
content = "OBSERVATION:\n" + obs.content
content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
self.messages.append({"role": "user", "content": content})
print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
else:
raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
response = self.llm.completion(
messages=self.messages,
stop=["</execute>"],
temperature=0.0,
seed=42,
)
action_str: str = parse_response(response)
self.messages.append({"role": "assistant", "content": action_str})
print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
if command is not None:
# a command was found
command_group = command.group(1)
if command_group.strip() == "exit":
print(colored("Exit received. Exiting...", "red"))
return AgentFinishAction()
return CmdRunAction(command = command_group)
# # execute the code
# # TODO: does exit_code get loaded into Message?
# exit_code, observation = self.env.execute(command_group)
# self._history.append(Message(Role.ASSISTANT, observation))
# print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
else:
# we could provide a error message for the model to continue similar to
# https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
# observation = INVALID_INPUT_MESSAGE
# self._history.append(Message(Role.ASSISTANT, observation))
# print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
return AgentEchoAction(content=INVALID_INPUT_MESSAGE) # warning message to itself
def search_memory(self, query: str) -> List[str]:
raise NotImplementedError("Implement this abstract method")

View File

@ -1,182 +1,4 @@
from typing import List
from opendevin.llm.llm import LLM
from opendevin.agent import Agent
from opendevin.state import State
from opendevin.action import Action
import agenthub.langchains_agent.utils.prompts as prompts
from agenthub.langchains_agent.utils.monologue import Monologue
from agenthub.langchains_agent.utils.memory import LongTermMemory
from opendevin.action import (
NullAction,
CmdRunAction,
CmdKillAction,
BrowseURLAction,
FileReadAction,
FileWriteAction,
AgentRecallAction,
AgentThinkAction,
AgentFinishAction,
)
from opendevin.observation import (
CmdOutputObservation,
)
MAX_MONOLOGUE_LENGTH = 20000
MAX_OUTPUT_LENGTH = 5000
INITIAL_THOUGHTS = [
"I exist!",
"Hmm...looks like I can type in a command line prompt",
"Looks like I have a web browser too!",
"Here's what I want to do: $TASK",
"How am I going to get there though?",
"It seems like I have some kind of short term memory.",
"Each of my thoughts seems to be stored in a numbered list.",
"It seems whatever I say next will be added to the list.",
"But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"Fortunately I have long term memory!",
"I can just say RECALL, followed by the thing I want to remember. And then related thoughts just spill out!",
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"Let's try it out!",
"RECALL what it is I want to do",
"Here's what I want to do: $TASK",
"How am I going to get there though?",
"Neat! And it looks like it's easy for me to use the command line too! I just have to say RUN followed by the command I want to run. The command output just jumps into my head!",
'RUN echo "hello world"',
"hello world",
"Cool! I bet I can read and edit files too.",
"RUN echo \"console.log('hello world')\" > test.js",
"",
"I just created test.js. I'll try and run it now.",
"RUN node test.js",
"hello world",
"it works!",
"And if I want to use the browser, I just need to say BROWSE, followed by a website I want to visit, or an action I want to take on the current site",
"Let's try that...",
"BROWSE google.com",
'<form><input type="text"></input><button type="submit"></button></form>',
"Very cool. Now to accomplish my task.",
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
"It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
]
class LangchainsAgent(Agent):
_initialized = False
def __init__(self, llm: LLM):
super().__init__(llm)
self.monologue = Monologue()
self.memory = LongTermMemory()
def _add_event(self, event: dict):
if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
self.monologue.add_event(event)
self.memory.add_event(event)
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
self.monologue.condense(self.llm)
def _initialize(self):
if self._initialized:
return
if self.instruction is None or self.instruction == "":
raise ValueError("Instruction must be provided")
self.monologue = Monologue()
self.memory = LongTermMemory()
next_is_output = False
for thought in INITIAL_THOUGHTS:
thought = thought.replace("$TASK", self.instruction)
if next_is_output:
d = {"action": "output", "args": {"output": thought}}
next_is_output = False
else:
if thought.startswith("RUN"):
command = thought.split("RUN ")[1]
d = {"action": "run", "args": {"command": command}}
next_is_output = True
elif thought.startswith("RECALL"):
query = thought.split("RECALL ")[1]
d = {"action": "recall", "args": {"query": query}}
next_is_output = True
elif thought.startswith("BROWSE"):
url = thought.split("BROWSE ")[1]
d = {"action": "browse", "args": {"url": url}}
next_is_output = True
else:
d = {"action": "think", "args": {"thought": thought}}
self._add_event(d)
self._initialized = True
def step(self, state: State) -> Action:
self._initialize()
# TODO: make langchains agent use Action & Observation
# completly from ground up
# Translate state to action_dict
for prev_action, obs in state.updated_info:
d = None
if isinstance(obs, CmdOutputObservation):
if obs.error:
d = {"action": "error", "args": {"output": obs.content}}
else:
d = {"action": "output", "args": {"output": obs.content}}
else:
d = {"action": "output", "args": {"output": obs.content}}
if d is not None:
self._add_event(d)
d = None
if isinstance(prev_action, CmdRunAction):
d = {"action": "run", "args": {"command": prev_action.command}}
elif isinstance(prev_action, CmdKillAction):
d = {"action": "kill", "args": {"id": prev_action.id}}
elif isinstance(prev_action, BrowseURLAction):
d = {"action": "browse", "args": {"url": prev_action.url}}
elif isinstance(prev_action, FileReadAction):
d = {"action": "read", "args": {"file": prev_action.path}}
elif isinstance(prev_action, FileWriteAction):
d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
elif isinstance(prev_action, AgentRecallAction):
d = {"action": "recall", "args": {"query": prev_action.query}}
elif isinstance(prev_action, AgentThinkAction):
d = {"action": "think", "args": {"thought": prev_action.thought}}
elif isinstance(prev_action, AgentFinishAction):
d = {"action": "finish"}
elif isinstance(prev_action, NullAction):
d = None
else:
raise ValueError(f"Unknown action type: {prev_action}")
if d is not None:
self._add_event(d)
state.updated_info = []
prompt = prompts.get_request_action_prompt(
self.instruction,
self.monologue.get_thoughts(),
state.background_commands_obs,
)
messages = [{"content": prompt,"role": "user"}]
resp = self.llm.completion(messages=messages)
action_resp = resp['choices'][0]['message']['content']
action = prompts.parse_action_response(action_resp)
self.latest_action = action
return action
def search_memory(self, query: str) -> List[str]:
return self.memory.search(query)
from .langchains_agent import LangchainsAgent
Agent.register("LangchainsAgent", LangchainsAgent)

View File

@ -0,0 +1,180 @@
from typing import List
from opendevin.llm.llm import LLM
from opendevin.agent import Agent
from opendevin.state import State
from opendevin.action import Action
import agenthub.langchains_agent.utils.prompts as prompts
from agenthub.langchains_agent.utils.monologue import Monologue
from agenthub.langchains_agent.utils.memory import LongTermMemory
from opendevin.action import (
NullAction,
CmdRunAction,
CmdKillAction,
BrowseURLAction,
FileReadAction,
FileWriteAction,
AgentRecallAction,
AgentThinkAction,
AgentFinishAction,
)
from opendevin.observation import (
CmdOutputObservation,
)
MAX_MONOLOGUE_LENGTH = 20000
MAX_OUTPUT_LENGTH = 5000
INITIAL_THOUGHTS = [
"I exist!",
"Hmm...looks like I can type in a command line prompt",
"Looks like I have a web browser too!",
"Here's what I want to do: $TASK",
"How am I going to get there though?",
"It seems like I have some kind of short term memory.",
"Each of my thoughts seems to be stored in a numbered list.",
"It seems whatever I say next will be added to the list.",
"But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
"Fortunately I have long term memory!",
"I can just say RECALL, followed by the thing I want to remember. And then related thoughts just spill out!",
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
"Let's try it out!",
"RECALL what it is I want to do",
"Here's what I want to do: $TASK",
"How am I going to get there though?",
"Neat! And it looks like it's easy for me to use the command line too! I just have to say RUN followed by the command I want to run. The command output just jumps into my head!",
'RUN echo "hello world"',
"hello world",
"Cool! I bet I can read and edit files too.",
"RUN echo \"console.log('hello world')\" > test.js",
"",
"I just created test.js. I'll try and run it now.",
"RUN node test.js",
"hello world",
"it works!",
"And if I want to use the browser, I just need to say BROWSE, followed by a website I want to visit, or an action I want to take on the current site",
"Let's try that...",
"BROWSE google.com",
'<form><input type="text"></input><button type="submit"></button></form>',
"Very cool. Now to accomplish my task.",
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
"In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
"It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
]
class LangchainsAgent(Agent):
_initialized = False
def __init__(self, llm: LLM):
super().__init__(llm)
self.monologue = Monologue()
self.memory = LongTermMemory()
def _add_event(self, event: dict):
if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
self.monologue.add_event(event)
self.memory.add_event(event)
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
self.monologue.condense(self.llm)
def _initialize(self):
if self._initialized:
return
if self.instruction is None or self.instruction == "":
raise ValueError("Instruction must be provided")
self.monologue = Monologue()
self.memory = LongTermMemory()
next_is_output = False
for thought in INITIAL_THOUGHTS:
thought = thought.replace("$TASK", self.instruction)
if next_is_output:
d = {"action": "output", "args": {"output": thought}}
next_is_output = False
else:
if thought.startswith("RUN"):
command = thought.split("RUN ")[1]
d = {"action": "run", "args": {"command": command}}
next_is_output = True
elif thought.startswith("RECALL"):
query = thought.split("RECALL ")[1]
d = {"action": "recall", "args": {"query": query}}
next_is_output = True
elif thought.startswith("BROWSE"):
url = thought.split("BROWSE ")[1]
d = {"action": "browse", "args": {"url": url}}
next_is_output = True
else:
d = {"action": "think", "args": {"thought": thought}}
self._add_event(d)
self._initialized = True
def step(self, state: State) -> Action:
self._initialize()
# TODO: make langchains agent use Action & Observation
# completly from ground up
# Translate state to action_dict
for prev_action, obs in state.updated_info:
d = None
if isinstance(obs, CmdOutputObservation):
if obs.error:
d = {"action": "error", "args": {"output": obs.content}}
else:
d = {"action": "output", "args": {"output": obs.content}}
else:
d = {"action": "output", "args": {"output": obs.content}}
if d is not None:
self._add_event(d)
d = None
if isinstance(prev_action, CmdRunAction):
d = {"action": "run", "args": {"command": prev_action.command}}
elif isinstance(prev_action, CmdKillAction):
d = {"action": "kill", "args": {"id": prev_action.id}}
elif isinstance(prev_action, BrowseURLAction):
d = {"action": "browse", "args": {"url": prev_action.url}}
elif isinstance(prev_action, FileReadAction):
d = {"action": "read", "args": {"file": prev_action.path}}
elif isinstance(prev_action, FileWriteAction):
d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
elif isinstance(prev_action, AgentRecallAction):
d = {"action": "recall", "args": {"query": prev_action.query}}
elif isinstance(prev_action, AgentThinkAction):
d = {"action": "think", "args": {"thought": prev_action.thought}}
elif isinstance(prev_action, AgentFinishAction):
d = {"action": "finish"}
elif isinstance(prev_action, NullAction):
d = None
else:
raise ValueError(f"Unknown action type: {prev_action}")
if d is not None:
self._add_event(d)
state.updated_info = []
prompt = prompts.get_request_action_prompt(
self.instruction,
self.monologue.get_thoughts(),
state.background_commands_obs,
)
messages = [{"content": prompt,"role": "user"}]
resp = self.llm.completion(messages=messages)
action_resp = resp['choices'][0]['message']['content']
action = prompts.parse_action_response(action_resp)
self.latest_action = action
return action
def search_memory(self, query: str) -> List[str]:
return self.memory.search(query)