Create generic LLM client using LiteLLM (#114)

* add generic llm client

* fix lint errors

* fix lint issues

* a potential suggestion for llm wrapper to keep all the function sigatures for ide

* use completion partial

* fix resp

* remove unused args

* add back truncation logic

* fix add_event

* fix merge issues

* more merge issues fixed

* fix codeact agent

* remove dead code

* remove import

* unused imports

* fix ruff

* update requirements

* mypy fixes

* more lint fixes

* fix browser errors

* fix up observation conversion

* fix format of error

* change max iter default back to 100

* fix kill action

* fix docker cleanup

* add RUN_AS_DEVIN flag

* fix condense

* revert some files

* unused imports

---------

Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
Co-authored-by: Robert Brennan <rbren@Roberts-MacBook-Pro.local>
This commit is contained in:
Robert Brennan 2024-03-26 00:10:23 -04:00 committed by GitHub
parent 815b78595a
commit eb4a261880
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 126 additions and 119 deletions

View File

@ -1,6 +1,5 @@
import os
import re
from litellm import completion
from termcolor import colored
from typing import List, Mapping
@ -17,6 +16,7 @@ from opendevin.observation import (
AgentMessageObservation,
)
from opendevin.llm.llm import LLM
assert (
"OPENAI_API_KEY" in os.environ
@ -62,7 +62,7 @@ def parse_response(response) -> str:
class CodeActAgent(Agent):
def __init__(
self,
model_name: str
llm: LLM,
) -> None:
"""
Initializes a new instance of the CodeActAgent class.
@ -71,7 +71,7 @@ class CodeActAgent(Agent):
- instruction (str): The instruction for the agent to execute.
- max_steps (int): The maximum number of steps to run the agent.
"""
super().__init__(model_name)
super().__init__(llm)
self.messages: List[Mapping[str, str]] = []
self.instruction: str = ""
@ -83,13 +83,10 @@ class CodeActAgent(Agent):
{"role": "user", "content": self.instruction},
]
print(colored("===USER:===\n" + self.instruction, "green"))
updated_info = state.updated_info
if updated_info:
for prev_action, obs in updated_info:
assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
if isinstance(obs, AgentMessageObservation): # warning message from itself
self.messages.append({"role": "user", "content": obs.content})
print(colored("===USER:===\n" + obs.content, "green"))
@ -100,10 +97,8 @@ class CodeActAgent(Agent):
print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
else:
raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
response = completion(
response = self.llm.completion(
messages=self.messages,
model=self.model_name,
stop=["</execute>"],
temperature=0.0,
seed=42,

View File

@ -1,9 +1,14 @@
from typing import List, Dict, Type
from typing import List
import agenthub.langchains_agent.utils.llm as llm
from opendevin.llm.llm import LLM
from opendevin.agent import Agent
from opendevin.state import State
from opendevin.action import Action
import agenthub.langchains_agent.utils.prompts as prompts
from agenthub.langchains_agent.utils.monologue import Monologue
from agenthub.langchains_agent.utils.memory import LongTermMemory
from opendevin.action import (
Action,
CmdRunAction,
CmdKillAction,
BrowseURLAction,
@ -14,15 +19,12 @@ from opendevin.action import (
AgentFinishAction,
)
from opendevin.observation import (
Observation,
CmdOutputObservation,
BrowserOutputObservation,
)
from opendevin.state import State
from agenthub.langchains_agent.utils.monologue import Monologue
from agenthub.langchains_agent.utils.memory import LongTermMemory
MAX_MONOLOGUE_LENGTH = 20000
MAX_OUTPUT_LENGTH = 5000
INITIAL_THOUGHTS = [
"I exist!",
@ -66,26 +68,12 @@ INITIAL_THOUGHTS = [
MAX_OUTPUT_LENGTH = 5000
MAX_MONOLOGUE_LENGTH = 20000
ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
"run": CmdRunAction,
"kill": CmdKillAction,
"browse": BrowseURLAction,
"read": FileReadAction,
"write": FileWriteAction,
"recall": AgentRecallAction,
"think": AgentThinkAction,
"finish": AgentFinishAction,
}
CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
class LangchainsAgent(Agent):
_initialized = False
def __init__(self, model_name: str):
super().__init__(model_name)
self.monologue = Monologue(self.model_name)
def __init__(self, llm: LLM):
super().__init__(llm)
self.monologue = Monologue()
self.memory = LongTermMemory()
def _add_event(self, event: dict):
@ -95,7 +83,7 @@ class LangchainsAgent(Agent):
self.monologue.add_event(event)
self.memory.add_event(event)
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
self.monologue.condense()
self.monologue.condense(self.llm)
def _initialize(self):
if self._initialized:
@ -103,6 +91,8 @@ class LangchainsAgent(Agent):
if self.instruction is None or self.instruction == "":
raise ValueError("Instruction must be provided")
self.monologue = Monologue()
self.memory = LongTermMemory()
next_is_output = False
for thought in INITIAL_THOUGHTS:
@ -128,7 +118,7 @@ class LangchainsAgent(Agent):
else:
d = {"action": "think", "args": {"thought": thought}}
self._add_event(d)
self._add_event(d)
self._initialized = True
def step(self, state: State) -> Action:
@ -143,14 +133,8 @@ class LangchainsAgent(Agent):
d = {"action": "error", "args": {"output": obs.content}}
else:
d = {"action": "output", "args": {"output": obs.content}}
# elif isinstance(obs, UserMessageObservation):
# d = {"action": "output", "args": {"output": obs.message}}
# elif isinstance(obs, AgentMessageObservation):
# d = {"action": "output", "args": {"output": obs.message}}
elif isinstance(obs, (BrowserOutputObservation, Observation)):
d = {"action": "output", "args": {"output": obs.content}}
else:
raise NotImplementedError(f"Unknown observation type: {obs}")
d = {"action": "output", "args": {"output": obs.content}}
self._add_event(d)
@ -175,18 +159,16 @@ class LangchainsAgent(Agent):
self._add_event(d)
state.updated_info = []
action_dict = llm.request_action(
prompt = prompts.get_request_action_prompt(
self.instruction,
self.monologue.get_thoughts(),
self.model_name,
state.background_commands_obs,
)
if action_dict is None:
action_dict = {"action": "think", "args": {"thought": "..."}}
# Translate action_dict to Action
action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
messages = [{"content": prompt,"role": "user"}]
resp = self.llm.completion(messages=messages)
action_resp = resp['choices'][0]['message']['content']
action = prompts.parse_action_response(action_resp)
self.latest_action = action
return action

View File

@ -1,11 +1,10 @@
import agenthub.langchains_agent.utils.json as json
import agenthub.langchains_agent.utils.llm as llm
import agenthub.langchains_agent.utils.prompts as prompts
class Monologue:
def __init__(self, model_name):
def __init__(self):
self.thoughts = []
self.model_name = model_name
def add_event(self, t: dict):
if not isinstance(t, dict):
@ -24,13 +23,11 @@ class Monologue:
print(f"Error serializing thought: {e}")
return total_length
def condense(self):
def condense(self, llm):
try:
new_thoughts = llm.summarize_monologue(self.thoughts, self.model_name)
# Ensure new_thoughts is not empty or significantly malformed before assigning
if not new_thoughts or len(new_thoughts) > len(self.thoughts):
raise ValueError("Condensing resulted in invalid state.")
self.thoughts = new_thoughts
prompt = prompts.get_summarize_monologue_prompt(self.thoughts)
response = llm.prompt(prompt)
self.thoughts = prompts.parse_summary_response(response)
except Exception as e:
# Consider logging the error here instead of or in addition to raising an exception
raise RuntimeError(f"Error condensing thoughts: {e}")

View File

@ -1,23 +1,44 @@
import os
from . import json
from typing import List, Dict, Type
from langchain_core.pydantic_v1 import BaseModel
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
if os.getenv("DEBUG"):
from langchain.globals import set_debug
set_debug(True)
from typing import List
from langchain_core.pydantic_v1 import BaseModel
from . import json
from opendevin.action import (
Action,
CmdRunAction,
CmdKillAction,
BrowseURLAction,
FileReadAction,
FileWriteAction,
AgentRecallAction,
AgentThinkAction,
AgentFinishAction,
)
from opendevin.observation import (
CmdOutputObservation,
)
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
"run": CmdRunAction,
"kill": CmdKillAction,
"browse": BrowseURLAction,
"read": FileReadAction,
"write": FileWriteAction,
"recall": AgentRecallAction,
"think": AgentThinkAction,
"finish": AgentFinishAction,
}
CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
ACTION_PROMPT = """
You're a thoughtful robot. Your main task is to {task}.
@ -103,36 +124,16 @@ class NewMonologue(BaseModel):
new_monologue: List[_ActionDict]
def get_chain(template, model_name):
assert (
"OPENAI_API_KEY" in os.environ
), "Please set the OPENAI_API_KEY environment variable to use langchains_agent."
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name) # type: ignore
prompt = PromptTemplate.from_template(template)
llm_chain = LLMChain(prompt=prompt, llm=llm)
return llm_chain
def get_summarize_monologue_prompt(thoughts):
prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT)
return prompt.format(monologue=json.dumps({'old_monologue': thoughts}))
def summarize_monologue(thoughts: List[dict], model_name):
llm_chain = get_chain(MONOLOGUE_SUMMARY_PROMPT, model_name)
parser = JsonOutputParser(pydantic_object=NewMonologue)
resp = llm_chain.invoke({"monologue": json.dumps({"old_monologue": thoughts})})
if os.getenv("DEBUG"):
print("resp", resp)
parsed = parser.parse(resp["text"])
return parsed["new_monologue"]
def request_action(
task,
thoughts: List[dict],
model_name: str,
background_commands_obs: List[CmdOutputObservation] = [],
def get_request_action_prompt(
task: str,
thoughts: List[dict],
background_commands_obs: List[CmdOutputObservation] = [],
):
llm_chain = get_chain(ACTION_PROMPT, model_name)
parser = JsonOutputParser(pydantic_object=_ActionDict)
hint = ""
hint = ''
if len(thoughts) > 0:
latest_thought = thoughts[-1]
if latest_thought["action"] == 'think':
@ -149,17 +150,24 @@ def request_action(
for command_obs in background_commands_obs:
bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
latest_thought = thoughts[-1]
resp = llm_chain.invoke(
{
"monologue": json.dumps(thoughts),
"hint": hint,
"task": task,
"background_commands": bg_commands_message,
}
prompt = PromptTemplate.from_template(ACTION_PROMPT)
return prompt.format(
task=task,
monologue=json.dumps(thoughts),
background_commands=bg_commands_message,
hint=hint,
)
if os.getenv("DEBUG"):
print("resp", resp)
parsed = parser.parse(resp["text"])
return parsed
def parse_action_response(response: str) -> Action:
parser = JsonOutputParser(pydantic_object=_ActionDict)
action_dict = parser.parse(response)
action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
return action
def parse_summary_response(response: str) -> List[Action]:
parser = JsonOutputParser(pydantic_object=NewMonologue)
parsed = parser.parse(response)
thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']]
return thoughts

View File

@ -4,7 +4,7 @@ from typing import List, Dict, Type, TYPE_CHECKING
if TYPE_CHECKING:
from opendevin.action import Action
from opendevin.state import State
from opendevin.llm.llm import LLM
class Agent(ABC):
"""
@ -19,9 +19,12 @@ class Agent(ABC):
_registry: Dict[str, Type["Agent"]] = {}
def __init__(self, model_name: str):
self.model_name = model_name
self.instruction: str = "" # need to be set before step
def __init__(
self,
llm: LLM,
):
self.instruction = ""
self.llm = llm
self._complete = False
@property

21
opendevin/llm/llm.py Normal file
View File

@ -0,0 +1,21 @@
from litellm import completion as litellm_completion
from functools import partial
import os
DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
class LLM:
def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY):
self.model = model if model else DEFAULT_MODEL
self.api_key = api_key if api_key else DEFAULT_API_KEY
self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
@property
def completion(self):
"""
Decorator for the litellm completion function.
"""
return self._completion

View File

@ -6,6 +6,7 @@ from typing import Type
import agenthub # noqa F401 (we import this to get the agents registered)
from opendevin.agent import Agent
from opendevin.controller import AgentController
from opendevin.llm.llm import LLM
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run an agent with a specific task")
@ -40,15 +41,15 @@ if __name__ == "__main__":
parser.add_argument(
"-i",
"--max-iterations",
default=10,
default=100,
type=int,
help="The maximum number of iterations to run the agent",
)
args = parser.parse_args()
print(f"Running agent {args.agent_cls} (model: {args.model_name}, directory: {args.directory}) with task: \"{args.task}\"")
llm = LLM(args.model_name)
AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
agent = AgentCls(model_name=args.model_name)
agent = AgentCls(llm=llm)
controller = AgentController(agent, workdir=args.directory, max_iterations=args.max_iterations)
asyncio.run(controller.start_loop(args.task))

View File

@ -6,6 +6,7 @@ from fastapi import WebSocketDisconnect
from opendevin.agent import Agent
from opendevin.controller import AgentController
from opendevin.llm.llm import LLM
from opendevin.action import (
Action,
@ -121,15 +122,14 @@ class Session:
model = "gpt-4-0125-preview"
if start_event and "model" in start_event.args:
model = start_event.args["model"]
if not os.path.exists(directory):
print(f"Workspace directory {directory} does not exist. Creating it...")
os.makedirs(directory)
directory = os.path.relpath(directory, os.getcwd())
llm = LLM(model)
AgentCls = Agent.get_cls(agent_cls)
self.agent = AgentCls(model_name=model)
self.controller = AgentController(self.agent, directory, callbacks=[self.on_agent_event])
self.agent = AgentCls(llm)
self.controller = AgentController(self.agent, workdir=directory, callbacks=[self.on_agent_event])
await self.send({"action": "initialize", "message": "Control loop started."})
async def start_task(self, start_event):