mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Create generic LLM client using LiteLLM (#114)
* add generic llm client * fix lint errors * fix lint issues * a potential suggestion for llm wrapper to keep all the function sigatures for ide * use completion partial * fix resp * remove unused args * add back truncation logic * fix add_event * fix merge issues * more merge issues fixed * fix codeact agent * remove dead code * remove import * unused imports * fix ruff * update requirements * mypy fixes * more lint fixes * fix browser errors * fix up observation conversion * fix format of error * change max iter default back to 100 * fix kill action * fix docker cleanup * add RUN_AS_DEVIN flag * fix condense * revert some files * unused imports --------- Co-authored-by: Xingyao Wang <xingyao6@illinois.edu> Co-authored-by: Robert Brennan <rbren@Roberts-MacBook-Pro.local>
This commit is contained in:
parent
815b78595a
commit
eb4a261880
@ -1,6 +1,5 @@
|
||||
import os
|
||||
import re
|
||||
from litellm import completion
|
||||
from termcolor import colored
|
||||
from typing import List, Mapping
|
||||
|
||||
@ -17,6 +16,7 @@ from opendevin.observation import (
|
||||
AgentMessageObservation,
|
||||
)
|
||||
|
||||
from opendevin.llm.llm import LLM
|
||||
|
||||
assert (
|
||||
"OPENAI_API_KEY" in os.environ
|
||||
@ -62,7 +62,7 @@ def parse_response(response) -> str:
|
||||
class CodeActAgent(Agent):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str
|
||||
llm: LLM,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes a new instance of the CodeActAgent class.
|
||||
@ -71,7 +71,7 @@ class CodeActAgent(Agent):
|
||||
- instruction (str): The instruction for the agent to execute.
|
||||
- max_steps (int): The maximum number of steps to run the agent.
|
||||
"""
|
||||
super().__init__(model_name)
|
||||
super().__init__(llm)
|
||||
self.messages: List[Mapping[str, str]] = []
|
||||
self.instruction: str = ""
|
||||
|
||||
@ -83,13 +83,10 @@ class CodeActAgent(Agent):
|
||||
{"role": "user", "content": self.instruction},
|
||||
]
|
||||
print(colored("===USER:===\n" + self.instruction, "green"))
|
||||
|
||||
updated_info = state.updated_info
|
||||
|
||||
if updated_info:
|
||||
for prev_action, obs in updated_info:
|
||||
assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
|
||||
|
||||
if isinstance(obs, AgentMessageObservation): # warning message from itself
|
||||
self.messages.append({"role": "user", "content": obs.content})
|
||||
print(colored("===USER:===\n" + obs.content, "green"))
|
||||
@ -100,10 +97,8 @@ class CodeActAgent(Agent):
|
||||
print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
|
||||
else:
|
||||
raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
|
||||
|
||||
response = completion(
|
||||
response = self.llm.completion(
|
||||
messages=self.messages,
|
||||
model=self.model_name,
|
||||
stop=["</execute>"],
|
||||
temperature=0.0,
|
||||
seed=42,
|
||||
|
||||
@ -1,9 +1,14 @@
|
||||
from typing import List, Dict, Type
|
||||
from typing import List
|
||||
|
||||
import agenthub.langchains_agent.utils.llm as llm
|
||||
from opendevin.llm.llm import LLM
|
||||
from opendevin.agent import Agent
|
||||
from opendevin.state import State
|
||||
from opendevin.action import Action
|
||||
import agenthub.langchains_agent.utils.prompts as prompts
|
||||
from agenthub.langchains_agent.utils.monologue import Monologue
|
||||
from agenthub.langchains_agent.utils.memory import LongTermMemory
|
||||
|
||||
from opendevin.action import (
|
||||
Action,
|
||||
CmdRunAction,
|
||||
CmdKillAction,
|
||||
BrowseURLAction,
|
||||
@ -14,15 +19,12 @@ from opendevin.action import (
|
||||
AgentFinishAction,
|
||||
)
|
||||
from opendevin.observation import (
|
||||
Observation,
|
||||
CmdOutputObservation,
|
||||
BrowserOutputObservation,
|
||||
)
|
||||
from opendevin.state import State
|
||||
|
||||
from agenthub.langchains_agent.utils.monologue import Monologue
|
||||
from agenthub.langchains_agent.utils.memory import LongTermMemory
|
||||
|
||||
MAX_MONOLOGUE_LENGTH = 20000
|
||||
MAX_OUTPUT_LENGTH = 5000
|
||||
|
||||
INITIAL_THOUGHTS = [
|
||||
"I exist!",
|
||||
@ -66,26 +68,12 @@ INITIAL_THOUGHTS = [
|
||||
MAX_OUTPUT_LENGTH = 5000
|
||||
MAX_MONOLOGUE_LENGTH = 20000
|
||||
|
||||
|
||||
ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
|
||||
"run": CmdRunAction,
|
||||
"kill": CmdKillAction,
|
||||
"browse": BrowseURLAction,
|
||||
"read": FileReadAction,
|
||||
"write": FileWriteAction,
|
||||
"recall": AgentRecallAction,
|
||||
"think": AgentThinkAction,
|
||||
"finish": AgentFinishAction,
|
||||
}
|
||||
|
||||
CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
|
||||
|
||||
class LangchainsAgent(Agent):
|
||||
_initialized = False
|
||||
|
||||
def __init__(self, model_name: str):
|
||||
super().__init__(model_name)
|
||||
self.monologue = Monologue(self.model_name)
|
||||
def __init__(self, llm: LLM):
|
||||
super().__init__(llm)
|
||||
self.monologue = Monologue()
|
||||
self.memory = LongTermMemory()
|
||||
|
||||
def _add_event(self, event: dict):
|
||||
@ -95,7 +83,7 @@ class LangchainsAgent(Agent):
|
||||
self.monologue.add_event(event)
|
||||
self.memory.add_event(event)
|
||||
if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
|
||||
self.monologue.condense()
|
||||
self.monologue.condense(self.llm)
|
||||
|
||||
def _initialize(self):
|
||||
if self._initialized:
|
||||
@ -103,6 +91,8 @@ class LangchainsAgent(Agent):
|
||||
|
||||
if self.instruction is None or self.instruction == "":
|
||||
raise ValueError("Instruction must be provided")
|
||||
self.monologue = Monologue()
|
||||
self.memory = LongTermMemory()
|
||||
|
||||
next_is_output = False
|
||||
for thought in INITIAL_THOUGHTS:
|
||||
@ -128,7 +118,7 @@ class LangchainsAgent(Agent):
|
||||
else:
|
||||
d = {"action": "think", "args": {"thought": thought}}
|
||||
|
||||
self._add_event(d)
|
||||
self._add_event(d)
|
||||
self._initialized = True
|
||||
|
||||
def step(self, state: State) -> Action:
|
||||
@ -143,14 +133,8 @@ class LangchainsAgent(Agent):
|
||||
d = {"action": "error", "args": {"output": obs.content}}
|
||||
else:
|
||||
d = {"action": "output", "args": {"output": obs.content}}
|
||||
# elif isinstance(obs, UserMessageObservation):
|
||||
# d = {"action": "output", "args": {"output": obs.message}}
|
||||
# elif isinstance(obs, AgentMessageObservation):
|
||||
# d = {"action": "output", "args": {"output": obs.message}}
|
||||
elif isinstance(obs, (BrowserOutputObservation, Observation)):
|
||||
d = {"action": "output", "args": {"output": obs.content}}
|
||||
else:
|
||||
raise NotImplementedError(f"Unknown observation type: {obs}")
|
||||
d = {"action": "output", "args": {"output": obs.content}}
|
||||
self._add_event(d)
|
||||
|
||||
|
||||
@ -175,18 +159,16 @@ class LangchainsAgent(Agent):
|
||||
self._add_event(d)
|
||||
|
||||
state.updated_info = []
|
||||
|
||||
action_dict = llm.request_action(
|
||||
|
||||
prompt = prompts.get_request_action_prompt(
|
||||
self.instruction,
|
||||
self.monologue.get_thoughts(),
|
||||
self.model_name,
|
||||
state.background_commands_obs,
|
||||
)
|
||||
if action_dict is None:
|
||||
action_dict = {"action": "think", "args": {"thought": "..."}}
|
||||
|
||||
# Translate action_dict to Action
|
||||
action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
|
||||
messages = [{"content": prompt,"role": "user"}]
|
||||
resp = self.llm.completion(messages=messages)
|
||||
action_resp = resp['choices'][0]['message']['content']
|
||||
action = prompts.parse_action_response(action_resp)
|
||||
self.latest_action = action
|
||||
return action
|
||||
|
||||
|
||||
@ -1,11 +1,10 @@
|
||||
|
||||
import agenthub.langchains_agent.utils.json as json
|
||||
import agenthub.langchains_agent.utils.llm as llm
|
||||
import agenthub.langchains_agent.utils.prompts as prompts
|
||||
|
||||
class Monologue:
|
||||
def __init__(self, model_name):
|
||||
def __init__(self):
|
||||
self.thoughts = []
|
||||
self.model_name = model_name
|
||||
|
||||
def add_event(self, t: dict):
|
||||
if not isinstance(t, dict):
|
||||
@ -24,13 +23,11 @@ class Monologue:
|
||||
print(f"Error serializing thought: {e}")
|
||||
return total_length
|
||||
|
||||
def condense(self):
|
||||
def condense(self, llm):
|
||||
try:
|
||||
new_thoughts = llm.summarize_monologue(self.thoughts, self.model_name)
|
||||
# Ensure new_thoughts is not empty or significantly malformed before assigning
|
||||
if not new_thoughts or len(new_thoughts) > len(self.thoughts):
|
||||
raise ValueError("Condensing resulted in invalid state.")
|
||||
self.thoughts = new_thoughts
|
||||
prompt = prompts.get_summarize_monologue_prompt(self.thoughts)
|
||||
response = llm.prompt(prompt)
|
||||
self.thoughts = prompts.parse_summary_response(response)
|
||||
except Exception as e:
|
||||
# Consider logging the error here instead of or in addition to raising an exception
|
||||
raise RuntimeError(f"Error condensing thoughts: {e}")
|
||||
|
||||
@ -1,23 +1,44 @@
|
||||
import os
|
||||
|
||||
from . import json
|
||||
from typing import List, Dict, Type
|
||||
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
|
||||
if os.getenv("DEBUG"):
|
||||
from langchain.globals import set_debug
|
||||
|
||||
set_debug(True)
|
||||
|
||||
from typing import List
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from . import json
|
||||
|
||||
from opendevin.action import (
|
||||
Action,
|
||||
CmdRunAction,
|
||||
CmdKillAction,
|
||||
BrowseURLAction,
|
||||
FileReadAction,
|
||||
FileWriteAction,
|
||||
AgentRecallAction,
|
||||
AgentThinkAction,
|
||||
AgentFinishAction,
|
||||
)
|
||||
from opendevin.observation import (
|
||||
CmdOutputObservation,
|
||||
)
|
||||
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
|
||||
"run": CmdRunAction,
|
||||
"kill": CmdKillAction,
|
||||
"browse": BrowseURLAction,
|
||||
"read": FileReadAction,
|
||||
"write": FileWriteAction,
|
||||
"recall": AgentRecallAction,
|
||||
"think": AgentThinkAction,
|
||||
"finish": AgentFinishAction,
|
||||
}
|
||||
CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
|
||||
|
||||
ACTION_PROMPT = """
|
||||
You're a thoughtful robot. Your main task is to {task}.
|
||||
@ -103,36 +124,16 @@ class NewMonologue(BaseModel):
|
||||
new_monologue: List[_ActionDict]
|
||||
|
||||
|
||||
def get_chain(template, model_name):
|
||||
assert (
|
||||
"OPENAI_API_KEY" in os.environ
|
||||
), "Please set the OPENAI_API_KEY environment variable to use langchains_agent."
|
||||
llm = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"), model_name=model_name) # type: ignore
|
||||
prompt = PromptTemplate.from_template(template)
|
||||
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
||||
return llm_chain
|
||||
def get_summarize_monologue_prompt(thoughts):
|
||||
prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT)
|
||||
return prompt.format(monologue=json.dumps({'old_monologue': thoughts}))
|
||||
|
||||
|
||||
def summarize_monologue(thoughts: List[dict], model_name):
|
||||
llm_chain = get_chain(MONOLOGUE_SUMMARY_PROMPT, model_name)
|
||||
parser = JsonOutputParser(pydantic_object=NewMonologue)
|
||||
resp = llm_chain.invoke({"monologue": json.dumps({"old_monologue": thoughts})})
|
||||
|
||||
if os.getenv("DEBUG"):
|
||||
print("resp", resp)
|
||||
parsed = parser.parse(resp["text"])
|
||||
return parsed["new_monologue"]
|
||||
|
||||
|
||||
def request_action(
|
||||
task,
|
||||
thoughts: List[dict],
|
||||
model_name: str,
|
||||
background_commands_obs: List[CmdOutputObservation] = [],
|
||||
def get_request_action_prompt(
|
||||
task: str,
|
||||
thoughts: List[dict],
|
||||
background_commands_obs: List[CmdOutputObservation] = [],
|
||||
):
|
||||
llm_chain = get_chain(ACTION_PROMPT, model_name)
|
||||
parser = JsonOutputParser(pydantic_object=_ActionDict)
|
||||
hint = ""
|
||||
hint = ''
|
||||
if len(thoughts) > 0:
|
||||
latest_thought = thoughts[-1]
|
||||
if latest_thought["action"] == 'think':
|
||||
@ -149,17 +150,24 @@ def request_action(
|
||||
for command_obs in background_commands_obs:
|
||||
bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
|
||||
bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
|
||||
|
||||
latest_thought = thoughts[-1]
|
||||
resp = llm_chain.invoke(
|
||||
{
|
||||
"monologue": json.dumps(thoughts),
|
||||
"hint": hint,
|
||||
"task": task,
|
||||
"background_commands": bg_commands_message,
|
||||
}
|
||||
|
||||
prompt = PromptTemplate.from_template(ACTION_PROMPT)
|
||||
return prompt.format(
|
||||
task=task,
|
||||
monologue=json.dumps(thoughts),
|
||||
background_commands=bg_commands_message,
|
||||
hint=hint,
|
||||
)
|
||||
if os.getenv("DEBUG"):
|
||||
print("resp", resp)
|
||||
parsed = parser.parse(resp["text"])
|
||||
return parsed
|
||||
|
||||
def parse_action_response(response: str) -> Action:
|
||||
parser = JsonOutputParser(pydantic_object=_ActionDict)
|
||||
action_dict = parser.parse(response)
|
||||
action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
|
||||
return action
|
||||
|
||||
def parse_summary_response(response: str) -> List[Action]:
|
||||
parser = JsonOutputParser(pydantic_object=NewMonologue)
|
||||
parsed = parser.parse(response)
|
||||
thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']]
|
||||
return thoughts
|
||||
@ -4,7 +4,7 @@ from typing import List, Dict, Type, TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from opendevin.action import Action
|
||||
from opendevin.state import State
|
||||
|
||||
from opendevin.llm.llm import LLM
|
||||
|
||||
class Agent(ABC):
|
||||
"""
|
||||
@ -19,9 +19,12 @@ class Agent(ABC):
|
||||
|
||||
_registry: Dict[str, Type["Agent"]] = {}
|
||||
|
||||
def __init__(self, model_name: str):
|
||||
self.model_name = model_name
|
||||
self.instruction: str = "" # need to be set before step
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLM,
|
||||
):
|
||||
self.instruction = ""
|
||||
self.llm = llm
|
||||
self._complete = False
|
||||
|
||||
@property
|
||||
|
||||
21
opendevin/llm/llm.py
Normal file
21
opendevin/llm/llm.py
Normal file
@ -0,0 +1,21 @@
|
||||
from litellm import completion as litellm_completion
|
||||
from functools import partial
|
||||
import os
|
||||
|
||||
DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
|
||||
DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
|
||||
|
||||
class LLM:
|
||||
def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY):
|
||||
self.model = model if model else DEFAULT_MODEL
|
||||
self.api_key = api_key if api_key else DEFAULT_API_KEY
|
||||
|
||||
self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
|
||||
|
||||
|
||||
@property
|
||||
def completion(self):
|
||||
"""
|
||||
Decorator for the litellm completion function.
|
||||
"""
|
||||
return self._completion
|
||||
@ -6,6 +6,7 @@ from typing import Type
|
||||
import agenthub # noqa F401 (we import this to get the agents registered)
|
||||
from opendevin.agent import Agent
|
||||
from opendevin.controller import AgentController
|
||||
from opendevin.llm.llm import LLM
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run an agent with a specific task")
|
||||
@ -40,15 +41,15 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--max-iterations",
|
||||
default=10,
|
||||
default=100,
|
||||
type=int,
|
||||
help="The maximum number of iterations to run the agent",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Running agent {args.agent_cls} (model: {args.model_name}, directory: {args.directory}) with task: \"{args.task}\"")
|
||||
|
||||
llm = LLM(args.model_name)
|
||||
AgentCls: Type[Agent] = Agent.get_cls(args.agent_cls)
|
||||
agent = AgentCls(model_name=args.model_name)
|
||||
agent = AgentCls(llm=llm)
|
||||
controller = AgentController(agent, workdir=args.directory, max_iterations=args.max_iterations)
|
||||
asyncio.run(controller.start_loop(args.task))
|
||||
|
||||
@ -6,6 +6,7 @@ from fastapi import WebSocketDisconnect
|
||||
|
||||
from opendevin.agent import Agent
|
||||
from opendevin.controller import AgentController
|
||||
from opendevin.llm.llm import LLM
|
||||
|
||||
from opendevin.action import (
|
||||
Action,
|
||||
@ -121,15 +122,14 @@ class Session:
|
||||
model = "gpt-4-0125-preview"
|
||||
if start_event and "model" in start_event.args:
|
||||
model = start_event.args["model"]
|
||||
|
||||
if not os.path.exists(directory):
|
||||
print(f"Workspace directory {directory} does not exist. Creating it...")
|
||||
os.makedirs(directory)
|
||||
directory = os.path.relpath(directory, os.getcwd())
|
||||
|
||||
llm = LLM(model)
|
||||
AgentCls = Agent.get_cls(agent_cls)
|
||||
self.agent = AgentCls(model_name=model)
|
||||
self.controller = AgentController(self.agent, directory, callbacks=[self.on_agent_event])
|
||||
self.agent = AgentCls(llm)
|
||||
self.controller = AgentController(self.agent, workdir=directory, callbacks=[self.on_agent_event])
|
||||
await self.send({"action": "initialize", "message": "Control loop started."})
|
||||
|
||||
async def start_task(self, start_event):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user