Extract logic from init from langchains_agent and codeact_agent (#167)

2025-12-26 05:48:36 +08:00 · 2024-03-28 12:51:21 +00:00 · 2024-03-28 12:51:21 +00:00 · 82c215ed5d
commit 82c215ed5d
parent b1944a63ef
4 changed files with 310 additions and 306 deletions
--- a/agenthub/codeact_agent/init.py
+++ b/agenthub/codeact_agent/init.py
@ -1,130 +1,4 @@
-import re
-from typing import List, Mapping
-
-from termcolor import colored
-
 from opendevin.agent import Agent
-from opendevin.state import State
-from opendevin.action import (
-    Action,
-    CmdRunAction,
-    AgentEchoAction,
-    AgentFinishAction,
-)
-from opendevin.observation import (
-    CmdOutputObservation,
-    AgentMessageObservation,
-)
-
-from opendevin.llm.llm import LLM
-
-SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
-You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
-
-DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
-For example:
-
-You can list the files in the current directory by executing the following command:
-<execute>ls</execute>
-
-You can also install packages using pip:
-<execute> pip install numpy </execute>
-
-You can also write a block of code to a file:
-<execute>
-echo "import math
-print(math.pi)" > math.py
-</execute>
-
-When you are done, execute "exit" to close the shell and end the conversation.
-"""
-
-INVALID_INPUT_MESSAGE = (
-    "I don't understand your input. \n"
-    "If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
-    "If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
-)
-
-
-def parse_response(response) -> str:
-    action = response.choices[0].message.content
-    if "<execute>" in action and "</execute>" not in action:
-        action += "</execute>"
-    return action
-
-
-class CodeActAgent(Agent):
-    def __init__(
-        self,
-        llm: LLM,
-    ) -> None:
-        """
-        Initializes a new instance of the CodeActAgent class.
-
-        Parameters:
-        - instruction (str): The instruction for the agent to execute.
-        - max_steps (int): The maximum number of steps to run the agent.
-        """
-        super().__init__(llm)
-        self.messages: List[Mapping[str, str]] = []
-        self.instruction: str = ""
-
-    def step(self, state: State) -> Action:
-        if len(self.messages) == 0:
-            assert self.instruction, "Expecting instruction to be set"
-            self.messages = [
-                {"role": "system", "content": SYSTEM_MESSAGE},
-                {"role": "user", "content": self.instruction},
-            ]
-            print(colored("===USER:===\n" + self.instruction, "green"))
-        updated_info = state.updated_info
-        if updated_info:
-            for prev_action, obs in updated_info:
-                assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
-                if isinstance(obs, AgentMessageObservation):  # warning message from itself
-                    self.messages.append({"role": "user", "content": obs.content})
-                    print(colored("===USER:===\n" + obs.content, "green"))
-                elif isinstance(obs, CmdOutputObservation):
-                    content = "OBSERVATION:\n" + obs.content
-                    content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
-                    self.messages.append({"role": "user", "content": content})
-                    print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
-                else:
-                    raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
-        response = self.llm.completion(
-            messages=self.messages,
-            stop=["</execute>"],
-            temperature=0.0,
-            seed=42,
-        )
-        action_str: str = parse_response(response)
-        self.messages.append({"role": "assistant", "content": action_str})
-        print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
-
-        command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
-        if command is not None:
-            # a command was found
-            command_group = command.group(1)
-            if command_group.strip() == "exit":
-                print(colored("Exit received. Exiting...", "red"))
-                return AgentFinishAction()
-            return CmdRunAction(command = command_group)
-            # # execute the code
-            # # TODO: does exit_code get loaded into Message?
-            # exit_code, observation = self.env.execute(command_group)
-            # self._history.append(Message(Role.ASSISTANT, observation))
-            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
-        else:
-            # we could provide a error message for the model to continue similar to
-            # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
-            # observation = INVALID_INPUT_MESSAGE
-            # self._history.append(Message(Role.ASSISTANT, observation))
-            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
-            return AgentEchoAction(content=INVALID_INPUT_MESSAGE)  # warning message to itself
-
-
-    def search_memory(self, query: str) -> List[str]:
-        raise NotImplementedError("Implement this abstract method")
-
+from .codeact_agent import CodeActAgent

 Agent.register("CodeActAgent", CodeActAgent)
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@ -0,0 +1,128 @@
+import re
+from typing import List, Mapping
+
+from termcolor import colored
+
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import (
+    Action,
+    CmdRunAction,
+    AgentEchoAction,
+    AgentFinishAction,
+)
+from opendevin.observation import (
+    CmdOutputObservation,
+    AgentMessageObservation,
+)
+
+from opendevin.llm.llm import LLM
+
+SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
+You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
+
+DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
+For example:
+
+You can list the files in the current directory by executing the following command:
+<execute>ls</execute>
+
+You can also install packages using pip:
+<execute> pip install numpy </execute>
+
+You can also write a block of code to a file:
+<execute>
+echo "import math
+print(math.pi)" > math.py
+</execute>
+
+When you are done, execute "exit" to close the shell and end the conversation.
+"""
+
+INVALID_INPUT_MESSAGE = (
+    "I don't understand your input. \n"
+    "If you want to execute command, please use <execute> YOUR_COMMAND_HERE </execute>.\n"
+    "If you already completed the task, please exit the shell by generating: <execute> exit </execute>."
+)
+
+
+def parse_response(response) -> str:
+    action = response.choices[0].message.content
+    if "<execute>" in action and "</execute>" not in action:
+        action += "</execute>"
+    return action
+
+
+class CodeActAgent(Agent):
+    def __init__(
+        self,
+        llm: LLM,
+    ) -> None:
+        """
+        Initializes a new instance of the CodeActAgent class.
+
+        Parameters:
+        - instruction (str): The instruction for the agent to execute.
+        - max_steps (int): The maximum number of steps to run the agent.
+        """
+        super().__init__(llm)
+        self.messages: List[Mapping[str, str]] = []
+        self.instruction: str = ""
+
+    def step(self, state: State) -> Action:
+        if len(self.messages) == 0:
+            assert self.instruction, "Expecting instruction to be set"
+            self.messages = [
+                {"role": "system", "content": SYSTEM_MESSAGE},
+                {"role": "user", "content": self.instruction},
+            ]
+            print(colored("===USER:===\n" + self.instruction, "green"))
+        updated_info = state.updated_info
+        if updated_info:
+            for prev_action, obs in updated_info:
+                assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
+                if isinstance(obs, AgentMessageObservation):  # warning message from itself
+                    self.messages.append({"role": "user", "content": obs.content})
+                    print(colored("===USER:===\n" + obs.content, "green"))
+                elif isinstance(obs, CmdOutputObservation):
+                    content = "OBSERVATION:\n" + obs.content
+                    content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
+                    self.messages.append({"role": "user", "content": content})
+                    print(colored("===ENV OBSERVATION:===\n" + content, "blue"))
+                else:
+                    raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
+        response = self.llm.completion(
+            messages=self.messages,
+            stop=["</execute>"],
+            temperature=0.0,
+            seed=42,
+        )
+        action_str: str = parse_response(response)
+        self.messages.append({"role": "assistant", "content": action_str})
+        print(colored("===ASSISTANT:===\n" + action_str, "yellow"))
+
+        command = re.search(r"<execute>(.*)</execute>", action_str, re.DOTALL)
+        if command is not None:
+            # a command was found
+            command_group = command.group(1)
+            if command_group.strip() == "exit":
+                print(colored("Exit received. Exiting...", "red"))
+                return AgentFinishAction()
+            return CmdRunAction(command = command_group)
+            # # execute the code
+            # # TODO: does exit_code get loaded into Message?
+            # exit_code, observation = self.env.execute(command_group)
+            # self._history.append(Message(Role.ASSISTANT, observation))
+            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+        else:
+            # we could provide a error message for the model to continue similar to
+            # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
+            # observation = INVALID_INPUT_MESSAGE
+            # self._history.append(Message(Role.ASSISTANT, observation))
+            # print(colored("===ENV OBSERVATION:===\n" + observation, "blue"))
+            return AgentEchoAction(content=INVALID_INPUT_MESSAGE)  # warning message to itself
+
+
+    def search_memory(self, query: str) -> List[str]:
+        raise NotImplementedError("Implement this abstract method")
+
--- a/agenthub/langchains_agent/init.py
+++ b/agenthub/langchains_agent/init.py
@ -1,182 +1,4 @@
-from typing import List
-
-from opendevin.llm.llm import LLM
 from opendevin.agent import Agent
-from opendevin.state import State
-from opendevin.action import Action
-import agenthub.langchains_agent.utils.prompts as prompts
-from agenthub.langchains_agent.utils.monologue import Monologue
-from agenthub.langchains_agent.utils.memory import LongTermMemory
-
-from opendevin.action import (
-    NullAction,
-    CmdRunAction,
-    CmdKillAction,
-    BrowseURLAction,
-    FileReadAction,
-    FileWriteAction,
-    AgentRecallAction,
-    AgentThinkAction,
-    AgentFinishAction,
-)
-from opendevin.observation import (
-    CmdOutputObservation,
-)
-
-
-MAX_MONOLOGUE_LENGTH = 20000
-MAX_OUTPUT_LENGTH = 5000
-
-INITIAL_THOUGHTS = [
-    "I exist!",
-    "Hmm...looks like I can type in a command line prompt",
-    "Looks like I have a web browser too!",
-    "Here's what I want to do: $TASK",
-    "How am I going to get there though?",
-    "It seems like I have some kind of short term memory.",
-    "Each of my thoughts seems to be stored in a numbered list.",
-    "It seems whatever I say next will be added to the list.",
-    "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
-    "Fortunately I have long term memory!",
-    "I can just say RECALL, followed by the thing I want to remember. And then related thoughts just spill out!",
-    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
-    "Let's try it out!",
-    "RECALL what it is I want to do",
-    "Here's what I want to do: $TASK",
-    "How am I going to get there though?",
-    "Neat! And it looks like it's easy for me to use the command line too! I just have to say RUN followed by the command I want to run. The command output just jumps into my head!",
-    'RUN echo "hello world"',
-    "hello world",
-    "Cool! I bet I can read and edit files too.",
-    "RUN echo \"console.log('hello world')\" > test.js",
-    "",
-    "I just created test.js. I'll try and run it now.",
-    "RUN node test.js",
-    "hello world",
-    "it works!",
-    "And if I want to use the browser, I just need to say BROWSE, followed by a website I want to visit, or an action I want to take on the current site",
-    "Let's try that...",
-    "BROWSE google.com",
-    '<form><input type="text"></input><button type="submit"></button></form>',
-    "Very cool. Now to accomplish my task.",
-    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
-    "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
-    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
-    "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
-]
-
-
-class LangchainsAgent(Agent):
-    _initialized = False
-
-    def __init__(self, llm: LLM):
-        super().__init__(llm)
-        self.monologue = Monologue()
-        self.memory = LongTermMemory()
-
-    def _add_event(self, event: dict):
-        if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
-            event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
-
-        self.monologue.add_event(event)
-        self.memory.add_event(event)
-        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
-            self.monologue.condense(self.llm)
-
-    def _initialize(self):
-        if self._initialized:
-            return
-
-        if self.instruction is None or self.instruction == "":
-            raise ValueError("Instruction must be provided")
-        self.monologue = Monologue()
-        self.memory = LongTermMemory()
-
-        next_is_output = False
-        for thought in INITIAL_THOUGHTS:
-            thought = thought.replace("$TASK", self.instruction)
-            if next_is_output:
-                d = {"action": "output", "args": {"output": thought}}
-                next_is_output = False
-            else:
-                if thought.startswith("RUN"):
-                    command = thought.split("RUN ")[1]
-                    d = {"action": "run", "args": {"command": command}}
-                    next_is_output = True
-
-                elif thought.startswith("RECALL"):
-                    query = thought.split("RECALL ")[1]
-                    d = {"action": "recall", "args": {"query": query}}
-                    next_is_output = True
-
-                elif thought.startswith("BROWSE"):
-                    url = thought.split("BROWSE ")[1]
-                    d = {"action": "browse", "args": {"url": url}}
-                    next_is_output = True
-                else:
-                    d = {"action": "think", "args": {"thought": thought}}
-
-            self._add_event(d)
-        self._initialized = True
-
-    def step(self, state: State) -> Action:
-        self._initialize()
-        # TODO: make langchains agent use Action & Observation
-        # completly from ground up
-
-        # Translate state to action_dict
-        for prev_action, obs in state.updated_info:
-            d = None
-            if isinstance(obs, CmdOutputObservation):
-                if obs.error:
-                    d = {"action": "error", "args": {"output": obs.content}}
-                else:
-                    d = {"action": "output", "args": {"output": obs.content}}
-            else:
-                d = {"action": "output", "args": {"output": obs.content}}
-            if d is not None:
-                self._add_event(d)
-
-            d = None
-            if isinstance(prev_action, CmdRunAction):
-                d = {"action": "run", "args": {"command": prev_action.command}}
-            elif isinstance(prev_action, CmdKillAction):
-                d = {"action": "kill", "args": {"id": prev_action.id}}
-            elif isinstance(prev_action, BrowseURLAction):
-                d = {"action": "browse", "args": {"url": prev_action.url}}
-            elif isinstance(prev_action, FileReadAction):
-                d = {"action": "read", "args": {"file": prev_action.path}}
-            elif isinstance(prev_action, FileWriteAction):
-                d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
-            elif isinstance(prev_action, AgentRecallAction):
-                d = {"action": "recall", "args": {"query": prev_action.query}}
-            elif isinstance(prev_action, AgentThinkAction):
-                d = {"action": "think", "args": {"thought": prev_action.thought}}
-            elif isinstance(prev_action, AgentFinishAction):
-                d = {"action": "finish"}
-            elif isinstance(prev_action, NullAction):
-                d = None
-            else:
-                raise ValueError(f"Unknown action type: {prev_action}")
-            if d is not None:
-                self._add_event(d)
-
-        state.updated_info = []
-
-        prompt = prompts.get_request_action_prompt(
-            self.instruction,
-            self.monologue.get_thoughts(),
-            state.background_commands_obs,
-        )
-        messages = [{"content": prompt,"role": "user"}]
-        resp = self.llm.completion(messages=messages)
-        action_resp = resp['choices'][0]['message']['content']
-        action = prompts.parse_action_response(action_resp)
-        self.latest_action = action
-        return action
-
-    def search_memory(self, query: str) -> List[str]:
-        return self.memory.search(query)
-
+from .langchains_agent import LangchainsAgent

 Agent.register("LangchainsAgent", LangchainsAgent)
--- a/agenthub/langchains_agent/langchains_agent.py
+++ b/agenthub/langchains_agent/langchains_agent.py
@ -0,0 +1,180 @@
+from typing import List
+
+from opendevin.llm.llm import LLM
+from opendevin.agent import Agent
+from opendevin.state import State
+from opendevin.action import Action
+import agenthub.langchains_agent.utils.prompts as prompts
+from agenthub.langchains_agent.utils.monologue import Monologue
+from agenthub.langchains_agent.utils.memory import LongTermMemory
+
+from opendevin.action import (
+    NullAction,
+    CmdRunAction,
+    CmdKillAction,
+    BrowseURLAction,
+    FileReadAction,
+    FileWriteAction,
+    AgentRecallAction,
+    AgentThinkAction,
+    AgentFinishAction,
+)
+from opendevin.observation import (
+    CmdOutputObservation,
+)
+
+
+MAX_MONOLOGUE_LENGTH = 20000
+MAX_OUTPUT_LENGTH = 5000
+
+INITIAL_THOUGHTS = [
+    "I exist!",
+    "Hmm...looks like I can type in a command line prompt",
+    "Looks like I have a web browser too!",
+    "Here's what I want to do: $TASK",
+    "How am I going to get there though?",
+    "It seems like I have some kind of short term memory.",
+    "Each of my thoughts seems to be stored in a numbered list.",
+    "It seems whatever I say next will be added to the list.",
+    "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
+    "Fortunately I have long term memory!",
+    "I can just say RECALL, followed by the thing I want to remember. And then related thoughts just spill out!",
+    "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
+    "Let's try it out!",
+    "RECALL what it is I want to do",
+    "Here's what I want to do: $TASK",
+    "How am I going to get there though?",
+    "Neat! And it looks like it's easy for me to use the command line too! I just have to say RUN followed by the command I want to run. The command output just jumps into my head!",
+    'RUN echo "hello world"',
+    "hello world",
+    "Cool! I bet I can read and edit files too.",
+    "RUN echo \"console.log('hello world')\" > test.js",
+    "",
+    "I just created test.js. I'll try and run it now.",
+    "RUN node test.js",
+    "hello world",
+    "it works!",
+    "And if I want to use the browser, I just need to say BROWSE, followed by a website I want to visit, or an action I want to take on the current site",
+    "Let's try that...",
+    "BROWSE google.com",
+    '<form><input type="text"></input><button type="submit"></button></form>',
+    "Very cool. Now to accomplish my task.",
+    "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
+    "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
+    "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
+    "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
+]
+
+
+class LangchainsAgent(Agent):
+    _initialized = False
+
+    def __init__(self, llm: LLM):
+        super().__init__(llm)
+        self.monologue = Monologue()
+        self.memory = LongTermMemory()
+
+    def _add_event(self, event: dict):
+        if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
+            event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
+
+        self.monologue.add_event(event)
+        self.memory.add_event(event)
+        if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
+            self.monologue.condense(self.llm)
+
+    def _initialize(self):
+        if self._initialized:
+            return
+
+        if self.instruction is None or self.instruction == "":
+            raise ValueError("Instruction must be provided")
+        self.monologue = Monologue()
+        self.memory = LongTermMemory()
+
+        next_is_output = False
+        for thought in INITIAL_THOUGHTS:
+            thought = thought.replace("$TASK", self.instruction)
+            if next_is_output:
+                d = {"action": "output", "args": {"output": thought}}
+                next_is_output = False
+            else:
+                if thought.startswith("RUN"):
+                    command = thought.split("RUN ")[1]
+                    d = {"action": "run", "args": {"command": command}}
+                    next_is_output = True
+
+                elif thought.startswith("RECALL"):
+                    query = thought.split("RECALL ")[1]
+                    d = {"action": "recall", "args": {"query": query}}
+                    next_is_output = True
+
+                elif thought.startswith("BROWSE"):
+                    url = thought.split("BROWSE ")[1]
+                    d = {"action": "browse", "args": {"url": url}}
+                    next_is_output = True
+                else:
+                    d = {"action": "think", "args": {"thought": thought}}
+
+            self._add_event(d)
+        self._initialized = True
+
+    def step(self, state: State) -> Action:
+        self._initialize()
+        # TODO: make langchains agent use Action & Observation
+        # completly from ground up
+
+        # Translate state to action_dict
+        for prev_action, obs in state.updated_info:
+            d = None
+            if isinstance(obs, CmdOutputObservation):
+                if obs.error:
+                    d = {"action": "error", "args": {"output": obs.content}}
+                else:
+                    d = {"action": "output", "args": {"output": obs.content}}
+            else:
+                d = {"action": "output", "args": {"output": obs.content}}
+            if d is not None:
+                self._add_event(d)
+
+            d = None
+            if isinstance(prev_action, CmdRunAction):
+                d = {"action": "run", "args": {"command": prev_action.command}}
+            elif isinstance(prev_action, CmdKillAction):
+                d = {"action": "kill", "args": {"id": prev_action.id}}
+            elif isinstance(prev_action, BrowseURLAction):
+                d = {"action": "browse", "args": {"url": prev_action.url}}
+            elif isinstance(prev_action, FileReadAction):
+                d = {"action": "read", "args": {"file": prev_action.path}}
+            elif isinstance(prev_action, FileWriteAction):
+                d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
+            elif isinstance(prev_action, AgentRecallAction):
+                d = {"action": "recall", "args": {"query": prev_action.query}}
+            elif isinstance(prev_action, AgentThinkAction):
+                d = {"action": "think", "args": {"thought": prev_action.thought}}
+            elif isinstance(prev_action, AgentFinishAction):
+                d = {"action": "finish"}
+            elif isinstance(prev_action, NullAction):
+                d = None
+            else:
+                raise ValueError(f"Unknown action type: {prev_action}")
+            if d is not None:
+                self._add_event(d)
+
+        state.updated_info = []
+
+        prompt = prompts.get_request_action_prompt(
+            self.instruction,
+            self.monologue.get_thoughts(),
+            state.background_commands_obs,
+        )
+        messages = [{"content": prompt,"role": "user"}]
+        resp = self.llm.completion(messages=messages)
+        action_resp = resp['choices'][0]['message']['content']
+        action = prompts.parse_action_response(action_resp)
+        self.latest_action = action
+        return action
+
+    def search_memory(self, query: str) -> List[str]:
+        return self.memory.search(query)
+