From f7491bd2face6b4042d7cc2c2150a8082ffd8291 Mon Sep 17 00:00:00 2001 From: Yufan Song <33971064+yufansong@users.noreply.github.com> Date: Mon, 10 Jun 2024 18:17:30 +0800 Subject: [PATCH] Refactor response to action in agent step (#2350) * refactor action parser * Fix typos * fix typo --------- Co-authored-by: Boxuan Li --- agenthub/codeact_agent/action_parser.py | 182 ++++++++++++++++++++++++ agenthub/codeact_agent/codeact_agent.py | 57 +------- opendevin/controller/action_parser.py | 76 ++++++++++ 3 files changed, 263 insertions(+), 52 deletions(-) create mode 100644 agenthub/codeact_agent/action_parser.py create mode 100644 opendevin/controller/action_parser.py diff --git a/agenthub/codeact_agent/action_parser.py b/agenthub/codeact_agent/action_parser.py new file mode 100644 index 0000000000..a855f06bd4 --- /dev/null +++ b/agenthub/codeact_agent/action_parser.py @@ -0,0 +1,182 @@ +import re + +from opendevin.controller.action_parser import ActionParser, ResponseParser +from opendevin.events.action import ( + Action, + AgentDelegateAction, + AgentFinishAction, + CmdRunAction, + IPythonRunCellAction, + MessageAction, +) + + +class CodeActResponseParser(ResponseParser): + """ + Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__( + self, + ): + # Need pay attention to the item order in self.action_parsers + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + ] + self.default_parser = CodeActActionParserMessage() + + def parse(self, response: str) -> Action: + action_str = self.parse_response(response) + return self.parse_action(action_str) + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + for lang in ['bash', 'ipython', 'browse']: + if f'' in action and f'' not in action: + action += f'' + return action + + def parse_action(self, action_str: str) -> Action: + for action_parser in self.action_parsers: + if action_parser.check_condition(action_str): + return action_parser.parse(action_str) + return self.default_parser.parse(action_str) + + +class CodeActActionParserFinish(ActionParser): + """ + Parser action: + - AgentFinishAction() - end the interaction + """ + + def __init__( + self, + ): + self.finish_command = None + + def check_condition(self, action_str: str) -> bool: + self.finish_command = re.search(r'.*', action_str, re.DOTALL) + return self.finish_command is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.finish_command is not None + ), 'self.finish_command should not be None when parse is called' + thought = action_str.replace(self.finish_command.group(0), '').strip() + return AgentFinishAction(thought=thought) + + +class CodeActActionParserCmdRun(ActionParser): + """ + Parser action: + - CmdRunAction(command) - bash command to run + - AgentFinishAction() - end the interaction + """ + + def __init__( + self, + ): + self.bash_command = None + + def check_condition(self, action_str: str) -> bool: + self.bash_command = re.search( + r'(.*?)', action_str, re.DOTALL + ) + return self.bash_command is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.bash_command is not None + ), 'self.bash_command should not be None when parse is called' + thought = action_str.replace(self.bash_command.group(0), '').strip() + # a command was found + command_group = self.bash_command.group(1).strip() + if command_group.strip() == 'exit': + return AgentFinishAction() + return CmdRunAction(command=command_group, thought=thought) + + +class CodeActActionParserIPythonRunCell(ActionParser): + """ + Parser action: + - IPythonRunCellAction(code) - IPython code to run + """ + + def __init__( + self, + ): + self.python_code = None + self.jupyter_kernel_init_code: str = 'from agentskills import *' + + def check_condition(self, action_str: str) -> bool: + self.python_code = re.search( + r'(.*?)', action_str, re.DOTALL + ) + return self.python_code is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.python_code is not None + ), 'self.python_code should not be None when parse is called' + code_group = self.python_code.group(1).strip() + thought = action_str.replace(self.python_code.group(0), '').strip() + return IPythonRunCellAction( + code=code_group, + thought=thought, + kernel_init_code=self.jupyter_kernel_init_code, + ) + + +class CodeActActionParserAgentDelegate(ActionParser): + """ + Parser action: + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + """ + + def __init__( + self, + ): + self.agent_delegate = None + + def check_condition(self, action_str: str) -> bool: + self.agent_delegate = re.search( + r'(.*)', action_str, re.DOTALL + ) + return self.agent_delegate is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.agent_delegate is not None + ), 'self.agent_delegate should not be None when parse is called' + thought = action_str.replace(self.agent_delegate.group(0), '').strip() + browse_actions = self.agent_delegate.group(1).strip() + task = f'{thought}. I should start with: {browse_actions}' + return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task}) + + +class CodeActActionParserMessage(ActionParser): + """ + Parser action: + - MessageAction(content) - Message action to run (e.g. ask for clarification) + """ + + def __init__( + self, + ): + pass + + def check_condition(self, action_str: str) -> bool: + # We assume the LLM is GOOD enough that when it returns pure natural language + # it wants to talk to the user + return True + + def parse(self, action_str: str) -> Action: + return MessageAction(content=action_str, wait_for_response=True) diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index aead683ef4..d6aeae31ca 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -1,5 +1,4 @@ -import re - +from agenthub.codeact_agent.action_parser import CodeActResponseParser from agenthub.codeact_agent.prompt import ( COMMAND_DOCS, EXAMPLES, @@ -11,7 +10,6 @@ from opendevin.controller.agent import Agent from opendevin.controller.state.state import State from opendevin.events.action import ( Action, - AgentDelegateAction, AgentFinishAction, BrowseInteractiveAction, CmdRunAction, @@ -35,14 +33,6 @@ from opendevin.runtime.tools import RuntimeTool ENABLE_GITHUB = True -def parse_response(response) -> str: - action = response.choices[0].message.content - for lang in ['bash', 'ipython', 'browse']: - if f'' in action and f'' not in action: - action += f'' - return action - - def action_to_str(action: Action) -> str: if isinstance(action, CmdRunAction): return f'{action.thought}\n\n{action.command}\n' @@ -169,11 +159,12 @@ class CodeActAgent(Agent): JupyterRequirement(), ] runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER] - jupyter_kernel_init_code: str = 'from agentskills import *' system_message: str = get_system_message() in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!" + action_parser = CodeActResponseParser() + def __init__( self, llm: LLM, @@ -239,48 +230,10 @@ class CodeActAgent(Agent): ], temperature=0.0, ) - - action_str: str = parse_response(response) state.num_of_chars += sum( len(message['content']) for message in messages - ) + len(action_str) - - if finish_command := re.search(r'.*', action_str, re.DOTALL): - thought = action_str.replace(finish_command.group(0), '').strip() - return AgentFinishAction(thought=thought) - if bash_command := re.search( - r'(.*?)', action_str, re.DOTALL - ): - # remove the command from the action string to get thought - thought = action_str.replace(bash_command.group(0), '').strip() - # a command was found - command_group = bash_command.group(1).strip() - - if command_group.strip() == 'exit': - return AgentFinishAction() - return CmdRunAction(command=command_group, thought=thought) - elif python_code := re.search( - r'(.*?)', action_str, re.DOTALL - ): - # a code block was found - code_group = python_code.group(1).strip() - thought = action_str.replace(python_code.group(0), '').strip() - return IPythonRunCellAction( - code=code_group, - thought=thought, - kernel_init_code=self.jupyter_kernel_init_code, - ) - elif browse_command := re.search( - r'(.*)', action_str, re.DOTALL - ): - thought = action_str.replace(browse_command.group(0), '').strip() - browse_actions = browse_command.group(1).strip() - task = f'{thought}. I should start with: {browse_actions}' - return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task}) - else: - # We assume the LLM is GOOD enough that when it returns pure natural language - # it want to talk to the user - return MessageAction(content=action_str, wait_for_response=True) + ) + len(response.choices[0].message.content) + return self.action_parser.parse(response) def search_memory(self, query: str) -> list[str]: raise NotImplementedError('Implement this abstract method') diff --git a/opendevin/controller/action_parser.py b/opendevin/controller/action_parser.py new file mode 100644 index 0000000000..6ff5f025a9 --- /dev/null +++ b/opendevin/controller/action_parser.py @@ -0,0 +1,76 @@ +from abc import ABC, abstractmethod + +from opendevin.events.action import Action + + +class ResponseParser(ABC): + """ + This abstract base class is a general interface for an response parser dedicated to + parsing the action from the response from the LLM. + """ + + def __init__( + self, + ): + # Need pay attention to the item order in self.action_parsers + self.action_parsers = [] + + @abstractmethod + def parse(self, response: str) -> Action: + """ + Parses the action from the response from the LLM. + + Parameters: + - response (str): The response from the LLM. + + Returns: + - action (Action): The action parsed from the response. + """ + pass + + @abstractmethod + def parse_response(self, response) -> str: + """ + Parses the action from the response from the LLM. + + Parameters: + - response (str): The response from the LLM. + + Returns: + - action_str (str): The action str parsed from the response. + """ + pass + + @abstractmethod + def parse_action(self, action_str: str) -> Action: + """ + Parses the action from the response from the LLM. + + Parameters: + - action_str (str): The response from the LLM. + + Returns: + - action (Action): The action parsed from the response. + """ + pass + + +class ActionParser(ABC): + """ + This abstract base class is an general interface for an action parser dedicated to + parsing the action from the action str from the LLM. + """ + + @abstractmethod + def check_condition(self, action_str: str) -> bool: + """ + Check if the action string can be parsed by this parser. + """ + pass + + @abstractmethod + def parse(self, action_str: str) -> Action: + """ + Parses the action from the action string from the LLM response. + """ + pass