Refactor response to action in agent step (#2350)

* refactor action parser

* Fix typos

* fix typo

---------

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
This commit is contained in:
Yufan Song 2024-06-10 18:17:30 +08:00 committed by GitHub
parent 7fc57650f3
commit f7491bd2fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 263 additions and 52 deletions

View File

@ -0,0 +1,182 @@
import re
from opendevin.controller.action_parser import ActionParser, ResponseParser
from opendevin.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
CmdRunAction,
IPythonRunCellAction,
MessageAction,
)
class CodeActResponseParser(ResponseParser):
"""
Parser action:
- CmdRunAction(command) - bash command to run
- IPythonRunCellAction(code) - IPython code to run
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
def __init__(
self,
):
# Need pay attention to the item order in self.action_parsers
self.action_parsers = [
CodeActActionParserFinish(),
CodeActActionParserCmdRun(),
CodeActActionParserIPythonRunCell(),
CodeActActionParserAgentDelegate(),
]
self.default_parser = CodeActActionParserMessage()
def parse(self, response: str) -> Action:
action_str = self.parse_response(response)
return self.parse_action(action_str)
def parse_response(self, response) -> str:
action = response.choices[0].message.content
for lang in ['bash', 'ipython', 'browse']:
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
action += f'</execute_{lang}>'
return action
def parse_action(self, action_str: str) -> Action:
for action_parser in self.action_parsers:
if action_parser.check_condition(action_str):
return action_parser.parse(action_str)
return self.default_parser.parse(action_str)
class CodeActActionParserFinish(ActionParser):
"""
Parser action:
- AgentFinishAction() - end the interaction
"""
def __init__(
self,
):
self.finish_command = None
def check_condition(self, action_str: str) -> bool:
self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
return self.finish_command is not None
def parse(self, action_str: str) -> Action:
assert (
self.finish_command is not None
), 'self.finish_command should not be None when parse is called'
thought = action_str.replace(self.finish_command.group(0), '').strip()
return AgentFinishAction(thought=thought)
class CodeActActionParserCmdRun(ActionParser):
"""
Parser action:
- CmdRunAction(command) - bash command to run
- AgentFinishAction() - end the interaction
"""
def __init__(
self,
):
self.bash_command = None
def check_condition(self, action_str: str) -> bool:
self.bash_command = re.search(
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
)
return self.bash_command is not None
def parse(self, action_str: str) -> Action:
assert (
self.bash_command is not None
), 'self.bash_command should not be None when parse is called'
thought = action_str.replace(self.bash_command.group(0), '').strip()
# a command was found
command_group = self.bash_command.group(1).strip()
if command_group.strip() == 'exit':
return AgentFinishAction()
return CmdRunAction(command=command_group, thought=thought)
class CodeActActionParserIPythonRunCell(ActionParser):
"""
Parser action:
- IPythonRunCellAction(code) - IPython code to run
"""
def __init__(
self,
):
self.python_code = None
self.jupyter_kernel_init_code: str = 'from agentskills import *'
def check_condition(self, action_str: str) -> bool:
self.python_code = re.search(
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
)
return self.python_code is not None
def parse(self, action_str: str) -> Action:
assert (
self.python_code is not None
), 'self.python_code should not be None when parse is called'
code_group = self.python_code.group(1).strip()
thought = action_str.replace(self.python_code.group(0), '').strip()
return IPythonRunCellAction(
code=code_group,
thought=thought,
kernel_init_code=self.jupyter_kernel_init_code,
)
class CodeActActionParserAgentDelegate(ActionParser):
"""
Parser action:
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
"""
def __init__(
self,
):
self.agent_delegate = None
def check_condition(self, action_str: str) -> bool:
self.agent_delegate = re.search(
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
)
return self.agent_delegate is not None
def parse(self, action_str: str) -> Action:
assert (
self.agent_delegate is not None
), 'self.agent_delegate should not be None when parse is called'
thought = action_str.replace(self.agent_delegate.group(0), '').strip()
browse_actions = self.agent_delegate.group(1).strip()
task = f'{thought}. I should start with: {browse_actions}'
return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
class CodeActActionParserMessage(ActionParser):
"""
Parser action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
"""
def __init__(
self,
):
pass
def check_condition(self, action_str: str) -> bool:
# We assume the LLM is GOOD enough that when it returns pure natural language
# it wants to talk to the user
return True
def parse(self, action_str: str) -> Action:
return MessageAction(content=action_str, wait_for_response=True)

View File

@ -1,5 +1,4 @@
import re
from agenthub.codeact_agent.action_parser import CodeActResponseParser
from agenthub.codeact_agent.prompt import (
COMMAND_DOCS,
EXAMPLES,
@ -11,7 +10,6 @@ from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
@ -35,14 +33,6 @@ from opendevin.runtime.tools import RuntimeTool
ENABLE_GITHUB = True
def parse_response(response) -> str:
action = response.choices[0].message.content
for lang in ['bash', 'ipython', 'browse']:
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
action += f'</execute_{lang}>'
return action
def action_to_str(action: Action) -> str:
if isinstance(action, CmdRunAction):
return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
@ -169,11 +159,12 @@ class CodeActAgent(Agent):
JupyterRequirement(),
]
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
action_parser = CodeActResponseParser()
def __init__(
self,
llm: LLM,
@ -239,48 +230,10 @@ class CodeActAgent(Agent):
],
temperature=0.0,
)
action_str: str = parse_response(response)
state.num_of_chars += sum(
len(message['content']) for message in messages
) + len(action_str)
if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
thought = action_str.replace(finish_command.group(0), '').strip()
return AgentFinishAction(thought=thought)
if bash_command := re.search(
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
):
# remove the command from the action string to get thought
thought = action_str.replace(bash_command.group(0), '').strip()
# a command was found
command_group = bash_command.group(1).strip()
if command_group.strip() == 'exit':
return AgentFinishAction()
return CmdRunAction(command=command_group, thought=thought)
elif python_code := re.search(
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
):
# a code block was found
code_group = python_code.group(1).strip()
thought = action_str.replace(python_code.group(0), '').strip()
return IPythonRunCellAction(
code=code_group,
thought=thought,
kernel_init_code=self.jupyter_kernel_init_code,
)
elif browse_command := re.search(
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
):
thought = action_str.replace(browse_command.group(0), '').strip()
browse_actions = browse_command.group(1).strip()
task = f'{thought}. I should start with: {browse_actions}'
return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
else:
# We assume the LLM is GOOD enough that when it returns pure natural language
# it want to talk to the user
return MessageAction(content=action_str, wait_for_response=True)
) + len(response.choices[0].message.content)
return self.action_parser.parse(response)
def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')

View File

@ -0,0 +1,76 @@
from abc import ABC, abstractmethod
from opendevin.events.action import Action
class ResponseParser(ABC):
"""
This abstract base class is a general interface for an response parser dedicated to
parsing the action from the response from the LLM.
"""
def __init__(
self,
):
# Need pay attention to the item order in self.action_parsers
self.action_parsers = []
@abstractmethod
def parse(self, response: str) -> Action:
"""
Parses the action from the response from the LLM.
Parameters:
- response (str): The response from the LLM.
Returns:
- action (Action): The action parsed from the response.
"""
pass
@abstractmethod
def parse_response(self, response) -> str:
"""
Parses the action from the response from the LLM.
Parameters:
- response (str): The response from the LLM.
Returns:
- action_str (str): The action str parsed from the response.
"""
pass
@abstractmethod
def parse_action(self, action_str: str) -> Action:
"""
Parses the action from the response from the LLM.
Parameters:
- action_str (str): The response from the LLM.
Returns:
- action (Action): The action parsed from the response.
"""
pass
class ActionParser(ABC):
"""
This abstract base class is an general interface for an action parser dedicated to
parsing the action from the action str from the LLM.
"""
@abstractmethod
def check_condition(self, action_str: str) -> bool:
"""
Check if the action string can be parsed by this parser.
"""
pass
@abstractmethod
def parse(self, action_str: str) -> Action:
"""
Parses the action from the action string from the LLM response.
"""
pass