diff --git a/agenthub/codeact_agent/action_parser.py b/agenthub/codeact_agent/action_parser.py
new file mode 100644
index 0000000000..a855f06bd4
--- /dev/null
+++ b/agenthub/codeact_agent/action_parser.py
@@ -0,0 +1,182 @@
+import re
+
+from opendevin.controller.action_parser import ActionParser, ResponseParser
+from opendevin.events.action import (
+ Action,
+ AgentDelegateAction,
+ AgentFinishAction,
+ CmdRunAction,
+ IPythonRunCellAction,
+ MessageAction,
+)
+
+
+class CodeActResponseParser(ResponseParser):
+ """
+ Parser action:
+ - CmdRunAction(command) - bash command to run
+ - IPythonRunCellAction(code) - IPython code to run
+ - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
+ - MessageAction(content) - Message action to run (e.g. ask for clarification)
+ - AgentFinishAction() - end the interaction
+ """
+
+ def __init__(
+ self,
+ ):
+ # Need pay attention to the item order in self.action_parsers
+ self.action_parsers = [
+ CodeActActionParserFinish(),
+ CodeActActionParserCmdRun(),
+ CodeActActionParserIPythonRunCell(),
+ CodeActActionParserAgentDelegate(),
+ ]
+ self.default_parser = CodeActActionParserMessage()
+
+ def parse(self, response: str) -> Action:
+ action_str = self.parse_response(response)
+ return self.parse_action(action_str)
+
+ def parse_response(self, response) -> str:
+ action = response.choices[0].message.content
+ for lang in ['bash', 'ipython', 'browse']:
+ if f'' in action and f'' not in action:
+ action += f''
+ return action
+
+ def parse_action(self, action_str: str) -> Action:
+ for action_parser in self.action_parsers:
+ if action_parser.check_condition(action_str):
+ return action_parser.parse(action_str)
+ return self.default_parser.parse(action_str)
+
+
+class CodeActActionParserFinish(ActionParser):
+ """
+ Parser action:
+ - AgentFinishAction() - end the interaction
+ """
+
+ def __init__(
+ self,
+ ):
+ self.finish_command = None
+
+ def check_condition(self, action_str: str) -> bool:
+ self.finish_command = re.search(r'.*', action_str, re.DOTALL)
+ return self.finish_command is not None
+
+ def parse(self, action_str: str) -> Action:
+ assert (
+ self.finish_command is not None
+ ), 'self.finish_command should not be None when parse is called'
+ thought = action_str.replace(self.finish_command.group(0), '').strip()
+ return AgentFinishAction(thought=thought)
+
+
+class CodeActActionParserCmdRun(ActionParser):
+ """
+ Parser action:
+ - CmdRunAction(command) - bash command to run
+ - AgentFinishAction() - end the interaction
+ """
+
+ def __init__(
+ self,
+ ):
+ self.bash_command = None
+
+ def check_condition(self, action_str: str) -> bool:
+ self.bash_command = re.search(
+ r'(.*?)', action_str, re.DOTALL
+ )
+ return self.bash_command is not None
+
+ def parse(self, action_str: str) -> Action:
+ assert (
+ self.bash_command is not None
+ ), 'self.bash_command should not be None when parse is called'
+ thought = action_str.replace(self.bash_command.group(0), '').strip()
+ # a command was found
+ command_group = self.bash_command.group(1).strip()
+ if command_group.strip() == 'exit':
+ return AgentFinishAction()
+ return CmdRunAction(command=command_group, thought=thought)
+
+
+class CodeActActionParserIPythonRunCell(ActionParser):
+ """
+ Parser action:
+ - IPythonRunCellAction(code) - IPython code to run
+ """
+
+ def __init__(
+ self,
+ ):
+ self.python_code = None
+ self.jupyter_kernel_init_code: str = 'from agentskills import *'
+
+ def check_condition(self, action_str: str) -> bool:
+ self.python_code = re.search(
+ r'(.*?)', action_str, re.DOTALL
+ )
+ return self.python_code is not None
+
+ def parse(self, action_str: str) -> Action:
+ assert (
+ self.python_code is not None
+ ), 'self.python_code should not be None when parse is called'
+ code_group = self.python_code.group(1).strip()
+ thought = action_str.replace(self.python_code.group(0), '').strip()
+ return IPythonRunCellAction(
+ code=code_group,
+ thought=thought,
+ kernel_init_code=self.jupyter_kernel_init_code,
+ )
+
+
+class CodeActActionParserAgentDelegate(ActionParser):
+ """
+ Parser action:
+ - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
+ """
+
+ def __init__(
+ self,
+ ):
+ self.agent_delegate = None
+
+ def check_condition(self, action_str: str) -> bool:
+ self.agent_delegate = re.search(
+ r'(.*)', action_str, re.DOTALL
+ )
+ return self.agent_delegate is not None
+
+ def parse(self, action_str: str) -> Action:
+ assert (
+ self.agent_delegate is not None
+ ), 'self.agent_delegate should not be None when parse is called'
+ thought = action_str.replace(self.agent_delegate.group(0), '').strip()
+ browse_actions = self.agent_delegate.group(1).strip()
+ task = f'{thought}. I should start with: {browse_actions}'
+ return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
+
+
+class CodeActActionParserMessage(ActionParser):
+ """
+ Parser action:
+ - MessageAction(content) - Message action to run (e.g. ask for clarification)
+ """
+
+ def __init__(
+ self,
+ ):
+ pass
+
+ def check_condition(self, action_str: str) -> bool:
+ # We assume the LLM is GOOD enough that when it returns pure natural language
+ # it wants to talk to the user
+ return True
+
+ def parse(self, action_str: str) -> Action:
+ return MessageAction(content=action_str, wait_for_response=True)
diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
index aead683ef4..d6aeae31ca 100644
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -1,5 +1,4 @@
-import re
-
+from agenthub.codeact_agent.action_parser import CodeActResponseParser
from agenthub.codeact_agent.prompt import (
COMMAND_DOCS,
EXAMPLES,
@@ -11,7 +10,6 @@ from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.events.action import (
Action,
- AgentDelegateAction,
AgentFinishAction,
BrowseInteractiveAction,
CmdRunAction,
@@ -35,14 +33,6 @@ from opendevin.runtime.tools import RuntimeTool
ENABLE_GITHUB = True
-def parse_response(response) -> str:
- action = response.choices[0].message.content
- for lang in ['bash', 'ipython', 'browse']:
- if f'' in action and f'' not in action:
- action += f''
- return action
-
-
def action_to_str(action: Action) -> str:
if isinstance(action, CmdRunAction):
return f'{action.thought}\n\n{action.command}\n'
@@ -169,11 +159,12 @@ class CodeActAgent(Agent):
JupyterRequirement(),
]
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
- jupyter_kernel_init_code: str = 'from agentskills import *'
system_message: str = get_system_message()
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
+ action_parser = CodeActResponseParser()
+
def __init__(
self,
llm: LLM,
@@ -239,48 +230,10 @@ class CodeActAgent(Agent):
],
temperature=0.0,
)
-
- action_str: str = parse_response(response)
state.num_of_chars += sum(
len(message['content']) for message in messages
- ) + len(action_str)
-
- if finish_command := re.search(r'.*', action_str, re.DOTALL):
- thought = action_str.replace(finish_command.group(0), '').strip()
- return AgentFinishAction(thought=thought)
- if bash_command := re.search(
- r'(.*?)', action_str, re.DOTALL
- ):
- # remove the command from the action string to get thought
- thought = action_str.replace(bash_command.group(0), '').strip()
- # a command was found
- command_group = bash_command.group(1).strip()
-
- if command_group.strip() == 'exit':
- return AgentFinishAction()
- return CmdRunAction(command=command_group, thought=thought)
- elif python_code := re.search(
- r'(.*?)', action_str, re.DOTALL
- ):
- # a code block was found
- code_group = python_code.group(1).strip()
- thought = action_str.replace(python_code.group(0), '').strip()
- return IPythonRunCellAction(
- code=code_group,
- thought=thought,
- kernel_init_code=self.jupyter_kernel_init_code,
- )
- elif browse_command := re.search(
- r'(.*)', action_str, re.DOTALL
- ):
- thought = action_str.replace(browse_command.group(0), '').strip()
- browse_actions = browse_command.group(1).strip()
- task = f'{thought}. I should start with: {browse_actions}'
- return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
- else:
- # We assume the LLM is GOOD enough that when it returns pure natural language
- # it want to talk to the user
- return MessageAction(content=action_str, wait_for_response=True)
+ ) + len(response.choices[0].message.content)
+ return self.action_parser.parse(response)
def search_memory(self, query: str) -> list[str]:
raise NotImplementedError('Implement this abstract method')
diff --git a/opendevin/controller/action_parser.py b/opendevin/controller/action_parser.py
new file mode 100644
index 0000000000..6ff5f025a9
--- /dev/null
+++ b/opendevin/controller/action_parser.py
@@ -0,0 +1,76 @@
+from abc import ABC, abstractmethod
+
+from opendevin.events.action import Action
+
+
+class ResponseParser(ABC):
+ """
+ This abstract base class is a general interface for an response parser dedicated to
+ parsing the action from the response from the LLM.
+ """
+
+ def __init__(
+ self,
+ ):
+ # Need pay attention to the item order in self.action_parsers
+ self.action_parsers = []
+
+ @abstractmethod
+ def parse(self, response: str) -> Action:
+ """
+ Parses the action from the response from the LLM.
+
+ Parameters:
+ - response (str): The response from the LLM.
+
+ Returns:
+ - action (Action): The action parsed from the response.
+ """
+ pass
+
+ @abstractmethod
+ def parse_response(self, response) -> str:
+ """
+ Parses the action from the response from the LLM.
+
+ Parameters:
+ - response (str): The response from the LLM.
+
+ Returns:
+ - action_str (str): The action str parsed from the response.
+ """
+ pass
+
+ @abstractmethod
+ def parse_action(self, action_str: str) -> Action:
+ """
+ Parses the action from the response from the LLM.
+
+ Parameters:
+ - action_str (str): The response from the LLM.
+
+ Returns:
+ - action (Action): The action parsed from the response.
+ """
+ pass
+
+
+class ActionParser(ABC):
+ """
+ This abstract base class is an general interface for an action parser dedicated to
+ parsing the action from the action str from the LLM.
+ """
+
+ @abstractmethod
+ def check_condition(self, action_str: str) -> bool:
+ """
+ Check if the action string can be parsed by this parser.
+ """
+ pass
+
+ @abstractmethod
+ def parse(self, action_str: str) -> Action:
+ """
+ Parses the action from the action string from the LLM response.
+ """
+ pass