mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Refactor response to action in agent step (#2350)
* refactor action parser * Fix typos * fix typo --------- Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
This commit is contained in:
parent
7fc57650f3
commit
f7491bd2fa
182
agenthub/codeact_agent/action_parser.py
Normal file
182
agenthub/codeact_agent/action_parser.py
Normal file
@ -0,0 +1,182 @@
|
||||
import re
|
||||
|
||||
from opendevin.controller.action_parser import ActionParser, ResponseParser
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
CmdRunAction,
|
||||
IPythonRunCellAction,
|
||||
MessageAction,
|
||||
)
|
||||
|
||||
|
||||
class CodeActResponseParser(ResponseParser):
|
||||
"""
|
||||
Parser action:
|
||||
- CmdRunAction(command) - bash command to run
|
||||
- IPythonRunCellAction(code) - IPython code to run
|
||||
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
|
||||
- MessageAction(content) - Message action to run (e.g. ask for clarification)
|
||||
- AgentFinishAction() - end the interaction
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
# Need pay attention to the item order in self.action_parsers
|
||||
self.action_parsers = [
|
||||
CodeActActionParserFinish(),
|
||||
CodeActActionParserCmdRun(),
|
||||
CodeActActionParserIPythonRunCell(),
|
||||
CodeActActionParserAgentDelegate(),
|
||||
]
|
||||
self.default_parser = CodeActActionParserMessage()
|
||||
|
||||
def parse(self, response: str) -> Action:
|
||||
action_str = self.parse_response(response)
|
||||
return self.parse_action(action_str)
|
||||
|
||||
def parse_response(self, response) -> str:
|
||||
action = response.choices[0].message.content
|
||||
for lang in ['bash', 'ipython', 'browse']:
|
||||
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
|
||||
action += f'</execute_{lang}>'
|
||||
return action
|
||||
|
||||
def parse_action(self, action_str: str) -> Action:
|
||||
for action_parser in self.action_parsers:
|
||||
if action_parser.check_condition(action_str):
|
||||
return action_parser.parse(action_str)
|
||||
return self.default_parser.parse(action_str)
|
||||
|
||||
|
||||
class CodeActActionParserFinish(ActionParser):
|
||||
"""
|
||||
Parser action:
|
||||
- AgentFinishAction() - end the interaction
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
self.finish_command = None
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
|
||||
return self.finish_command is not None
|
||||
|
||||
def parse(self, action_str: str) -> Action:
|
||||
assert (
|
||||
self.finish_command is not None
|
||||
), 'self.finish_command should not be None when parse is called'
|
||||
thought = action_str.replace(self.finish_command.group(0), '').strip()
|
||||
return AgentFinishAction(thought=thought)
|
||||
|
||||
|
||||
class CodeActActionParserCmdRun(ActionParser):
|
||||
"""
|
||||
Parser action:
|
||||
- CmdRunAction(command) - bash command to run
|
||||
- AgentFinishAction() - end the interaction
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
self.bash_command = None
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
self.bash_command = re.search(
|
||||
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
|
||||
)
|
||||
return self.bash_command is not None
|
||||
|
||||
def parse(self, action_str: str) -> Action:
|
||||
assert (
|
||||
self.bash_command is not None
|
||||
), 'self.bash_command should not be None when parse is called'
|
||||
thought = action_str.replace(self.bash_command.group(0), '').strip()
|
||||
# a command was found
|
||||
command_group = self.bash_command.group(1).strip()
|
||||
if command_group.strip() == 'exit':
|
||||
return AgentFinishAction()
|
||||
return CmdRunAction(command=command_group, thought=thought)
|
||||
|
||||
|
||||
class CodeActActionParserIPythonRunCell(ActionParser):
|
||||
"""
|
||||
Parser action:
|
||||
- IPythonRunCellAction(code) - IPython code to run
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
self.python_code = None
|
||||
self.jupyter_kernel_init_code: str = 'from agentskills import *'
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
self.python_code = re.search(
|
||||
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
|
||||
)
|
||||
return self.python_code is not None
|
||||
|
||||
def parse(self, action_str: str) -> Action:
|
||||
assert (
|
||||
self.python_code is not None
|
||||
), 'self.python_code should not be None when parse is called'
|
||||
code_group = self.python_code.group(1).strip()
|
||||
thought = action_str.replace(self.python_code.group(0), '').strip()
|
||||
return IPythonRunCellAction(
|
||||
code=code_group,
|
||||
thought=thought,
|
||||
kernel_init_code=self.jupyter_kernel_init_code,
|
||||
)
|
||||
|
||||
|
||||
class CodeActActionParserAgentDelegate(ActionParser):
|
||||
"""
|
||||
Parser action:
|
||||
- AgentDelegateAction(agent, inputs) - delegate action for (sub)task
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
self.agent_delegate = None
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
self.agent_delegate = re.search(
|
||||
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
|
||||
)
|
||||
return self.agent_delegate is not None
|
||||
|
||||
def parse(self, action_str: str) -> Action:
|
||||
assert (
|
||||
self.agent_delegate is not None
|
||||
), 'self.agent_delegate should not be None when parse is called'
|
||||
thought = action_str.replace(self.agent_delegate.group(0), '').strip()
|
||||
browse_actions = self.agent_delegate.group(1).strip()
|
||||
task = f'{thought}. I should start with: {browse_actions}'
|
||||
return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
|
||||
|
||||
|
||||
class CodeActActionParserMessage(ActionParser):
|
||||
"""
|
||||
Parser action:
|
||||
- MessageAction(content) - Message action to run (e.g. ask for clarification)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
pass
|
||||
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
# We assume the LLM is GOOD enough that when it returns pure natural language
|
||||
# it wants to talk to the user
|
||||
return True
|
||||
|
||||
def parse(self, action_str: str) -> Action:
|
||||
return MessageAction(content=action_str, wait_for_response=True)
|
||||
@ -1,5 +1,4 @@
|
||||
import re
|
||||
|
||||
from agenthub.codeact_agent.action_parser import CodeActResponseParser
|
||||
from agenthub.codeact_agent.prompt import (
|
||||
COMMAND_DOCS,
|
||||
EXAMPLES,
|
||||
@ -11,7 +10,6 @@ from opendevin.controller.agent import Agent
|
||||
from opendevin.controller.state.state import State
|
||||
from opendevin.events.action import (
|
||||
Action,
|
||||
AgentDelegateAction,
|
||||
AgentFinishAction,
|
||||
BrowseInteractiveAction,
|
||||
CmdRunAction,
|
||||
@ -35,14 +33,6 @@ from opendevin.runtime.tools import RuntimeTool
|
||||
ENABLE_GITHUB = True
|
||||
|
||||
|
||||
def parse_response(response) -> str:
|
||||
action = response.choices[0].message.content
|
||||
for lang in ['bash', 'ipython', 'browse']:
|
||||
if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
|
||||
action += f'</execute_{lang}>'
|
||||
return action
|
||||
|
||||
|
||||
def action_to_str(action: Action) -> str:
|
||||
if isinstance(action, CmdRunAction):
|
||||
return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
|
||||
@ -169,11 +159,12 @@ class CodeActAgent(Agent):
|
||||
JupyterRequirement(),
|
||||
]
|
||||
runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
|
||||
jupyter_kernel_init_code: str = 'from agentskills import *'
|
||||
|
||||
system_message: str = get_system_message()
|
||||
in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
|
||||
|
||||
action_parser = CodeActResponseParser()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLM,
|
||||
@ -239,48 +230,10 @@ class CodeActAgent(Agent):
|
||||
],
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
action_str: str = parse_response(response)
|
||||
state.num_of_chars += sum(
|
||||
len(message['content']) for message in messages
|
||||
) + len(action_str)
|
||||
|
||||
if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
|
||||
thought = action_str.replace(finish_command.group(0), '').strip()
|
||||
return AgentFinishAction(thought=thought)
|
||||
if bash_command := re.search(
|
||||
r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
|
||||
):
|
||||
# remove the command from the action string to get thought
|
||||
thought = action_str.replace(bash_command.group(0), '').strip()
|
||||
# a command was found
|
||||
command_group = bash_command.group(1).strip()
|
||||
|
||||
if command_group.strip() == 'exit':
|
||||
return AgentFinishAction()
|
||||
return CmdRunAction(command=command_group, thought=thought)
|
||||
elif python_code := re.search(
|
||||
r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
|
||||
):
|
||||
# a code block was found
|
||||
code_group = python_code.group(1).strip()
|
||||
thought = action_str.replace(python_code.group(0), '').strip()
|
||||
return IPythonRunCellAction(
|
||||
code=code_group,
|
||||
thought=thought,
|
||||
kernel_init_code=self.jupyter_kernel_init_code,
|
||||
)
|
||||
elif browse_command := re.search(
|
||||
r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
|
||||
):
|
||||
thought = action_str.replace(browse_command.group(0), '').strip()
|
||||
browse_actions = browse_command.group(1).strip()
|
||||
task = f'{thought}. I should start with: {browse_actions}'
|
||||
return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
|
||||
else:
|
||||
# We assume the LLM is GOOD enough that when it returns pure natural language
|
||||
# it want to talk to the user
|
||||
return MessageAction(content=action_str, wait_for_response=True)
|
||||
) + len(response.choices[0].message.content)
|
||||
return self.action_parser.parse(response)
|
||||
|
||||
def search_memory(self, query: str) -> list[str]:
|
||||
raise NotImplementedError('Implement this abstract method')
|
||||
|
||||
76
opendevin/controller/action_parser.py
Normal file
76
opendevin/controller/action_parser.py
Normal file
@ -0,0 +1,76 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from opendevin.events.action import Action
|
||||
|
||||
|
||||
class ResponseParser(ABC):
|
||||
"""
|
||||
This abstract base class is a general interface for an response parser dedicated to
|
||||
parsing the action from the response from the LLM.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
):
|
||||
# Need pay attention to the item order in self.action_parsers
|
||||
self.action_parsers = []
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, response: str) -> Action:
|
||||
"""
|
||||
Parses the action from the response from the LLM.
|
||||
|
||||
Parameters:
|
||||
- response (str): The response from the LLM.
|
||||
|
||||
Returns:
|
||||
- action (Action): The action parsed from the response.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parse_response(self, response) -> str:
|
||||
"""
|
||||
Parses the action from the response from the LLM.
|
||||
|
||||
Parameters:
|
||||
- response (str): The response from the LLM.
|
||||
|
||||
Returns:
|
||||
- action_str (str): The action str parsed from the response.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parse_action(self, action_str: str) -> Action:
|
||||
"""
|
||||
Parses the action from the response from the LLM.
|
||||
|
||||
Parameters:
|
||||
- action_str (str): The response from the LLM.
|
||||
|
||||
Returns:
|
||||
- action (Action): The action parsed from the response.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ActionParser(ABC):
|
||||
"""
|
||||
This abstract base class is an general interface for an action parser dedicated to
|
||||
parsing the action from the action str from the LLM.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def check_condition(self, action_str: str) -> bool:
|
||||
"""
|
||||
Check if the action string can be parsed by this parser.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, action_str: str) -> Action:
|
||||
"""
|
||||
Parses the action from the action string from the LLM response.
|
||||
"""
|
||||
pass
|
||||
Loading…
x
Reference in New Issue
Block a user