Small refactor to improve (CodeAct)Agent extensibility (#8244)

This commit is contained in:
Chase 2025-05-04 10:21:54 -07:00 committed by GitHub
parent 2c085ae79e
commit fc32efb52e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 58 additions and 32 deletions

View File

@ -1,8 +1,12 @@
import copy
import os
from collections import deque
from typing import TYPE_CHECKING
from litellm import ChatCompletionToolParam
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
from openhands.events.action import Action
from openhands.llm.llm import ModelResponse
import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
from openhands.agenthub.codeact_agent.tools.bash import create_cmd_run_tool
@ -20,7 +24,7 @@ from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import Message
from openhands.events.action import Action, AgentFinishAction, MessageAction
from openhands.events.action import AgentFinishAction, MessageAction
from openhands.events.event import Event
from openhands.llm.llm import LLM
from openhands.memory.condenser import Condenser
@ -75,23 +79,26 @@ class CodeActAgent(Agent):
- config (AgentConfig): The configuration for this agent
"""
super().__init__(llm, config)
self.pending_actions: deque[Action] = deque()
self.pending_actions: deque['Action'] = deque()
self.reset()
self.tools = self._get_tools()
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
# Create a ConversationMemory instance
self.conversation_memory = ConversationMemory(self.config, self.prompt_manager)
self.condenser = Condenser.from_config(self.config.condenser)
logger.debug(f'Using condenser: {type(self.condenser)}')
self.response_to_actions_fn = codeact_function_calling.response_to_actions
@property
def prompt_manager(self) -> PromptManager:
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
def _get_tools(self) -> list[ChatCompletionToolParam]:
return self._prompt_manager
def _get_tools(self) -> list['ChatCompletionToolParam']:
# For these models, we use short tool descriptions ( < 1024 tokens)
# to avoid hitting the OpenAI token limit for tool descriptions.
SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-', 'o3', 'o1', 'o4']
@ -130,7 +137,7 @@ class CodeActAgent(Agent):
super().reset()
self.pending_actions.clear()
def step(self, state: State) -> Action:
def step(self, state: State) -> 'Action':
"""Performs one step using the CodeAct Agent.
This includes gathering info on previous steps and prompting the model to make a command to execute.
@ -198,9 +205,7 @@ class CodeActAgent(Agent):
params['extra_body'] = {'metadata': state.to_llm_metadata(agent_name=self.name)}
response = self.llm.completion(**params)
logger.debug(f'Response from LLM: {response}')
actions = self.response_to_actions_fn(
response, mcp_tool_names=list(self.mcp_tools.keys())
)
actions = self.response_to_actions(response)
logger.debug(f'Actions after response_to_actions: {actions}')
for action in actions:
self.pending_actions.append(action)
@ -274,3 +279,8 @@ class CodeActAgent(Agent):
self.conversation_memory.apply_prompt_caching(messages)
return messages
def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
return codeact_function_calling.response_to_actions(
response, mcp_tool_names=list(self.mcp_tools.keys())
)

View File

@ -4,6 +4,13 @@ ReadOnlyAgent - A specialized version of CodeActAgent that only uses read-only t
import os
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from litellm import ChatCompletionToolParam
from openhands.events.action import Action
from openhands.llm.llm import ModelResponse
from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.agenthub.readonly_agent import (
function_calling as readonly_function_calling,
@ -41,24 +48,27 @@ class ReadOnlyAgent(CodeActAgent):
- llm (LLM): The llm to be used by this agent
- config (AgentConfig): The configuration for this agent
"""
# Initialize the CodeActAgent class but we'll override some of its behavior
# Initialize the CodeActAgent class; some of it is overridden with class methods
super().__init__(llm, config)
# Override the tools to only include read-only tools
# Get the read-only tools from our own function_calling module
self.tools = readonly_function_calling.get_tools()
# Set up our own prompt manager
self.prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
self.response_to_actions_fn = readonly_function_calling.response_to_actions
logger.debug(
f"TOOLS loaded for ReadOnlyAgent: {', '.join([tool.get('function').get('name') for tool in self.tools])}"
)
@property
def prompt_manager(self) -> PromptManager:
# Set up our own prompt manager
if self._prompt_manager is None:
self._prompt_manager = PromptManager(
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
)
return self._prompt_manager
def _get_tools(self) -> list['ChatCompletionToolParam']:
# Override the tools to only include read-only tools
# Get the read-only tools from our own function_calling module
return readonly_function_calling.get_tools()
def set_mcp_tools(self, mcp_tools: list[dict]) -> None:
"""Sets the list of MCP tools for the agent.
@ -68,3 +78,8 @@ class ReadOnlyAgent(CodeActAgent):
logger.warning(
'ReadOnlyAgent does not support MCP tools. MCP tools will be ignored by the agent.'
)
def response_to_actions(self, response: 'ModelResponse') -> list['Action']:
return readonly_function_calling.response_to_actions(
response, mcp_tool_names=list(self.mcp_tools.keys())
)

View File

@ -8,6 +8,7 @@ if TYPE_CHECKING:
from openhands.core.config import AgentConfig
from openhands.events.action import Action
from openhands.events.action.message import SystemMessageAction
from openhands.utils.prompt import PromptManager
from litellm import ChatCompletionToolParam
from openhands.core.exceptions import (
@ -19,9 +20,6 @@ from openhands.events.event import EventSource
from openhands.llm.llm import LLM
from openhands.runtime.plugins import PluginRequirement
if TYPE_CHECKING:
from openhands.utils.prompt import PromptManager
class Agent(ABC):
DEPRECATED = False
@ -43,10 +41,16 @@ class Agent(ABC):
self.llm = llm
self.config = config
self._complete = False
self.prompt_manager: 'PromptManager' | None = None
self._prompt_manager: 'PromptManager' | None = None
self.mcp_tools: dict[str, ChatCompletionToolParam] = {}
self.tools: list = []
@property
def prompt_manager(self) -> 'PromptManager':
if self._prompt_manager is None:
raise ValueError(f'Prompt manager not initialized for agent {self.name}')
return self._prompt_manager
def get_system_message(self) -> 'SystemMessageAction | None':
"""
Returns a SystemMessageAction containing the system message and tools.

View File

@ -437,9 +437,6 @@ def test_enhance_messages_adds_newlines_between_consecutive_user_messages(
agent: CodeActAgent,
):
"""Test that _enhance_messages adds newlines between consecutive user messages."""
# Set up the prompt manager
agent.prompt_manager = Mock()
# Create consecutive user messages with various content types
messages = [
# First user message with TextContent only