Refactor system message handling to use event stream (#7824)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Calvin Smith <email@cjsmith.io>
This commit is contained in:
Xingyao Wang
2025-04-17 10:30:19 -04:00
committed by GitHub
parent caf34d83bd
commit 93e9db3206
19 changed files with 446 additions and 321 deletions

View File

@@ -20,6 +20,7 @@ from openhands.events.action import (
MessageAction,
)
from openhands.events.action.mcp import McpAction
from openhands.events.action.message import SystemMessageAction
from openhands.events.event import Event, RecallType
from openhands.events.observation import (
AgentCondensationObservation,
@@ -53,7 +54,6 @@ class ConversationMemory:
def process_events(
self,
condensed_history: list[Event],
initial_messages: list[Message],
max_message_chars: int | None = None,
vision_is_active: bool = False,
) -> list[Message]:
@@ -63,7 +63,6 @@ class ConversationMemory:
Args:
condensed_history: The condensed history of events to convert
initial_messages: The initial messages to include in the conversation
max_message_chars: The maximum number of characters in the content of an event included
in the prompt to the LLM. Larger observations are truncated.
vision_is_active: Whether vision is active in the LLM. If True, image URLs will be included.
@@ -74,8 +73,8 @@ class ConversationMemory:
# log visual browsing status
logger.debug(f'Visual browsing: {self.agent_config.enable_som_visual_browsing}')
# Process special events first (system prompts, etc.)
messages = initial_messages
# Initialize empty messages list
messages = []
# Process regular events
pending_tool_call_action_messages: dict[str, Message] = {}
@@ -132,20 +131,6 @@ class ConversationMemory:
messages = list(ConversationMemory._filter_unmatched_tool_calls(messages))
return messages
def process_initial_messages(self, with_caching: bool = False) -> list[Message]:
"""Create the initial messages for the conversation."""
return [
Message(
role='system',
content=[
TextContent(
text=self.prompt_manager.get_system_message(),
cache_prompt=with_caching,
)
],
)
]
def _process_action(
self,
action: Action,
@@ -275,6 +260,16 @@ class ConversationMemory:
content=content,
)
]
elif isinstance(action, SystemMessageAction):
# Convert SystemMessageAction to a system message
return [
Message(
role='system',
content=[TextContent(text=action.content)],
# Include tools if function calling is enabled
tool_calls=None,
)
]
return []
def _process_observation(
@@ -546,6 +541,8 @@ class ConversationMemory:
For new Anthropic API, we only need to mark the last user or tool message as cacheable.
"""
if len(messages) > 0 and messages[0].role == 'system':
messages[0].content[-1].cache_prompt = True
# NOTE: this is only needed for anthropic
for message in reversed(messages):
if message.role in ('user', 'tool'):