Remove/reduce unused content in a CmdOutputObservation (#7404)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Engel Nyst 2025-07-17 19:34:46 +02:00 committed by GitHub
parent eecc00fa4a
commit e65e0a98f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 105 additions and 2 deletions

View File

@ -17,6 +17,11 @@ CMD_OUTPUT_METADATA_PS1_REGEX = re.compile(
re.DOTALL | re.MULTILINE,
)
# Default max size for command output content
# to prevent too large observations from being saved in the stream
# This matches the default max_message_chars in LLMConfig
MAX_CMD_OUTPUT_SIZE: int = 30000
class CmdOutputMetadata(BaseModel):
"""Additional metadata captured from PS1"""
@ -109,7 +114,11 @@ class CmdOutputObservation(Observation):
hidden: bool = False,
**kwargs: Any,
) -> None:
super().__init__(content)
# Truncate content before passing it to parent
truncated_content = self._maybe_truncate(content)
super().__init__(truncated_content)
self.command = command
self.observation = observation
self.hidden = hidden
@ -124,6 +133,36 @@ class CmdOutputObservation(Observation):
if 'command_id' in kwargs:
self.metadata.pid = kwargs['command_id']
@staticmethod
def _maybe_truncate(content: str, max_size: int = MAX_CMD_OUTPUT_SIZE) -> str:
"""Truncate the content if it's too large.
This helps avoid storing unnecessarily large content in the event stream.
Args:
content: The content to truncate
max_size: Maximum size before truncation. Defaults to MAX_CMD_OUTPUT_SIZE.
Returns:
Original content if not too large, or truncated content otherwise
"""
if len(content) <= max_size:
return content
# Truncate the middle and include a message about it
half = max_size // 2
original_length = len(content)
truncated = (
content[:half]
+ '\n[... Observation truncated due to length ...]\n'
+ content[-half:]
)
logger.debug(
f'Truncated large command output: {original_length} -> {len(truncated)} chars'
)
return truncated
@property
def command_id(self) -> int:
return self.metadata.pid

View File

@ -5,4 +5,11 @@ from openhands.events.event import Event
@dataclass
class Observation(Event):
"""Base class for observations from the environment.
Attributes:
content: The content of the observation. For large observations,
this might be truncated when stored.
"""
content: str

View File

@ -369,8 +369,11 @@ class ConversationMemory:
message: Message
if isinstance(obs, CmdOutputObservation):
# if it doesn't have tool call metadata, it was triggered by a user action
# Note: CmdOutputObservation content is already truncated at initialization,
# and the observation content should not have been modified after initialization
# we keep this truncation for backwards compatibility for a time
if obs.tool_call_metadata is None:
# if it doesn't have tool call metadata, it was triggered by a user action
text = truncate_content(
f'\nObserved result of command executed by user:\n{obs.to_agent_observation()}',
max_message_chars,

View File

@ -9,6 +9,7 @@ from openhands.events.observation import (
RecallObservation,
)
from openhands.events.observation.agent import MicroagentKnowledge
from openhands.events.observation.commands import MAX_CMD_OUTPUT_SIZE
from openhands.events.serialization import (
event_from_dict,
event_to_dict,
@ -113,6 +114,59 @@ def test_success_field_serialization():
assert serialized['success'] is False
def test_cmd_output_truncation():
"""Test that large command outputs are truncated during initialization."""
# Create a large content string that exceeds MAX_CMD_OUTPUT_SIZE (50000 characters)
large_content = 'a' * 60000 # 60k characters
# Create a CmdOutputObservation with the large content
obs = CmdOutputObservation(
content=large_content,
command='ls -R',
metadata=CmdOutputMetadata(
exit_code=0,
),
)
# Verify the content was truncated
assert len(obs.content) < 60000
# The truncated content might be slightly larger than MAX_CMD_OUTPUT_SIZE
# due to the added truncation message
truncation_msg = '[... Observation truncated due to length ...]'
assert truncation_msg in obs.content
# The truncation algorithm might add a few extra characters due to the truncation message
# We'll allow a small margin (1% of MAX_CMD_OUTPUT_SIZE) for the total content length
margin = int(MAX_CMD_OUTPUT_SIZE * 0.01) # 1% margin
assert len(obs.content) <= MAX_CMD_OUTPUT_SIZE + margin
# Verify the beginning and end of the content are preserved
half_size = MAX_CMD_OUTPUT_SIZE // 2
assert obs.content.startswith('a' * half_size)
assert obs.content.endswith('a' * half_size)
def test_cmd_output_no_truncation():
"""Test that small command outputs are not truncated."""
# Create a content string that doesn't exceed MAX_CMD_OUTPUT_SIZE (50000 characters)
# We use a much smaller value for testing efficiency
small_content = 'a' * 1000 # 1k characters
# Create a CmdOutputObservation with the small content
obs = CmdOutputObservation(
content=small_content,
command='ls',
metadata=CmdOutputMetadata(
exit_code=0,
),
)
# Verify the content was not truncated
assert len(obs.content) == 1000
assert obs.content == small_content
def test_legacy_serialization():
original_observation_dict = {
'id': 42,