From e65e0a98f0402f6b6079ebf94da70dcf1513f610 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Thu, 17 Jul 2025 19:34:46 +0200 Subject: [PATCH] Remove/reduce unused content in a CmdOutputObservation (#7404) Co-authored-by: openhands --- openhands/events/observation/commands.py | 41 ++++++++++++++- openhands/events/observation/observation.py | 7 +++ openhands/memory/conversation_memory.py | 5 +- tests/unit/test_observation_serialization.py | 54 ++++++++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) diff --git a/openhands/events/observation/commands.py b/openhands/events/observation/commands.py index 5a413662d1..849c415c19 100644 --- a/openhands/events/observation/commands.py +++ b/openhands/events/observation/commands.py @@ -17,6 +17,11 @@ CMD_OUTPUT_METADATA_PS1_REGEX = re.compile( re.DOTALL | re.MULTILINE, ) +# Default max size for command output content +# to prevent too large observations from being saved in the stream +# This matches the default max_message_chars in LLMConfig +MAX_CMD_OUTPUT_SIZE: int = 30000 + class CmdOutputMetadata(BaseModel): """Additional metadata captured from PS1""" @@ -109,7 +114,11 @@ class CmdOutputObservation(Observation): hidden: bool = False, **kwargs: Any, ) -> None: - super().__init__(content) + # Truncate content before passing it to parent + truncated_content = self._maybe_truncate(content) + + super().__init__(truncated_content) + self.command = command self.observation = observation self.hidden = hidden @@ -124,6 +133,36 @@ class CmdOutputObservation(Observation): if 'command_id' in kwargs: self.metadata.pid = kwargs['command_id'] + @staticmethod + def _maybe_truncate(content: str, max_size: int = MAX_CMD_OUTPUT_SIZE) -> str: + """Truncate the content if it's too large. + + This helps avoid storing unnecessarily large content in the event stream. + + Args: + content: The content to truncate + max_size: Maximum size before truncation. Defaults to MAX_CMD_OUTPUT_SIZE. + + Returns: + Original content if not too large, or truncated content otherwise + """ + + if len(content) <= max_size: + return content + + # Truncate the middle and include a message about it + half = max_size // 2 + original_length = len(content) + truncated = ( + content[:half] + + '\n[... Observation truncated due to length ...]\n' + + content[-half:] + ) + logger.debug( + f'Truncated large command output: {original_length} -> {len(truncated)} chars' + ) + return truncated + @property def command_id(self) -> int: return self.metadata.pid diff --git a/openhands/events/observation/observation.py b/openhands/events/observation/observation.py index 46f8a0e712..f0407d0f3f 100644 --- a/openhands/events/observation/observation.py +++ b/openhands/events/observation/observation.py @@ -5,4 +5,11 @@ from openhands.events.event import Event @dataclass class Observation(Event): + """Base class for observations from the environment. + + Attributes: + content: The content of the observation. For large observations, + this might be truncated when stored. + """ + content: str diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py index 5933429f22..a6ced05ae7 100644 --- a/openhands/memory/conversation_memory.py +++ b/openhands/memory/conversation_memory.py @@ -369,8 +369,11 @@ class ConversationMemory: message: Message if isinstance(obs, CmdOutputObservation): - # if it doesn't have tool call metadata, it was triggered by a user action + # Note: CmdOutputObservation content is already truncated at initialization, + # and the observation content should not have been modified after initialization + # we keep this truncation for backwards compatibility for a time if obs.tool_call_metadata is None: + # if it doesn't have tool call metadata, it was triggered by a user action text = truncate_content( f'\nObserved result of command executed by user:\n{obs.to_agent_observation()}', max_message_chars, diff --git a/tests/unit/test_observation_serialization.py b/tests/unit/test_observation_serialization.py index 138151ef3d..f7cdb3ca8c 100644 --- a/tests/unit/test_observation_serialization.py +++ b/tests/unit/test_observation_serialization.py @@ -9,6 +9,7 @@ from openhands.events.observation import ( RecallObservation, ) from openhands.events.observation.agent import MicroagentKnowledge +from openhands.events.observation.commands import MAX_CMD_OUTPUT_SIZE from openhands.events.serialization import ( event_from_dict, event_to_dict, @@ -113,6 +114,59 @@ def test_success_field_serialization(): assert serialized['success'] is False +def test_cmd_output_truncation(): + """Test that large command outputs are truncated during initialization.""" + # Create a large content string that exceeds MAX_CMD_OUTPUT_SIZE (50000 characters) + large_content = 'a' * 60000 # 60k characters + + # Create a CmdOutputObservation with the large content + obs = CmdOutputObservation( + content=large_content, + command='ls -R', + metadata=CmdOutputMetadata( + exit_code=0, + ), + ) + + # Verify the content was truncated + assert len(obs.content) < 60000 + + # The truncated content might be slightly larger than MAX_CMD_OUTPUT_SIZE + # due to the added truncation message + truncation_msg = '[... Observation truncated due to length ...]' + assert truncation_msg in obs.content + + # The truncation algorithm might add a few extra characters due to the truncation message + # We'll allow a small margin (1% of MAX_CMD_OUTPUT_SIZE) for the total content length + margin = int(MAX_CMD_OUTPUT_SIZE * 0.01) # 1% margin + assert len(obs.content) <= MAX_CMD_OUTPUT_SIZE + margin + + # Verify the beginning and end of the content are preserved + half_size = MAX_CMD_OUTPUT_SIZE // 2 + assert obs.content.startswith('a' * half_size) + assert obs.content.endswith('a' * half_size) + + +def test_cmd_output_no_truncation(): + """Test that small command outputs are not truncated.""" + # Create a content string that doesn't exceed MAX_CMD_OUTPUT_SIZE (50000 characters) + # We use a much smaller value for testing efficiency + small_content = 'a' * 1000 # 1k characters + + # Create a CmdOutputObservation with the small content + obs = CmdOutputObservation( + content=small_content, + command='ls', + metadata=CmdOutputMetadata( + exit_code=0, + ), + ) + + # Verify the content was not truncated + assert len(obs.content) == 1000 + assert obs.content == small_content + + def test_legacy_serialization(): original_observation_dict = { 'id': 42,