Remove/reduce unused content in a CmdOutputObservation (#7404)

Co-authored-by: openhands <openhands@all-hands.dev>
2025-12-26 05:48:36 +08:00 · 2025-07-17 19:34:46 +02:00 · 2025-07-17 19:34:46 +02:00 · e65e0a98f0
commit e65e0a98f0
parent eecc00fa4a
4 changed files with 105 additions and 2 deletions
--- a/openhands/events/observation/commands.py
+++ b/openhands/events/observation/commands.py
@ -17,6 +17,11 @@ CMD_OUTPUT_METADATA_PS1_REGEX = re.compile(
    re.DOTALL | re.MULTILINE,
 )

+# Default max size for command output content
+# to prevent too large observations from being saved in the stream
+# This matches the default max_message_chars in LLMConfig
+MAX_CMD_OUTPUT_SIZE: int = 30000
+

 class CmdOutputMetadata(BaseModel):
    """Additional metadata captured from PS1"""
@ -109,7 +114,11 @@ class CmdOutputObservation(Observation):
        hidden: bool = False,
        **kwargs: Any,
    ) -> None:
-        super().__init__(content)
+        # Truncate content before passing it to parent
+        truncated_content = self._maybe_truncate(content)
+
+        super().__init__(truncated_content)
+
        self.command = command
        self.observation = observation
        self.hidden = hidden
@ -124,6 +133,36 @@ class CmdOutputObservation(Observation):
        if 'command_id' in kwargs:
            self.metadata.pid = kwargs['command_id']

+    @staticmethod
+    def _maybe_truncate(content: str, max_size: int = MAX_CMD_OUTPUT_SIZE) -> str:
+        """Truncate the content if it's too large.
+
+        This helps avoid storing unnecessarily large content in the event stream.
+
+        Args:
+            content: The content to truncate
+            max_size: Maximum size before truncation. Defaults to MAX_CMD_OUTPUT_SIZE.
+
+        Returns:
+            Original content if not too large, or truncated content otherwise
+        """
+
+        if len(content) <= max_size:
+            return content
+
+        # Truncate the middle and include a message about it
+        half = max_size // 2
+        original_length = len(content)
+        truncated = (
+            content[:half]
+            + '\n[... Observation truncated due to length ...]\n'
+            + content[-half:]
+        )
+        logger.debug(
+            f'Truncated large command output: {original_length} -> {len(truncated)} chars'
+        )
+        return truncated
+
    @property
    def command_id(self) -> int:
        return self.metadata.pid
--- a/openhands/events/observation/observation.py
+++ b/openhands/events/observation/observation.py
@ -5,4 +5,11 @@ from openhands.events.event import Event

@dataclass
 class Observation(Event):
+    """Base class for observations from the environment.
+
+    Attributes:
+        content: The content of the observation. For large observations,
+                this might be truncated when stored.
+    """
+
    content: str
--- a/openhands/memory/conversation_memory.py
+++ b/openhands/memory/conversation_memory.py
@ -369,8 +369,11 @@ class ConversationMemory:
        message: Message

        if isinstance(obs, CmdOutputObservation):
-            # if it doesn't have tool call metadata, it was triggered by a user action
+            # Note: CmdOutputObservation content is already truncated at initialization,
+            # and the observation content should not have been modified after initialization
+            # we keep this truncation for backwards compatibility for a time
            if obs.tool_call_metadata is None:
+                # if it doesn't have tool call metadata, it was triggered by a user action
                text = truncate_content(
                    f'\nObserved result of command executed by user:\n{obs.to_agent_observation()}',
                    max_message_chars,
--- a/tests/unit/test_observation_serialization.py
+++ b/tests/unit/test_observation_serialization.py
@ -9,6 +9,7 @@ from openhands.events.observation import (
    RecallObservation,
 )
 from openhands.events.observation.agent import MicroagentKnowledge
+from openhands.events.observation.commands import MAX_CMD_OUTPUT_SIZE
 from openhands.events.serialization import (
    event_from_dict,
    event_to_dict,
@ -113,6 +114,59 @@ def test_success_field_serialization():
    assert serialized['success'] is False


+def test_cmd_output_truncation():
+    """Test that large command outputs are truncated during initialization."""
+    # Create a large content string that exceeds MAX_CMD_OUTPUT_SIZE (50000 characters)
+    large_content = 'a' * 60000  # 60k characters
+
+    # Create a CmdOutputObservation with the large content
+    obs = CmdOutputObservation(
+        content=large_content,
+        command='ls -R',
+        metadata=CmdOutputMetadata(
+            exit_code=0,
+        ),
+    )
+
+    # Verify the content was truncated
+    assert len(obs.content) < 60000
+
+    # The truncated content might be slightly larger than MAX_CMD_OUTPUT_SIZE
+    # due to the added truncation message
+    truncation_msg = '[... Observation truncated due to length ...]'
+    assert truncation_msg in obs.content
+
+    # The truncation algorithm might add a few extra characters due to the truncation message
+    # We'll allow a small margin (1% of MAX_CMD_OUTPUT_SIZE) for the total content length
+    margin = int(MAX_CMD_OUTPUT_SIZE * 0.01)  # 1% margin
+    assert len(obs.content) <= MAX_CMD_OUTPUT_SIZE + margin
+
+    # Verify the beginning and end of the content are preserved
+    half_size = MAX_CMD_OUTPUT_SIZE // 2
+    assert obs.content.startswith('a' * half_size)
+    assert obs.content.endswith('a' * half_size)
+
+
+def test_cmd_output_no_truncation():
+    """Test that small command outputs are not truncated."""
+    # Create a content string that doesn't exceed MAX_CMD_OUTPUT_SIZE (50000 characters)
+    # We use a much smaller value for testing efficiency
+    small_content = 'a' * 1000  # 1k characters
+
+    # Create a CmdOutputObservation with the small content
+    obs = CmdOutputObservation(
+        content=small_content,
+        command='ls',
+        metadata=CmdOutputMetadata(
+            exit_code=0,
+        ),
+    )
+
+    # Verify the content was not truncated
+    assert len(obs.content) == 1000
+    assert obs.content == small_content
+
+
 def test_legacy_serialization():
    original_observation_dict = {
        'id': 42,