Add condensation observation to history and detect its loop (#7132)

2025-12-26 05:48:36 +08:00 · 2025-03-06 18:52:45 +01:00 · 2025-03-06 18:52:45 +01:00 · 22064d5555
commit 22064d5555
parent 08735a9b72
3 changed files with 235 additions and 4 deletions
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@ -78,7 +78,6 @@ class AgentController:
        NullObservation,
        ChangeAgentStateAction,
        AgentStateChangedObservation,
-        AgentCondensationObservation,
    )

    def __init__(
--- a/openhands/controller/stuck.py
+++ b/openhands/controller/stuck.py
@ -9,6 +9,7 @@ from openhands.events.observation import (
    CmdOutputObservation,
    IPythonRunCellObservation,
 )
+from openhands.events.observation.agent import AgentCondensationObservation
 from openhands.events.observation.empty import NullObservation
 from openhands.events.observation.error import ErrorObservation
 from openhands.events.observation.observation import Observation
@ -97,9 +98,12 @@ class StuckDetector:
            return True

        # scenario 4: action, observation pattern on the last six steps
-        if len(filtered_history) < 6:
-            return False
-        if self._is_stuck_action_observation_pattern(filtered_history):
+        if len(filtered_history) >= 6:
+            if self._is_stuck_action_observation_pattern(filtered_history):
+                return True
+
+        # scenario 5: context window error loop
+        if self._is_stuck_context_window_error(filtered_history):
            return True

        return False
@ -308,6 +312,54 @@ class StuckDetector:
                return True
        return False

+    def _is_stuck_context_window_error(self, filtered_history):
+        """Detects if we're stuck in a loop of context window errors.
+
+        This happens when we repeatedly get context window errors and try to trim,
+        but the trimming doesn't work, causing us to get more context window errors.
+        The pattern is repeated AgentCondensationObservation events without any other
+        events between them.
+
+        Args:
+            filtered_history: List of filtered events to check
+
+        Returns:
+            bool: True if we detect a context window error loop
+        """
+        # Look for AgentCondensationObservation events
+        condensation_events = [
+            (i, event)
+            for i, event in enumerate(filtered_history)
+            if isinstance(event, AgentCondensationObservation)
+        ]
+
+        # Need at least 3 condensation events to detect a loop
+        if len(condensation_events) < 3:
+            return False
+
+        # Get the last 3 condensation events
+        last_condensation_events = condensation_events[-3:]
+
+        # Check if there are any non-condensation events between them
+        for i in range(len(last_condensation_events) - 1):
+            start_idx = last_condensation_events[i][0]
+            end_idx = last_condensation_events[i + 1][0]
+
+            # Look for any non-condensation events between these two
+            has_other_events = False
+            for event in filtered_history[start_idx + 1 : end_idx]:
+                if not isinstance(event, AgentCondensationObservation):
+                    has_other_events = True
+                    break
+
+            if not has_other_events:
+                logger.warning(
+                    'Context window error loop detected - repeated condensation events'
+                )
+                return True
+
+        return False
+
    def _eq_no_pid(self, obj1, obj2):
        if isinstance(obj1, IPythonRunCellAction) and isinstance(
            obj2, IPythonRunCellAction
--- a/tests/unit/test_is_stuck.py
+++ b/tests/unit/test_is_stuck.py
@ -13,6 +13,7 @@ from openhands.events.observation import (
    CmdOutputObservation,
    FileReadObservation,
 )
+from openhands.events.observation.agent import AgentCondensationObservation
 from openhands.events.observation.commands import IPythonRunCellObservation
 from openhands.events.observation.empty import NullObservation
 from openhands.events.observation.error import ErrorObservation
@ -602,6 +603,185 @@ class TestStuckDetector:
        with patch('logging.Logger.warning'):
            assert not stuck_detector.is_stuck(headless_mode=True)

+    def test_is_stuck_context_window_error_loop(self, stuck_detector):
+        """Test that we detect when we're stuck in a loop of context window errors."""
+        state = stuck_detector.state
+
+        # Add some initial events
+        message_action = MessageAction(content='Hello', wait_for_response=False)
+        message_action._source = EventSource.USER
+        state.history.append(message_action)
+        message_observation = NullObservation(content='')
+        state.history.append(message_observation)
+
+        # Add three consecutive condensation events (should detect as stuck)
+        for _ in range(3):
+            condensation = AgentCondensationObservation(
+                content='Trimming prompt to meet context window limitations'
+            )
+            state.history.append(condensation)
+
+        with patch('logging.Logger.warning') as mock_warning:
+            assert stuck_detector.is_stuck(headless_mode=True) is True
+            mock_warning.assert_called_once_with(
+                'Context window error loop detected - repeated condensation events'
+            )
+
+    def test_is_not_stuck_context_window_error_with_other_events(self, stuck_detector):
+        """Test that we don't detect a loop when there are other events between condensation events."""
+        state = stuck_detector.state
+
+        # Add some initial events
+        message_action = MessageAction(content='Hello', wait_for_response=False)
+        message_action._source = EventSource.USER
+        state.history.append(message_action)
+        message_observation = NullObservation(content='')
+        state.history.append(message_observation)
+
+        # Add condensation events with other events between them
+        condensation1 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation1)
+
+        # Add some other events between condensation events
+        cmd_action = CmdRunAction(command='ls')
+        state.history.append(cmd_action)
+        cmd_observation = CmdOutputObservation(
+            command='ls', content='file1.txt\nfile2.txt'
+        )
+        state.history.append(cmd_observation)
+
+        condensation2 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation2)
+
+        # Add more other events
+        read_action = FileReadAction(path='file1.txt')
+        state.history.append(read_action)
+        read_observation = FileReadObservation(content='File content', path='file1.txt')
+        state.history.append(read_observation)
+
+        condensation3 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation3)
+
+        with patch('logging.Logger.warning') as mock_warning:
+            assert stuck_detector.is_stuck(headless_mode=True) is False
+            mock_warning.assert_not_called()
+
+    def test_is_not_stuck_context_window_error_less_than_three(self, stuck_detector):
+        """Test that we don't detect a loop with less than three condensation events."""
+        state = stuck_detector.state
+
+        # Add some initial events
+        message_action = MessageAction(content='Hello', wait_for_response=False)
+        message_action._source = EventSource.USER
+        state.history.append(message_action)
+        message_observation = NullObservation(content='')
+        state.history.append(message_observation)
+
+        # Add only two condensation events (should not detect as stuck)
+        for _ in range(2):
+            condensation = AgentCondensationObservation(
+                content='Trimming prompt to meet context window limitations'
+            )
+            state.history.append(condensation)
+
+        with patch('logging.Logger.warning') as mock_warning:
+            assert stuck_detector.is_stuck(headless_mode=True) is False
+            mock_warning.assert_not_called()
+
+    def test_is_stuck_context_window_error_with_user_messages(self, stuck_detector):
+        """Test that we still detect a loop even with user messages between condensation events.
+
+        User messages are filtered out in the stuck detection logic, so they shouldn't
+        prevent us from detecting a loop of condensation events.
+        """
+        state = stuck_detector.state
+
+        # Add some initial events
+        message_action = MessageAction(content='Hello', wait_for_response=False)
+        message_action._source = EventSource.USER
+        state.history.append(message_action)
+        message_observation = NullObservation(content='')
+        state.history.append(message_observation)
+
+        # Add condensation events with user messages between them
+        condensation1 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation1)
+
+        # Add user message between condensation events
+        user_message = MessageAction(content='Please continue', wait_for_response=False)
+        user_message._source = EventSource.USER
+        state.history.append(user_message)
+        user_observation = NullObservation(content='')
+        state.history.append(user_observation)
+
+        condensation2 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation2)
+
+        # Add another user message
+        user_message2 = MessageAction(content='Keep going', wait_for_response=False)
+        user_message2._source = EventSource.USER
+        state.history.append(user_message2)
+        user_observation2 = NullObservation(content='')
+        state.history.append(user_observation2)
+
+        condensation3 = AgentCondensationObservation(
+            content='Trimming prompt to meet context window limitations'
+        )
+        state.history.append(condensation3)
+
+        with patch('logging.Logger.warning') as mock_warning:
+            assert stuck_detector.is_stuck(headless_mode=True) is True
+            mock_warning.assert_called_once_with(
+                'Context window error loop detected - repeated condensation events'
+            )
+
+    def test_is_not_stuck_context_window_error_in_non_headless(self, stuck_detector):
+        """Test that in non-headless mode, we don't detect a loop if the condensation events
+        are before the last user message.
+
+        In non-headless mode, we only look at events after the last user message.
+        """
+        state = stuck_detector.state
+
+        # Add condensation events first
+        for _ in range(3):
+            condensation = AgentCondensationObservation(
+                content='Trimming prompt to meet context window limitations'
+            )
+            state.history.append(condensation)
+
+        # Add a user message at the end
+        user_message = MessageAction(content='Please continue', wait_for_response=False)
+        user_message._source = EventSource.USER
+        state.history.append(user_message)
+        user_observation = NullObservation(content='')
+        state.history.append(user_observation)
+
+        with patch('logging.Logger.warning') as mock_warning:
+            # In headless mode, we should detect the loop
+            assert stuck_detector.is_stuck(headless_mode=True) is True
+            mock_warning.assert_called_once_with(
+                'Context window error loop detected - repeated condensation events'
+            )
+
+            # Reset mock for next assertion
+            mock_warning.reset_mock()
+
+            # In non-headless mode, we should NOT detect the loop since we only look
+            # at events after the last user message
+            assert stuck_detector.is_stuck(headless_mode=False) is False
+            mock_warning.assert_not_called()
+

 class TestAgentController:
    @pytest.fixture