diff --git a/openhands/controller/agent_controller.py b/openhands/controller/agent_controller.py index fdcc829ec0..37eb022e0d 100644 --- a/openhands/controller/agent_controller.py +++ b/openhands/controller/agent_controller.py @@ -78,7 +78,6 @@ class AgentController: NullObservation, ChangeAgentStateAction, AgentStateChangedObservation, - AgentCondensationObservation, ) def __init__( diff --git a/openhands/controller/stuck.py b/openhands/controller/stuck.py index 1bf7e410eb..99aa09bc75 100644 --- a/openhands/controller/stuck.py +++ b/openhands/controller/stuck.py @@ -9,6 +9,7 @@ from openhands.events.observation import ( CmdOutputObservation, IPythonRunCellObservation, ) +from openhands.events.observation.agent import AgentCondensationObservation from openhands.events.observation.empty import NullObservation from openhands.events.observation.error import ErrorObservation from openhands.events.observation.observation import Observation @@ -97,9 +98,12 @@ class StuckDetector: return True # scenario 4: action, observation pattern on the last six steps - if len(filtered_history) < 6: - return False - if self._is_stuck_action_observation_pattern(filtered_history): + if len(filtered_history) >= 6: + if self._is_stuck_action_observation_pattern(filtered_history): + return True + + # scenario 5: context window error loop + if self._is_stuck_context_window_error(filtered_history): return True return False @@ -308,6 +312,54 @@ class StuckDetector: return True return False + def _is_stuck_context_window_error(self, filtered_history): + """Detects if we're stuck in a loop of context window errors. + + This happens when we repeatedly get context window errors and try to trim, + but the trimming doesn't work, causing us to get more context window errors. + The pattern is repeated AgentCondensationObservation events without any other + events between them. + + Args: + filtered_history: List of filtered events to check + + Returns: + bool: True if we detect a context window error loop + """ + # Look for AgentCondensationObservation events + condensation_events = [ + (i, event) + for i, event in enumerate(filtered_history) + if isinstance(event, AgentCondensationObservation) + ] + + # Need at least 3 condensation events to detect a loop + if len(condensation_events) < 3: + return False + + # Get the last 3 condensation events + last_condensation_events = condensation_events[-3:] + + # Check if there are any non-condensation events between them + for i in range(len(last_condensation_events) - 1): + start_idx = last_condensation_events[i][0] + end_idx = last_condensation_events[i + 1][0] + + # Look for any non-condensation events between these two + has_other_events = False + for event in filtered_history[start_idx + 1 : end_idx]: + if not isinstance(event, AgentCondensationObservation): + has_other_events = True + break + + if not has_other_events: + logger.warning( + 'Context window error loop detected - repeated condensation events' + ) + return True + + return False + def _eq_no_pid(self, obj1, obj2): if isinstance(obj1, IPythonRunCellAction) and isinstance( obj2, IPythonRunCellAction diff --git a/tests/unit/test_is_stuck.py b/tests/unit/test_is_stuck.py index fae6b10381..f2ac3f0773 100644 --- a/tests/unit/test_is_stuck.py +++ b/tests/unit/test_is_stuck.py @@ -13,6 +13,7 @@ from openhands.events.observation import ( CmdOutputObservation, FileReadObservation, ) +from openhands.events.observation.agent import AgentCondensationObservation from openhands.events.observation.commands import IPythonRunCellObservation from openhands.events.observation.empty import NullObservation from openhands.events.observation.error import ErrorObservation @@ -602,6 +603,185 @@ class TestStuckDetector: with patch('logging.Logger.warning'): assert not stuck_detector.is_stuck(headless_mode=True) + def test_is_stuck_context_window_error_loop(self, stuck_detector): + """Test that we detect when we're stuck in a loop of context window errors.""" + state = stuck_detector.state + + # Add some initial events + message_action = MessageAction(content='Hello', wait_for_response=False) + message_action._source = EventSource.USER + state.history.append(message_action) + message_observation = NullObservation(content='') + state.history.append(message_observation) + + # Add three consecutive condensation events (should detect as stuck) + for _ in range(3): + condensation = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation) + + with patch('logging.Logger.warning') as mock_warning: + assert stuck_detector.is_stuck(headless_mode=True) is True + mock_warning.assert_called_once_with( + 'Context window error loop detected - repeated condensation events' + ) + + def test_is_not_stuck_context_window_error_with_other_events(self, stuck_detector): + """Test that we don't detect a loop when there are other events between condensation events.""" + state = stuck_detector.state + + # Add some initial events + message_action = MessageAction(content='Hello', wait_for_response=False) + message_action._source = EventSource.USER + state.history.append(message_action) + message_observation = NullObservation(content='') + state.history.append(message_observation) + + # Add condensation events with other events between them + condensation1 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation1) + + # Add some other events between condensation events + cmd_action = CmdRunAction(command='ls') + state.history.append(cmd_action) + cmd_observation = CmdOutputObservation( + command='ls', content='file1.txt\nfile2.txt' + ) + state.history.append(cmd_observation) + + condensation2 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation2) + + # Add more other events + read_action = FileReadAction(path='file1.txt') + state.history.append(read_action) + read_observation = FileReadObservation(content='File content', path='file1.txt') + state.history.append(read_observation) + + condensation3 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation3) + + with patch('logging.Logger.warning') as mock_warning: + assert stuck_detector.is_stuck(headless_mode=True) is False + mock_warning.assert_not_called() + + def test_is_not_stuck_context_window_error_less_than_three(self, stuck_detector): + """Test that we don't detect a loop with less than three condensation events.""" + state = stuck_detector.state + + # Add some initial events + message_action = MessageAction(content='Hello', wait_for_response=False) + message_action._source = EventSource.USER + state.history.append(message_action) + message_observation = NullObservation(content='') + state.history.append(message_observation) + + # Add only two condensation events (should not detect as stuck) + for _ in range(2): + condensation = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation) + + with patch('logging.Logger.warning') as mock_warning: + assert stuck_detector.is_stuck(headless_mode=True) is False + mock_warning.assert_not_called() + + def test_is_stuck_context_window_error_with_user_messages(self, stuck_detector): + """Test that we still detect a loop even with user messages between condensation events. + + User messages are filtered out in the stuck detection logic, so they shouldn't + prevent us from detecting a loop of condensation events. + """ + state = stuck_detector.state + + # Add some initial events + message_action = MessageAction(content='Hello', wait_for_response=False) + message_action._source = EventSource.USER + state.history.append(message_action) + message_observation = NullObservation(content='') + state.history.append(message_observation) + + # Add condensation events with user messages between them + condensation1 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation1) + + # Add user message between condensation events + user_message = MessageAction(content='Please continue', wait_for_response=False) + user_message._source = EventSource.USER + state.history.append(user_message) + user_observation = NullObservation(content='') + state.history.append(user_observation) + + condensation2 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation2) + + # Add another user message + user_message2 = MessageAction(content='Keep going', wait_for_response=False) + user_message2._source = EventSource.USER + state.history.append(user_message2) + user_observation2 = NullObservation(content='') + state.history.append(user_observation2) + + condensation3 = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation3) + + with patch('logging.Logger.warning') as mock_warning: + assert stuck_detector.is_stuck(headless_mode=True) is True + mock_warning.assert_called_once_with( + 'Context window error loop detected - repeated condensation events' + ) + + def test_is_not_stuck_context_window_error_in_non_headless(self, stuck_detector): + """Test that in non-headless mode, we don't detect a loop if the condensation events + are before the last user message. + + In non-headless mode, we only look at events after the last user message. + """ + state = stuck_detector.state + + # Add condensation events first + for _ in range(3): + condensation = AgentCondensationObservation( + content='Trimming prompt to meet context window limitations' + ) + state.history.append(condensation) + + # Add a user message at the end + user_message = MessageAction(content='Please continue', wait_for_response=False) + user_message._source = EventSource.USER + state.history.append(user_message) + user_observation = NullObservation(content='') + state.history.append(user_observation) + + with patch('logging.Logger.warning') as mock_warning: + # In headless mode, we should detect the loop + assert stuck_detector.is_stuck(headless_mode=True) is True + mock_warning.assert_called_once_with( + 'Context window error loop detected - repeated condensation events' + ) + + # Reset mock for next assertion + mock_warning.reset_mock() + + # In non-headless mode, we should NOT detect the loop since we only look + # at events after the last user message + assert stuck_detector.is_stuck(headless_mode=False) is False + mock_warning.assert_not_called() + class TestAgentController: @pytest.fixture