Add condensation observation to history and detect its loop (#7132)

This commit is contained in:
Engel Nyst 2025-03-06 18:52:45 +01:00 committed by GitHub
parent 08735a9b72
commit 22064d5555
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 235 additions and 4 deletions

View File

@ -78,7 +78,6 @@ class AgentController:
NullObservation,
ChangeAgentStateAction,
AgentStateChangedObservation,
AgentCondensationObservation,
)
def __init__(

View File

@ -9,6 +9,7 @@ from openhands.events.observation import (
CmdOutputObservation,
IPythonRunCellObservation,
)
from openhands.events.observation.agent import AgentCondensationObservation
from openhands.events.observation.empty import NullObservation
from openhands.events.observation.error import ErrorObservation
from openhands.events.observation.observation import Observation
@ -97,9 +98,12 @@ class StuckDetector:
return True
# scenario 4: action, observation pattern on the last six steps
if len(filtered_history) < 6:
return False
if self._is_stuck_action_observation_pattern(filtered_history):
if len(filtered_history) >= 6:
if self._is_stuck_action_observation_pattern(filtered_history):
return True
# scenario 5: context window error loop
if self._is_stuck_context_window_error(filtered_history):
return True
return False
@ -308,6 +312,54 @@ class StuckDetector:
return True
return False
def _is_stuck_context_window_error(self, filtered_history):
"""Detects if we're stuck in a loop of context window errors.
This happens when we repeatedly get context window errors and try to trim,
but the trimming doesn't work, causing us to get more context window errors.
The pattern is repeated AgentCondensationObservation events without any other
events between them.
Args:
filtered_history: List of filtered events to check
Returns:
bool: True if we detect a context window error loop
"""
# Look for AgentCondensationObservation events
condensation_events = [
(i, event)
for i, event in enumerate(filtered_history)
if isinstance(event, AgentCondensationObservation)
]
# Need at least 3 condensation events to detect a loop
if len(condensation_events) < 3:
return False
# Get the last 3 condensation events
last_condensation_events = condensation_events[-3:]
# Check if there are any non-condensation events between them
for i in range(len(last_condensation_events) - 1):
start_idx = last_condensation_events[i][0]
end_idx = last_condensation_events[i + 1][0]
# Look for any non-condensation events between these two
has_other_events = False
for event in filtered_history[start_idx + 1 : end_idx]:
if not isinstance(event, AgentCondensationObservation):
has_other_events = True
break
if not has_other_events:
logger.warning(
'Context window error loop detected - repeated condensation events'
)
return True
return False
def _eq_no_pid(self, obj1, obj2):
if isinstance(obj1, IPythonRunCellAction) and isinstance(
obj2, IPythonRunCellAction

View File

@ -13,6 +13,7 @@ from openhands.events.observation import (
CmdOutputObservation,
FileReadObservation,
)
from openhands.events.observation.agent import AgentCondensationObservation
from openhands.events.observation.commands import IPythonRunCellObservation
from openhands.events.observation.empty import NullObservation
from openhands.events.observation.error import ErrorObservation
@ -602,6 +603,185 @@ class TestStuckDetector:
with patch('logging.Logger.warning'):
assert not stuck_detector.is_stuck(headless_mode=True)
def test_is_stuck_context_window_error_loop(self, stuck_detector):
"""Test that we detect when we're stuck in a loop of context window errors."""
state = stuck_detector.state
# Add some initial events
message_action = MessageAction(content='Hello', wait_for_response=False)
message_action._source = EventSource.USER
state.history.append(message_action)
message_observation = NullObservation(content='')
state.history.append(message_observation)
# Add three consecutive condensation events (should detect as stuck)
for _ in range(3):
condensation = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation)
with patch('logging.Logger.warning') as mock_warning:
assert stuck_detector.is_stuck(headless_mode=True) is True
mock_warning.assert_called_once_with(
'Context window error loop detected - repeated condensation events'
)
def test_is_not_stuck_context_window_error_with_other_events(self, stuck_detector):
"""Test that we don't detect a loop when there are other events between condensation events."""
state = stuck_detector.state
# Add some initial events
message_action = MessageAction(content='Hello', wait_for_response=False)
message_action._source = EventSource.USER
state.history.append(message_action)
message_observation = NullObservation(content='')
state.history.append(message_observation)
# Add condensation events with other events between them
condensation1 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation1)
# Add some other events between condensation events
cmd_action = CmdRunAction(command='ls')
state.history.append(cmd_action)
cmd_observation = CmdOutputObservation(
command='ls', content='file1.txt\nfile2.txt'
)
state.history.append(cmd_observation)
condensation2 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation2)
# Add more other events
read_action = FileReadAction(path='file1.txt')
state.history.append(read_action)
read_observation = FileReadObservation(content='File content', path='file1.txt')
state.history.append(read_observation)
condensation3 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation3)
with patch('logging.Logger.warning') as mock_warning:
assert stuck_detector.is_stuck(headless_mode=True) is False
mock_warning.assert_not_called()
def test_is_not_stuck_context_window_error_less_than_three(self, stuck_detector):
"""Test that we don't detect a loop with less than three condensation events."""
state = stuck_detector.state
# Add some initial events
message_action = MessageAction(content='Hello', wait_for_response=False)
message_action._source = EventSource.USER
state.history.append(message_action)
message_observation = NullObservation(content='')
state.history.append(message_observation)
# Add only two condensation events (should not detect as stuck)
for _ in range(2):
condensation = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation)
with patch('logging.Logger.warning') as mock_warning:
assert stuck_detector.is_stuck(headless_mode=True) is False
mock_warning.assert_not_called()
def test_is_stuck_context_window_error_with_user_messages(self, stuck_detector):
"""Test that we still detect a loop even with user messages between condensation events.
User messages are filtered out in the stuck detection logic, so they shouldn't
prevent us from detecting a loop of condensation events.
"""
state = stuck_detector.state
# Add some initial events
message_action = MessageAction(content='Hello', wait_for_response=False)
message_action._source = EventSource.USER
state.history.append(message_action)
message_observation = NullObservation(content='')
state.history.append(message_observation)
# Add condensation events with user messages between them
condensation1 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation1)
# Add user message between condensation events
user_message = MessageAction(content='Please continue', wait_for_response=False)
user_message._source = EventSource.USER
state.history.append(user_message)
user_observation = NullObservation(content='')
state.history.append(user_observation)
condensation2 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation2)
# Add another user message
user_message2 = MessageAction(content='Keep going', wait_for_response=False)
user_message2._source = EventSource.USER
state.history.append(user_message2)
user_observation2 = NullObservation(content='')
state.history.append(user_observation2)
condensation3 = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation3)
with patch('logging.Logger.warning') as mock_warning:
assert stuck_detector.is_stuck(headless_mode=True) is True
mock_warning.assert_called_once_with(
'Context window error loop detected - repeated condensation events'
)
def test_is_not_stuck_context_window_error_in_non_headless(self, stuck_detector):
"""Test that in non-headless mode, we don't detect a loop if the condensation events
are before the last user message.
In non-headless mode, we only look at events after the last user message.
"""
state = stuck_detector.state
# Add condensation events first
for _ in range(3):
condensation = AgentCondensationObservation(
content='Trimming prompt to meet context window limitations'
)
state.history.append(condensation)
# Add a user message at the end
user_message = MessageAction(content='Please continue', wait_for_response=False)
user_message._source = EventSource.USER
state.history.append(user_message)
user_observation = NullObservation(content='')
state.history.append(user_observation)
with patch('logging.Logger.warning') as mock_warning:
# In headless mode, we should detect the loop
assert stuck_detector.is_stuck(headless_mode=True) is True
mock_warning.assert_called_once_with(
'Context window error loop detected - repeated condensation events'
)
# Reset mock for next assertion
mock_warning.reset_mock()
# In non-headless mode, we should NOT detect the loop since we only look
# at events after the last user message
assert stuck_detector.is_stuck(headless_mode=False) is False
mock_warning.assert_not_called()
class TestAgentController:
@pytest.fixture