mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com> Co-authored-by: Engel Nyst <engel.nyst@gmail.com>
549 lines
20 KiB
Python
549 lines
20 KiB
Python
from unittest.mock import MagicMock
|
|
|
|
from openhands.events.action import (
|
|
Action,
|
|
ChangeAgentStateAction,
|
|
CmdRunAction,
|
|
MessageAction,
|
|
NullAction,
|
|
)
|
|
from openhands.events.event import Event, EventSource
|
|
from openhands.events.event_filter import (
|
|
EventFilter, # Needed for ANY matcher type check
|
|
)
|
|
from openhands.events.observation import (
|
|
AgentStateChangedObservation,
|
|
CmdOutputObservation,
|
|
NullObservation,
|
|
)
|
|
from openhands.events.stream import EventStream
|
|
from openhands.server.routes.manage_conversations import _get_contextual_events
|
|
|
|
|
|
# Helper to create event instances for testing, inspired by test_agent_history.py
|
|
def create_test_events(event_specs: list[dict]) -> list[Event]:
|
|
events = []
|
|
for spec in event_specs:
|
|
event_type = spec['type']
|
|
# Attributes for the constructor
|
|
kwargs = {
|
|
k: v
|
|
for k, v in spec.items()
|
|
if k not in ['type', 'id', 'source', 'hidden', 'cause']
|
|
}
|
|
|
|
# Provide default values for required fields if not in spec, to ensure instantiation
|
|
if event_type == MessageAction and 'content' not in kwargs:
|
|
kwargs['content'] = f'default_content_for_{spec["id"]}'
|
|
elif event_type == CmdRunAction and 'command' not in kwargs:
|
|
kwargs['command'] = f'default_command_for_{spec["id"]}'
|
|
elif event_type == CmdOutputObservation:
|
|
if 'content' not in kwargs:
|
|
kwargs['content'] = f'default_obs_content_for_{spec["id"]}'
|
|
if 'command_id' not in kwargs:
|
|
kwargs['command_id'] = spec.get(
|
|
'cause', spec['id'] - 1 if spec['id'] > 0 else 0
|
|
) # Simplistic default
|
|
if 'command' not in kwargs:
|
|
kwargs['command'] = f'default_cmd_for_obs_{spec["id"]}'
|
|
elif event_type == NullAction:
|
|
assert 'content' not in kwargs
|
|
elif event_type == NullObservation:
|
|
kwargs['content'] = ''
|
|
elif event_type == ChangeAgentStateAction:
|
|
if 'agent_state' not in kwargs:
|
|
kwargs['agent_state'] = 'running'
|
|
if 'thought' not in kwargs:
|
|
kwargs['thought'] = ''
|
|
# 'content' for ChangeAgentStateAction is auto-generated by its message property
|
|
elif event_type == AgentStateChangedObservation:
|
|
if 'agent_state' not in kwargs:
|
|
kwargs['agent_state'] = 'running'
|
|
# 'content' for AgentStateChangedObservation is auto-generated by its message property
|
|
|
|
event = event_type(**kwargs)
|
|
|
|
# Set internal attributes after instantiation
|
|
event._id = spec['id']
|
|
# Default source based on type, can be overridden by spec
|
|
default_source = (
|
|
EventSource.AGENT if issubclass(event_type, Action) else EventSource.USER
|
|
)
|
|
event._source = spec.get('source', default_source)
|
|
event._hidden = spec.get('hidden', False)
|
|
if 'cause' in spec:
|
|
event._cause = spec['cause']
|
|
|
|
events.append(event)
|
|
return events
|
|
|
|
|
|
def test_get_contextual_events_basic_retrieval():
|
|
"""
|
|
Tests basic retrieval of events, ensuring correct count, order, and string formatting.
|
|
All events in this test are of types that are NOT filtered out by default.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = 5
|
|
context_size = 4 # Hardcoded in _get_contextual_events
|
|
|
|
# Define all events that *could* be in the stream for this test
|
|
all_event_specs = [
|
|
{'id': 1, 'type': MessageAction, 'content': 'message_1'},
|
|
{'id': 2, 'type': CmdRunAction, 'command': 'command_2'},
|
|
{
|
|
'id': 3,
|
|
'type': CmdOutputObservation,
|
|
'content': 'observation_3',
|
|
'command_id': 2,
|
|
'command': 'command_2',
|
|
},
|
|
{'id': 4, 'type': MessageAction, 'content': 'message_4'},
|
|
{'id': 5, 'type': CmdRunAction, 'command': 'command_5_target'}, # Target Event
|
|
{
|
|
'id': 6,
|
|
'type': CmdOutputObservation,
|
|
'content': 'observation_6',
|
|
'command_id': 5,
|
|
'command': 'command_5_target',
|
|
},
|
|
{'id': 7, 'type': MessageAction, 'content': 'message_7'},
|
|
{'id': 8, 'type': CmdRunAction, 'command': 'command_8'},
|
|
{
|
|
'id': 9,
|
|
'type': CmdOutputObservation,
|
|
'content': 'observation_9',
|
|
'command_id': 8,
|
|
'command': 'command_8',
|
|
},
|
|
{'id': 10, 'type': MessageAction, 'content': 'message_10'},
|
|
{
|
|
'id': 11,
|
|
'type': CmdRunAction,
|
|
'command': 'command_11',
|
|
}, # Extra event, should not be included in 'after' due to limit
|
|
]
|
|
all_events_objects = create_test_events(all_event_specs)
|
|
|
|
# Map IDs to objects for easy lookup
|
|
events_by_id = {e.id: e for e in all_events_objects}
|
|
|
|
# Define what search_events should return for the "before" call
|
|
# (event_id=5, limit=4, reverse=True) -> expects [5, 4, 3, 2]
|
|
events_to_return_before = [
|
|
events_by_id[5],
|
|
events_by_id[4],
|
|
events_by_id[3],
|
|
events_by_id[2],
|
|
]
|
|
|
|
# Define what search_events should return for the "after" call
|
|
# (start_id=6, limit=5) -> expects [6, 7, 8, 9, 10]
|
|
events_to_return_after = [
|
|
events_by_id[6],
|
|
events_by_id[7],
|
|
events_by_id[8],
|
|
events_by_id[9],
|
|
events_by_id[10],
|
|
]
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
events_to_return_before,
|
|
events_to_return_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
# Expected final list of events after processing (chronological order):
|
|
# [event_obj_2, event_obj_3, event_obj_4, event_obj_5, (from before, reversed)
|
|
# event_obj_6, event_obj_7, event_obj_8, event_obj_9, event_obj_10 (from after)]
|
|
expected_final_event_objects = [
|
|
events_by_id[2],
|
|
events_by_id[3],
|
|
events_by_id[4],
|
|
events_by_id[5],
|
|
events_by_id[6],
|
|
events_by_id[7],
|
|
events_by_id[8],
|
|
events_by_id[9],
|
|
events_by_id[10],
|
|
]
|
|
|
|
# The output string is joined by newlines, using event.__str__
|
|
expected_output_str = '\n'.join(str(e) for e in expected_final_event_objects)
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
# Check calls to search_events
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2
|
|
|
|
# Call 1: Before events
|
|
args_before, kwargs_before = calls[0]
|
|
assert kwargs_before['start_id'] == target_event_id
|
|
assert isinstance(kwargs_before['filter'], EventFilter)
|
|
assert kwargs_before['reverse'] is True
|
|
assert kwargs_before['limit'] == context_size
|
|
|
|
# Call 2: After events
|
|
args_after, kwargs_after = calls[1]
|
|
assert kwargs_after['start_id'] == target_event_id + 1
|
|
assert isinstance(kwargs_after['filter'], EventFilter)
|
|
assert (
|
|
'reverse' not in kwargs_after or kwargs_after['reverse'] is False
|
|
) # default is False
|
|
assert kwargs_after['limit'] == context_size + 1
|
|
|
|
|
|
def test_get_contextual_events_filtering():
|
|
"""
|
|
Tests that specified event types and hidden events are filtered out.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = 3 # Target a non-filtered event
|
|
|
|
all_event_specs = [
|
|
# Before target_event_id = 3. Context size 4. Search limit 4.
|
|
# search_events(start_id=3, reverse=True, limit=4)
|
|
{'id': 0, 'type': NullAction}, # Filtered
|
|
{'id': 1, 'type': MessageAction, 'content': 'message_1_VISIBLE'}, # Visible
|
|
{
|
|
'id': 2,
|
|
'type': ChangeAgentStateAction,
|
|
'agent_state': 'thinking',
|
|
'thought': 'abc_FILTERED',
|
|
}, # Filtered
|
|
{
|
|
'id': 3,
|
|
'type': CmdRunAction,
|
|
'command': 'command_3_TARGET_VISIBLE',
|
|
}, # Target, Visible
|
|
# After target_event_id = 3. Context size 4 + 1 = 5. Search limit 5.
|
|
# search_events(start_id=4, limit=5)
|
|
{
|
|
'id': 4,
|
|
'type': CmdOutputObservation,
|
|
'content': 'obs_4_HIDDEN_FILTERED',
|
|
'command_id': 3,
|
|
'hidden': True,
|
|
}, # Filtered (hidden)
|
|
{
|
|
'id': 5,
|
|
'type': AgentStateChangedObservation,
|
|
'agent_state': 'running',
|
|
'content': 'state_change_5_FILTERED',
|
|
}, # Filtered
|
|
{'id': 6, 'type': MessageAction, 'content': 'message_6_VISIBLE'}, # Visible
|
|
{
|
|
'id': 7,
|
|
'type': NullObservation,
|
|
'content': 'null_obs_7_FILTERED',
|
|
}, # Filtered
|
|
{'id': 8, 'type': CmdRunAction, 'command': 'command_8_VISIBLE'}, # Visible
|
|
{
|
|
'id': 9,
|
|
'type': MessageAction,
|
|
'content': 'message_9_VISIBLE',
|
|
}, # Visible (within limit of 5 for 'after' search)
|
|
{
|
|
'id': 10,
|
|
'type': MessageAction,
|
|
'content': 'message_10_EXTRA',
|
|
}, # Extra, should not be fetched by 'after' search
|
|
]
|
|
all_events_objects = create_test_events(all_event_specs)
|
|
events_by_id = {e.id: e for e in all_events_objects}
|
|
|
|
# Expected events to be returned by search_events AFTER internal filtering by EventFilter
|
|
# For "before" call (start_id=3, reverse=True, limit=4):
|
|
# Raw available before/incl target: [cmd3, state2_filt, msg1, null0_filt]
|
|
# After EventFilter: [cmd3, msg1] -> search_events should return these
|
|
[events_by_id[3], events_by_id[1]]
|
|
|
|
# For "after" call (start_id=4, limit=5):
|
|
# Raw available after target: [hidden_obs4_filt, agent_state5_filt, msg6, null_obs7_filt, cmd8, msg9, msg10_extra]
|
|
# After EventFilter: [msg6, cmd8, msg9, msg10_extra] -> search_events should return first 5 of these if available
|
|
# Limit is 5, so it should return [msg6, cmd8, msg9] (msg10_extra is out of original context_size+1 scope)
|
|
# Correcting this: the mock search_events should simulate what EventStream.search_events does.
|
|
# EventStream.search_events applies the filter internally.
|
|
# So, the lists passed to side_effect should be the *already filtered* lists.
|
|
|
|
# Simulating EventStream.search_events behavior:
|
|
# It iterates, applies filter, then takes limit.
|
|
|
|
# Before: start_id=3, reverse=True, limit=4. Candidates: [3,2,1,0]. Filtered: [3,1]. Result: [3,1]
|
|
simulated_search_before = [events_by_id[3], events_by_id[1]]
|
|
|
|
# After: start_id=4, limit=5. Candidates: [4,5,6,7,8,9,10]. Filtered: [6,8,9]. Result: [6,8,9]
|
|
simulated_search_after = [events_by_id[6], events_by_id[8], events_by_id[9]]
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
simulated_search_before,
|
|
simulated_search_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
expected_final_event_objects = [
|
|
events_by_id[1], # from before, reversed
|
|
events_by_id[3], # from before, reversed (target)
|
|
events_by_id[6], # from after
|
|
events_by_id[8], # from after
|
|
events_by_id[9], # from after
|
|
]
|
|
expected_output_str = '\n'.join(str(e) for e in expected_final_event_objects)
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
# Verify the EventFilter used in search_events
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2
|
|
|
|
expected_filtered_types = (
|
|
NullAction,
|
|
NullObservation,
|
|
ChangeAgentStateAction,
|
|
AgentStateChangedObservation,
|
|
)
|
|
|
|
# Check filter for "before" call
|
|
filter_before = calls[0][1]['filter'] # kwargs['filter']
|
|
assert isinstance(filter_before, EventFilter)
|
|
assert filter_before.exclude_hidden is True
|
|
assert set(filter_before.exclude_types) == set(expected_filtered_types)
|
|
|
|
# Check filter for "after" call
|
|
filter_after = calls[1][1]['filter'] # kwargs['filter']
|
|
assert isinstance(filter_after, EventFilter)
|
|
assert filter_after.exclude_hidden is True
|
|
assert set(filter_after.exclude_types) == set(expected_filtered_types)
|
|
|
|
|
|
def test_get_contextual_events_target_at_beginning():
|
|
"""
|
|
Tests behavior when the target event_id is at the beginning of the stream,
|
|
resulting in fewer than context_size events before it.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = 1 # Target is the second event (IDs are 0-indexed in list, 1-indexed for events)
|
|
context_size = 4
|
|
|
|
all_event_specs = [
|
|
{'id': 0, 'type': MessageAction, 'content': 'message_0_first'},
|
|
{'id': 1, 'type': CmdRunAction, 'command': 'command_1_TARGET'}, # Target
|
|
{'id': 2, 'type': CmdOutputObservation, 'content': 'obs_2', 'command_id': 1},
|
|
{'id': 3, 'type': MessageAction, 'content': 'message_3'},
|
|
{'id': 4, 'type': CmdRunAction, 'command': 'command_4'},
|
|
{'id': 5, 'type': CmdOutputObservation, 'content': 'obs_5', 'command_id': 4},
|
|
{
|
|
'id': 6,
|
|
'type': MessageAction,
|
|
'content': 'message_6',
|
|
}, # Should be fetched by 'after'
|
|
]
|
|
all_events_objects = create_test_events(all_event_specs)
|
|
events_by_id = {e.id: e for e in all_events_objects}
|
|
|
|
# Before: start_id=1, reverse=True, limit=4. Candidates: [1,0]. Filtered: [1,0]. Result: [1,0]
|
|
simulated_search_before = [events_by_id[1], events_by_id[0]]
|
|
|
|
# After: start_id=2, limit=5. Candidates: [2,3,4,5,6]. Filtered: [2,3,4,5,6]. Result: [2,3,4,5,6]
|
|
simulated_search_after = [
|
|
events_by_id[2],
|
|
events_by_id[3],
|
|
events_by_id[4],
|
|
events_by_id[5],
|
|
events_by_id[6],
|
|
]
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
simulated_search_before,
|
|
simulated_search_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
# Expected final: [event_obj_0, event_obj_1] (from before, reversed)
|
|
# + [event_obj_2, event_obj_3, event_obj_4, event_obj_5, event_obj_6] (from after)
|
|
expected_final_event_objects = [
|
|
events_by_id[0],
|
|
events_by_id[1],
|
|
events_by_id[2],
|
|
events_by_id[3],
|
|
events_by_id[4],
|
|
events_by_id[5],
|
|
events_by_id[6],
|
|
]
|
|
expected_output_str = '\n'.join(str(e) for e in expected_final_event_objects)
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2
|
|
# Call 1: Before events
|
|
kwargs_before = calls[0][1]
|
|
assert kwargs_before['start_id'] == target_event_id
|
|
assert kwargs_before['limit'] == context_size
|
|
# Call 2: After events
|
|
kwargs_after = calls[1][1]
|
|
assert kwargs_after['start_id'] == target_event_id + 1
|
|
assert kwargs_after['limit'] == context_size + 1
|
|
|
|
|
|
def test_get_contextual_events_target_at_end():
|
|
"""
|
|
Tests behavior when the target event_id is at the end of the stream,
|
|
resulting in fewer than context_size + 1 events after it.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = 5 # Target is near the end
|
|
context_size = 4
|
|
|
|
all_event_specs = [
|
|
{'id': 0, 'type': MessageAction, 'content': 'message_0'},
|
|
{'id': 1, 'type': CmdRunAction, 'command': 'command_1'},
|
|
{
|
|
'id': 2,
|
|
'type': CmdOutputObservation,
|
|
'content': 'obs_2',
|
|
'command_id': 1,
|
|
}, # Fetched by 'before'
|
|
{'id': 3, 'type': MessageAction, 'content': 'message_3'}, # Fetched by 'before'
|
|
{'id': 4, 'type': CmdRunAction, 'command': 'command_4'}, # Fetched by 'before'
|
|
{
|
|
'id': 5,
|
|
'type': CmdOutputObservation,
|
|
'content': 'obs_5_TARGET',
|
|
'command_id': 4,
|
|
}, # Target, Fetched by 'before'
|
|
{
|
|
'id': 6,
|
|
'type': MessageAction,
|
|
'content': 'message_6_last',
|
|
}, # Last event, fetched by 'after'
|
|
]
|
|
all_events_objects = create_test_events(all_event_specs)
|
|
events_by_id = {e.id: e for e in all_events_objects}
|
|
|
|
# Before: start_id=5, reverse=True, limit=4.
|
|
# Candidates (reverse chronological from stream): [5,4,3,2,1,0]
|
|
# search_events (after its internal filtering, assuming all visible) should return: [events_by_id[5], events_by_id[4], events_by_id[3], events_by_id[2]]
|
|
simulated_search_before = [
|
|
events_by_id[5],
|
|
events_by_id[4],
|
|
events_by_id[3],
|
|
events_by_id[2],
|
|
]
|
|
|
|
# After: start_id=6, limit=context_size + 1 = 5.
|
|
# Candidates from stream: [events_by_id[6]]
|
|
# search_events (after its internal filtering) should return: [events_by_id[6]]
|
|
simulated_search_after = [events_by_id[6]]
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
simulated_search_before,
|
|
simulated_search_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
# Expected final:
|
|
# From 'before' (reversed): [event_obj_2, event_obj_3, event_obj_4, event_obj_5]
|
|
# From 'after': [event_obj_6]
|
|
expected_final_event_objects = [
|
|
events_by_id[2],
|
|
events_by_id[3],
|
|
events_by_id[4],
|
|
events_by_id[5],
|
|
events_by_id[6],
|
|
]
|
|
expected_output_str = '\n'.join(str(e) for e in expected_final_event_objects)
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2
|
|
kwargs_before = calls[0][1]
|
|
assert kwargs_before['start_id'] == target_event_id
|
|
assert kwargs_before['limit'] == context_size # context_size for before
|
|
kwargs_after = calls[1][1]
|
|
assert kwargs_after['start_id'] == target_event_id + 1
|
|
assert kwargs_after['limit'] == context_size + 1 # context_size + 1 for after
|
|
|
|
|
|
def test_get_contextual_events_empty_search_results():
|
|
"""
|
|
Tests behavior when search_events returns empty lists for before and after.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = 10
|
|
context_size = 4
|
|
|
|
# search_events will return empty lists
|
|
simulated_search_before = []
|
|
simulated_search_after = []
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
simulated_search_before,
|
|
simulated_search_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
expected_output_str = '' # Empty string as no events are found
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2
|
|
kwargs_before = calls[0][1]
|
|
assert kwargs_before['start_id'] == target_event_id
|
|
assert kwargs_before['limit'] == context_size
|
|
kwargs_after = calls[1][1]
|
|
assert kwargs_after['start_id'] == target_event_id + 1
|
|
assert kwargs_after['limit'] == context_size + 1
|
|
|
|
|
|
def test_get_contextual_events_all_events_filtered():
|
|
"""
|
|
Tests behavior when all events in the context window are of types
|
|
that should be filtered out.
|
|
"""
|
|
mock_event_stream = MagicMock(spec=EventStream)
|
|
target_event_id = (
|
|
2 # Target event itself might be filtered or not, doesn't matter for this test
|
|
)
|
|
|
|
# All events are of types that should be filtered by the default filter in _get_contextual_events
|
|
# create_test_events(all_event_specs) # Not strictly needed as search_events will return []
|
|
|
|
# search_events, after applying the internal EventFilter, will return empty lists
|
|
simulated_search_before = []
|
|
simulated_search_after = []
|
|
|
|
mock_event_stream.search_events.side_effect = [
|
|
simulated_search_before,
|
|
simulated_search_after,
|
|
]
|
|
|
|
result_str = _get_contextual_events(mock_event_stream, target_event_id)
|
|
|
|
expected_output_str = '' # Empty string as all events are filtered
|
|
|
|
assert result_str == expected_output_str
|
|
|
|
calls = mock_event_stream.search_events.call_args_list
|
|
assert len(calls) == 2 # Still called twice
|
|
|
|
# Check the filter properties on one of the calls (they should be identical)
|
|
filter_used = calls[0][1]['filter']
|
|
expected_filtered_types = (
|
|
NullAction,
|
|
NullObservation,
|
|
ChangeAgentStateAction,
|
|
AgentStateChangedObservation,
|
|
)
|
|
assert isinstance(filter_used, EventFilter)
|
|
assert filter_used.exclude_hidden is True
|
|
assert set(filter_used.exclude_types) == set(expected_filtered_types)
|