Fix pagination bug in event_service_base.search_events causing duplicate events in exports (#13364)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Tim O'Farrell
2026-03-12 12:24:06 -06:00
committed by GitHub
parent 127e611706
commit e2805dea75
2 changed files with 111 additions and 3 deletions

View File

@@ -106,14 +106,15 @@ class EventServiceBase(EventService, ABC):
reverse=(sort_order == EventSortOrder.TIMESTAMP_DESC),
)
# Apply pagination to items (not paths)
start_offset = 0
next_page_id = None
if page_id:
start_offset = int(page_id)
paths = paths[start_offset:]
if len(paths) > limit:
paths = paths[:limit]
items = items[start_offset:]
if len(items) > limit:
next_page_id = str(start_offset + limit)
items = items[:limit]
return EventPage(items=items, next_page_id=next_page_id)

View File

@@ -161,6 +161,113 @@ class TestFilesystemEventServiceSearchEvents:
assert hasattr(result, 'next_page_id')
assert len(result.items) == 3
@pytest.mark.asyncio
async def test_search_events_pagination_limits_results(
self, service: FilesystemEventService
):
"""Test that search_events respects the limit parameter for pagination."""
conversation_id = uuid4()
total_events = 10
page_limit = 3
# Create more events than the limit
for _ in range(total_events):
await service.save_event(conversation_id, create_token_event())
# First page should return only 'limit' events
result = await service.search_events(conversation_id, limit=page_limit)
assert len(result.items) == page_limit
assert result.next_page_id is not None
@pytest.mark.asyncio
async def test_search_events_pagination_iterates_all_events(
self, service: FilesystemEventService
):
"""Test that pagination correctly iterates through all events without duplicates.
This test verifies the fix for a bug where pagination was applied to 'paths'
instead of 'items', causing all events to be returned on every page.
"""
conversation_id = uuid4()
total_events = 10
page_limit = 3
# Create events and track their IDs
created_event_ids = set()
for _ in range(total_events):
event = create_token_event()
created_event_ids.add(event.id)
await service.save_event(conversation_id, event)
# Iterate through all pages and collect event IDs
collected_event_ids = set()
page_id = None
page_count = 0
while True:
result = await service.search_events(
conversation_id, page_id=page_id, limit=page_limit
)
page_count += 1
for item in result.items:
# Verify no duplicates - this would fail with the old buggy code
assert item.id not in collected_event_ids, (
f'Duplicate event {item.id} found on page {page_count}'
)
collected_event_ids.add(item.id)
if result.next_page_id is None:
break
page_id = result.next_page_id
# Verify we got all events exactly once
assert collected_event_ids == created_event_ids
assert len(collected_event_ids) == total_events
# With 10 events and limit of 3, we should have 4 pages (3+3+3+1)
expected_pages = (total_events + page_limit - 1) // page_limit
assert page_count == expected_pages
@pytest.mark.asyncio
async def test_search_events_pagination_with_filters(
self, service: FilesystemEventService
):
"""Test that pagination works correctly when combined with filters."""
conversation_id = uuid4()
# Create a mix of events
token_events = [create_token_event() for _ in range(5)]
pause_events = [create_pause_event() for _ in range(3)]
for event in token_events + pause_events:
await service.save_event(conversation_id, event)
# Search only for token events with pagination
page_limit = 2
collected_ids = set()
page_id = None
while True:
result = await service.search_events(
conversation_id,
kind__eq='TokenEvent',
page_id=page_id,
limit=page_limit,
)
for item in result.items:
assert item.kind == 'TokenEvent'
collected_ids.add(item.id)
if result.next_page_id is None:
break
page_id = result.next_page_id
# Should have found all 5 token events
assert len(collected_ids) == 5
class TestFilesystemEventServiceIntegration:
"""Integration tests for FilesystemEventService."""