Fix Bitbucket pagination and sorting to fetch ALL repositories (#9356)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Robert Brennan 2025-06-25 17:06:01 -04:00 committed by GitHub
parent 8e4a8a65f8
commit d37e40caf8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 160 additions and 15 deletions

View File

@ -149,6 +149,41 @@ class BitBucketService(BaseGitService, GitService):
# Bitbucket doesn't have a dedicated search endpoint like GitHub
return []
async def _fetch_paginated_data(
self, url: str, params: dict, max_items: int
) -> list[dict]:
"""
Fetch data with pagination support for Bitbucket API.
Args:
url: The API endpoint URL
params: Query parameters for the request
max_items: Maximum number of items to fetch
Returns:
List of data items from all pages
"""
all_items: list[dict] = []
current_url = url
while current_url and len(all_items) < max_items:
response, _ = await self._make_request(current_url, params)
# Extract items from response
page_items = response.get('values', [])
if not page_items: # No more items
break
all_items.extend(page_items)
# Get the next page URL from the response
current_url = response.get('next')
# Clear params for subsequent requests since the next URL already contains all parameters
params = {}
return all_items[:max_items] # Trim to max_items if needed
async def get_repositories(self, sort: str, app_mode: AppMode) -> list[Repository]:
"""Get repositories for the authenticated user using workspaces endpoint.
@ -157,33 +192,51 @@ class BitBucketService(BaseGitService, GitService):
This approach is more comprehensive and efficient than the previous implementation
that made separate calls for public and private repositories.
"""
repositories = []
MAX_REPOS = 1000
PER_PAGE = 100 # Maximum allowed by Bitbucket API
repositories: list[Repository] = []
# Get user's workspaces
# Get user's workspaces with pagination
workspaces_url = f'{self.BASE_URL}/workspaces'
workspaces_data, _ = await self._make_request(workspaces_url)
workspaces = await self._fetch_paginated_data(workspaces_url, {}, MAX_REPOS)
for workspace in workspaces_data.get('values', []):
for workspace in workspaces:
workspace_slug = workspace.get('slug')
if not workspace_slug:
continue
# Get repositories for this workspace
# Get repositories for this workspace with pagination
workspace_repos_url = f'{self.BASE_URL}/repositories/{workspace_slug}'
# Map sort parameter to Bitbucket API compatible values
# Map sort parameter to Bitbucket API compatible values and ensure descending order
# to show most recently changed repos at the top
bitbucket_sort = sort
if sort == 'pushed':
# Bitbucket doesn't support 'pushed', use 'updated_on' instead
bitbucket_sort = 'updated_on'
bitbucket_sort = (
'-updated_on' # Use negative prefix for descending order
)
elif sort == 'updated':
bitbucket_sort = '-updated_on'
elif sort == 'created':
bitbucket_sort = '-created_on'
elif sort == 'full_name':
bitbucket_sort = 'name' # Bitbucket uses 'name' not 'full_name'
else:
# Default to most recently updated first
bitbucket_sort = '-updated_on'
params = {
'pagelen': 100,
'pagelen': PER_PAGE,
'sort': bitbucket_sort,
}
repos_data, headers = await self._make_request(workspace_repos_url, params)
for repo in repos_data.get('values', []):
# Fetch all repositories for this workspace with pagination
workspace_repos = await self._fetch_paginated_data(
workspace_repos_url, params, MAX_REPOS - len(repositories)
)
for repo in workspace_repos:
uuid = repo.get('uuid', '')
repositories.append(
Repository(
@ -192,11 +245,18 @@ class BitBucketService(BaseGitService, GitService):
git_provider=ProviderType.BITBUCKET,
is_public=repo.get('is_private', True) is False,
stargazers_count=None, # Bitbucket doesn't have stars
link_header=headers.get('Link', ''),
pushed_at=repo.get('updated_on'),
)
)
# Stop if we've reached the maximum number of repositories
if len(repositories) >= MAX_REPOS:
break
# Stop if we've reached the maximum number of repositories
if len(repositories) >= MAX_REPOS:
break
return repositories
async def get_suggested_tasks(self) -> list[SuggestedTask]:
@ -240,10 +300,21 @@ class BitBucketService(BaseGitService, GitService):
repo = parts[-1]
url = f'{self.BASE_URL}/repositories/{owner}/{repo}/refs/branches'
data, _ = await self._make_request(url)
# Set maximum branches to fetch (similar to GitHub/GitLab implementations)
MAX_BRANCHES = 1000
PER_PAGE = 100
params = {
'pagelen': PER_PAGE,
'sort': '-target.date', # Sort by most recent commit date, descending
}
# Fetch all branches with pagination
branch_data = await self._fetch_paginated_data(url, params, MAX_BRANCHES)
branches = []
for branch in data.get('values', []):
for branch in branch_data:
branches.append(
Branch(
name=branch.get('name', ''),

View File

@ -459,11 +459,85 @@ async def test_bitbucket_sort_parameter_mapping():
second_call_args = mock_request.call_args_list[1]
url, params = second_call_args[0]
# Verify the sort parameter was mapped correctly
assert params['sort'] == 'updated_on'
# Verify the sort parameter was mapped correctly (with descending order)
assert params['sort'] == '-updated_on'
assert 'repositories/test-workspace' in url
@pytest.mark.asyncio
async def test_bitbucket_pagination():
"""
Test that the Bitbucket service correctly handles pagination for repositories.
"""
# Create a service instance
service = BitBucketService(token=SecretStr('test-token'))
# Mock the _make_request method to simulate paginated responses
with patch.object(service, '_make_request') as mock_request:
# Mock responses for pagination test
mock_request.side_effect = [
# First call: workspaces
({'values': [{'slug': 'test-workspace', 'name': 'Test Workspace'}]}, {}),
# Second call: first page of repositories
(
{
'values': [
{
'uuid': 'repo-1',
'slug': 'repo1',
'workspace': {'slug': 'test-workspace'},
'is_private': False,
'updated_on': '2023-01-01T00:00:00Z',
},
{
'uuid': 'repo-2',
'slug': 'repo2',
'workspace': {'slug': 'test-workspace'},
'is_private': True,
'updated_on': '2023-01-02T00:00:00Z',
},
],
'next': 'https://api.bitbucket.org/2.0/repositories/test-workspace?page=2',
},
{},
),
# Third call: second page of repositories
(
{
'values': [
{
'uuid': 'repo-3',
'slug': 'repo3',
'workspace': {'slug': 'test-workspace'},
'is_private': False,
'updated_on': '2023-01-03T00:00:00Z',
}
],
# No 'next' URL indicates this is the last page
},
{},
),
]
# Call get_repositories
repositories = await service.get_repositories('pushed', AppMode.SAAS)
# Verify that all three requests were made (workspaces + 2 pages of repos)
assert mock_request.call_count == 3
# Verify that we got all repositories from both pages
assert len(repositories) == 3
assert repositories[0].id == 'repo-1'
assert repositories[1].id == 'repo-2'
assert repositories[2].id == 'repo-3'
# Verify repository properties
assert repositories[0].full_name == 'test-workspace/repo1'
assert repositories[0].is_public is True
assert repositories[1].is_public is False
assert repositories[2].is_public is True
@pytest.mark.asyncio
async def test_validate_provider_token_with_empty_tokens():
"""