mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
Fix Bitbucket pagination and sorting to fetch ALL repositories (#9356)
Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
8e4a8a65f8
commit
d37e40caf8
@ -149,6 +149,41 @@ class BitBucketService(BaseGitService, GitService):
|
||||
# Bitbucket doesn't have a dedicated search endpoint like GitHub
|
||||
return []
|
||||
|
||||
async def _fetch_paginated_data(
|
||||
self, url: str, params: dict, max_items: int
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Fetch data with pagination support for Bitbucket API.
|
||||
|
||||
Args:
|
||||
url: The API endpoint URL
|
||||
params: Query parameters for the request
|
||||
max_items: Maximum number of items to fetch
|
||||
|
||||
Returns:
|
||||
List of data items from all pages
|
||||
"""
|
||||
all_items: list[dict] = []
|
||||
current_url = url
|
||||
|
||||
while current_url and len(all_items) < max_items:
|
||||
response, _ = await self._make_request(current_url, params)
|
||||
|
||||
# Extract items from response
|
||||
page_items = response.get('values', [])
|
||||
if not page_items: # No more items
|
||||
break
|
||||
|
||||
all_items.extend(page_items)
|
||||
|
||||
# Get the next page URL from the response
|
||||
current_url = response.get('next')
|
||||
|
||||
# Clear params for subsequent requests since the next URL already contains all parameters
|
||||
params = {}
|
||||
|
||||
return all_items[:max_items] # Trim to max_items if needed
|
||||
|
||||
async def get_repositories(self, sort: str, app_mode: AppMode) -> list[Repository]:
|
||||
"""Get repositories for the authenticated user using workspaces endpoint.
|
||||
|
||||
@ -157,33 +192,51 @@ class BitBucketService(BaseGitService, GitService):
|
||||
This approach is more comprehensive and efficient than the previous implementation
|
||||
that made separate calls for public and private repositories.
|
||||
"""
|
||||
repositories = []
|
||||
MAX_REPOS = 1000
|
||||
PER_PAGE = 100 # Maximum allowed by Bitbucket API
|
||||
repositories: list[Repository] = []
|
||||
|
||||
# Get user's workspaces
|
||||
# Get user's workspaces with pagination
|
||||
workspaces_url = f'{self.BASE_URL}/workspaces'
|
||||
workspaces_data, _ = await self._make_request(workspaces_url)
|
||||
workspaces = await self._fetch_paginated_data(workspaces_url, {}, MAX_REPOS)
|
||||
|
||||
for workspace in workspaces_data.get('values', []):
|
||||
for workspace in workspaces:
|
||||
workspace_slug = workspace.get('slug')
|
||||
if not workspace_slug:
|
||||
continue
|
||||
|
||||
# Get repositories for this workspace
|
||||
# Get repositories for this workspace with pagination
|
||||
workspace_repos_url = f'{self.BASE_URL}/repositories/{workspace_slug}'
|
||||
|
||||
# Map sort parameter to Bitbucket API compatible values
|
||||
# Map sort parameter to Bitbucket API compatible values and ensure descending order
|
||||
# to show most recently changed repos at the top
|
||||
bitbucket_sort = sort
|
||||
if sort == 'pushed':
|
||||
# Bitbucket doesn't support 'pushed', use 'updated_on' instead
|
||||
bitbucket_sort = 'updated_on'
|
||||
bitbucket_sort = (
|
||||
'-updated_on' # Use negative prefix for descending order
|
||||
)
|
||||
elif sort == 'updated':
|
||||
bitbucket_sort = '-updated_on'
|
||||
elif sort == 'created':
|
||||
bitbucket_sort = '-created_on'
|
||||
elif sort == 'full_name':
|
||||
bitbucket_sort = 'name' # Bitbucket uses 'name' not 'full_name'
|
||||
else:
|
||||
# Default to most recently updated first
|
||||
bitbucket_sort = '-updated_on'
|
||||
|
||||
params = {
|
||||
'pagelen': 100,
|
||||
'pagelen': PER_PAGE,
|
||||
'sort': bitbucket_sort,
|
||||
}
|
||||
repos_data, headers = await self._make_request(workspace_repos_url, params)
|
||||
|
||||
for repo in repos_data.get('values', []):
|
||||
# Fetch all repositories for this workspace with pagination
|
||||
workspace_repos = await self._fetch_paginated_data(
|
||||
workspace_repos_url, params, MAX_REPOS - len(repositories)
|
||||
)
|
||||
|
||||
for repo in workspace_repos:
|
||||
uuid = repo.get('uuid', '')
|
||||
repositories.append(
|
||||
Repository(
|
||||
@ -192,11 +245,18 @@ class BitBucketService(BaseGitService, GitService):
|
||||
git_provider=ProviderType.BITBUCKET,
|
||||
is_public=repo.get('is_private', True) is False,
|
||||
stargazers_count=None, # Bitbucket doesn't have stars
|
||||
link_header=headers.get('Link', ''),
|
||||
pushed_at=repo.get('updated_on'),
|
||||
)
|
||||
)
|
||||
|
||||
# Stop if we've reached the maximum number of repositories
|
||||
if len(repositories) >= MAX_REPOS:
|
||||
break
|
||||
|
||||
# Stop if we've reached the maximum number of repositories
|
||||
if len(repositories) >= MAX_REPOS:
|
||||
break
|
||||
|
||||
return repositories
|
||||
|
||||
async def get_suggested_tasks(self) -> list[SuggestedTask]:
|
||||
@ -240,10 +300,21 @@ class BitBucketService(BaseGitService, GitService):
|
||||
repo = parts[-1]
|
||||
|
||||
url = f'{self.BASE_URL}/repositories/{owner}/{repo}/refs/branches'
|
||||
data, _ = await self._make_request(url)
|
||||
|
||||
# Set maximum branches to fetch (similar to GitHub/GitLab implementations)
|
||||
MAX_BRANCHES = 1000
|
||||
PER_PAGE = 100
|
||||
|
||||
params = {
|
||||
'pagelen': PER_PAGE,
|
||||
'sort': '-target.date', # Sort by most recent commit date, descending
|
||||
}
|
||||
|
||||
# Fetch all branches with pagination
|
||||
branch_data = await self._fetch_paginated_data(url, params, MAX_BRANCHES)
|
||||
|
||||
branches = []
|
||||
for branch in data.get('values', []):
|
||||
for branch in branch_data:
|
||||
branches.append(
|
||||
Branch(
|
||||
name=branch.get('name', ''),
|
||||
|
||||
@ -459,11 +459,85 @@ async def test_bitbucket_sort_parameter_mapping():
|
||||
second_call_args = mock_request.call_args_list[1]
|
||||
url, params = second_call_args[0]
|
||||
|
||||
# Verify the sort parameter was mapped correctly
|
||||
assert params['sort'] == 'updated_on'
|
||||
# Verify the sort parameter was mapped correctly (with descending order)
|
||||
assert params['sort'] == '-updated_on'
|
||||
assert 'repositories/test-workspace' in url
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bitbucket_pagination():
|
||||
"""
|
||||
Test that the Bitbucket service correctly handles pagination for repositories.
|
||||
"""
|
||||
# Create a service instance
|
||||
service = BitBucketService(token=SecretStr('test-token'))
|
||||
|
||||
# Mock the _make_request method to simulate paginated responses
|
||||
with patch.object(service, '_make_request') as mock_request:
|
||||
# Mock responses for pagination test
|
||||
mock_request.side_effect = [
|
||||
# First call: workspaces
|
||||
({'values': [{'slug': 'test-workspace', 'name': 'Test Workspace'}]}, {}),
|
||||
# Second call: first page of repositories
|
||||
(
|
||||
{
|
||||
'values': [
|
||||
{
|
||||
'uuid': 'repo-1',
|
||||
'slug': 'repo1',
|
||||
'workspace': {'slug': 'test-workspace'},
|
||||
'is_private': False,
|
||||
'updated_on': '2023-01-01T00:00:00Z',
|
||||
},
|
||||
{
|
||||
'uuid': 'repo-2',
|
||||
'slug': 'repo2',
|
||||
'workspace': {'slug': 'test-workspace'},
|
||||
'is_private': True,
|
||||
'updated_on': '2023-01-02T00:00:00Z',
|
||||
},
|
||||
],
|
||||
'next': 'https://api.bitbucket.org/2.0/repositories/test-workspace?page=2',
|
||||
},
|
||||
{},
|
||||
),
|
||||
# Third call: second page of repositories
|
||||
(
|
||||
{
|
||||
'values': [
|
||||
{
|
||||
'uuid': 'repo-3',
|
||||
'slug': 'repo3',
|
||||
'workspace': {'slug': 'test-workspace'},
|
||||
'is_private': False,
|
||||
'updated_on': '2023-01-03T00:00:00Z',
|
||||
}
|
||||
],
|
||||
# No 'next' URL indicates this is the last page
|
||||
},
|
||||
{},
|
||||
),
|
||||
]
|
||||
|
||||
# Call get_repositories
|
||||
repositories = await service.get_repositories('pushed', AppMode.SAAS)
|
||||
|
||||
# Verify that all three requests were made (workspaces + 2 pages of repos)
|
||||
assert mock_request.call_count == 3
|
||||
|
||||
# Verify that we got all repositories from both pages
|
||||
assert len(repositories) == 3
|
||||
assert repositories[0].id == 'repo-1'
|
||||
assert repositories[1].id == 'repo-2'
|
||||
assert repositories[2].id == 'repo-3'
|
||||
|
||||
# Verify repository properties
|
||||
assert repositories[0].full_name == 'test-workspace/repo1'
|
||||
assert repositories[0].is_public is True
|
||||
assert repositories[1].is_public is False
|
||||
assert repositories[2].is_public is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_validate_provider_token_with_empty_tokens():
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user