OpenHands/openhands/resolver/interfaces/github.py

from typing import Any

import httpx

from openhands.core.logger import openhands_logger as logger
from openhands.resolver.interfaces.issue import (
    Issue,
    IssueHandlerInterface,
    ReviewThread,
)
from openhands.resolver.utils import extract_issue_references


class GithubIssueHandler(IssueHandlerInterface):
    def __init__(
        self,
        owner: str,
        repo: str,
        token: str,
        username: str | None = None,
        base_domain: str = 'github.com',
    ):
        """Initialize a GitHub issue handler.

        Args:
            owner: The owner of the repository
            repo: The name of the repository
            token: The GitHub personal access token
            username: Optional GitHub username
            base_domain: The domain for GitHub Enterprise (default: "github.com")
        """
        self.owner = owner
        self.repo = repo
        self.token = token
        self.username = username
        self.base_domain = base_domain
        self.base_url = self.get_base_url()
        self.download_url = self.get_download_url()
        self.clone_url = self.get_clone_url()
        self.headers = self.get_headers()

    def set_owner(self, owner: str) -> None:
        self.owner = owner

    def get_headers(self) -> dict[str, str]:
        return {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }

    def get_base_url(self) -> str:
        if self.base_domain == 'github.com':
            return f'https://api.github.com/repos/{self.owner}/{self.repo}'
        else:
            return f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}'

    def get_authorize_url(self) -> str:
        return f'https://{self.username}:{self.token}@{self.base_domain}/'

    def get_branch_url(self, branch_name: str) -> str:
        return self.get_base_url() + f'/branches/{branch_name}'

    def get_download_url(self) -> str:
        return f'{self.base_url}/issues'

    def get_clone_url(self) -> str:
        username_and_token = (
            f'{self.username}:{self.token}'
            if self.username
            else f'x-auth-token:{self.token}'
        )
        return f'https://{username_and_token}@{self.base_domain}/{self.owner}/{self.repo}.git'

    def get_graphql_url(self) -> str:
        if self.base_domain == 'github.com':
            return 'https://api.github.com/graphql'
        else:
            return f'https://{self.base_domain}/api/graphql'

    def get_compare_url(self, branch_name: str) -> str:
        return f'https://{self.base_domain}/{self.owner}/{self.repo}/compare/{branch_name}?expand=1'

    def get_converted_issues(
        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
    ) -> list[Issue]:
        """Download issues from Github.

        Args:
            issue_numbers: The numbers of the issues to download
            comment_id: The ID of a single comment, if provided, otherwise all comments

        Returns:
            List of Github issues.
        """
        if not issue_numbers:
            raise ValueError('Unspecified issue number')

        all_issues = self.download_issues()
        logger.info(f'Limiting resolving to issues {issue_numbers}.')
        all_issues = [
            issue
            for issue in all_issues
            if issue['number'] in issue_numbers and 'pull_request' not in issue
        ]

        if len(issue_numbers) == 1 and not all_issues:
            raise ValueError(f'Issue {issue_numbers[0]} not found')

        converted_issues = []
        for issue in all_issues:
            # Check for required fields (number and title)
            if any([issue.get(key) is None for key in ['number', 'title']]):
                logger.warning(
                    f'Skipping issue {issue} as it is missing number or title.'
                )
                continue

            # Handle empty body by using empty string
            if issue.get('body') is None:
                issue['body'] = ''

            # Get issue thread comments
            thread_comments = self.get_issue_comments(
                issue['number'], comment_id=comment_id
            )
            # Convert empty lists to None for optional fields
            issue_details = Issue(
                owner=self.owner,
                repo=self.repo,
                number=issue['number'],
                title=issue['title'],
                body=issue['body'],
                thread_comments=thread_comments,
                review_comments=None,  # Initialize review comments as None for regular issues
            )

            converted_issues.append(issue_details)

        return converted_issues

    def download_issues(self) -> list[Any]:
        params: dict[str, int | str] = {'state': 'open', 'per_page': 100, 'page': 1}
        all_issues = []

        while True:
            response = httpx.get(self.download_url, headers=self.headers, params=params)
            response.raise_for_status()
            issues = response.json()

            if not issues:
                break

            if not isinstance(issues, list) or any(
                [not isinstance(issue, dict) for issue in issues]
            ):
                raise ValueError(
                    'Expected list of dictionaries from Service Github API.'
                )

            all_issues.extend(issues)
            assert isinstance(params['page'], int)
            params['page'] += 1

        return all_issues

    def get_issue_comments(
        self, issue_number: int, comment_id: int | None = None
    ) -> list[str] | None:
        """Download comments for a specific issue from Github."""
        url = f'{self.download_url}/{issue_number}/comments'
        params = {'per_page': 100, 'page': 1}
        all_comments = []

        while True:
            response = httpx.get(url, headers=self.headers, params=params)
            response.raise_for_status()
            comments = response.json()

            if not comments:
                break

            if comment_id:
                matching_comment = next(
                    (
                        comment['body']
                        for comment in comments
                        if comment['id'] == comment_id
                    ),
                    None,
                )
                if matching_comment:
                    return [matching_comment]
            else:
                all_comments.extend([comment['body'] for comment in comments])

            params['page'] += 1

        return all_comments if all_comments else None

    def branch_exists(self, branch_name: str) -> bool:
        logger.info(f'Checking if branch {branch_name} exists...')
        response = httpx.get(
            f'{self.base_url}/branches/{branch_name}', headers=self.headers
        )
        exists = response.status_code == 200
        logger.info(f'Branch {branch_name} exists: {exists}')
        return exists

    def get_branch_name(self, base_branch_name: str) -> str:
        branch_name = base_branch_name
        attempt = 1
        while self.branch_exists(branch_name):
            attempt += 1
            branch_name = f'{base_branch_name}-try{attempt}'
        return branch_name

    def reply_to_comment(self, pr_number: int, comment_id: str, reply: str) -> None:
        # Opting for graphql as REST API doesn't allow reply to replies in comment threads
        query = """
            mutation($body: String!, $pullRequestReviewThreadId: ID!) {
                addPullRequestReviewThreadReply(input: { body: $body, pullRequestReviewThreadId: $pullRequestReviewThreadId }) {
                    comment {
                        id
                        body
                        createdAt
                    }
                }
            }
            """

        comment_reply = f'Openhands fix success summary\n\n\n{reply}'
        variables = {'body': comment_reply, 'pullRequestReviewThreadId': comment_id}
        url = self.get_graphql_url()
        headers = {
            'Authorization': f'Bearer {self.token}',
            'Content-Type': 'application/json',
        }

        response = httpx.post(
            url, json={'query': query, 'variables': variables}, headers=headers
        )
        response.raise_for_status()

    def get_pull_url(self, pr_number: int) -> str:
        return f'https://{self.base_domain}/{self.owner}/{self.repo}/pull/{pr_number}'

    def get_default_branch_name(self) -> str:
        response = httpx.get(f'{self.base_url}', headers=self.headers)
        response.raise_for_status()
        data = response.json()
        return str(data['default_branch'])

    def create_pull_request(self, data: dict[str, Any] | None = None) -> dict[str, Any]:
        if data is None:
            data = {}
        response = httpx.post(f'{self.base_url}/pulls', headers=self.headers, json=data)
        if response.status_code == 403:
            raise RuntimeError(
                'Failed to create pull request due to missing permissions. '
                'Make sure that the provided token has push permissions for the repository.'
            )
        response.raise_for_status()
        pr_data = response.json()
        return dict(pr_data)

    def request_reviewers(self, reviewer: str, pr_number: int) -> None:
        review_data = {'reviewers': [reviewer]}
        review_response = httpx.post(
            f'{self.base_url}/pulls/{pr_number}/requested_reviewers',
            headers=self.headers,
            json=review_data,
        )
        if review_response.status_code != 201:
            logger.warning(
                f'Failed to request review from {reviewer}: {review_response.text}'
            )

    def send_comment_msg(self, issue_number: int, msg: str) -> None:
        """Send a comment message to a GitHub issue or pull request.

        Args:
            issue_number: The issue or pull request number
            msg: The message content to post as a comment
        """
        # Post a comment on the PR
        comment_url = f'{self.base_url}/issues/{issue_number}/comments'
        comment_data = {'body': msg}
        comment_response = httpx.post(
            comment_url, headers=self.headers, json=comment_data
        )
        if comment_response.status_code != 201:
            logger.error(
                f'Failed to post comment: {comment_response.status_code} {comment_response.text}'
            )
        else:
            logger.info(f'Comment added to the PR: {msg}')

    def get_context_from_external_issues_references(
        self,
        closing_issues: list[str],
        closing_issue_numbers: list[int],
        issue_body: str,
        review_comments: list[str] | None,
        review_threads: list[ReviewThread],
        thread_comments: list[str] | None,
    ) -> list[str]:
        return []


class GithubPRHandler(GithubIssueHandler):
    def __init__(
        self,
        owner: str,
        repo: str,
        token: str,
        username: str | None = None,
        base_domain: str = 'github.com',
    ):
        """Initialize a GitHub PR handler.

        Args:
            owner: The owner of the repository
            repo: The name of the repository
            token: The GitHub personal access token
            username: Optional GitHub username
            base_domain: The domain for GitHub Enterprise (default: "github.com")
        """
        super().__init__(owner, repo, token, username, base_domain)
        if self.base_domain == 'github.com':
            self.download_url = (
                f'https://api.github.com/repos/{self.owner}/{self.repo}/pulls'
            )
        else:
            self.download_url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/pulls'

    def download_pr_metadata(
        self, pull_number: int, comment_id: int | None = None
    ) -> tuple[list[str], list[int], list[str], list[ReviewThread], list[str]]:
        """Run a GraphQL query against the GitHub API for information.

        Retrieves information about:
            1. unresolved review comments
            2. referenced issues the pull request would close

        Args:
            pull_number: The number of the pull request to query.
            comment_id: Optional ID of a specific comment to focus on.
            query: The GraphQL query as a string.
            variables: A dictionary of variables for the query.
            token: Your GitHub personal access token.

        Returns:
            The JSON response from the GitHub API.
        """
        # Using graphql as REST API doesn't indicate resolved status for review comments
        # TODO: grabbing the first 10 issues, 100 review threads, and 100 coments; add pagination to retrieve all
        query = """
                query($owner: String!, $repo: String!, $pr: Int!) {
                    repository(owner: $owner, name: $repo) {
                        pullRequest(number: $pr) {
                            closingIssuesReferences(first: 10) {
                                edges {
                                    node {
                                        body
                                        number
                                    }
                                }
                            }
                            url
                            reviews(first: 100) {
                                nodes {
                                    body
                                    state
                                    fullDatabaseId
                                }
                            }
                            reviewThreads(first: 100) {
                                edges{
                                    node{
                                        id
                                        isResolved
                                        comments(first: 100) {
                                            totalCount
                                            nodes {
                                                body
                                                path
                                                fullDatabaseId
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            """

        variables = {'owner': self.owner, 'repo': self.repo, 'pr': pull_number}

        url = self.get_graphql_url()
        headers = {
            'Authorization': f'Bearer {self.token}',
            'Content-Type': 'application/json',
        }

        response = httpx.post(
            url, json={'query': query, 'variables': variables}, headers=headers
        )
        response.raise_for_status()
        response_json = response.json()

        # Parse the response to get closing issue references and unresolved review comments
        pr_data = (
            response_json.get('data', {}).get('repository', {}).get('pullRequest', {})
        )

        # Get closing issues
        closing_issues = pr_data.get('closingIssuesReferences', {}).get('edges', [])
        closing_issues_bodies = [issue['node']['body'] for issue in closing_issues]
        closing_issue_numbers = [
            issue['node']['number'] for issue in closing_issues
        ]  # Extract issue numbers

        # Get review comments
        reviews = pr_data.get('reviews', {}).get('nodes', [])
        if comment_id is not None:
            reviews = [
                review
                for review in reviews
                if int(review['fullDatabaseId']) == comment_id
            ]
        review_bodies = [review['body'] for review in reviews]

        # Get unresolved review threads
        review_threads = []
        thread_ids = []  # Store thread IDs; agent replies to the thread
        raw_review_threads = pr_data.get('reviewThreads', {}).get('edges', [])
        for thread in raw_review_threads:
            node = thread.get('node', {})
            if not node.get(
                'isResolved', True
            ):  # Check if the review thread is unresolved
                id = node.get('id')
                thread_contains_comment_id = False
                my_review_threads = node.get('comments', {}).get('nodes', [])
                message = ''
                files = []
                for i, review_thread in enumerate(my_review_threads):
                    if (
                        comment_id is not None
                        and int(review_thread['fullDatabaseId']) == comment_id
                    ):
                        thread_contains_comment_id = True

                    if (
                        i == len(my_review_threads) - 1
                    ):  # Check if it's the last thread in the thread
                        if len(my_review_threads) > 1:
                            message += '---\n'  # Add "---" before the last message if there's more than one thread
                        message += 'latest feedback:\n' + review_thread['body'] + '\n'
                    else:
                        message += (
                            review_thread['body'] + '\n'
                        )  # Add each thread in a new line

                    file = review_thread.get('path')
                    if file and file not in files:
                        files.append(file)

                if comment_id is None or thread_contains_comment_id:
                    unresolved_thread = ReviewThread(comment=message, files=files)
                    review_threads.append(unresolved_thread)
                    thread_ids.append(id)

        return (
            closing_issues_bodies,
            closing_issue_numbers,
            review_bodies,
            review_threads,
            thread_ids,
        )

    # Override processing of downloaded issues
    def get_pr_comments(
        self, pr_number: int, comment_id: int | None = None
    ) -> list[str] | None:
        """Download comments for a specific pull request from Github."""
        if self.base_domain == 'github.com':
            url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments'
        else:
            url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/issues/{pr_number}/comments'
        headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.v3+json',
        }
        params = {'per_page': 100, 'page': 1}
        all_comments = []

        while True:
            response = httpx.get(url, headers=headers, params=params)
            response.raise_for_status()
            comments = response.json()

            if not comments:
                break

            if comment_id is not None:
                matching_comment = next(
                    (
                        comment['body']
                        for comment in comments
                        if comment['id'] == comment_id
                    ),
                    None,
                )
                if matching_comment:
                    return [matching_comment]
            else:
                all_comments.extend([comment['body'] for comment in comments])

            params['page'] += 1

        return all_comments if all_comments else None

    def get_context_from_external_issues_references(
        self,
        closing_issues: list[str],
        closing_issue_numbers: list[int],
        issue_body: str,
        review_comments: list[str] | None,
        review_threads: list[ReviewThread],
        thread_comments: list[str] | None,
    ) -> list[str]:
        new_issue_references = []

        if issue_body:
            new_issue_references.extend(extract_issue_references(issue_body))

        if review_comments:
            for comment in review_comments:
                new_issue_references.extend(extract_issue_references(comment))

        if review_threads:
            for review_thread in review_threads:
                new_issue_references.extend(
                    extract_issue_references(review_thread.comment)
                )

        if thread_comments:
            for thread_comment in thread_comments:
                new_issue_references.extend(extract_issue_references(thread_comment))

        non_duplicate_references = set(new_issue_references)
        unique_issue_references = non_duplicate_references.difference(
            closing_issue_numbers
        )

        for issue_number in unique_issue_references:
            try:
                if self.base_domain == 'github.com':
                    url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/{issue_number}'
                else:
                    url = f'https://{self.base_domain}/api/v3/repos/{self.owner}/{self.repo}/issues/{issue_number}'
                headers = {
                    'Authorization': f'Bearer {self.token}',
                    'Accept': 'application/vnd.github.v3+json',
                }
                response = httpx.get(url, headers=headers)
                response.raise_for_status()
                issue_data = response.json()
                issue_body = issue_data.get('body', '')
                if issue_body:
                    closing_issues.append(issue_body)
            except httpx.HTTPError as e:
                logger.warning(f'Failed to fetch issue {issue_number}: {str(e)}')

        return closing_issues

    def get_converted_issues(
        self, issue_numbers: list[int] | None = None, comment_id: int | None = None
    ) -> list[Issue]:
        if not issue_numbers:
            raise ValueError('Unspecified issue numbers')

        all_issues = self.download_issues()
        logger.info(f'Limiting resolving to issues {issue_numbers}.')
        all_issues = [issue for issue in all_issues if issue['number'] in issue_numbers]

        converted_issues = []
        for issue in all_issues:
            # For PRs, body can be None
            if any([issue.get(key) is None for key in ['number', 'title']]):
                logger.warning(f'Skipping #{issue} as it is missing number or title.')
                continue

            # Handle None body for PRs
            body = issue.get('body') if issue.get('body') is not None else ''
            (
                closing_issues,
                closing_issues_numbers,
                review_comments,
                review_threads,
                thread_ids,
            ) = self.download_pr_metadata(issue['number'], comment_id=comment_id)
            head_branch = issue['head']['ref']

            # Get PR thread comments
            thread_comments = self.get_pr_comments(
                issue['number'], comment_id=comment_id
            )

            closing_issues = self.get_context_from_external_issues_references(
                closing_issues,
                closing_issues_numbers,
                body,
                review_comments,
                review_threads,
                thread_comments,
            )

            issue_details = Issue(
                owner=self.owner,
                repo=self.repo,
                number=issue['number'],
                title=issue['title'],
                body=body,
                closing_issues=closing_issues,
                review_comments=review_comments,
                review_threads=review_threads,
                thread_ids=thread_ids,
                head_branch=head_branch,
                thread_comments=thread_comments,
            )

            converted_issues.append(issue_details)

        return converted_issues