Add Google Sheets integration for GitHub user verification (#4671)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
This commit is contained in:
Robert Brennan 2024-11-01 15:17:15 -07:00 committed by GitHub
parent adf7ab5849
commit b27fabe504
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 172 additions and 31 deletions

View File

@ -3,37 +3,93 @@ import os
import httpx
from openhands.core.logger import openhands_logger as logger
from openhands.server.sheets_client import GoogleSheetsClient
GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip()
GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip()
GITHUB_USER_LIST = None
def load_github_user_list():
global GITHUB_USER_LIST
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if waitlist:
with open(waitlist, 'r') as f:
GITHUB_USER_LIST = [line.strip() for line in f if line.strip()]
class UserVerifier:
def __init__(self) -> None:
logger.info('Initializing UserVerifier')
self.file_users: list[str] | None = None
self.sheets_client: GoogleSheetsClient | None = None
self.spreadsheet_id: str | None = None
# Initialize from environment variables
self._init_file_users()
self._init_sheets_client()
def _init_file_users(self) -> None:
"""Load users from text file if configured"""
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if not waitlist:
logger.info('GITHUB_USER_LIST_FILE not configured')
return
if not os.path.exists(waitlist):
logger.error(f'User list file not found: {waitlist}')
raise FileNotFoundError(f'User list file not found: {waitlist}')
try:
with open(waitlist, 'r') as f:
self.file_users = [line.strip() for line in f if line.strip()]
logger.info(
f'Successfully loaded {len(self.file_users)} users from {waitlist}'
)
except Exception as e:
logger.error(f'Error reading user list file {waitlist}: {str(e)}')
def _init_sheets_client(self) -> None:
"""Initialize Google Sheets client if configured"""
sheet_id = os.getenv('GITHUB_USERS_SHEET_ID')
if not sheet_id:
logger.info('GITHUB_USERS_SHEET_ID not configured')
return
logger.info('Initializing Google Sheets integration')
self.sheets_client = GoogleSheetsClient()
self.spreadsheet_id = sheet_id
def is_user_allowed(self, username: str) -> bool:
"""Check if user is allowed based on file and/or sheet configuration"""
if not self.file_users and not self.sheets_client:
logger.debug('No verification sources configured - allowing all users')
return True
logger.info(f'Checking if GitHub user {username} is allowed')
if self.file_users:
if username in self.file_users:
logger.info(f'User {username} found in text file allowlist')
return True
logger.debug(f'User {username} not found in text file allowlist')
if self.sheets_client and self.spreadsheet_id:
sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id)
if username in sheet_users:
logger.info(f'User {username} found in Google Sheets allowlist')
return True
logger.debug(f'User {username} not found in Google Sheets allowlist')
logger.info(f'User {username} not found in any allowlist')
return False
load_github_user_list()
# Global instance of user verifier
user_verifier = UserVerifier()
async def authenticate_github_user(auth_token) -> bool:
logger.info('Checking GitHub token')
if not GITHUB_USER_LIST:
return True
if not auth_token:
logger.warning('No GitHub token provided')
return False
login, error = await get_github_user(auth_token)
if error:
logger.warning(f'Invalid GitHub token: {error}')
return False
if login not in GITHUB_USER_LIST:
login = await get_github_user(auth_token)
if not user_verifier.is_user_allowed(login):
logger.warning(f'GitHub user {login} not in allow list')
return False
@ -41,7 +97,7 @@ async def authenticate_github_user(auth_token) -> bool:
return True
async def get_github_user(token: str) -> tuple[str | None, str | None]:
async def get_github_user(token: str) -> str:
"""Get GitHub user info from token.
Args:
@ -52,21 +108,17 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]:
If successful, error_message is None
If failed, login is None and error_message contains the error
"""
logger.info('Fetching GitHub user info from token')
headers = {
'Accept': 'application/vnd.github+json',
'Authorization': f'Bearer {token}',
'X-GitHub-Api-Version': '2022-11-28',
}
try:
async with httpx.AsyncClient() as client:
response = await client.get('https://api.github.com/user', headers=headers)
if response.status_code == 200:
user_data = response.json()
return user_data.get('login'), None
else:
return (
None,
f'GitHub API error: {response.status_code} - {response.text}',
)
except Exception as e:
return None, f'Error connecting to GitHub: {str(e)}'
async with httpx.AsyncClient() as client:
logger.debug('Making request to GitHub API')
response = await client.get('https://api.github.com/user', headers=headers)
response.raise_for_status()
user_data = response.json()
login = user_data.get('login')
logger.info(f'Successfully retrieved GitHub user: {login}')
return login

View File

@ -0,0 +1,68 @@
from typing import List
from google.auth import default
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from openhands.core.logger import openhands_logger as logger
class GoogleSheetsClient:
def __init__(self):
"""Initialize Google Sheets client using workload identity.
Uses application default credentials which supports workload identity when running in GCP.
"""
logger.info('Initializing Google Sheets client with workload identity')
try:
credentials, project = default(
scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
)
logger.info(f'Successfully obtained credentials for project: {project}')
self.service = build('sheets', 'v4', credentials=credentials)
logger.info('Successfully initialized Google Sheets API service')
except Exception as e:
logger.error(f'Failed to initialize Google Sheets client: {str(e)}')
self.service = None
def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]:
"""Get list of usernames from specified Google Sheet.
Args:
spreadsheet_id: The ID of the Google Sheet
range_name: The A1 notation of the range to fetch
Returns:
List of usernames from the sheet
"""
if not self.service:
logger.error('Google Sheets service not initialized')
return []
try:
logger.info(
f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}'
)
result = (
self.service.spreadsheets()
.values()
.get(spreadsheetId=spreadsheet_id, range=range_name)
.execute()
)
values = result.get('values', [])
usernames = [
str(cell[0]).strip() for cell in values if cell and cell[0].strip()
]
logger.info(
f'Successfully fetched {len(usernames)} usernames from Google Sheet'
)
return usernames
except HttpError as err:
logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}')
return []
except Exception as e:
logger.error(
f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}'
)
return []

22
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "aenum"
@ -2319,6 +2319,24 @@ files = [
google-auth = "*"
httplib2 = ">=0.19.0"
[[package]]
name = "google-auth-oauthlib"
version = "1.2.1"
description = "Google Authentication Library"
optional = false
python-versions = ">=3.6"
files = [
{file = "google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f"},
{file = "google_auth_oauthlib-1.2.1.tar.gz", hash = "sha256:afd0cad092a2eaa53cd8e8298557d6de1034c6cb4a740500b5357b648af97263"},
]
[package.dependencies]
google-auth = ">=2.15.0"
requests-oauthlib = ">=0.7.0"
[package.extras]
tool = ["click (>=6.0.0)"]
[[package]]
name = "google-cloud-aiplatform"
version = "1.70.0"
@ -10109,4 +10127,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "2b268ef696ace0d8170276407dbdeb414134477839ebe4b7ecf29b1a1fe2cef3"
content-hash = "2a4f90bb5c7f7d82160f57d71af7e81c7acef69426d0e1e46e1da09972a6215f"

View File

@ -16,6 +16,9 @@ datasets = "*"
pandas = "*"
litellm = "^1.51.1"
google-generativeai = "*" # To use litellm with Gemini Pro API
google-api-python-client = "*" # For Google Sheets API
google-auth-httplib2 = "*" # For Google Sheets authentication
google-auth-oauthlib = "*" # For Google Sheets OAuth
termcolor = "*"
seaborn = "*"
docker = "*"