Fix Resend sync to respect deleted users (#12904)

Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
Rohit Malhotra
2026-02-19 12:43:15 -05:00
committed by GitHub
parent 7cd219792b
commit f3429e33ca
6 changed files with 524 additions and 37 deletions

View File

@@ -0,0 +1,67 @@
"""Create resend_synced_users table.
Revision ID: 096
Revises: 095
Create Date: 2025-02-17 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = '096'
down_revision: Union[str, None] = '095'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Create resend_synced_users table for tracking users synced to Resend audiences."""
op.create_table(
'resend_synced_users',
sa.Column(
'id',
sa.UUID(as_uuid=True),
nullable=False,
primary_key=True,
),
sa.Column('email', sa.String(), nullable=False),
sa.Column('audience_id', sa.String(), nullable=False),
sa.Column(
'synced_at',
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.text('CURRENT_TIMESTAMP'),
),
sa.Column('keycloak_user_id', sa.String(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint(
'email', 'audience_id', name='uq_resend_synced_email_audience'
),
)
# Create index on email for fast lookups
op.create_index(
'ix_resend_synced_users_email',
'resend_synced_users',
['email'],
)
# Create index on audience_id for filtering by audience
op.create_index(
'ix_resend_synced_users_audience_id',
'resend_synced_users',
['audience_id'],
)
def downgrade() -> None:
"""Drop resend_synced_users table."""
op.drop_index(
'ix_resend_synced_users_audience_id', table_name='resend_synced_users'
)
op.drop_index('ix_resend_synced_users_email', table_name='resend_synced_users')
op.drop_table('resend_synced_users')

View File

@@ -23,6 +23,7 @@ from storage.org import Org
from storage.org_invitation import OrgInvitation
from storage.org_member import OrgMember
from storage.proactive_convos import ProactiveConversation
from storage.resend_synced_user import ResendSyncedUser
from storage.role import Role
from storage.slack_conversation import SlackConversation
from storage.slack_team import SlackTeam
@@ -69,6 +70,7 @@ __all__ = [
'OrgInvitation',
'OrgMember',
'ProactiveConversation',
'ResendSyncedUser',
'Role',
'SlackConversation',
'SlackTeam',

View File

@@ -0,0 +1,35 @@
"""SQLAlchemy model for tracking users synced to Resend audiences."""
from datetime import UTC, datetime
from uuid import uuid4
from sqlalchemy import Column, DateTime, String, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID
from storage.base import Base
class ResendSyncedUser(Base): # type: ignore
"""Tracks users that have been synced to a Resend audience.
This table ensures that once a user is synced to a Resend audience,
they won't be re-added even if they are later deleted from the
Resend UI. This respects manual deletions/unsubscribes.
"""
__tablename__ = 'resend_synced_users'
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid4)
email = Column(String, nullable=False, index=True)
audience_id = Column(String, nullable=False, index=True)
synced_at = Column(
DateTime(timezone=True),
default=lambda: datetime.now(UTC),
nullable=False,
)
keycloak_user_id = Column(String, nullable=True)
__table_args__ = (
UniqueConstraint(
'email', 'audience_id', name='uq_resend_synced_email_audience'
),
)

View File

@@ -0,0 +1,125 @@
"""Store class for managing Resend synced users."""
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import Optional, Set
from sqlalchemy import delete, select
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.orm import sessionmaker
from storage.resend_synced_user import ResendSyncedUser
@dataclass
class ResendSyncedUserStore:
"""Store for tracking users synced to Resend audiences."""
session_maker: sessionmaker
def is_user_synced(self, email: str, audience_id: str) -> bool:
"""Check if a user has been synced to a specific audience.
Args:
email: The email address to check.
audience_id: The Resend audience ID.
Returns:
True if the user has been synced, False otherwise.
"""
with self.session_maker() as session:
stmt = select(ResendSyncedUser).where(
ResendSyncedUser.email == email.lower(),
ResendSyncedUser.audience_id == audience_id,
)
result = session.execute(stmt).first()
return result is not None
def get_synced_emails_for_audience(self, audience_id: str) -> Set[str]:
"""Get all synced email addresses for a specific audience.
Args:
audience_id: The Resend audience ID.
Returns:
A set of lowercase email addresses that have been synced.
"""
with self.session_maker() as session:
stmt = select(ResendSyncedUser.email).where(
ResendSyncedUser.audience_id == audience_id,
)
result = session.execute(stmt).scalars().all()
return set(result)
def mark_user_synced(
self,
email: str,
audience_id: str,
keycloak_user_id: Optional[str] = None,
) -> ResendSyncedUser:
"""Mark a user as synced to a specific audience.
Uses upsert to handle race conditions - if the user is already
marked as synced, this is a no-op.
Args:
email: The email address of the user.
audience_id: The Resend audience ID.
keycloak_user_id: Optional Keycloak user ID.
Returns:
The ResendSyncedUser record.
Raises:
RuntimeError: If the record could not be created or retrieved.
"""
with self.session_maker() as session:
stmt = (
insert(ResendSyncedUser)
.values(
email=email.lower(),
audience_id=audience_id,
keycloak_user_id=keycloak_user_id,
synced_at=datetime.now(UTC),
)
.on_conflict_do_nothing(constraint='uq_resend_synced_email_audience')
.returning(ResendSyncedUser)
)
result = session.execute(stmt)
session.commit()
row = result.first()
if row:
return row[0]
# on_conflict_do_nothing triggered, fetch the existing record
existing = session.execute(
select(ResendSyncedUser).where(
ResendSyncedUser.email == email.lower(),
ResendSyncedUser.audience_id == audience_id,
)
).first()
if existing:
return existing[0]
raise RuntimeError(
f'Failed to create or retrieve synced user record for {email}'
)
def remove_synced_user(self, email: str, audience_id: str) -> bool:
"""Remove a user's synced status for a specific audience.
Args:
email: The email address of the user.
audience_id: The Resend audience ID.
Returns:
True if a record was deleted, False if no record existed.
"""
with self.session_maker() as session:
stmt = delete(ResendSyncedUser).where(
ResendSyncedUser.email == email.lower(),
ResendSyncedUser.audience_id == audience_id,
)
result = session.execute(stmt)
session.commit()
return result.rowcount > 0

View File

@@ -35,6 +35,7 @@ import resend
from keycloak.exceptions import KeycloakError
from resend.exceptions import ResendError
from server.auth.token_manager import get_keycloak_admin
from storage.resend_synced_user_store import ResendSyncedUserStore
from tenacity import (
retry,
retry_if_exception_type,
@@ -69,9 +70,6 @@ RATE_LIMIT = float(os.environ.get('RATE_LIMIT', '2')) # Requests per second
# Set up Resend API
resend.api_key = RESEND_API_KEY
print('resend module', resend)
print('has contacts', hasattr(resend, 'Contacts'))
class ResendSyncError(Exception):
"""Base exception for Resend sync errors."""
@@ -199,8 +197,6 @@ def get_resend_contacts(audience_id: str) -> Dict[str, Dict[str, Any]]:
Raises:
ResendAPIError: If the API call fails.
"""
print('getting resend contacts')
print('has resend contacts', hasattr(resend, 'Contacts'))
try:
contacts = resend.Contacts.list(audience_id).get('data', [])
# Create a dictionary mapping email addresses to contact data for
@@ -317,8 +313,84 @@ def send_welcome_email(
raise
def _get_resend_synced_user_store() -> ResendSyncedUserStore:
"""Get the ResendSyncedUserStore instance.
This is separated into a function to allow for easier testing/mocking.
"""
from openhands.app_server.config import get_global_config
config = get_global_config()
db_session_injector = config.db_session
return ResendSyncedUserStore(session_maker=db_session_injector.get_session_maker())
def _backfill_existing_resend_contacts(
synced_user_store: ResendSyncedUserStore,
audience_id: str,
) -> int:
"""Backfill the synced_users table with contacts already in Resend.
This ensures that users who were added to Resend before the tracking
table existed are properly recorded, preventing duplicate welcome emails.
Args:
synced_user_store: The store for tracking synced users.
audience_id: The Resend audience ID.
Returns:
The number of contacts backfilled.
"""
logger.info('Starting backfill of existing Resend contacts...')
try:
resend_contacts = get_resend_contacts(audience_id)
logger.info(f'Found {len(resend_contacts)} contacts in Resend audience')
already_synced_emails = synced_user_store.get_synced_emails_for_audience(
audience_id
)
logger.info(
f'Found {len(already_synced_emails)} already synced emails in database'
)
backfilled_count = 0
for email in resend_contacts:
if email.lower() not in already_synced_emails:
synced_user_store.mark_user_synced(
email=email,
audience_id=audience_id,
keycloak_user_id=None, # We don't have this info during backfill
)
backfilled_count += 1
logger.debug(f'Backfilled existing Resend contact: {email}')
logger.info(
f'Backfill completed: {backfilled_count} contacts added to tracking'
)
return backfilled_count
except Exception:
logger.exception('Error during backfill of existing Resend contacts')
# Don't fail the entire sync if backfill fails - just log and continue
return 0
def sync_users_to_resend():
"""Sync users from Keycloak to Resend."""
"""Sync users from Keycloak to Resend.
This function syncs users from Keycloak to a Resend audience. It tracks
which users have been synced in the database to ensure that:
1. Users are only added once (even across multiple sync runs)
2. Users who are manually deleted from Resend are not re-added
The tracking is done via the resend_synced_users table, which records
each email/audience_id combination that has been synced.
On first run (or when new contacts exist in Resend), it will backfill
the tracking table with existing Resend contacts to avoid sending
duplicate welcome emails.
"""
# Check required environment variables
required_vars = {
'RESEND_API_KEY': RESEND_API_KEY,
@@ -344,28 +416,36 @@ def sync_users_to_resend():
)
try:
# Get the store for tracking synced users
synced_user_store = _get_resend_synced_user_store()
# Backfill existing Resend contacts into our tracking table
# This ensures users already in Resend don't get duplicate welcome emails
backfilled_count = _backfill_existing_resend_contacts(
synced_user_store, RESEND_AUDIENCE_ID
)
# Get the total number of users
total_users = get_total_keycloak_users()
logger.info(
f'Found {total_users} users in Keycloak realm {KEYCLOAK_REALM_NAME}'
)
# Get contacts from Resend
resend_contacts = get_resend_contacts(RESEND_AUDIENCE_ID)
logger.info(
f'Found {len(resend_contacts)} contacts in Resend audience '
f'{RESEND_AUDIENCE_ID}'
)
# Stats
stats = {
'total_users': total_users,
'existing_contacts': len(resend_contacts),
'backfilled_contacts': backfilled_count,
'already_synced': 0,
'added_contacts': 0,
'skipped_invalid_emails': 0,
'errors': 0,
}
synced_emails = synced_user_store.get_synced_emails_for_audience(
RESEND_AUDIENCE_ID
)
logger.info(f'Found {len(synced_emails)} already synced emails in database')
# Process users in batches
offset = 0
while offset < total_users:
@@ -378,8 +458,12 @@ def sync_users_to_resend():
continue
email = email.lower()
if email in resend_contacts:
logger.debug(f'User {email} already exists in Resend, skipping')
if email in synced_emails:
logger.debug(
f'User {email} was already synced to this audience, skipping'
)
stats['already_synced'] += 1
continue
# Validate email format before attempting to add to Resend
@@ -388,35 +472,51 @@ def sync_users_to_resend():
stats['skipped_invalid_emails'] += 1
continue
try:
first_name = user.get('first_name')
last_name = user.get('last_name')
first_name = user.get('first_name')
last_name = user.get('last_name')
keycloak_user_id = user.get('id')
# Add the contact to the Resend audience
# Mark as synced first (optimistic) to ensure consistency.
# If Resend API fails, we remove the record.
try:
synced_user_store.mark_user_synced(
email=email,
audience_id=RESEND_AUDIENCE_ID,
keycloak_user_id=keycloak_user_id,
)
except Exception:
logger.exception(f'Failed to mark user {email} as synced')
stats['errors'] += 1
continue
try:
add_contact_to_resend(
RESEND_AUDIENCE_ID, email, first_name, last_name
)
logger.info(f'Added user {email} to Resend')
stats['added_contacts'] += 1
# Sleep to respect rate limit after first API call
time.sleep(1 / RATE_LIMIT)
# Send a welcome email to the newly added contact
try:
send_welcome_email(email, first_name, last_name)
logger.info(f'Sent welcome email to {email}')
except Exception:
logger.exception(
f'Failed to send welcome email to {email}, but contact was added to audience'
)
# Continue with the sync process even if sending the welcome email fails
# Sleep to respect rate limit after second API call
time.sleep(1 / RATE_LIMIT)
except Exception:
logger.exception(f'Error adding user {email} to Resend')
synced_user_store.remove_synced_user(email, RESEND_AUDIENCE_ID)
stats['errors'] += 1
continue
synced_emails.add(email)
stats['added_contacts'] += 1
# Sleep to respect rate limit after first API call
time.sleep(1 / RATE_LIMIT)
# Send a welcome email to the newly added contact
try:
send_welcome_email(email, first_name, last_name)
logger.info(f'Sent welcome email to {email}')
except Exception:
logger.exception(
f'Failed to send welcome email to {email}, but contact was added to audience'
)
# Sleep to respect rate limit after second API call
time.sleep(1 / RATE_LIMIT)
offset += BATCH_SIZE

View File

@@ -0,0 +1,158 @@
"""Unit tests for ResendSyncedUserStore."""
from unittest.mock import MagicMock
import pytest
# Import directly from the module files to avoid loading all of storage/__init__.py
# which has many dependencies
from storage.resend_synced_user import ResendSyncedUser
from storage.resend_synced_user_store import ResendSyncedUserStore
@pytest.fixture
def mock_session():
"""Mock database session."""
session = MagicMock()
return session
@pytest.fixture
def mock_session_maker(mock_session):
"""Mock session maker."""
session_maker = MagicMock()
session_maker.return_value.__enter__.return_value = mock_session
session_maker.return_value.__exit__.return_value = None
return session_maker
@pytest.fixture
def store(mock_session_maker):
"""Create ResendSyncedUserStore instance."""
return ResendSyncedUserStore(session_maker=mock_session_maker)
class TestResendSyncedUserStore:
"""Test cases for ResendSyncedUserStore."""
def test_is_user_synced_returns_true_when_exists(self, store, mock_session):
"""Test is_user_synced returns True when user exists in database."""
email = 'test@example.com'
audience_id = 'test-audience-123'
mock_row = MagicMock()
mock_session.execute.return_value.first.return_value = mock_row
result = store.is_user_synced(email, audience_id)
assert result is True
mock_session.execute.assert_called_once()
def test_is_user_synced_returns_false_when_not_exists(self, store, mock_session):
"""Test is_user_synced returns False when user doesn't exist."""
email = 'test@example.com'
audience_id = 'test-audience-123'
mock_session.execute.return_value.first.return_value = None
result = store.is_user_synced(email, audience_id)
assert result is False
def test_is_user_synced_normalizes_email_to_lowercase(self, store, mock_session):
"""Test that is_user_synced normalizes email to lowercase."""
email = 'TEST@EXAMPLE.COM'
audience_id = 'test-audience-123'
mock_session.execute.return_value.first.return_value = None
store.is_user_synced(email, audience_id)
# Verify the query was called (we can't easily check the exact SQL)
mock_session.execute.assert_called_once()
def test_mark_user_synced_creates_new_record(self, store, mock_session):
"""Test that mark_user_synced creates a new record."""
email = 'test@example.com'
audience_id = 'test-audience-123'
keycloak_user_id = 'kc-user-123'
mock_synced_user = MagicMock(spec=ResendSyncedUser)
mock_result = MagicMock()
mock_result.first.return_value = (mock_synced_user,)
mock_session.execute.return_value = mock_result
result = store.mark_user_synced(email, audience_id, keycloak_user_id)
assert result == mock_synced_user
mock_session.execute.assert_called_once()
mock_session.commit.assert_called_once()
def test_mark_user_synced_handles_existing_record(self, store, mock_session):
"""Test that mark_user_synced handles conflict (existing record)."""
email = 'test@example.com'
audience_id = 'test-audience-123'
# First execute (insert) returns None (conflict occurred)
# Second execute (select existing) returns the record
mock_existing_user = MagicMock(spec=ResendSyncedUser)
mock_result_insert = MagicMock()
mock_result_insert.first.return_value = None
mock_result_select = MagicMock()
mock_result_select.first.return_value = (mock_existing_user,)
mock_session.execute.side_effect = [mock_result_insert, mock_result_select]
result = store.mark_user_synced(email, audience_id)
assert result == mock_existing_user
assert mock_session.execute.call_count == 2
mock_session.commit.assert_called_once()
def test_mark_user_synced_normalizes_email_to_lowercase(self, store, mock_session):
"""Test that mark_user_synced normalizes email to lowercase."""
email = 'TEST@EXAMPLE.COM'
audience_id = 'test-audience-123'
mock_synced_user = MagicMock(spec=ResendSyncedUser)
mock_result = MagicMock()
mock_result.first.return_value = (mock_synced_user,)
mock_session.execute.return_value = mock_result
store.mark_user_synced(email, audience_id)
# Verify execute was called (the email normalization happens in the SQL)
mock_session.execute.assert_called_once()
mock_session.commit.assert_called_once()
def test_mark_user_synced_without_keycloak_user_id(self, store, mock_session):
"""Test that mark_user_synced works without keycloak_user_id."""
email = 'test@example.com'
audience_id = 'test-audience-123'
mock_synced_user = MagicMock(spec=ResendSyncedUser)
mock_result = MagicMock()
mock_result.first.return_value = (mock_synced_user,)
mock_session.execute.return_value = mock_result
result = store.mark_user_synced(email, audience_id)
assert result == mock_synced_user
mock_session.execute.assert_called_once()
class TestResendSyncedUser:
"""Test cases for ResendSyncedUser model."""
def test_model_has_required_fields(self):
"""Test that the model has all required fields."""
assert hasattr(ResendSyncedUser, 'id')
assert hasattr(ResendSyncedUser, 'email')
assert hasattr(ResendSyncedUser, 'audience_id')
assert hasattr(ResendSyncedUser, 'synced_at')
assert hasattr(ResendSyncedUser, 'keycloak_user_id')
def test_model_table_name(self):
"""Test the model's table name."""
assert ResendSyncedUser.__tablename__ == 'resend_synced_users'