OpenHands/enterprise/server/auth/email_validation.py

110 lines
3.3 KiB
Python

"""Email validation utilities for preventing duplicate signups with + modifier."""
import re
def extract_base_email(email: str) -> str | None:
"""Extract base email from an email address.
For emails with + modifier, extracts the base email (local part before + and @, plus domain).
For emails without + modifier, returns the email as-is.
Examples:
extract_base_email("joe+test@example.com") -> "joe@example.com"
extract_base_email("joe@example.com") -> "joe@example.com"
extract_base_email("joe+openhands+test@example.com") -> "joe@example.com"
Args:
email: The email address to process
Returns:
The base email address, or None if email format is invalid
"""
if not email or '@' not in email:
return None
try:
local_part, domain = email.rsplit('@', 1)
# Extract the part before + if it exists
base_local = local_part.split('+', 1)[0]
return f'{base_local}@{domain}'
except (ValueError, AttributeError):
return None
def has_plus_modifier(email: str) -> bool:
"""Check if an email address contains a + modifier.
Args:
email: The email address to check
Returns:
True if email contains + before @, False otherwise
"""
if not email or '@' not in email:
return False
try:
local_part, _ = email.rsplit('@', 1)
return '+' in local_part
except (ValueError, AttributeError):
return False
def matches_base_email(email: str, base_email: str) -> bool:
"""Check if an email matches a base email pattern.
An email matches if:
- It is exactly the base email (e.g., joe@example.com)
- It has the same base local part and domain, with or without + modifier
(e.g., joe+test@example.com matches base joe@example.com)
Args:
email: The email address to check
base_email: The base email to match against
Returns:
True if email matches the base pattern, False otherwise
"""
if not email or not base_email:
return False
# Extract base from both emails for comparison
email_base = extract_base_email(email)
base_email_normalized = extract_base_email(base_email)
if not email_base or not base_email_normalized:
return False
# Emails match if they have the same base
return email_base.lower() == base_email_normalized.lower()
def get_base_email_regex_pattern(base_email: str) -> re.Pattern | None:
"""Generate a regex pattern to match emails with the same base.
For base_email "joe@example.com", the pattern will match:
- joe@example.com
- joe+anything@example.com
Args:
base_email: The base email address
Returns:
A compiled regex pattern, or None if base_email is invalid
"""
base = extract_base_email(base_email)
if not base:
return None
try:
local_part, domain = base.rsplit('@', 1)
# Escape special regex characters in local part and domain
escaped_local = re.escape(local_part)
escaped_domain = re.escape(domain)
# Pattern: joe@example.com OR joe+anything@example.com
pattern = rf'^{escaped_local}(\+[^@\s]+)?@{escaped_domain}$'
return re.compile(pattern, re.IGNORECASE)
except (ValueError, AttributeError):
return None