From 779990221cfe6eea7e09430de6caac926545abcd Mon Sep 17 00:00:00 2001 From: Elsie Hupp <9206310+elsiehupp@users.noreply.github.com> Date: Mon, 6 Sep 2021 19:35:38 -0400 Subject: [PATCH] More Structured Errors? Signed-off-by: Elsie Hupp <9206310+elsiehupp@users.noreply.github.com> --- MANIFEST.in | 1 + email_validator/__init__.py | 83 ++++++-------- email_validator/error_classes/__init__.py | 128 ++++++++++++++++++++++ tests/test_main.py | 74 ++++++++++++- 4 files changed, 235 insertions(+), 51 deletions(-) create mode 100644 email_validator/error_classes/__init__.py diff --git a/MANIFEST.in b/MANIFEST.in index 2f9bf23..a23ef01 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include email_validator.py +include error_classes.py include LICENSE README.md diff --git a/email_validator/__init__.py b/email_validator/__init__.py index f960f67..fc3b47a 100644 --- a/email_validator/__init__.py +++ b/email_validator/__init__.py @@ -7,6 +7,7 @@ import dns.exception import idna # implements IDNA 2008; Python's codec is only IDNA 2003 +from email_validator.error_classes import * # Based on RFC 2822 section 3.2.4 / RFC 5322 section 3.2.3, these # characters are permitted in email addresses (not taking into @@ -49,22 +50,6 @@ DEFAULT_TIMEOUT = 15 # secs - -class EmailNotValidError(ValueError): - """Parent class of all exceptions raised by this module.""" - pass - - -class EmailSyntaxError(EmailNotValidError): - """Exception raised when an email address fails validation because of its form.""" - pass - - -class EmailUndeliverableError(EmailNotValidError): - """Exception raised when an email address fails validation because its domain name does not appear deliverable.""" - pass - - class ValidatedEmail(object): """The validate_email function returns objects of this type holding the normalized form of the email address and other information.""" @@ -174,10 +159,10 @@ def as_dict(self): def __get_length_reason(addr, utf8=False, limit=EMAIL_MAX_LENGTH): diff = len(addr) - limit - reason = "({}{} character{} too many)" + reason_string = "({}{} character{} too many)" prefix = "at least " if utf8 else "" suffix = "s" if diff > 1 else "" - return reason.format(prefix, diff, suffix) + return (reason_string.format(prefix, diff, suffix), diff) def caching_resolver(timeout=DEFAULT_TIMEOUT, cache=None): @@ -208,12 +193,14 @@ def validate_email( try: email = email.decode("ascii") except ValueError: - raise EmailSyntaxError("The email address is not valid ASCII.") + raise EmailInvalidAsciiError("The email address is not valid ASCII.") # At-sign. parts = email.split('@') - if len(parts) != 2: - raise EmailSyntaxError("The email address is not valid. It must have exactly one @-sign.") + if len(parts) < 2: + raise EmailNoAtSignError("The email address is not valid. It must have exactly one @-sign.") + if len(parts) > 2: + raise EmailMultipleAtSignsError("The email address is not valid. It must have exactly one @-sign.") # Collect return values in this instance. ret = ValidatedEmail() @@ -261,22 +248,22 @@ def validate_email( # See the length checks on the local part and the domain. if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH: if ret.ascii_email == ret.email: - reason = __get_length_reason(ret.ascii_email) + reason_tuple = __get_length_reason(ret.ascii_email) elif len(ret.email) > EMAIL_MAX_LENGTH: # If there are more than 254 characters, then the ASCII # form is definitely going to be too long. - reason = __get_length_reason(ret.email, utf8=True) + reason_tuple = __get_length_reason(ret.email, utf8=True) else: - reason = "(when converted to IDNA ASCII)" - raise EmailSyntaxError("The email address is too long {}.".format(reason)) + reason_tuple = "(when converted to IDNA ASCII)" + raise EmailTooLongAsciiError("The email address is too long {}.".format(reason_tuple[0]), reason_tuple[1]) if len(ret.email.encode("utf8")) > EMAIL_MAX_LENGTH: if len(ret.email) > EMAIL_MAX_LENGTH: # If there are more than 254 characters, then the UTF-8 # encoding is definitely going to be too long. - reason = __get_length_reason(ret.email, utf8=True) + reason_tuple = __get_length_reason(ret.email, utf8=True) else: - reason = "(when encoded in bytes)" - raise EmailSyntaxError("The email address is too long {}.".format(reason)) + reason_tuple = "(when encoded in bytes)" + raise EmailTooLongUtf8Error("The email address is too long {}.".format(reason_tuple[0]), reason_tuple[1]) if check_deliverability: # Validate the email address's deliverability and update the @@ -296,7 +283,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals if len(local) == 0: if not allow_empty_local: - raise EmailSyntaxError("There must be something before the @-sign.") + raise EmailDomainPartEmptyError("There must be something before the @-sign.") else: # The caller allows an empty local part. Useful for validating certain # Postfix aliases. @@ -313,8 +300,8 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals # that may not be relevant. We will check the total address length # instead. if len(local) > LOCAL_PART_MAX_LENGTH: - reason = __get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH) - raise EmailSyntaxError("The email address is too long before the @-sign {}.".format(reason)) + reason_tuple = __get_length_reason(local, limit=LOCAL_PART_MAX_LENGTH) + raise EmailLocalPartTooLongError("The email address is too long before the @-sign {}.".format(reason_tuple[0])) # Check the local part against the regular expression for the older ASCII requirements. m = re.match(DOT_ATOM_TEXT + "\\Z", local) @@ -334,11 +321,11 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals bad_chars = ', '.join(sorted(set( c for c in local if not re.match(u"[" + (ATEXT if not allow_smtputf8 else ATEXT_UTF8) + u"]", c) ))) - raise EmailSyntaxError("The email address contains invalid characters before the @-sign: %s." % bad_chars) + raise EmailLocalPartInvalidCharactersError("The email address contains invalid characters before the @-sign: %s." % bad_chars) # It would be valid if internationalized characters were allowed by the caller. if not allow_smtputf8: - raise EmailSyntaxError("Internationalized characters before the @-sign are not supported.") + raise EmailLocalPartInternationalizedCharactersError("Internationalized characters before the @-sign are not supported.") # It's valid. @@ -357,7 +344,7 @@ def validate_email_local_part(local, allow_smtputf8=True, allow_empty_local=Fals def validate_email_domain_part(domain): # Empty? if len(domain) == 0: - raise EmailSyntaxError("There must be something after the @-sign.") + raise EmailDomainPartEmptyError("There must be something after the @-sign.") # Perform UTS-46 normalization, which includes casefolding, NFC normalization, # and converting all label separators (the period/full stop, fullwidth full stop, @@ -367,18 +354,18 @@ def validate_email_domain_part(domain): try: domain = idna.uts46_remap(domain, std3_rules=False, transitional=False) except idna.IDNAError as e: - raise EmailSyntaxError("The domain name %s contains invalid characters (%s)." % (domain, str(e))) + raise EmailDomainInvalidIdnaError("The domain name %s contains invalid characters (%s)." % (domain, str(e)), e) # Now we can perform basic checks on the use of periods (since equivalent # symbols have been mapped to periods). These checks are needed because the # IDNA library doesn't handle well domains that have empty labels (i.e. initial # dot, trailing dot, or two dots in a row). if domain.endswith("."): - raise EmailSyntaxError("An email address cannot end with a period.") + raise EmailDomainEndsWithPeriodError("An email address cannot end with a period.") if domain.startswith("."): - raise EmailSyntaxError("An email address cannot have a period immediately after the @-sign.") + raise EmailDomainStartsWithPeriodError("An email address cannot have a period immediately after the @-sign.") if ".." in domain: - raise EmailSyntaxError("An email address cannot have two periods in a row.") + raise EmailDomainMultiplePeriodsInARowError("An email address cannot have two periods in a row.") # Regardless of whether international characters are actually used, # first convert to IDNA ASCII. For ASCII-only domains, the transformation @@ -398,8 +385,8 @@ def validate_email_domain_part(domain): # the length check is applied to a string that is different from the # one the user supplied. Also I'm not sure if the length check applies # to the internationalized form, the IDNA ASCII form, or even both! - raise EmailSyntaxError("The email address is too long after the @-sign.") - raise EmailSyntaxError("The domain name %s contains invalid characters (%s)." % (domain, str(e))) + raise EmailDomainTooLongError("The email address is too long after the @-sign.") + raise EmailDomainInvalidIdnaError("The domain name %s contains invalid characters (%s)." % (domain, str(e)), e) # We may have been given an IDNA ASCII domain to begin with. Check # that the domain actually conforms to IDNA. It could look like IDNA @@ -411,7 +398,7 @@ def validate_email_domain_part(domain): try: domain_i18n = idna.decode(ascii_domain.encode('ascii')) except idna.IDNAError as e: - raise EmailSyntaxError("The domain name %s is not valid IDNA (%s)." % (ascii_domain, str(e))) + raise EmailDomainInvalidIdnaError("The domain name %s is not valid IDNA (%s)." % (ascii_domain, str(e)), e) # RFC 5321 4.5.3.1.2 # We're checking the number of bytes (octets) here, which can be much @@ -420,7 +407,7 @@ def validate_email_domain_part(domain): # as IDNA ASCII. This is also checked by idna.encode, so this exception # is never reached. if len(ascii_domain) > DOMAIN_MAX_LENGTH: - raise EmailSyntaxError("The email address is too long after the @-sign.") + raise EmailDomainTooLongError("The email address is too long after the @-sign.") # A "dot atom text", per RFC 2822 3.2.4, but using the restricted # characters allowed in a hostname (see ATEXT_HOSTNAME above). @@ -430,14 +417,14 @@ def validate_email_domain_part(domain): # with idna.decode, which also checks this format. m = re.match(DOT_ATOM_TEXT + "\\Z", ascii_domain) if not m: - raise EmailSyntaxError("The email address contains invalid characters after the @-sign.") + raise EmailDomainInvalidCharactersError("The email address contains invalid characters after the @-sign.") # All publicly deliverable addresses have domain named with at least # one period. We also know that all TLDs end with a letter. if "." not in ascii_domain: - raise EmailSyntaxError("The domain name %s is not valid. It should have a period." % domain_i18n) + raise EmailDomainNoPeriodError("The domain name %s is not valid. It should have a period." % domain_i18n) if not re.search(r"[A-Za-z]\Z", ascii_domain): - raise EmailSyntaxError( + raise EmailDomainNoValidTldError( "The domain name %s is not valid. It is not within a valid top-level domain." % domain_i18n ) @@ -509,7 +496,7 @@ def dns_resolver_resolve_shim(domain, record): # If there was no MX, A, or AAAA record, then mail to # this domain is not deliverable. - raise EmailUndeliverableError("The domain name %s does not exist." % domain_i18n) + raise EmailDomainNameDoesNotExistError("The domain name %s does not exist." % domain_i18n) except dns.exception.Timeout: # A timeout could occur for various reasons, so don't treat it as a failure. @@ -523,8 +510,8 @@ def dns_resolver_resolve_shim(domain, record): except Exception as e: # Unhandled conditions should not propagate. - raise EmailUndeliverableError( - "There was an error while checking if the domain name in the email address is deliverable: " + str(e) + raise EmailDomainUnhandledDnsExceptionError( + "There was an error while checking if the domain name in the email address is deliverable: " + str(e), e ) return { diff --git a/email_validator/error_classes/__init__.py b/email_validator/error_classes/__init__.py new file mode 100644 index 0000000..e4209f7 --- /dev/null +++ b/email_validator/error_classes/__init__.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- + +class EmailNotValidError(ValueError): + """Parent class of all exceptions raised by this module.""" + pass + + +class EmailSyntaxError(EmailNotValidError): + """Parent class of exceptions raised when an email address fails validation because of its form.""" + pass + + + +# Syntax errors pertaining to the email address as a whole +class EmailInvalidAsciiError(EmailSyntaxError): + """Exception raised when an email address fails validation because it is not valid ASCII.""" + pass + +class EmailNoAtSignError(EmailSyntaxError): + """Exception raised when an email address fails validation because it does not contain an @-sign""" + pass + +class EmailMultipleAtSignsError(EmailSyntaxError): + """Exception raised when an email address fails validation because it contains more than one @-sign""" + pass + + + +# Syntax errors pertaining to the email address being too long +class EmailTooLongError(EmailSyntaxError): + """Parent class of exceptions raised when an email address fails validation because it is too long.""" + pass + +class EmailTooLongAsciiError(EmailTooLongError): + """Exception raised when an email address fails validation because it is too long when converted to IDNA ASCII. + May contain a second argument with the integer number of characters the email address exceeds the allowed length.""" + pass + +class EmailTooLongUtf8Error(EmailTooLongError): + """Exception raised when an email address fails validation because it is too long when encoded in bytes. + May contain a second argument with the integer number of characters the email address exceeds the allowed length.""" + pass + + + +# Syntax errors pertaining to the local part of the email (i.e. before the @-sign) +class EmailLocalPartError(EmailSyntaxError): + """Parent class of exceptions raised when an email address fails validation because of its local part.""" + pass + +class EmailLocalPartEmptyError(EmailLocalPartError): + """Exception raised when an email address fails validation because it contains no characters before the @-sign.""" + pass + +class EmailLocalPartTooLongError(EmailLocalPartError): + """Exception raised when an email address fails validation because the part before the @-sign is too long when converted to IDNA ASCII. + May contain a second argument with the integer number of characters the email address exceeds the allowed length.""" + pass + +class EmailLocalPartInvalidCharactersError(EmailLocalPartError): + """Exception raised when an email address fails validation because it contains invalid characters before the @-sign.""" + pass + +class EmailLocalPartInternationalizedCharactersError(EmailLocalPartError): + """Exception raised when an email address fails validation because it contains internationalized characters before the @-sign.""" + pass + + + +# Syntax errors pertaining to the domain part of the email (i.e. after the @-sign) +class EmailDomainPartError(EmailSyntaxError): + """Parent class of exceptions raised when an email address fails validation because of its local part.""" + pass + +class EmailDomainPartEmptyError(EmailDomainPartError): + """Exception raised when an email address fails validation because it contains no characters after the @-sign.""" + pass + +class EmailDomainInvalidCharactersError(EmailDomainPartError): + """Exception raised when an email address fails validation because it contains invalid characters after the @-sign.""" + pass + +class EmailDomainInvalidIdnaError(EmailDomainInvalidCharactersError): + """Exception raised when an email address fails validation because it contains invalid characters after the @-sign. + Contains the original IDNA error as a second argument.""" + pass + +class EmailDomainEndsWithPeriodError(EmailDomainPartError): + """Exception raised when an email address fails validation because it ends with a period.""" + pass + +class EmailDomainStartsWithPeriodError(EmailDomainPartError): + """Exception raised when an email address fails validation because it has a period immediately after the @-sign.""" + pass + +class EmailDomainMultiplePeriodsInARowError(EmailDomainPartError): + """Exception raised when an email address fails validation because it contains two or more periods in a row after the @-sign.""" + pass + +class EmailDomainTooLongError(EmailDomainPartError): + """Exception raised when an email address fails validation because the part after the @-sign is too long.""" + pass + +class EmailDomainNoPeriodError(EmailDomainPartError): + """Exception raised when an email address fails validation because it does not contain a period after the @-sign.""" + pass + +class EmailDomainNoValidTldError(EmailDomainPartError): + """Exception raised when an email address fails validation because it does not contain a valid top-level domain (TLD) after the @-sign.""" + pass + + + +# Errors determined heuristically from DNS queries +# The parent class name is retained for backwards-compatibility +class EmailUndeliverableError(EmailNotValidError): + """Parent class of exceptions raised when an email address fails validation because its domain name does not appear deliverable.""" + pass + +class EmailDomainNameDoesNotExistError(EmailUndeliverableError): + """Exception raised when an email address fails validation because its domain name does not exist.""" + pass + +class EmailDomainUnhandledDnsExceptionError(EmailUndeliverableError): + """Exception raised when an email address fails validation because the DNS query of its domain name has raised an exception. + Contains the DNS exception (from the Python dns module) as the second argument.""" + pass + diff --git a/tests/test_main.py b/tests/test_main.py index d2fd923..3fc84ff 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,9 +1,40 @@ from unittest import mock import dns.resolver import pytest -from email_validator import EmailSyntaxError, EmailUndeliverableError, \ - validate_email, validate_email_deliverability, \ - caching_resolver, ValidatedEmail +# error classes are imported individually here +# so that syntax highlighting can be used +# to easily visualize test coverage +from email_validator.error_classes import \ + EmailNotValidError, \ + EmailSyntaxError, \ + EmailInvalidAsciiError, \ + EmailNoAtSignError, \ + EmailMultipleAtSignsError, \ + EmailTooLongError, \ + EmailTooLongAsciiError, \ + EmailTooLongUtf8Error, \ + EmailLocalPartError, \ + EmailLocalPartEmptyError, \ + EmailLocalPartTooLongError, \ + EmailLocalPartInvalidCharactersError, \ + EmailLocalPartInternationalizedCharactersError, \ + EmailDomainPartError, \ + EmailDomainPartEmptyError, \ + EmailDomainInvalidCharactersError, \ + EmailDomainInvalidIdnaError, \ + EmailDomainEndsWithPeriodError, \ + EmailDomainStartsWithPeriodError, \ + EmailDomainMultiplePeriodsInARowError, \ + EmailDomainTooLongError, \ + EmailDomainNoPeriodError, \ + EmailDomainNoValidTldError, \ + EmailUndeliverableError, \ + EmailDomainNameDoesNotExistError, \ + EmailDomainUnhandledDnsExceptionError +from email_validator import ValidatedEmail, \ + validate_email, \ + validate_email_deliverability, \ + caching_resolver # Let's test main but rename it to be clear from email_validator import main as validator_main @@ -254,6 +285,43 @@ def test_email_invalid(email_input, error_msg): assert str(exc_info.value) == error_msg +@pytest.mark.parametrize( + 'email_input,error_class', + [ + ('my@.leadingdot.com', EmailDomainStartsWithPeriodError), + ('my@..leadingfwdot.com', EmailDomainStartsWithPeriodError), + ('my@..twodots.com', EmailDomainStartsWithPeriodError), + ('my@twodots..com', EmailDomainMultiplePeriodsInARowError), + ('my@baddash.-.com', EmailDomainInvalidIdnaError), + ('my@baddash.-a.com', EmailDomainInvalidIdnaError), + ('my@baddash.b-.com', EmailDomainInvalidIdnaError), + ('my@example.com\n', EmailDomainInvalidIdnaError), + ('my@example\n.com', EmailDomainInvalidIdnaError), + ('.leadingdot@domain.com', EmailLocalPartInvalidCharactersError), + ('..twodots@domain.com', EmailLocalPartInvalidCharactersError), + ('twodots..here@domain.com', EmailLocalPartInvalidCharactersError), + ('me@⒈wouldbeinvalid.com', EmailDomainInvalidIdnaError), + ('@example.com', EmailLocalPartEmptyError), + ('\nmy@example.com', EmailLocalPartInvalidCharactersError), + ('m\ny@example.com', EmailLocalPartInvalidCharactersError), + ('my\n@example.com', EmailLocalPartInvalidCharactersError), + ('11111111112222222222333333333344444444445555555555666666666677777@example.com', EmailLocalPartTooLongError), + ('111111111122222222223333333333444444444455555555556666666666777777@example.com', EmailLocalPartTooLongError), + ('me@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.111111111122222222223333333333444444444455555555556.com', EmailDomainTooLongError), + ('my.long.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333333344444.info', EmailTooLongAsciiError), + ('my.long.address@λ111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.11111111112222222222333333.info', EmailTooLongAsciiError), + ('my.long.address@λ111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', EmailTooLongUtf8Error), + ('my.λong.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.111111111122222222223333333333444.info', EmailTooLongUtf8Error), + ('my.λong.address@1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444444444555555555.6666666666777777777788888888889999999999000000000.1111111111222222222233333333334444.info', EmailTooLongUtf8Error), + ], +) +def test_email_invalid(email_input, error_class): + with pytest.raises(EmailSyntaxError) as exc_info: + validate_email(email_input) + # print(f'({email_input!r}, {str(exc_info.value)!r}),') + assert isinstance(exc_info, error_class) + + def test_dict_accessor(): input_email = "testaddr@example.com" valid_email = validate_email(input_email, check_deliverability=False)