From de6527fa3247f3eff02da1b1327ae9074dbe5ee1 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sat, 15 Apr 2023 09:55:58 -0400 Subject: [PATCH] Rename the `email` field of ValidatedEmail to `normalized` to be clearer about its importance --- CHANGELOG.md | 1 + README.md | 26 ++++++++++----------- email_validator/exceptions_types.py | 18 +++++++++++---- email_validator/validate_email.py | 16 ++++++------- tests/test_main.py | 6 ++--- tests/test_syntax.py | 36 ++++++++++++++--------------- 6 files changed, 56 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6ad16f..a2a898b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ There are no significant changes to which email addresses are considered valid/i * Some syntax error messages have changed because they are now checked explicitly rather than as a part of other checks. * The quoted-string local part syntax (e.g. multiple @-signs, spaces, etc. if surrounded by quotes) and domain-literal addresses (e.g. @[192.XXX...] or @[IPv6:...]) are now parsed but not considered valid by default. Better error messages are now given for these addresses since it can be confusing for a technically valid address to be rejected, and new allow_quoted_local and allow_domain_literal options are added to allow these addresses if you really need them. * Some other error messages have changed to not repeat the email address in the error message. +* The `email` field on the returned `ValidatedEmail` object has been renamed to `normalized` to be clearer about its importance, but access via `.email` is also still supported. * The library has been reorganized internally into smaller modules. * The tests have been reorganized and expanded. Deliverability tests now mostly use captured DNS responses so they can be run off-line. * The __main__ tool now reads options to validate_email from environment variables. diff --git a/README.md b/README.md index 65a1f0f..39dc30f 100644 --- a/README.md +++ b/README.md @@ -65,11 +65,11 @@ try: # Check that the email address is valid. Turn on check_deliverability # for first-time validations like on account creation pages (but not # login pages). - validation = validate_email(email, check_deliverability=False) + emailinfo = validate_email(email, check_deliverability=False) # After this point, use only the normalized form of the email address, # especially before going to a database query. - email = validation.email + email = emailinfo.normalized except EmailNotValidError as e: @@ -158,7 +158,7 @@ from email_validator import validate_email, caching_resolver resolver = caching_resolver(timeout=10) while True: - email = validate_email(email, dns_resolver=resolver).email + validate_email(email, dns_resolver=resolver) ``` ### Test addresses @@ -248,8 +248,8 @@ This library gives you back the ASCII-ized form in the `ascii_email` field in the returned object, which you can get like this: ```python -valid = validate_email(email, allow_smtputf8=False) -email = valid.ascii_email +emailinfo = validate_email(email, allow_smtputf8=False) +email = emailinfo.ascii_email ``` The local part is left alone (if it has internationalized characters @@ -274,9 +274,9 @@ equivalent in domain names to their ASCII counterparts. This library normalizes them to their ASCII counterparts: ```python -valid = validate_email("me@Domain.com") -print(valid.email) -print(valid.ascii_email) +emailinfo = validate_email("me@Domain.com") +print(emailinfo.normalized) +print(emailinfo.ascii_email) # prints "me@domain.com" twice ``` @@ -320,7 +320,7 @@ For the email address `test@joshdata.me`, the returned object is: ```python ValidatedEmail( - email='test@joshdata.me', + normalized='test@joshdata.me', local_part='test', domain='joshdata.me', ascii_email='test@joshdata.me', @@ -334,7 +334,7 @@ internationalized domain but ASCII local part, the returned object is: ```python ValidatedEmail( - email='example@ツ.life', + normalized='example@ツ.life', local_part='example', domain='ツ.life', ascii_email='example@xn--bdk.life', @@ -357,7 +357,7 @@ internationalized local part, the returned object is: ```python ValidatedEmail( - email='ツ-test@joshdata.me', + normalized='ツ-test@joshdata.me', local_part='ツ-test', domain='joshdata.me', ascii_email=None, @@ -380,8 +380,8 @@ are: | Field | Value | | -----:|-------| -| `email` | The normalized form of the email address that you should put in your database. This combines the `local_part` and `domain` fields (see below). | -| `ascii_email` | If set, an ASCII-only form of the email address by replacing the domain part with [IDNA](https://tools.ietf.org/html/rfc5891) [Punycode](https://www.rfc-editor.org/rfc/rfc3492.txt). This field will be present when an ASCII-only form of the email address exists (including if the email address is already ASCII). If the local part of the email address contains internationalized characters, `ascii_email` will be `None`. If set, it merely combines `ascii_local_part` and `ascii_domain`. | +| `normalized` | The normalized form of the email address that you should put in your database. This combines the `local_part` and `domain` fields (see below). | +| `ascii_email` | If set, an ASCII-only form of the normalized email address by replacing the domain part with [IDNA](https://tools.ietf.org/html/rfc5891) [Punycode](https://www.rfc-editor.org/rfc/rfc3492.txt). This field will be present when an ASCII-only form of the email address exists (including if the email address is already ASCII). If the local part of the email address contains internationalized characters, `ascii_email` will be `None`. If set, it merely combines `ascii_local_part` and `ascii_domain`. | | `local_part` | The normalized local part of the given email address (before the @-sign). Normalization includes Unicode NFC normalization and removing unnecessary quoted-string quotes and backslashes. If `allow_quoted_local` is True and the surrounding quotes are necessary, the quotes _will_ be present in this field. | | `ascii_local_part` | If set, the local part, which is composed of ASCII characters only. | | `domain` | The canonical internationalized Unicode form of the domain part of the email address. If the returned string contains non-ASCII characters, either the [SMTPUTF8](https://tools.ietf.org/html/rfc6531) feature of your mail relay will be required to transmit the message or else the email address's domain part must be converted to IDNA ASCII first: Use `ascii_domain` field instead. | diff --git a/email_validator/exceptions_types.py b/email_validator/exceptions_types.py index 978abbc..9a1b331 100644 --- a/email_validator/exceptions_types.py +++ b/email_validator/exceptions_types.py @@ -22,13 +22,13 @@ class ValidatedEmail(object): and other information.""" """The email address that was passed to validate_email. (If passed as bytes, this will be a string.)""" - original_email: str + original: str """The normalized email address, which should always be used in preferance to the original address. The normalized address converts an IDNA ASCII domain name to Unicode, if possible, and performs Unicode normalization on the local part and on the domain (if originally Unicode). It is the concatenation of the local_part and domain attributes, separated by an @-sign.""" - email: str + normalized: str """The local part of the email address after Unicode normalization.""" local_part: str @@ -68,14 +68,22 @@ def __init__(self, **kwargs): setattr(self, k, v) def __repr__(self): - return f"" + return f"" + + """For backwards compatibility, support old field names.""" + def __getattr__(self, key): + if key == "original_email": + return self.original + if key == "email": + return self.normalized + raise AttributeError() """For backwards compatibility, some fields are also exposed through a dict-like interface. Note that some of the names changed when they became attributes.""" def __getitem__(self, key): warnings.warn("dict-like access to the return value of validate_email is deprecated and may not be supported in the future.", DeprecationWarning, stacklevel=2) if key == "email": - return self.email + return self.normalized if key == "email_ascii": return self.ascii_email if key == "local": @@ -97,7 +105,7 @@ def __eq__(self, other): if not isinstance(other, ValidatedEmail): return False return ( - self.email == other.email + self.normalized == other.normalized and self.local_part == other.local_part and self.domain == other.domain and getattr(self, 'ascii_email', None) == getattr(other, 'ascii_email', None) diff --git a/email_validator/validate_email.py b/email_validator/validate_email.py index bdbffc8..6114931 100644 --- a/email_validator/validate_email.py +++ b/email_validator/validate_email.py @@ -76,7 +76,7 @@ def validate_email( # Collect return values in this instance. ret = ValidatedEmail() - ret.original_email = email + ret.original = email # Validate the email address's local part syntax and get a normalized form. # If the original address was quoted and the decoded local part is a valid @@ -113,7 +113,7 @@ def validate_email( ret.ascii_domain = domain_part_info["ascii_domain"] # Construct the complete normalized form. - ret.email = ret.local_part + "@" + ret.domain + ret.normalized = ret.local_part + "@" + ret.domain # If the email address has an ASCII form, add it. if not ret.smtputf8: @@ -144,20 +144,20 @@ def validate_email( # # See the length checks on the local part and the domain. if ret.ascii_email and len(ret.ascii_email) > EMAIL_MAX_LENGTH: - if ret.ascii_email == ret.email: + if ret.ascii_email == ret.normalized: reason = get_length_reason(ret.ascii_email) - elif len(ret.email) > EMAIL_MAX_LENGTH: + elif len(ret.normalized) > EMAIL_MAX_LENGTH: # If there are more than 254 characters, then the ASCII # form is definitely going to be too long. - reason = get_length_reason(ret.email, utf8=True) + reason = get_length_reason(ret.normalized, utf8=True) else: reason = "(when converted to IDNA ASCII)" raise EmailSyntaxError(f"The email address is too long {reason}.") - if len(ret.email.encode("utf8")) > EMAIL_MAX_LENGTH: - if len(ret.email) > EMAIL_MAX_LENGTH: + if len(ret.normalized.encode("utf8")) > EMAIL_MAX_LENGTH: + if len(ret.normalized) > EMAIL_MAX_LENGTH: # If there are more than 254 characters, then the UTF-8 # encoding is definitely going to be too long. - reason = get_length_reason(ret.email, utf8=True) + reason = get_length_reason(ret.normalized, utf8=True) else: reason = "(when encoded in bytes)" raise EmailSyntaxError(f"The email address is too long {reason}.") diff --git a/tests/test_main.py b/tests/test_main.py index 34005da..e32af94 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -13,7 +13,7 @@ def test_dict_accessor(): input_email = "testaddr@example.tld" valid_email = validate_email(input_email, check_deliverability=False) assert isinstance(valid_email.as_dict(), dict) - assert valid_email.as_dict()["original_email"] == input_email + assert valid_email.as_dict()["original"] == input_email def test_main_single_good_input(monkeypatch, capsys): @@ -24,7 +24,7 @@ def test_main_single_good_input(monkeypatch, capsys): stdout, _ = capsys.readouterr() output = json.loads(str(stdout)) assert isinstance(output, dict) - assert validate_email(test_email, dns_resolver=RESOLVER).original_email == output["original_email"] + assert validate_email(test_email, dns_resolver=RESOLVER).original == output["original"] def test_main_single_bad_input(monkeypatch, capsys): @@ -53,7 +53,7 @@ def test_bytes_input(): input_email = b"testaddr@example.tld" valid_email = validate_email(input_email, check_deliverability=False) assert isinstance(valid_email.as_dict(), dict) - assert valid_email.as_dict()["email"] == input_email.decode("utf8") + assert valid_email.as_dict()["normalized"] == input_email.decode("utf8") input_email = "testaddr中example.tld".encode("utf32") with pytest.raises(EmailSyntaxError): diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 707d0e8..0a26d55 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -16,7 +16,7 @@ smtputf8=False, ascii_domain='example.tld', domain='example.tld', - email='Abc@example.tld', + normalized='Abc@example.tld', ascii_email='Abc@example.tld', ), ), @@ -28,7 +28,7 @@ smtputf8=False, ascii_domain='test-example.com', domain='test-example.com', - email='Abc.123@test-example.com', + normalized='Abc.123@test-example.com', ascii_email='Abc.123@test-example.com', ), ), @@ -40,7 +40,7 @@ smtputf8=False, ascii_domain='example.tld', domain='example.tld', - email='user+mailbox/department=shipping@example.tld', + normalized='user+mailbox/department=shipping@example.tld', ascii_email='user+mailbox/department=shipping@example.tld', ), ), @@ -52,7 +52,7 @@ smtputf8=False, ascii_domain='example.tld', domain='example.tld', - email="!#$%&'*+-/=?^_`.{|}~@example.tld", + normalized="!#$%&'*+-/=?^_`.{|}~@example.tld", ascii_email="!#$%&'*+-/=?^_`.{|}~@example.tld", ), ), @@ -64,7 +64,7 @@ smtputf8=False, ascii_domain='xn--fiqq24b10vi0d.tw', domain='臺網中心.tw', - email='jeff@臺網中心.tw', + normalized='jeff@臺網中心.tw', ascii_email='jeff@xn--fiqq24b10vi0d.tw', ), ), @@ -88,7 +88,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--5nqv22n.xn--lhr59c', domain='郵件.商務', - email='伊昭傑@郵件.商務', + normalized='伊昭傑@郵件.商務', ), ), ( @@ -98,7 +98,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--l2bl7a9d.xn--o1b8dj2ki', domain='मोहन.ईन्फो', - email='राम@मोहन.ईन्फो', + normalized='राम@मोहन.ईन्फो', ), ), ( @@ -108,7 +108,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--80ajglhfv.xn--j1aef', domain='екзампл.ком', - email='юзер@екзампл.ком', + normalized='юзер@екзампл.ком', ), ), ( @@ -118,7 +118,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--mxahbxey0c.xn--xxaf0a', domain='εχαμπλε.ψομ', - email='θσερ@εχαμπλε.ψομ', + normalized='θσερ@εχαμπλε.ψομ', ), ), ( @@ -128,7 +128,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--fiqq24b10vi0d.tw', domain='臺網中心.tw', - email='葉士豪@臺網中心.tw', + normalized='葉士豪@臺網中心.tw', ), ), ( @@ -138,7 +138,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--fiqq24b10vi0d.xn--kpry57d', domain='臺網中心.台灣', - email='葉士豪@臺網中心.台灣', + normalized='葉士豪@臺網中心.台灣', ), ), ( @@ -148,7 +148,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--fiqq24b10vi0d.tw', domain='臺網中心.tw', - email='jeff葉@臺網中心.tw', + normalized='jeff葉@臺網中心.tw', ), ), ( @@ -158,7 +158,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='example.tld', domain='example.tld', - email='ñoñó@example.tld', + normalized='ñoñó@example.tld', ), ), ( @@ -168,7 +168,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='example.tld', domain='example.tld', - email='我買@example.tld', + normalized='我買@example.tld', ), ), ( @@ -178,7 +178,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='example.tld', domain='example.tld', - email='甲斐黒川日本@example.tld', + normalized='甲斐黒川日本@example.tld', ), ), ( @@ -188,7 +188,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='example.tld', domain='example.tld', - email='чебурашкаящик-с-апельсинами.рф@example.tld', + normalized='чебурашкаящик-с-апельсинами.рф@example.tld', ), ), ( @@ -198,7 +198,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='domain.with.idn.tld', domain='domain.with.idn.tld', - email='उदाहरण.परीक्ष@domain.with.idn.tld', + normalized='उदाहरण.परीक्ष@domain.with.idn.tld', ), ), ( @@ -208,7 +208,7 @@ def test_email_valid(email_input, output): smtputf8=True, ascii_domain='xn--qxaa9ba.gr', domain='εεττ.gr', - email='ιωάννης@εεττ.gr', + normalized='ιωάννης@εεττ.gr', ), ), ],