Skip to content

Commit

Permalink
WIP Add an asynchronous call method
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshData committed Oct 20, 2023
1 parent 786defc commit 5e52051
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 57 deletions.
14 changes: 11 additions & 3 deletions email_validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# Export the main method, helper methods, and the public data types.
from .exceptions_types import ValidatedEmail, EmailNotValidError, \
EmailSyntaxError, EmailUndeliverableError
from .validate_email import validate_email
from .validate_email import validate_email_sync as validate_email, validate_email_async
from .version import __version__

__all__ = ["validate_email",
__all__ = ["validate_email", "validate_email_async",
"ValidatedEmail", "EmailNotValidError",
"EmailSyntaxError", "EmailUndeliverableError",
"caching_resolver", "__version__"]
"caching_resolver", "caching_async_resolver",
"__version__"]


def caching_resolver(*args, **kwargs):
Expand All @@ -19,6 +20,13 @@ def caching_resolver(*args, **kwargs):
return caching_resolver(*args, **kwargs)


def caching_async_resolver(*args, **kwargs):
# Lazy load `deliverability` as it is slow to import (due to dns.resolver)
from .deliverability import caching_async_resolver

return caching_async_resolver(*args, **kwargs)


# These global attributes are a part of the library's API and can be
# changed by library users.

Expand Down
99 changes: 75 additions & 24 deletions email_validator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,85 @@
# python -m email_validator [email protected]
# python -m email_validator < LIST_OF_ADDRESSES.TXT
#
# Provide email addresses to validate either as a command-line argument
# or in STDIN separated by newlines. Validation errors will be printed for
# invalid email addresses. When passing an email address on the command
# line, if the email address is valid, information about it will be printed.
# When using STDIN, no output will be given for valid email addresses.
# Provide email addresses to validate either as a single command-line argument
# or on STDIN separated by newlines.
#
# When passing an email address on the command line, if the email address
# is valid, information about it will be printed to STDOUT. If the email
# address is invalid, an error message will be printed to STDOUT and
# the exit code will be set to 1.
#
# When passsing email addresses on STDIN, validation errors will be printed
# for invalid email addresses. No output is given for valid email addresses.
# Validation errors are preceded by the email address that failed and a tab
# character. It is the user's responsibility to ensure email addresses
# do not contain tab or newline characters.
#
# Keyword arguments to validate_email can be set in environment variables
# of the same name but upprcase (see below).

import json
import os
import sys
import itertools

from .validate_email import validate_email
from .deliverability import caching_resolver
from .deliverability import caching_async_resolver
from .exceptions_types import EmailNotValidError


def main_command_line(email_address, options, dns_resolver):
# Validate the email address passed on the command line.

from . import validate_email

try:
result = validate_email(email_address, dns_resolver=dns_resolver, **options)
print(json.dumps(result.as_dict(), indent=2, sort_keys=True, ensure_ascii=False))
return True
except EmailNotValidError as e:
print(e)
return False


async def main_stdin(options, dns_resolver):
# Validate the email addresses pased line-by-line on STDIN.
# Chunk the addresses and call the async version of validate_email
# for all the addresses in the chunk, and wait for the chunk
# to complete.

import asyncio

from . import validate_email_async as validate_email

dns_resolver = dns_resolver or caching_async_resolver()

# https://stackoverflow.com/a/312467
def split_seq(iterable, size):
it = iter(iterable)
item = list(itertools.islice(it, size))
while item:
yield item
item = list(itertools.islice(it, size))

CHUNK_SIZE = 100

async def process_line(line):
email = line.strip()
try:
await validate_email(email, dns_resolver=dns_resolver, **options)
# If the email was valid, do nothing.
return None
except EmailNotValidError as e:
return (email, e)

for chunk in split_seq(sys.stdin, CHUNK_SIZE):
awaitables = [process_line(line) for line in chunk]
errors = await asyncio.gather(*awaitables)
for error in errors:
if error is not None:
print(*error, sep='\t')


def main(dns_resolver=None):
# The dns_resolver argument is for tests.

Expand All @@ -36,24 +97,14 @@ def main(dns_resolver=None):
if varname in os.environ:
options[varname.lower()] = float(os.environ[varname])

if len(sys.argv) == 1:
# Validate the email addresses pased line-by-line on STDIN.
dns_resolver = dns_resolver or caching_resolver()
for line in sys.stdin:
email = line.strip()
try:
validate_email(email, dns_resolver=dns_resolver, **options)
except EmailNotValidError as e:
print(f"{email} {e}")
if len(sys.argv) == 2:
return main_command_line(sys.argv[1], options, dns_resolver)
else:
# Validate the email address passed on the command line.
email = sys.argv[1]
try:
result = validate_email(email, dns_resolver=dns_resolver, **options)
print(json.dumps(result.as_dict(), indent=2, sort_keys=True, ensure_ascii=False))
except EmailNotValidError as e:
print(e)
import asyncio
asyncio.run(main_stdin(options, dns_resolver))
return True


if __name__ == "__main__":
main()
if not main():
sys.exit(1)
59 changes: 51 additions & 8 deletions email_validator/deliverability.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .exceptions_types import EmailUndeliverableError

import dns.resolver
import dns.asyncresolver
import dns.exception


Expand All @@ -16,30 +17,72 @@ def caching_resolver(*, timeout: Optional[int] = None, cache=None):
return resolver


def validate_email_deliverability(domain: str, domain_i18n: str, timeout: Optional[int] = None, dns_resolver=None):
def caching_async_resolver(*, timeout: Optional[int] = None, cache=None):
if timeout is None:
from . import DEFAULT_TIMEOUT
timeout = DEFAULT_TIMEOUT
resolver = dns.asyncresolver.Resolver()
resolver.cache = cache or dns.resolver.LRUCache() # type: ignore
resolver.lifetime = timeout # type: ignore # timeout, in seconds
return resolver


async def validate_email_deliverability(
domain: str,
domain_i18n: str,
timeout: Optional[int] = None,
dns_resolver=None,
_async_loop: Optional[bool] = None):

# Check that the domain resolves to an MX record. If there is no MX record,
# try an A or AAAA record which is a deprecated fallback for deliverability.
# Raises an EmailUndeliverableError on failure. On success, returns a dict
# with deliverability information.

# When _async_loop is None, this method must return synchronously. The
# caller drives the coroutine manually to get the result synchronously,
# and consequently this call must not yield execution. Otherwise, regular
# async/await calls may be used.

# If no dns.resolver.Resolver was given, get dnspython's default resolver.
# Override the default resolver's timeout. This may affect other uses of
# dnspython in this process.
if dns_resolver is None:
if not _async_loop:
dns_resolver = dns.resolver.get_default_resolver()
else:
dns_resolver = dns.asyncresolver.get_default_resolver()

# Override the default resolver's timeout. This may affect other uses of
# dnspython in this process.
from . import DEFAULT_TIMEOUT
if timeout is None:
timeout = DEFAULT_TIMEOUT
dns_resolver = dns.resolver.get_default_resolver()
dns_resolver.lifetime = timeout

elif timeout is not None:
raise ValueError("It's not valid to pass both timeout and dns_resolver.")

# Define a resolve function that works with a regular or
# asynchronous dns.resolver.Resolver instance depending
# on the _async_loop argument.
async def resolve(qname, rtype):
# When called non-asynchronously, expect a regular
# resolver that returns synchronously. Or if the
# user didn't pass a dns.asyncresolver.Resolver,
# call it synchronously.
if not _async_loop or not isinstance(dns_resolver, dns.asyncresolver.Resolver):
return dns_resolver.resolve(qname, rtype)

# When called asynchronsouly, if given a dns.asyncresolver.Resolver,
# call it asynchronously.
else:
return await dns_resolver.resolve(qname, rtype)

deliverability_info: Dict[str, Any] = {}

try:
try:
# Try resolving for MX records (RFC 5321 Section 5).
response = dns_resolver.resolve(domain, "MX")
response = await resolve(domain, "MX")

# For reporting, put them in priority order and remove the trailing dot in the qnames.
mtas = sorted([(r.preference, str(r.exchange).rstrip('.')) for r in response])
Expand All @@ -59,7 +102,7 @@ def validate_email_deliverability(domain: str, domain_i18n: str, timeout: Option
except dns.resolver.NoAnswer:
# If there was no MX record, fall back to an A record. (RFC 5321 Section 5)
try:
response = dns_resolver.resolve(domain, "A")
response = await resolve(domain, "A")
deliverability_info["mx"] = [(0, str(r)) for r in response]
deliverability_info["mx_fallback_type"] = "A"

Expand All @@ -68,7 +111,7 @@ def validate_email_deliverability(domain: str, domain_i18n: str, timeout: Option
# If there was no A record, fall back to an AAAA record.
# (It's unclear if SMTP servers actually do this.)
try:
response = dns_resolver.resolve(domain, "AAAA")
response = await resolve(domain, "AAAA")
deliverability_info["mx"] = [(0, str(r)) for r in response]
deliverability_info["mx_fallback_type"] = "AAAA"

Expand All @@ -85,7 +128,7 @@ def validate_email_deliverability(domain: str, domain_i18n: str, timeout: Option
# absence of an MX record, this is probably a good sign that the
# domain is not used for email.
try:
response = dns_resolver.resolve(domain, "TXT")
response = await resolve(domain, "TXT")
for rec in response:
value = b"".join(rec.strings)
if value.startswith(b"v=spf1 "):
Expand Down
Loading

0 comments on commit 5e52051

Please sign in to comment.