Skip to content

Commit

Permalink
bpo-30681: Support invalid date format or value in email Date header (p…
Browse files Browse the repository at this point in the history
…ythonGH-22090)

I am re-submitting an older PR which was abandoned but is still relevant, python#10783 by @timb07.

The issue being solved () is still relevant. The original PR python#10783 was closed as
the final request changes were not applied and since abandoned.

In this new PR I have re-used the original patch plus applied both comments from the review, by @maxking and @pganssle.


For reference, here is the original PR description:
In email.utils.parsedate_to_datetime(), a failure to parse the date, or invalid date components (such as hour outside 0..23) raises an exception. Document this behaviour, and add tests to test_email/test_utils.py to confirm this behaviour.

In email.headerregistry.DateHeader.parse(), check when parsedate_to_datetime() raises an exception and add a new defect InvalidDateDefect; preserve the invalid value as the string value of the header, but set the datetime attribute to None.

Add tests to test_email/test_headerregistry.py to confirm this behaviour; also added test to test_email/test_inversion.py to confirm emails with such defective date headers round trip successfully.

This pull request incorporates feedback gratefully received from @bitdancer, @brettcannon, @Mariatta and @warsaw, and replaces the earlier PR python#2254.

Automerge-Triggered-By: GH:warsaw
  • Loading branch information
sim0nx authored and adorilson committed Mar 11, 2021
1 parent 41ebafb commit c135e20
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 5 deletions.
3 changes: 3 additions & 0 deletions Doc/library/email.errors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
bytes, the number of non-padding base64 characters was invalid (1 more than
a multiple of 4). The encoded block was kept as-is.

* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field.
The original value is kept as-is.
6 changes: 4 additions & 2 deletions Doc/library/email.utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,10 @@ of the new API.
.. function:: parsedate_to_datetime(date)

The inverse of :func:`format_datetime`. Performs the same function as
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
:func:`parsedate`, but on success returns a :mod:`~datetime.datetime`;
otherwise ``ValueError`` is raised if *date* contains an invalid value such
as an hour greater than 23 or a timezone offset not between -24 and 24 hours.
If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
``datetime``, and if the date is conforming to the RFCs it will represent a
time in UTC but with no indication of the actual source timezone of the
message the date comes from. If the input date has any other valid timezone
Expand Down
2 changes: 1 addition & 1 deletion Lib/email/_parseaddr.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _parsedate_tz(data):
"""
if not data:
return
return None
data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and
# adjust for this.
Expand Down
3 changes: 3 additions & 0 deletions Lib/email/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect):
"""local_part contains non-ASCII characters"""
# This defect only occurs during unicode parsing, not when
# parsing messages decoded from binary.

class InvalidDateDefect(HeaderDefect):
"""Header has unparseable or invalid date"""
9 changes: 8 additions & 1 deletion Lib/email/headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,14 @@ def parse(cls, value, kwds):
kwds['parse_tree'] = parser.TokenList()
return
if isinstance(value, str):
value = utils.parsedate_to_datetime(value)
kwds['decoded'] = value
try:
value = utils.parsedate_to_datetime(value)
except ValueError:
kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
kwds['datetime'] = None
kwds['parse_tree'] = parser.TokenList()
return
kwds['datetime'] = value
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
Expand Down
5 changes: 4 additions & 1 deletion Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None):


def parsedate_to_datetime(data):
*dtuple, tz = _parsedate_tz(data)
parsed_date_tz = _parsedate_tz(data)
if parsed_date_tz is None:
raise ValueError('Invalid date value or format "%s"' % str(data))
*dtuple, tz = parsed_date_tz
if tz is None:
return datetime.datetime(*dtuple[:6])
return datetime.datetime(*dtuple[:6],
Expand Down
16 changes: 16 additions & 0 deletions Lib/test/test_email/test_headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,22 @@ def test_no_value_is_defect(self):
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)

def test_invalid_date_format(self):
s = 'Not a date header'
h = self.make_header('date', s)
self.assertEqual(h, s)
self.assertIsNone(h.datetime)
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)

def test_invalid_date_value(self):
s = 'Tue, 06 Jun 2017 27:39:33 +0600'
h = self.make_header('date', s)
self.assertEqual(h, s)
self.assertIsNone(h.datetime)
self.assertEqual(len(h.defects), 1)
self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)

def test_datetime_read_only(self):
h = self.make_header('date', self.datestring)
with self.assertRaises(AttributeError):
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/test_email/test_inversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ def msg_as_input(self, msg):
foo
"""),),

'header_with_invalid_date': (dedent(b"""\
Date: Tue, 06 Jun 2017 27:39:33 +0600
From: [email protected]
Subject: timezones
How do they work even?
"""),),

}

payload_params = {
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_email/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ def test_parsedate_to_datetime_naive(self):
utils.parsedate_to_datetime(self.datestring + ' -0000'),
self.naive_dt)

def test_parsedate_to_datetime_with_invalid_raises_valueerror(self):
invalid_dates = ['',
'0',
'A Complete Waste of Time'
'Tue, 06 Jun 2017 27:39:33 +0600',
'Tue, 06 Jun 2017 07:39:33 +2600',
'Tue, 06 Jun 2017 27:39:33']
for dtstr in invalid_dates:
with self.subTest(dtstr=dtstr):
self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr)

class LocaltimeTests(unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Handle exceptions caused by unparseable date headers when using email
"default" policy. Patch by Tim Bell, Georges Toth

0 comments on commit c135e20

Please sign in to comment.