diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst index f4b9f52509689e..7a77640571cb1e 100644 --- a/Doc/library/email.errors.rst +++ b/Doc/library/email.errors.rst @@ -112,3 +112,6 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`. * :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded bytes, the number of non-padding base64 characters was invalid (1 more than a multiple of 4). The encoded block was kept as-is. + +* :class:`InvalidDateDefect` -- When decoding an invalid or unparsable date field. + The original value is kept as-is. \ No newline at end of file diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 4d0e920eb0ad29..0e266b6a45782a 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -124,8 +124,10 @@ of the new API. .. function:: parsedate_to_datetime(date) The inverse of :func:`format_datetime`. Performs the same function as - :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If - the input date has a timezone of ``-0000``, the ``datetime`` will be a naive + :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`; + otherwise ``ValueError`` is raised if *date* contains an invalid value such + as an hour greater than 23 or a timezone offset not between -24 and 24 hours. + If the input date has a timezone of ``-0000``, the ``datetime`` will be a naive ``datetime``, and if the date is conforming to the RFCs it will represent a time in UTC but with no indication of the actual source timezone of the message the date comes from. If the input date has any other valid timezone diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 41ff6f8c000d57..4d27f87974b20d 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -65,7 +65,7 @@ def _parsedate_tz(data): """ if not data: - return + return None data = data.split() # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. diff --git a/Lib/email/errors.py b/Lib/email/errors.py index d28a6800104bab..1d258c34fc9d4a 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -108,3 +108,6 @@ class NonASCIILocalPartDefect(HeaderDefect): """local_part contains non-ASCII characters""" # This defect only occurs during unicode parsing, not when # parsing messages decoded from binary. + +class InvalidDateDefect(HeaderDefect): + """Header has unparseable or invalid date""" diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 5d84fc0d82d0b0..d8613ebf24e613 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -302,7 +302,14 @@ def parse(cls, value, kwds): kwds['parse_tree'] = parser.TokenList() return if isinstance(value, str): - value = utils.parsedate_to_datetime(value) + kwds['decoded'] = value + try: + value = utils.parsedate_to_datetime(value) + except ValueError: + kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) + kwds['datetime'] = None + kwds['parse_tree'] = parser.TokenList() + return kwds['datetime'] = value kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['parse_tree'] = cls.value_parser(kwds['decoded']) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 1a7719dbc4898f..a8e46a761bf922 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -195,7 +195,10 @@ def make_msgid(idstring=None, domain=None): def parsedate_to_datetime(data): - *dtuple, tz = _parsedate_tz(data) + parsed_date_tz = _parsedate_tz(data) + if parsed_date_tz is None: + raise ValueError('Invalid date value or format "%s"' % str(data)) + *dtuple, tz = parsed_date_tz if tz is None: return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6], diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 68bbc9561c4aff..59fcd932e0ec4a 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -204,6 +204,22 @@ def test_no_value_is_defect(self): self.assertEqual(len(h.defects), 1) self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue) + def test_invalid_date_format(self): + s = 'Not a date header' + h = self.make_header('date', s) + self.assertEqual(h, s) + self.assertIsNone(h.datetime) + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects[0], errors.InvalidDateDefect) + + def test_invalid_date_value(self): + s = 'Tue, 06 Jun 2017 27:39:33 +0600' + h = self.make_header('date', s) + self.assertEqual(h, s) + self.assertIsNone(h.datetime) + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects[0], errors.InvalidDateDefect) + def test_datetime_read_only(self): h = self.make_header('date', self.datestring) with self.assertRaises(AttributeError): diff --git a/Lib/test/test_email/test_inversion.py b/Lib/test/test_email/test_inversion.py index 8e8d67641b8943..7bd7f2a72067ad 100644 --- a/Lib/test/test_email/test_inversion.py +++ b/Lib/test/test_email/test_inversion.py @@ -46,6 +46,14 @@ def msg_as_input(self, msg): foo """),), + 'header_with_invalid_date': (dedent(b"""\ + Date: Tue, 06 Jun 2017 27:39:33 +0600 + From: abc@xyz.com + Subject: timezones + + How do they work even? + """),), + } payload_params = { diff --git a/Lib/test/test_email/test_utils.py b/Lib/test/test_email/test_utils.py index 4e3c3f3a195fc4..e3d3eaebc93693 100644 --- a/Lib/test/test_email/test_utils.py +++ b/Lib/test/test_email/test_utils.py @@ -48,6 +48,16 @@ def test_parsedate_to_datetime_naive(self): utils.parsedate_to_datetime(self.datestring + ' -0000'), self.naive_dt) + def test_parsedate_to_datetime_with_invalid_raises_valueerror(self): + invalid_dates = ['', + '0', + 'A Complete Waste of Time' + 'Tue, 06 Jun 2017 27:39:33 +0600', + 'Tue, 06 Jun 2017 07:39:33 +2600', + 'Tue, 06 Jun 2017 27:39:33'] + for dtstr in invalid_dates: + with self.subTest(dtstr=dtstr): + self.assertRaises(ValueError, utils.parsedate_to_datetime, dtstr) class LocaltimeTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst b/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst new file mode 100644 index 00000000000000..83830e343da66f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-09-04-17-33-04.bpo-30681.LR4fnY.rst @@ -0,0 +1,2 @@ +Handle exceptions caused by unparseable date headers when using email +"default" policy. Patch by Tim Bell, Georges Toth