Merge branch 'master' into accept-bool

python · Aug 31, 2019 · d69a95b · d69a95b
2 parents cde16f5 + c5b242f
commit d69a95b
Show file tree

Hide file tree

Showing 60 changed files with 254 additions and 104 deletions.
diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst
@@ -876,7 +876,7 @@ Path Configuration
 If at least one "output field" is not set, Python computes the path
 configuration to fill unset fields. If
 :c:member:`~PyConfig.module_search_paths_set` is equal to 0,
-:c:member:`~PyConfig.module_search_paths` is overriden and
+:c:member:`~PyConfig.module_search_paths` is overridden and
 :c:member:`~PyConfig.module_search_paths_set` is set to 1.
 
 It is possible to completely ignore the function computing the default

diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
@@ -1379,8 +1379,8 @@ an :term:`importer`.
    bytecode file. An empty string represents no optimization, so
    ``/foo/bar/baz.py`` with an *optimization* of ``''`` will result in a
    bytecode path of ``/foo/bar/__pycache__/baz.cpython-32.pyc``. ``None`` causes
-   the interpter's optimization level to be used. Any other value's string
-   representation being used, so ``/foo/bar/baz.py`` with an *optimization* of
+   the interpreter's optimization level to be used. Any other value's string
+   representation is used, so ``/foo/bar/baz.py`` with an *optimization* of
    ``2`` will lead to the bytecode path of
    ``/foo/bar/__pycache__/baz.cpython-32.opt-2.pyc``. The string representation
    of *optimization* can only be alphanumeric, else :exc:`ValueError` is raised.

diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst
@@ -30,9 +30,17 @@ avoid confusion, the terms used here are "pickling" and "unpickling".
 
 .. warning::
 
-   The :mod:`pickle` module is not secure against erroneous or maliciously
-   constructed data.  Never unpickle data received from an untrusted or
-   unauthenticated source.
+   The ``pickle`` module **is not secure**. Only unpickle data you trust.
+
+   It is possible to construct malicious pickle data which will **execute
+   arbitrary code during unpickling**. Never unpickle data that could have come
+   from an untrusted source, or that could have been tampered with.
+
+   Consider signing data with :mod:`hmac` if you need to ensure that it has not
+   been tampered with.
+
+   Safer serialization formats such as :mod:`json` may be more appropriate if
+   you are processing untrusted data. See :ref:`comparison-with-json`.
 
 
 Relationship to other Python modules
@@ -75,6 +83,9 @@ The :mod:`pickle` module differs from :mod:`marshal` in several significant ways
   pickling and unpickling code deals with Python 2 to Python 3 type differences
   if your data is crossing that unique breaking change language boundary.
 
+
+.. _comparison-with-json:
+
 Comparison with ``json``
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -94,7 +105,10 @@ There are fundamental differences between the pickle protocols and
   types, and no custom classes; pickle can represent an extremely large
   number of Python types (many of them automatically, by clever usage
   of Python's introspection facilities; complex cases can be tackled by
-  implementing :ref:`specific object APIs <pickle-inst>`).
+  implementing :ref:`specific object APIs <pickle-inst>`);
+
+* Unlike pickle, deserializing untrusted JSON does not in itself create an
+  arbitrary code execution vulnerability.
 
 .. seealso::
    The :mod:`json` module: a standard library module allowing JSON

diff --git a/Include/pyhash.h b/Include/pyhash.h
@@ -119,7 +119,7 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
  * configure script.
  *
  * - FNV is available on all platforms and architectures.
- * - SIPHASH24 only works on plaforms that don't require aligned memory for integers.
+ * - SIPHASH24 only works on platforms that don't require aligned memory for integers.
  * - With EXTERNAL embedders can provide an alternative implementation with::
  *
  *     PyHash_FuncDef PyHash_Func = {...};

diff --git a/Include/pyport.h b/Include/pyport.h
@@ -133,8 +133,9 @@ typedef int Py_ssize_clean_t;
 
 /* PY_FORMAT_SIZE_T is a platform-specific modifier for use in a printf
  * format to convert an argument with the width of a size_t or Py_ssize_t.
- * C99 introduced "z" for this purpose, but not all platforms support that;
- * e.g., MS compilers use "I" instead.
+ * C99 introduced "z" for this purpose, but old MSVCs had not supported it.
+ * Since MSVC supports "z" since (at least) 2015, we can just use "z"
+ * for new code.
  *
  * These "high level" Python format functions interpret "z" correctly on
  * all platforms (Python interprets the format string itself, and does whatever
@@ -152,19 +153,11 @@ typedef int Py_ssize_clean_t;
  *     Py_ssize_t index;
  *     fprintf(stderr, "index %" PY_FORMAT_SIZE_T "d sucks\n", index);
  *
- * That will expand to %ld, or %Id, or to something else correct for a
- * Py_ssize_t on the platform.
+ * That will expand to %zd or to something else correct for a Py_ssize_t on
+ * the platform.
  */
 #ifndef PY_FORMAT_SIZE_T
-#   if SIZEOF_SIZE_T == SIZEOF_INT && !defined(__APPLE__)
-#       define PY_FORMAT_SIZE_T ""
-#   elif SIZEOF_SIZE_T == SIZEOF_LONG
-#       define PY_FORMAT_SIZE_T "l"
-#   elif defined(MS_WINDOWS)
-#       define PY_FORMAT_SIZE_T "I"
-#   else
-#       error "This platform's pyconfig.h needs to define PY_FORMAT_SIZE_T"
-#   endif
+#   define PY_FORMAT_SIZE_T "z"
 #endif
 
 /* Py_LOCAL can be used instead of static to get the fastest possible calling

diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py
@@ -71,7 +71,7 @@ def connect(host=None, port=None, *,
             ssl_handshake_timeout=None,
             happy_eyeballs_delay=None, interleave=None):
     # Design note:
-    # Don't use decorator approach but exilicit non-async
+    # Don't use decorator approach but explicit non-async
     # function to fail fast and explicitly
     # if passed arguments don't match the function signature
     return _ContextManagerHelper(_connect(host, port, limit,
@@ -442,7 +442,7 @@ def connect_unix(path=None, *,
                      ssl_handshake_timeout=None):
         """Similar to `connect()` but works with UNIX Domain Sockets."""
         # Design note:
-        # Don't use decorator approach but exilicit non-async
+        # Don't use decorator approach but explicit non-async
         # function to fail fast and explicitly
         # if passed arguments don't match the function signature
         return _ContextManagerHelper(_connect_unix(path,

diff --git a/Lib/bdb.py b/Lib/bdb.py
@@ -38,7 +38,7 @@ def canonic(self, filename):
         """Return canonical form of filename.
 
         For real filenames, the canonical form is a case-normalized (on
-        case insenstive filesystems) absolute path.  'Filenames' with
+        case insensitive filesystems) absolute path.  'Filenames' with
         angle brackets, such as "<stdin>", generated in interactive
         mode, are returned unchanged.
         """

diff --git a/Lib/ctypes/test/test_structures.py b/Lib/ctypes/test/test_structures.py
@@ -3,7 +3,7 @@
 from ctypes.test import need_symbol
 from struct import calcsize
 import _ctypes_test
-import test.support
+from test import support
 
 class SubclassesTest(unittest.TestCase):
     def test_subclass(self):
@@ -202,7 +202,7 @@ class X(Structure):
              "_pack_": -1}
         self.assertRaises(ValueError, type(Structure), "X", (Structure,), d)
 
-    @test.support.cpython_only
+    @support.cpython_only
     def test_packed_c_limits(self):
         # Issue 15989
         import _testcapi
@@ -396,27 +396,66 @@ class Z(Y):
         self.assertRaises(TypeError, lambda: Z(1, 2, 3, 4, 5, 6, 7))
 
     def test_pass_by_value(self):
-        # This should mirror the structure in Modules/_ctypes/_ctypes_test.c
-        class X(Structure):
+        # This should mirror the Test structure
+        # in Modules/_ctypes/_ctypes_test.c
+        class Test(Structure):
             _fields_ = [
                 ('first', c_ulong),
                 ('second', c_ulong),
                 ('third', c_ulong),
             ]
 
-        s = X()
+        s = Test()
         s.first = 0xdeadbeef
         s.second = 0xcafebabe
         s.third = 0x0bad1dea
         dll = CDLL(_ctypes_test.__file__)
         func = dll._testfunc_large_struct_update_value
-        func.argtypes = (X,)
+        func.argtypes = (Test,)
         func.restype = None
         func(s)
         self.assertEqual(s.first, 0xdeadbeef)
         self.assertEqual(s.second, 0xcafebabe)
         self.assertEqual(s.third, 0x0bad1dea)
 
+    def test_pass_by_value_finalizer(self):
+        # bpo-37140: Similar to test_pass_by_value(), but the Python structure
+        # has a finalizer (__del__() method): the finalizer must only be called
+        # once.
+
+        finalizer_calls = []
+
+        class Test(Structure):
+            _fields_ = [
+                ('first', c_ulong),
+                ('second', c_ulong),
+                ('third', c_ulong),
+            ]
+            def __del__(self):
+                finalizer_calls.append("called")
+
+        s = Test(1, 2, 3)
+        # Test the StructUnionType_paramfunc() code path which copies the
+        # structure: if the stucture is larger than sizeof(void*).
+        self.assertGreater(sizeof(s), sizeof(c_void_p))
+
+        dll = CDLL(_ctypes_test.__file__)
+        func = dll._testfunc_large_struct_update_value
+        func.argtypes = (Test,)
+        func.restype = None
+        func(s)
+        # bpo-37140: Passing the structure by refrence must not call
+        # its finalizer!
+        self.assertEqual(finalizer_calls, [])
+        self.assertEqual(s.first, 1)
+        self.assertEqual(s.second, 2)
+        self.assertEqual(s.third, 3)
+
+        # The finalizer must be called exactly once
+        s = None
+        support.gc_collect()
+        self.assertEqual(finalizer_calls, ["called"])
+
     def test_pass_by_value_in_register(self):
         class X(Structure):
             _fields_ = [

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
@@ -935,6 +935,10 @@ def __str__(self):
         return ''
 
 
+class _InvalidEwError(errors.HeaderParseError):
+    """Invalid encoded word found while parsing headers."""
+
+
 # XXX these need to become classes and used as instances so
 # that a program can't change them in a parse tree and screw
 # up other parse trees.  Maybe should have  tests for that, too.
@@ -1039,7 +1043,10 @@ def get_encoded_word(value):
         raise errors.HeaderParseError(
             "expected encoded word but found {}".format(value))
     remstr = ''.join(remainder)
-    if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
+    if (len(remstr) > 1 and
+        remstr[0] in hexdigits and
+        remstr[1] in hexdigits and
+        tok.count('?') < 2):
         # The ? after the CTE was followed by an encoded word escape (=XX).
         rest, *remainder = remstr.split('?=', 1)
         tok = tok + '?=' + rest
@@ -1051,7 +1058,7 @@ def get_encoded_word(value):
     try:
         text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
     except ValueError:
-        raise errors.HeaderParseError(
+        raise _InvalidEwError(
             "encoded word format invalid: '{}'".format(ew.cte))
     ew.charset = charset
     ew.lang = lang
@@ -1101,9 +1108,12 @@ def get_unstructured(value):
             token, value = get_fws(value)
             unstructured.append(token)
             continue
+        valid_ew = True
         if value.startswith('=?'):
             try:
                 token, value = get_encoded_word(value)
+            except _InvalidEwError:
+                valid_ew = False
             except errors.HeaderParseError:
                 # XXX: Need to figure out how to register defects when
                 # appropriate here.
@@ -1125,7 +1135,10 @@ def get_unstructured(value):
         # Split in the middle of an atom if there is a rfc2047 encoded word
         # which does not have WSP on both sides. The defect will be registered
         # the next time through the loop.
-        if rfc2047_matcher.search(tok):
+        # This needs to only be performed when the encoded word is valid;
+        # otherwise, performing it on an invalid encoded word can cause
+        # the parser to go in an infinite loop.
+        if valid_ew and rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
         vtext = ValueTerminal(tok, 'vtext')
         _validate_xtext(vtext)

diff --git a/Lib/idlelib/README.txt b/Lib/idlelib/README.txt
@@ -115,7 +115,7 @@ tooltip.py # unused
 
 IDLE MENUS
 Top level items and most submenu items are defined in mainmenu.
-Extenstions add submenu items when active.  The names given are
+Extensions add submenu items when active.  The names given are
 found, quoted, in one of these modules, paired with a '<<pseudoevent>>'.
 Each pseudoevent is bound to an event handler.  Some event handlers
 call another function that does the actual work.  The annotations below

diff --git a/Lib/idlelib/browser.py b/Lib/idlelib/browser.py
@@ -29,7 +29,7 @@ def transform_children(child_dict, modname=None):
     The dictionary maps names to pyclbr information objects.
     Filter out imported objects.
     Augment class names with bases.
-    The insertion order of the dictonary is assumed to have been in line
+    The insertion order of the dictionary is assumed to have been in line
     number order, so sorting is not necessary.
 
     The current tree only calls this once per child_dict as it saves

diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py
@@ -238,7 +238,7 @@ def __repr__(self):
         if self._kwargs:
             x += ', kwargs=' + str(self._kwargs)
         if self._key[0] is not None:
-            x += ', exitprority=' + str(self._key[0])
+            x += ', exitpriority=' + str(self._key[0])
         return x + '>'
 
 

diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py
@@ -467,7 +467,7 @@ def _check_notify(self, cond):
         # of the workers.
         # Secondly, this test assumes that condition variables are not subject
         # to spurious wakeups.  The absence of spurious wakeups is an implementation
-        # detail of Condition Cariables in current CPython, but in general, not
+        # detail of Condition Variables in current CPython, but in general, not
         # a guaranteed property of condition variables as a programming
         # construct.  In particular, it is possible that this can no longer
         # be conveniently guaranteed should their implementation ever change.

diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
@@ -474,7 +474,7 @@ def test_dash_m_errors(self):
             ('os.path', br'loader.*cannot handle'),
             ('importlib', br'No module named.*'
                 br'is a package and cannot be directly executed'),
-            ('importlib.nonexistant', br'No module named'),
+            ('importlib.nonexistent', br'No module named'),
             ('.unittest', br'Relative module names not supported'),
         )
         for name, regex in tests:

diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
@@ -1927,7 +1927,7 @@ def test_order_preservation(self):
                  'r', 'c', 'd', ' ', 's', 's', 'i', 'i', 'm', 'm', 'l'])
 
         # Math operations order first by the order encountered in the left
-        # operand and then by the order encounted in the right operand.
+        # operand and then by the order encountered in the right operand.
         ps = 'aaabbcdddeefggghhijjjkkl'
         qs = 'abbcccdeefffhkkllllmmnno'
         order = {letter: i for i, letter in enumerate(dict.fromkeys(ps + qs))}

diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
@@ -3025,7 +3025,7 @@ def test_str_subclass_as_dict_key(self):
         # Testing a str subclass used as dict key ..
 
         class cistr(str):
-            """Sublcass of str that computes __eq__ case-insensitively.
+            """Subclass of str that computes __eq__ case-insensitively.
 
             Also computes a hash code of the string in canonical form.
             """

diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
@@ -383,6 +383,22 @@ def test_get_unstructured_ew_without_trailing_whitespace(self):
             [errors.InvalidHeaderDefect],
             '')
 
+    def test_get_unstructured_without_trailing_whitespace_hang_case(self):
+        self._test_get_x(self._get_unst,
+            '=?utf-8?q?somevalue?=aa',
+            'somevalueaa',
+            'somevalueaa',
+            [errors.InvalidHeaderDefect],
+            '')
+
+    def test_get_unstructured_invalid_ew(self):
+        self._test_get_x(self._get_unst,
+            '=?utf-8?q?=somevalue?=',
+            '=?utf-8?q?=somevalue?=',
+            '=?utf-8?q?=somevalue?=',
+            [],
+            '')
+
     # get_qp_ctext
 
     def test_get_qp_ctext_only(self):

diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
@@ -5381,6 +5381,27 @@ def test_rfc2231_unencoded_then_encoded_segments(self):
         eq(language, 'en-us')
         eq(s, 'My Document For You')
 
+    def test_should_not_hang_on_invalid_ew_messages(self):
+        messages = ["""From: [email protected]
+To: [email protected]
+Bad-Header:
+ =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
+ =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
+ =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
+
+Hello!
+""", """From: ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ <xxx@xxx>
+To: "xxx" <xxx@xxx>
+Subject:   ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½
+MIME-Version: 1.0
+Content-Type: text/plain; charset="windows-1251";
+Content-Transfer-Encoding: 8bit
+
+ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½ï¿½
+"""]
+        for m in messages:
+            with self.subTest(m=m):
+                msg = email.message_from_string(m)
 
 
 # Tests to ensure that signed parts of an email are completely preserved, as