From becad9a2a1b5f3deaad24759daec95014218e0db Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 14 Dec 2023 14:36:40 -0600 Subject: [PATCH 01/12] Remove itertool recipe with low pedagogical value (gh-113138) --- Doc/library/itertools.rst | 64 +++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 83e2a9fdb7b4643..36cea9a835f3024 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -1136,24 +1136,6 @@ The following recipes have a more mathematical flavor: n = n // p * (p - 1) return n - def nth_combination(iterable, r, index): - "Equivalent to list(combinations(iterable, r))[index]" - pool = tuple(iterable) - n = len(pool) - c = math.comb(n, r) - if index < 0: - index += c - if index < 0 or index >= c: - raise IndexError - result = [] - while r: - c, n, r = c*r//n, n-1, r-1 - while index >= c: - index -= c - c, n = c*(n-r)//n, n-1 - result.append(pool[-1-n]) - return tuple(result) - .. doctest:: :hide: @@ -1577,20 +1559,6 @@ The following recipes have a more mathematical flavor: >>> first_true('ABC0DEF1', '9', str.isdigit) '0' - >>> population = 'ABCDEFGH' - >>> for r in range(len(population) + 1): - ... seq = list(combinations(population, r)) - ... for i in range(len(seq)): - ... assert nth_combination(population, r, i) == seq[i] - ... for i in range(-len(seq), 0): - ... assert nth_combination(population, r, i) == seq[i] - - >>> iterable = 'abcde' - >>> r = 3 - >>> combos = list(combinations(iterable, r)) - >>> all(nth_combination(iterable, r, i) == comb for i, comb in enumerate(combos)) - True - .. testcode:: :hide: @@ -1617,6 +1585,24 @@ The following recipes have a more mathematical flavor: for (a, _), (b, c) in pairwise(pairwise(iterable)): yield a, b, c + def nth_combination(iterable, r, index): + "Equivalent to list(combinations(iterable, r))[index]" + pool = tuple(iterable) + n = len(pool) + c = math.comb(n, r) + if index < 0: + index += c + if index < 0 or index >= c: + raise IndexError + result = [] + while r: + c, n, r = c*r//n, n-1, r-1 + while index >= c: + index -= c + c, n = c*(n-r)//n, n-1 + result.append(pool[-1-n]) + return tuple(result) + .. doctest:: :hide: @@ -1632,3 +1618,17 @@ The following recipes have a more mathematical flavor: >>> list(triplewise('ABCDEFG')) [('A', 'B', 'C'), ('B', 'C', 'D'), ('C', 'D', 'E'), ('D', 'E', 'F'), ('E', 'F', 'G')] + + >>> population = 'ABCDEFGH' + >>> for r in range(len(population) + 1): + ... seq = list(combinations(population, r)) + ... for i in range(len(seq)): + ... assert nth_combination(population, r, i) == seq[i] + ... for i in range(-len(seq), 0): + ... assert nth_combination(population, r, i) == seq[i] + + >>> iterable = 'abcde' + >>> r = 3 + >>> combos = list(combinations(iterable, r)) + >>> all(nth_combination(iterable, r, i) == comb for i, comb in enumerate(combos)) + True From 961f1043a022ddeff314f63480a232335a598d6a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Thu, 14 Dec 2023 22:48:36 +0200 Subject: [PATCH 02/12] gh-101100: Fix Sphinx warnings in `whatsnew/2.3.rst` (#112373) --- Doc/tools/.nitignore | 1 - Doc/whatsnew/2.3.rst | 276 +++++++++++++++++++++---------------------- 2 files changed, 138 insertions(+), 139 deletions(-) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index ca0cb84d8509289..20580f78e07b5dd 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -118,7 +118,6 @@ Doc/using/windows.rst Doc/whatsnew/2.0.rst Doc/whatsnew/2.1.rst Doc/whatsnew/2.2.rst -Doc/whatsnew/2.3.rst Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst index 0c77b339a182c9e..8ebcbfaf2485517 100644 --- a/Doc/whatsnew/2.3.rst +++ b/Doc/whatsnew/2.3.rst @@ -40,10 +40,10 @@ new feature. PEP 218: A Standard Set Datatype ================================ -The new :mod:`sets` module contains an implementation of a set datatype. The +The new :mod:`!sets` module contains an implementation of a set datatype. The :class:`Set` class is for mutable sets, sets that can have members added and -removed. The :class:`ImmutableSet` class is for sets that can't be modified, -and instances of :class:`ImmutableSet` can therefore be used as dictionary keys. +removed. The :class:`!ImmutableSet` class is for sets that can't be modified, +and instances of :class:`!ImmutableSet` can therefore be used as dictionary keys. Sets are built on top of dictionaries, so the elements within a set must be hashable. @@ -63,10 +63,10 @@ Here's a simple example:: Set([1, 2, 5]) >>> -The union and intersection of sets can be computed with the :meth:`union` and -:meth:`intersection` methods; an alternative notation uses the bitwise operators +The union and intersection of sets can be computed with the :meth:`~frozenset.union` and +:meth:`~frozenset.intersection` methods; an alternative notation uses the bitwise operators ``&`` and ``|``. Mutable sets also have in-place versions of these methods, -:meth:`union_update` and :meth:`intersection_update`. :: +:meth:`!union_update` and :meth:`~frozenset.intersection_update`. :: >>> S1 = sets.Set([1,2,3]) >>> S2 = sets.Set([4,5,6]) @@ -87,7 +87,7 @@ It's also possible to take the symmetric difference of two sets. This is the set of all elements in the union that aren't in the intersection. Another way of putting it is that the symmetric difference contains all elements that are in exactly one set. Again, there's an alternative notation (``^``), and an -in-place version with the ungainly name :meth:`symmetric_difference_update`. :: +in-place version with the ungainly name :meth:`~frozenset.symmetric_difference_update`. :: >>> S1 = sets.Set([1,2,3,4]) >>> S2 = sets.Set([3,4,5,6]) @@ -97,7 +97,7 @@ in-place version with the ungainly name :meth:`symmetric_difference_update`. :: Set([1, 2, 5, 6]) >>> -There are also :meth:`issubset` and :meth:`issuperset` methods for checking +There are also :meth:`!issubset` and :meth:`!issuperset` methods for checking whether one set is a subset or superset of another:: >>> S1 = sets.Set([1,2,3]) @@ -166,7 +166,7 @@ statement isn't allowed inside the :keyword:`try` block of a :keyword:`!try`...\ :keyword:`!finally` statement; read :pep:`255` for a full explanation of the interaction between :keyword:`!yield` and exceptions.) -Here's a sample usage of the :func:`generate_ints` generator:: +Here's a sample usage of the :func:`!generate_ints` generator:: >>> gen = generate_ints(3) >>> gen @@ -227,7 +227,7 @@ like:: sentence := "Store it in the neighboring harbor" if (i := find("or", sentence)) > 5 then write(i) -In Icon the :func:`find` function returns the indexes at which the substring +In Icon the :func:`!find` function returns the indexes at which the substring "or" is found: 3, 23, 33. In the :keyword:`if` statement, ``i`` is first assigned a value of 3, but 3 is less than 5, so the comparison fails, and Icon retries it with the second value of 23. 23 is greater than 5, so the comparison @@ -345,7 +345,7 @@ Python now allows using arbitrary Unicode strings (within the limitations of the file system) for all functions that expect file names, most notably the :func:`open` built-in function. If a Unicode string is passed to :func:`os.listdir`, Python now returns a list of Unicode strings. A new -function, :func:`os.getcwdu`, returns the current directory as a Unicode string. +function, :func:`!os.getcwdu`, returns the current directory as a Unicode string. Byte strings still work as file names, and on Windows Python will transparently convert them to Unicode using the ``mbcs`` encoding. @@ -386,10 +386,10 @@ one followed by the platform on which Python is running. Opening a file with the mode ``'U'`` or ``'rU'`` will open a file for reading in :term:`universal newlines` mode. All three line ending conventions will be translated to a ``'\n'`` in the strings returned by the various file methods such as -:meth:`read` and :meth:`readline`. +:meth:`!read` and :meth:`!readline`. Universal newline support is also used when importing modules and when executing -a file with the :func:`execfile` function. This means that Python modules can +a file with the :func:`!execfile` function. This means that Python modules can be shared between all three operating systems without needing to convert the line-endings. @@ -450,16 +450,16 @@ records to standard error or to a file or socket, send them to the system log, or even e-mail them to a particular address; of course, it's also possible to write your own handler classes. -The :class:`Logger` class is the primary class. Most application code will deal -with one or more :class:`Logger` objects, each one used by a particular -subsystem of the application. Each :class:`Logger` is identified by a name, and +The :class:`~logging.Logger` class is the primary class. Most application code will deal +with one or more :class:`~logging.Logger` objects, each one used by a particular +subsystem of the application. Each :class:`~logging.Logger` is identified by a name, and names are organized into a hierarchy using ``.`` as the component separator. -For example, you might have :class:`Logger` instances named ``server``, +For example, you might have :class:`~logging.Logger` instances named ``server``, ``server.auth`` and ``server.network``. The latter two instances are below ``server`` in the hierarchy. This means that if you turn up the verbosity for ``server`` or direct ``server`` messages to a different handler, the changes will also apply to records logged to ``server.auth`` and ``server.network``. -There's also a root :class:`Logger` that's the parent of all other loggers. +There's also a root :class:`~logging.Logger` that's the parent of all other loggers. For simple uses, the :mod:`logging` package contains some convenience functions that always use the root log:: @@ -480,14 +480,14 @@ This produces the following output:: In the default configuration, informational and debugging messages are suppressed and the output is sent to standard error. You can enable the display -of informational and debugging messages by calling the :meth:`setLevel` method +of informational and debugging messages by calling the :meth:`~logging.Logger.setLevel` method on the root logger. -Notice the :func:`warning` call's use of string formatting operators; all of the +Notice the :func:`~logging.warning` call's use of string formatting operators; all of the functions for logging messages take the arguments ``(msg, arg1, arg2, ...)`` and log the string resulting from ``msg % (arg1, arg2, ...)``. -There's also an :func:`exception` function that records the most recent +There's also an :func:`~logging.exception` function that records the most recent traceback. Any of the other functions will also record the traceback if you specify a true value for the keyword argument *exc_info*. :: @@ -517,16 +517,16 @@ it if it doesn't exist yet. ``getLogger(None)`` returns the root logger. :: ... Log records are usually propagated up the hierarchy, so a message logged to -``server.auth`` is also seen by ``server`` and ``root``, but a :class:`Logger` -can prevent this by setting its :attr:`propagate` attribute to :const:`False`. +``server.auth`` is also seen by ``server`` and ``root``, but a :class:`~logging.Logger` +can prevent this by setting its :attr:`~logging.Logger.propagate` attribute to :const:`False`. There are more classes provided by the :mod:`logging` package that can be -customized. When a :class:`Logger` instance is told to log a message, it -creates a :class:`LogRecord` instance that is sent to any number of different -:class:`Handler` instances. Loggers and handlers can also have an attached list -of filters, and each filter can cause the :class:`LogRecord` to be ignored or +customized. When a :class:`~logging.Logger` instance is told to log a message, it +creates a :class:`~logging.LogRecord` instance that is sent to any number of different +:class:`~logging.Handler` instances. Loggers and handlers can also have an attached list +of filters, and each filter can cause the :class:`~logging.LogRecord` to be ignored or can modify the record before passing it along. When they're finally output, -:class:`LogRecord` instances are converted to text by a :class:`Formatter` +:class:`~logging.LogRecord` instances are converted to text by a :class:`~logging.Formatter` class. All of these classes can be replaced by your own specially written classes. @@ -550,7 +550,7 @@ PEP 285: A Boolean Type ======================= A Boolean type was added to Python 2.3. Two new constants were added to the -:mod:`__builtin__` module, :const:`True` and :const:`False`. (:const:`True` and +:mod:`!__builtin__` module, :const:`True` and :const:`False`. (:const:`True` and :const:`False` constants were added to the built-ins in Python 2.2.1, but the 2.2.1 versions are simply set to integer values of 1 and 0 and aren't a different type.) @@ -662,7 +662,7 @@ a central catalog server. The resulting catalog is available from https://pypi.org. To make the catalog a bit more useful, a new optional *classifiers* keyword -argument has been added to the Distutils :func:`setup` function. A list of +argument has been added to the Distutils :func:`!setup` function. A list of `Trove `_-style strings can be supplied to help classify the software. @@ -703,14 +703,14 @@ PEP 302: New Import Hooks ========================= While it's been possible to write custom import hooks ever since the -:mod:`ihooks` module was introduced in Python 1.3, no one has ever been really +:mod:`!ihooks` module was introduced in Python 1.3, no one has ever been really happy with it because writing new import hooks is difficult and messy. There -have been various proposed alternatives such as the :mod:`imputil` and :mod:`iu` +have been various proposed alternatives such as the :mod:`!imputil` and :mod:`!iu` modules, but none of them has ever gained much acceptance, and none of them were easily usable from C code. :pep:`302` borrows ideas from its predecessors, especially from Gordon -McMillan's :mod:`iu` module. Three new items are added to the :mod:`sys` +McMillan's :mod:`!iu` module. Three new items are added to the :mod:`sys` module: * ``sys.path_hooks`` is a list of callable objects; most often they'll be @@ -790,7 +790,7 @@ package is much simpler:: for line in reader: print line -The :func:`reader` function takes a number of different options. The field +The :func:`~csv.reader` function takes a number of different options. The field separator isn't limited to the comma and can be changed to any character, and so can the quoting and line-ending characters. @@ -814,7 +814,7 @@ of tuples or lists, quoting strings that contain the delimiter. PEP 307: Pickle Enhancements ============================ -The :mod:`pickle` and :mod:`cPickle` modules received some attention during the +The :mod:`pickle` and :mod:`!cPickle` modules received some attention during the 2.3 development cycle. In 2.2, new-style classes could be pickled without difficulty, but they weren't pickled very compactly; :pep:`307` quotes a trivial example where a new-style class results in a pickled string three times longer @@ -829,13 +829,13 @@ fanciest protocol available. Unpickling is no longer considered a safe operation. 2.2's :mod:`pickle` provided hooks for trying to prevent unsafe classes from being unpickled -(specifically, a :attr:`__safe_for_unpickling__` attribute), but none of this +(specifically, a :attr:`!__safe_for_unpickling__` attribute), but none of this code was ever audited and therefore it's all been ripped out in 2.3. You should not unpickle untrusted data in any version of Python. To reduce the pickling overhead for new-style classes, a new interface for customizing pickling was added using three special methods: -:meth:`__getstate__`, :meth:`__setstate__`, and :meth:`__getnewargs__`. Consult +:meth:`~object.__getstate__`, :meth:`~object.__setstate__`, and :meth:`~object.__getnewargs__`. Consult :pep:`307` for the full semantics of these methods. As a way to compress pickles yet further, it's now possible to use integer codes @@ -939,7 +939,7 @@ Or use slice objects directly in subscripts:: To simplify implementing sequences that support extended slicing, slice objects now have a method ``indices(length)`` which, given the length of a sequence, returns a ``(start, stop, step)`` tuple that can be passed directly to -:func:`range`. :meth:`indices` handles omitted and out-of-bounds indices in a +:func:`range`. :meth:`!indices` handles omitted and out-of-bounds indices in a manner consistent with regular slices (and this innocuous phrase hides a welter of confusing details!). The method is intended to be used like this:: @@ -1042,7 +1042,7 @@ Here are all of the changes that Python 2.3 makes to the core Python language. execute any assertions. * Most type objects are now callable, so you can use them to create new objects - such as functions, classes, and modules. (This means that the :mod:`new` module + such as functions, classes, and modules. (This means that the :mod:`!new` module can be deprecated in a future Python version, because you can now use the type objects available in the :mod:`types` module.) For example, you can create a new module object with the following code: @@ -1069,11 +1069,11 @@ Here are all of the changes that Python 2.3 makes to the core Python language. * Using ``None`` as a variable name will now result in a :exc:`SyntaxWarning` warning. In a future version of Python, ``None`` may finally become a keyword. -* The :meth:`xreadlines` method of file objects, introduced in Python 2.1, is no +* The :meth:`!xreadlines` method of file objects, introduced in Python 2.1, is no longer necessary because files now behave as their own iterator. - :meth:`xreadlines` was originally introduced as a faster way to loop over all + :meth:`!xreadlines` was originally introduced as a faster way to loop over all the lines in a file, but now you can simply write ``for line in file_obj``. - File objects also have a new read-only :attr:`encoding` attribute that gives the + File objects also have a new read-only :attr:`!encoding` attribute that gives the encoding used by the file; Unicode strings written to the file will be automatically converted to bytes using the given encoding. @@ -1096,12 +1096,12 @@ Here are all of the changes that Python 2.3 makes to the core Python language. switching overhead. Some multithreaded applications may suffer slower response time, but that's easily fixed by setting the limit back to a lower number using ``sys.setcheckinterval(N)``. The limit can be retrieved with the new - :func:`sys.getcheckinterval` function. + :func:`!sys.getcheckinterval` function. * One minor but far-reaching change is that the names of extension types defined by the modules included with Python now contain the module and a ``'.'`` in front of the type name. For example, in Python 2.2, if you created a socket and - printed its :attr:`__class__`, you'd get this output:: + printed its :attr:`!__class__`, you'd get this output:: >>> s = socket.socket() >>> s.__class__ @@ -1138,9 +1138,9 @@ String Changes True Note that this doesn't tell you where the substring starts; if you need that - information, use the :meth:`find` string method. + information, use the :meth:`~str.find` string method. -* The :meth:`strip`, :meth:`lstrip`, and :meth:`rstrip` string methods now have +* The :meth:`~str.strip`, :meth:`~str.lstrip`, and :meth:`~str.rstrip` string methods now have an optional argument for specifying the characters to strip. The default is still to remove all whitespace characters:: @@ -1156,13 +1156,13 @@ String Changes (Suggested by Simon Brunning and implemented by Walter Dörwald.) -* The :meth:`startswith` and :meth:`endswith` string methods now accept negative +* The :meth:`~str.startswith` and :meth:`~str.endswith` string methods now accept negative numbers for the *start* and *end* parameters. -* Another new string method is :meth:`zfill`, originally a function in the - :mod:`string` module. :meth:`zfill` pads a numeric string with zeros on the +* Another new string method is :meth:`~str.zfill`, originally a function in the + :mod:`string` module. :meth:`~str.zfill` pads a numeric string with zeros on the left until it's the specified width. Note that the ``%`` operator is still more - flexible and powerful than :meth:`zfill`. :: + flexible and powerful than :meth:`~str.zfill`. :: >>> '45'.zfill(4) '0045' @@ -1173,10 +1173,10 @@ String Changes (Contributed by Walter Dörwald.) -* A new type object, :class:`basestring`, has been added. Both 8-bit strings and +* A new type object, :class:`!basestring`, has been added. Both 8-bit strings and Unicode strings inherit from this type, so ``isinstance(obj, basestring)`` will return :const:`True` for either kind of string. It's a completely abstract - type, so you can't create :class:`basestring` instances. + type, so you can't create :class:`!basestring` instances. * Interned strings are no longer immortal and will now be garbage-collected in the usual way when the only reference to them is from the internal dictionary of @@ -1191,7 +1191,7 @@ Optimizations * The creation of new-style class instances has been made much faster; they're now faster than classic classes! -* The :meth:`sort` method of list objects has been extensively rewritten by Tim +* The :meth:`~list.sort` method of list objects has been extensively rewritten by Tim Peters, and the implementation is significantly faster. * Multiplication of large long integers is now much faster thanks to an @@ -1203,7 +1203,7 @@ Optimizations increase, depending on your compiler's idiosyncrasies. See section :ref:`23section-other` for a longer explanation. (Removed by Michael Hudson.) -* :func:`xrange` objects now have their own iterator, making ``for i in +* :func:`!xrange` objects now have their own iterator, making ``for i in xrange(n)`` slightly faster than ``for i in range(n)``. (Patch by Raymond Hettinger.) @@ -1230,21 +1230,21 @@ complete list of changes, or look through the CVS logs for all the details. operator to add another array's contents, and the ``*=`` assignment operator to repeat an array. (Contributed by Jason Orendorff.) -* The :mod:`bsddb` module has been replaced by version 4.1.6 of the `PyBSDDB +* The :mod:`!bsddb` module has been replaced by version 4.1.6 of the `PyBSDDB `_ package, providing a more complete interface to the transactional features of the BerkeleyDB library. - The old version of the module has been renamed to :mod:`bsddb185` and is no + The old version of the module has been renamed to :mod:`!bsddb185` and is no longer built automatically; you'll have to edit :file:`Modules/Setup` to enable - it. Note that the new :mod:`bsddb` package is intended to be compatible with + it. Note that the new :mod:`!bsddb` package is intended to be compatible with the old module, so be sure to file bugs if you discover any incompatibilities. When upgrading to Python 2.3, if the new interpreter is compiled with a new version of the underlying BerkeleyDB library, you will almost certainly have to convert your database files to the new version. You can do this fairly easily with the new scripts :file:`db2pickle.py` and :file:`pickle2db.py` which you will find in the distribution's :file:`Tools/scripts` directory. If you've - already been using the PyBSDDB package and importing it as :mod:`bsddb3`, you - will have to change your ``import`` statements to import it as :mod:`bsddb`. + already been using the PyBSDDB package and importing it as :mod:`!bsddb3`, you + will have to change your ``import`` statements to import it as :mod:`!bsddb`. * The new :mod:`bz2` module is an interface to the bz2 data compression library. bz2-compressed data is usually smaller than corresponding @@ -1253,11 +1253,11 @@ complete list of changes, or look through the CVS logs for all the details. * A set of standard date/time types has been added in the new :mod:`datetime` module. See the following section for more details. -* The Distutils :class:`Extension` class now supports an extra constructor +* The Distutils :class:`!Extension` class now supports an extra constructor argument named *depends* for listing additional source files that an extension depends on. This lets Distutils recompile the module if any of the dependency files are modified. For example, if :file:`sampmodule.c` includes the header - file :file:`sample.h`, you would create the :class:`Extension` object like + file :file:`sample.h`, you would create the :class:`!Extension` object like this:: ext = Extension("samp", @@ -1268,21 +1268,21 @@ complete list of changes, or look through the CVS logs for all the details. (Contributed by Jeremy Hylton.) * Other minor changes to Distutils: it now checks for the :envvar:`CC`, - :envvar:`CFLAGS`, :envvar:`CPP`, :envvar:`LDFLAGS`, and :envvar:`CPPFLAGS` + :envvar:`CFLAGS`, :envvar:`!CPP`, :envvar:`LDFLAGS`, and :envvar:`CPPFLAGS` environment variables, using them to override the settings in Python's configuration (contributed by Robert Weber). * Previously the :mod:`doctest` module would only search the docstrings of public methods and functions for test cases, but it now also examines private - ones as well. The :func:`DocTestSuite` function creates a + ones as well. The :func:`~doctest.DocTestSuite` function creates a :class:`unittest.TestSuite` object from a set of :mod:`doctest` tests. * The new ``gc.get_referents(object)`` function returns a list of all the objects referenced by *object*. -* The :mod:`getopt` module gained a new function, :func:`gnu_getopt`, that - supports the same arguments as the existing :func:`getopt` function but uses - GNU-style scanning mode. The existing :func:`getopt` stops processing options as +* The :mod:`getopt` module gained a new function, :func:`~getopt.gnu_getopt`, that + supports the same arguments as the existing :func:`~getopt.getopt` function but uses + GNU-style scanning mode. The existing :func:`~getopt.getopt` stops processing options as soon as a non-option argument is encountered, but in GNU-style mode processing continues, meaning that options and arguments can be mixed. For example:: @@ -1311,7 +1311,7 @@ complete list of changes, or look through the CVS logs for all the details. O(lg n). (See https://xlinux.nist.gov/dads//HTML/priorityque.html for more information about the priority queue data structure.) - The :mod:`heapq` module provides :func:`heappush` and :func:`heappop` functions + The :mod:`heapq` module provides :func:`~heapq.heappush` and :func:`~heapq.heappop` functions for adding and removing items while maintaining the heap property on top of some other mutable Python sequence type. Here's an example that uses a Python list:: @@ -1343,7 +1343,7 @@ complete list of changes, or look through the CVS logs for all the details. * The :mod:`itertools` contains a number of useful functions for use with iterators, inspired by various functions provided by the ML and Haskell languages. For example, ``itertools.ifilter(predicate, iterator)`` returns all - elements in the iterator for which the function :func:`predicate` returns + elements in the iterator for which the function :func:`!predicate` returns :const:`True`, and ``itertools.repeat(obj, N)`` returns ``obj`` *N* times. There are a number of other functions in the module; see the package's reference documentation for details. @@ -1356,9 +1356,9 @@ complete list of changes, or look through the CVS logs for all the details. was added to :func:`math.log` to make it easier to compute logarithms for bases other than ``e`` and ``10``. (Contributed by Raymond Hettinger.) -* Several new POSIX functions (:func:`getpgid`, :func:`killpg`, :func:`lchown`, - :func:`loadavg`, :func:`major`, :func:`makedev`, :func:`minor`, and - :func:`mknod`) were added to the :mod:`posix` module that underlies the +* Several new POSIX functions (:func:`!getpgid`, :func:`!killpg`, :func:`!lchown`, + :func:`!loadavg`, :func:`!major`, :func:`!makedev`, :func:`!minor`, and + :func:`!mknod`) were added to the :mod:`posix` module that underlies the :mod:`os` module. (Contributed by Gustavo Niemeyer, Geert Jansen, and Denis S. Otkidach.) @@ -1368,9 +1368,9 @@ complete list of changes, or look through the CVS logs for all the details. During testing, it was found that some applications will break if time stamps are floats. For compatibility, when using the tuple interface of the - :class:`stat_result` time stamps will be represented as integers. When using + :class:`~os.stat_result` time stamps will be represented as integers. When using named fields (a feature first introduced in Python 2.2), time stamps are still - represented as integers, unless :func:`os.stat_float_times` is invoked to enable + represented as integers, unless :func:`!os.stat_float_times` is invoked to enable float return values:: >>> os.stat("/tmp").st_mtime @@ -1391,7 +1391,7 @@ complete list of changes, or look through the CVS logs for all the details. automatically generate a usage message. See the following section for more details. -* The old and never-documented :mod:`linuxaudiodev` module has been deprecated, +* The old and never-documented :mod:`!linuxaudiodev` module has been deprecated, and a new version named :mod:`!ossaudiodev` has been added. The module was renamed because the OSS sound drivers can be used on platforms other than Linux, and the interface has also been tidied and brought up to date in various ways. @@ -1402,14 +1402,14 @@ complete list of changes, or look through the CVS logs for all the details. functions for getting the architecture, CPU type, the Windows OS version, and even the Linux distribution version. (Contributed by Marc-André Lemburg.) -* The parser objects provided by the :mod:`pyexpat` module can now optionally +* The parser objects provided by the :mod:`pyexpat ` module can now optionally buffer character data, resulting in fewer calls to your character data handler and therefore faster performance. Setting the parser object's - :attr:`buffer_text` attribute to :const:`True` will enable buffering. + :attr:`~xml.parsers.expat.xmlparser.buffer_text` attribute to :const:`True` will enable buffering. * The ``sample(population, k)`` function was added to the :mod:`random` - module. *population* is a sequence or :class:`xrange` object containing the - elements of a population, and :func:`sample` chooses *k* elements from the + module. *population* is a sequence or :class:`!xrange` object containing the + elements of a population, and :func:`~random.sample` chooses *k* elements from the population without replacing chosen elements. *k* can be any value up to ``len(population)``. For example:: @@ -1436,20 +1436,20 @@ complete list of changes, or look through the CVS logs for all the details. (All changes contributed by Raymond Hettinger.) * The :mod:`readline` module also gained a number of new functions: - :func:`get_history_item`, :func:`get_current_history_length`, and - :func:`redisplay`. + :func:`~readline.get_history_item`, :func:`~readline.get_current_history_length`, and + :func:`~readline.redisplay`. -* The :mod:`rexec` and :mod:`Bastion` modules have been declared dead, and +* The :mod:`!rexec` and :mod:`!Bastion` modules have been declared dead, and attempts to import them will fail with a :exc:`RuntimeError`. New-style classes provide new ways to break out of the restricted execution environment provided - by :mod:`rexec`, and no one has interest in fixing them or time to do so. If - you have applications using :mod:`rexec`, rewrite them to use something else. + by :mod:`!rexec`, and no one has interest in fixing them or time to do so. If + you have applications using :mod:`!rexec`, rewrite them to use something else. (Sticking with Python 2.2 or 2.1 will not make your applications any safer - because there are known bugs in the :mod:`rexec` module in those versions. To - repeat: if you're using :mod:`rexec`, stop using it immediately.) + because there are known bugs in the :mod:`!rexec` module in those versions. To + repeat: if you're using :mod:`!rexec`, stop using it immediately.) -* The :mod:`rotor` module has been deprecated because the algorithm it uses for +* The :mod:`!rotor` module has been deprecated because the algorithm it uses for encryption is not believed to be secure. If you need encryption, use one of the several AES Python modules that are available separately. @@ -1474,9 +1474,9 @@ complete list of changes, or look through the CVS logs for all the details. * On Windows, the :mod:`socket` module now ships with Secure Sockets Layer (SSL) support. -* The value of the C :c:macro:`PYTHON_API_VERSION` macro is now exposed at the +* The value of the C :c:macro:`!PYTHON_API_VERSION` macro is now exposed at the Python level as ``sys.api_version``. The current exception can be cleared by - calling the new :func:`sys.exc_clear` function. + calling the new :func:`!sys.exc_clear` function. * The new :mod:`tarfile` module allows reading from and writing to :program:`tar`\ -format archive files. (Contributed by Lars Gustäbel.) @@ -1486,7 +1486,7 @@ complete list of changes, or look through the CVS logs for all the details. string and returns a list containing the text split into lines of no more than the chosen width. The ``fill(text, width)`` function returns a single string, reformatted to fit into lines no longer than the chosen width. (As you - can guess, :func:`fill` is built on top of :func:`wrap`. For example:: + can guess, :func:`~textwrap.fill` is built on top of :func:`~textwrap.wrap`. For example:: >>> import textwrap >>> paragraph = "Not a whit, we defy augury: ... more text ..." @@ -1503,15 +1503,15 @@ complete list of changes, or look through the CVS logs for all the details. it will come: the readiness is all. >>> - The module also contains a :class:`TextWrapper` class that actually implements - the text wrapping strategy. Both the :class:`TextWrapper` class and the - :func:`wrap` and :func:`fill` functions support a number of additional keyword + The module also contains a :class:`~textwrap.TextWrapper` class that actually implements + the text wrapping strategy. Both the :class:`~textwrap.TextWrapper` class and the + :func:`~textwrap.wrap` and :func:`~textwrap.fill` functions support a number of additional keyword arguments for fine-tuning the formatting; consult the module's documentation for details. (Contributed by Greg Ward.) -* The :mod:`thread` and :mod:`threading` modules now have companion modules, - :mod:`dummy_thread` and :mod:`dummy_threading`, that provide a do-nothing - implementation of the :mod:`thread` module's interface for platforms where +* The :mod:`!thread` and :mod:`threading` modules now have companion modules, + :mod:`!dummy_thread` and :mod:`!dummy_threading`, that provide a do-nothing + implementation of the :mod:`!thread` module's interface for platforms where threads are not supported. The intention is to simplify thread-aware modules (ones that *don't* rely on threads to run) by putting the following code at the top:: @@ -1521,26 +1521,26 @@ complete list of changes, or look through the CVS logs for all the details. except ImportError: import dummy_threading as _threading - In this example, :mod:`_threading` is used as the module name to make it clear + In this example, :mod:`!_threading` is used as the module name to make it clear that the module being used is not necessarily the actual :mod:`threading` - module. Code can call functions and use classes in :mod:`_threading` whether or + module. Code can call functions and use classes in :mod:`!_threading` whether or not threads are supported, avoiding an :keyword:`if` statement and making the code slightly clearer. This module will not magically make multithreaded code run without threads; code that waits for another thread to return or to do something will simply hang forever. -* The :mod:`time` module's :func:`strptime` function has long been an annoyance - because it uses the platform C library's :func:`strptime` implementation, and +* The :mod:`time` module's :func:`~time.strptime` function has long been an annoyance + because it uses the platform C library's :func:`~time.strptime` implementation, and different platforms sometimes have odd bugs. Brett Cannon contributed a portable implementation that's written in pure Python and should behave identically on all platforms. * The new :mod:`timeit` module helps measure how long snippets of Python code take to execute. The :file:`timeit.py` file can be run directly from the - command line, or the module's :class:`Timer` class can be imported and used + command line, or the module's :class:`~timeit.Timer` class can be imported and used directly. Here's a short example that figures out whether it's faster to convert an 8-bit string to Unicode by appending an empty Unicode string to it or - by using the :func:`unicode` function:: + by using the :func:`!unicode` function:: import timeit @@ -1558,46 +1558,46 @@ complete list of changes, or look through the CVS logs for all the details. * The :mod:`!Tix` module has received various bug fixes and updates for the current version of the Tix package. -* The :mod:`Tkinter` module now works with a thread-enabled version of Tcl. +* The :mod:`!Tkinter` module now works with a thread-enabled version of Tcl. Tcl's threading model requires that widgets only be accessed from the thread in which they're created; accesses from another thread can cause Tcl to panic. For - certain Tcl interfaces, :mod:`Tkinter` will now automatically avoid this when a + certain Tcl interfaces, :mod:`!Tkinter` will now automatically avoid this when a widget is accessed from a different thread by marshalling a command, passing it to the correct thread, and waiting for the results. Other interfaces can't be - handled automatically but :mod:`Tkinter` will now raise an exception on such an + handled automatically but :mod:`!Tkinter` will now raise an exception on such an access so that you can at least find out about the problem. See https://mail.python.org/pipermail/python-dev/2002-December/031107.html for a more detailed explanation of this change. (Implemented by Martin von Löwis.) -* Calling Tcl methods through :mod:`_tkinter` no longer returns only strings. +* Calling Tcl methods through :mod:`!_tkinter` no longer returns only strings. Instead, if Tcl returns other objects those objects are converted to their - Python equivalent, if one exists, or wrapped with a :class:`_tkinter.Tcl_Obj` + Python equivalent, if one exists, or wrapped with a :class:`!_tkinter.Tcl_Obj` object if no Python equivalent exists. This behavior can be controlled through - the :meth:`wantobjects` method of :class:`tkapp` objects. + the :meth:`!wantobjects` method of :class:`!tkapp` objects. - When using :mod:`_tkinter` through the :mod:`Tkinter` module (as most Tkinter + When using :mod:`!_tkinter` through the :mod:`!Tkinter` module (as most Tkinter applications will), this feature is always activated. It should not cause compatibility problems, since Tkinter would always convert string results to Python types where possible. If any incompatibilities are found, the old behavior can be restored by setting - the :attr:`wantobjects` variable in the :mod:`Tkinter` module to false before - creating the first :class:`tkapp` object. :: + the :attr:`!wantobjects` variable in the :mod:`!Tkinter` module to false before + creating the first :class:`!tkapp` object. :: import Tkinter Tkinter.wantobjects = 0 Any breakage caused by this change should be reported as a bug. -* The :mod:`UserDict` module has a new :class:`DictMixin` class which defines +* The :mod:`!UserDict` module has a new :class:`!DictMixin` class which defines all dictionary methods for classes that already have a minimum mapping interface. This greatly simplifies writing classes that need to be substitutable for dictionaries, such as the classes in the :mod:`shelve` module. Adding the mix-in as a superclass provides the full dictionary interface - whenever the class defines :meth:`~object.__getitem__`, :meth:`__setitem__`, - :meth:`__delitem__`, and :meth:`keys`. For example:: + whenever the class defines :meth:`~object.__getitem__`, :meth:`~object.__setitem__`, + :meth:`~object.__delitem__`, and :meth:`!keys`. For example:: >>> import UserDict >>> class SeqDict(UserDict.DictMixin): @@ -1640,15 +1640,15 @@ complete list of changes, or look through the CVS logs for all the details. * The DOM implementation in :mod:`xml.dom.minidom` can now generate XML output in a particular encoding by providing an optional encoding argument to the - :meth:`toxml` and :meth:`toprettyxml` methods of DOM nodes. + :meth:`~xml.dom.minidom.Node.toxml` and :meth:`~xml.dom.minidom.Node.toprettyxml` methods of DOM nodes. -* The :mod:`xmlrpclib` module now supports an XML-RPC extension for handling nil +* The :mod:`!xmlrpclib` module now supports an XML-RPC extension for handling nil data values such as Python's ``None``. Nil values are always supported on unmarshalling an XML-RPC response. To generate requests containing ``None``, you must supply a true value for the *allow_none* parameter when creating a - :class:`Marshaller` instance. + :class:`!Marshaller` instance. -* The new :mod:`DocXMLRPCServer` module allows writing self-documenting XML-RPC +* The new :mod:`!DocXMLRPCServer` module allows writing self-documenting XML-RPC servers. Run it in demo mode (as a program) to see it in action. Pointing the web browser to the RPC server produces pydoc-style documentation; pointing xmlrpclib to the server allows invoking the actual methods. (Contributed by @@ -1663,8 +1663,8 @@ complete list of changes, or look through the CVS logs for all the details. The :mod:`socket` module has also been extended to transparently convert Unicode hostnames to the ACE version before passing them to the C library. - Modules that deal with hostnames such as :mod:`httplib` and :mod:`ftplib`) - also support Unicode host names; :mod:`httplib` also sends HTTP ``Host`` + Modules that deal with hostnames such as :mod:`!httplib` and :mod:`ftplib`) + also support Unicode host names; :mod:`!httplib` also sends HTTP ``Host`` headers using the ACE version of the domain name. :mod:`urllib` supports Unicode URLs with non-ASCII host names as long as the ``path`` part of the URL is ASCII only. @@ -1682,17 +1682,17 @@ Date and time types suitable for expressing timestamps were added as the :mod:`datetime` module. The types don't support different calendars or many fancy features, and just stick to the basics of representing time. -The three primary types are: :class:`date`, representing a day, month, and year; +The three primary types are: :class:`~datetime.date`, representing a day, month, and year; :class:`~datetime.time`, consisting of hour, minute, and second; and :class:`~datetime.datetime`, -which contains all the attributes of both :class:`date` and :class:`~datetime.time`. -There's also a :class:`timedelta` class representing differences between two +which contains all the attributes of both :class:`~datetime.date` and :class:`~datetime.time`. +There's also a :class:`~datetime.timedelta` class representing differences between two points in time, and time zone logic is implemented by classes inheriting from -the abstract :class:`tzinfo` class. +the abstract :class:`~datetime.tzinfo` class. -You can create instances of :class:`date` and :class:`~datetime.time` by either supplying +You can create instances of :class:`~datetime.date` and :class:`~datetime.time` by either supplying keyword arguments to the appropriate constructor, e.g. ``datetime.date(year=1972, month=10, day=15)``, or by using one of a number of -class methods. For example, the :meth:`date.today` class method returns the +class methods. For example, the :meth:`~datetime.date.today` class method returns the current local date. Once created, instances of the date/time classes are all immutable. There are a @@ -1707,8 +1707,8 @@ number of methods for producing formatted strings from objects:: >>> now.strftime('%Y %d %b') '2002 30 Dec' -The :meth:`replace` method allows modifying one or more fields of a -:class:`date` or :class:`~datetime.datetime` instance, returning a new instance:: +The :meth:`~datetime.datetime.replace` method allows modifying one or more fields of a +:class:`~datetime.date` or :class:`~datetime.datetime` instance, returning a new instance:: >>> d = datetime.datetime.now() >>> d @@ -1718,10 +1718,10 @@ The :meth:`replace` method allows modifying one or more fields of a >>> Instances can be compared, hashed, and converted to strings (the result is the -same as that of :meth:`isoformat`). :class:`date` and :class:`~datetime.datetime` -instances can be subtracted from each other, and added to :class:`timedelta` +same as that of :meth:`~datetime.datetime.isoformat`). :class:`~datetime.date` and :class:`~datetime.datetime` +instances can be subtracted from each other, and added to :class:`~datetime.timedelta` instances. The largest missing feature is that there's no standard library -support for parsing strings and getting back a :class:`date` or +support for parsing strings and getting back a :class:`~datetime.date` or :class:`~datetime.datetime`. For more information, refer to the module's reference documentation. @@ -1739,7 +1739,7 @@ command-line parsing that follows the Unix conventions, automatically creates the output for :option:`!--help`, and can perform different actions for different options. -You start by creating an instance of :class:`OptionParser` and telling it what +You start by creating an instance of :class:`~optparse.OptionParser` and telling it what your program's options are. :: import sys @@ -1753,7 +1753,7 @@ your program's options are. :: action='store', type='int', dest='length', help='set maximum length of output') -Parsing a command line is then done by calling the :meth:`parse_args` method. :: +Parsing a command line is then done by calling the :meth:`~optparse.OptionParser.parse_args` method. :: options, args = op.parse_args(sys.argv[1:]) print options @@ -1925,7 +1925,7 @@ Changes to Python's build process and to the C API include: dependence on a system version or local installation of Expat. * If you dynamically allocate type objects in your extension, you should be - aware of a change in the rules relating to the :attr:`__module__` and + aware of a change in the rules relating to the :attr:`!__module__` and :attr:`~definition.__name__` attributes. In summary, you will want to ensure the type's dictionary contains a ``'__module__'`` key; making the module name the part of the type name leading up to the final period will no longer have the desired @@ -1940,7 +1940,7 @@ Port-Specific Changes Support for a port to IBM's OS/2 using the EMX runtime environment was merged into the main Python source tree. EMX is a POSIX emulation layer over the OS/2 system APIs. The Python port for EMX tries to support all the POSIX-like -capability exposed by the EMX runtime, and mostly succeeds; :func:`fork` and +capability exposed by the EMX runtime, and mostly succeeds; :func:`!fork` and :func:`fcntl` are restricted by the limitations of the underlying emulation layer. The standard OS/2 port, which uses IBM's Visual Age compiler, also gained support for case-sensitive import semantics as part of the integration of @@ -2031,9 +2031,9 @@ code: the file's encoding (UTF-8, Latin-1, or whatever) by adding a comment to the top of the file. See section :ref:`section-encodings` for more information. -* Calling Tcl methods through :mod:`_tkinter` no longer returns only strings. +* Calling Tcl methods through :mod:`!_tkinter` no longer returns only strings. Instead, if Tcl returns other objects those objects are converted to their - Python equivalent, if one exists, or wrapped with a :class:`_tkinter.Tcl_Obj` + Python equivalent, if one exists, or wrapped with a :class:`!_tkinter.Tcl_Obj` object if no Python equivalent exists. * Large octal and hex literals such as ``0xffffffff`` now trigger a @@ -2049,10 +2049,10 @@ code: * You can no longer disable assertions by assigning to ``__debug__``. -* The Distutils :func:`setup` function has gained various new keyword arguments +* The Distutils :func:`!setup` function has gained various new keyword arguments such as *depends*. Old versions of the Distutils will abort if passed unknown keywords. A solution is to check for the presence of the new - :func:`get_distutil_options` function in your :file:`setup.py` and only uses the + :func:`!get_distutil_options` function in your :file:`setup.py` and only uses the new keywords with a version of the Distutils that supports them:: from distutils import core From 25061f5c98a47691fdb70f550943167bda77f6e0 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Thu, 14 Dec 2023 21:10:26 +0000 Subject: [PATCH 03/12] gh-101100: Cleanup `mailbox` docs (#113124) --- Doc/library/mailbox.rst | 261 +++++++++++++++++++++------------------- Doc/tools/.nitignore | 1 - 2 files changed, 139 insertions(+), 123 deletions(-) diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index fd60d163378f071..c98496d1fff993f 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -13,8 +13,8 @@ This module defines two classes, :class:`Mailbox` and :class:`Message`, for accessing and manipulating on-disk mailboxes and the messages they contain. -:class:`Mailbox` offers a dictionary-like mapping from keys to messages. -:class:`Message` extends the :mod:`email.message` module's +:class:`!Mailbox` offers a dictionary-like mapping from keys to messages. +:class:`!Message` extends the :mod:`email.message` module's :class:`~email.message.Message` class with format-specific state and behavior. Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. @@ -27,37 +27,38 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-objects: -:class:`Mailbox` objects ------------------------- +:class:`!Mailbox` objects +------------------------- .. class:: Mailbox A mailbox, which may be inspected and modified. - The :class:`Mailbox` class defines an interface and is not intended to be + The :class:`!Mailbox` class defines an interface and is not intended to be instantiated. Instead, format-specific subclasses should inherit from - :class:`Mailbox` and your code should instantiate a particular subclass. + :class:`!Mailbox` and your code should instantiate a particular subclass. - The :class:`Mailbox` interface is dictionary-like, with small keys - corresponding to messages. Keys are issued by the :class:`Mailbox` instance - with which they will be used and are only meaningful to that :class:`Mailbox` + The :class:`!Mailbox` interface is dictionary-like, with small keys + corresponding to messages. Keys are issued by the :class:`!Mailbox` instance + with which they will be used and are only meaningful to that :class:`!Mailbox` instance. A key continues to identify a message even if the corresponding message is modified, such as by replacing it with another message. - Messages may be added to a :class:`Mailbox` instance using the set-like + Messages may be added to a :class:`!Mailbox` instance using the set-like method :meth:`add` and removed using a ``del`` statement or the set-like methods :meth:`remove` and :meth:`discard`. - :class:`Mailbox` interface semantics differ from dictionary semantics in some + :class:`!Mailbox` interface semantics differ from dictionary semantics in some noteworthy ways. Each time a message is requested, a new representation (typically a :class:`Message` instance) is generated based upon the current state of the mailbox. Similarly, when a message is added to a - :class:`Mailbox` instance, the provided message representation's contents are + :class:`!Mailbox` instance, the provided message representation's contents are copied. In neither case is a reference to the message representation kept by - the :class:`Mailbox` instance. + the :class:`!Mailbox` instance. - The default :class:`Mailbox` iterator iterates over message representations, - not keys as the default dictionary iterator does. Moreover, modification of a + The default :class:`!Mailbox` :term:`iterator` iterates over message + representations, not keys as the default :class:`dictionary ` + iterator does. Moreover, modification of a mailbox during iteration is safe and well-defined. Messages added to the mailbox after an iterator is created will not be seen by the iterator. Messages removed from the mailbox before the iterator yields them @@ -69,14 +70,15 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. Be very cautious when modifying mailboxes that might be simultaneously changed by some other process. The safest mailbox format to use for such - tasks is Maildir; try to avoid using single-file formats such as mbox for + tasks is :class:`Maildir`; try to avoid using single-file formats such as + :class:`mbox` for concurrent writing. If you're modifying a mailbox, you *must* lock it by calling the :meth:`lock` and :meth:`unlock` methods *before* reading any messages in the file or making any changes by adding or deleting a message. Failing to lock the mailbox runs the risk of losing messages or corrupting the entire mailbox. - :class:`Mailbox` instances have the following methods: + :class:`!Mailbox` instances have the following methods: .. method:: add(message) @@ -127,21 +129,23 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: iterkeys() - keys() - Return an iterator over all keys if called as :meth:`iterkeys` or return a - list of keys if called as :meth:`keys`. + Return an :term:`iterator` over all keys + + + .. method:: keys() + + The same as :meth:`iterkeys`, except that a :class:`list` is returned + rather than an :term:`iterator` .. method:: itervalues() __iter__() - values() - Return an iterator over representations of all messages if called as - :meth:`itervalues` or :meth:`__iter__` or return a list of such - representations if called as :meth:`values`. The messages are represented + Return an :term:`iterator` over representations of all messages. + The messages are represented as instances of the appropriate format-specific :class:`Message` subclass - unless a custom message factory was specified when the :class:`Mailbox` + unless a custom message factory was specified when the :class:`!Mailbox` instance was initialized. .. note:: @@ -150,15 +154,25 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. iterate over keys. + .. method:: values() + + The same as :meth:`itervalues`, except that a :class:`list` is returned + rather than an :term:`iterator` + + .. method:: iteritems() - items() - Return an iterator over (*key*, *message*) pairs, where *key* is a key and - *message* is a message representation, if called as :meth:`iteritems` or - return a list of such pairs if called as :meth:`items`. The messages are + Return an :term:`iterator` over (*key*, *message*) pairs, where *key* is + a key and *message* is a message representation. The messages are represented as instances of the appropriate format-specific :class:`Message` subclass unless a custom message factory was specified - when the :class:`Mailbox` instance was initialized. + when the :class:`!Mailbox` instance was initialized. + + + .. method:: items() + + The same as :meth:`iteritems`, except that a :class:`list` of pairs is + returned rather than an :term:`iterator` of pairs. .. method:: get(key, default=None) @@ -167,9 +181,9 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. Return a representation of the message corresponding to *key*. If no such message exists, *default* is returned if the method was called as :meth:`get` and a :exc:`KeyError` exception is raised if the method was - called as :meth:`~object.__getitem__`. The message is represented as an instance + called as :meth:`!__getitem__`. The message is represented as an instance of the appropriate format-specific :class:`Message` subclass unless a - custom message factory was specified when the :class:`Mailbox` instance + custom message factory was specified when the :class:`!Mailbox` instance was initialized. @@ -198,21 +212,23 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: get_file(key) - Return a file-like representation of the message corresponding to *key*, + Return a :term:`file-like ` representation of the + message corresponding to *key*, or raise a :exc:`KeyError` exception if no such message exists. The file-like object behaves as if open in binary mode. This file should be closed once it is no longer needed. .. versionchanged:: 3.2 - The file object really is a binary file; previously it was incorrectly - returned in text mode. Also, the file-like object now supports the - context management protocol: you can use a :keyword:`with` statement to - automatically close it. + The file object really is a :term:`binary file`; previously it was + incorrectly returned in text mode. Also, the :term:`file-like object` + now supports the :term:`context manager` protocol: you can use a + :keyword:`with` statement to automatically close it. .. note:: - Unlike other representations of messages, file-like representations are - not necessarily independent of the :class:`Mailbox` instance that + Unlike other representations of messages, + :term:`file-like ` representations are not + necessarily independent of the :class:`!Mailbox` instance that created them or of the underlying mailbox. More specific documentation is provided by each subclass. @@ -238,7 +254,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. the message. If no such message exists, return *default*. The message is represented as an instance of the appropriate format-specific :class:`Message` subclass unless a custom message factory was specified - when the :class:`Mailbox` instance was initialized. + when the :class:`!Mailbox` instance was initialized. .. method:: popitem() @@ -248,7 +264,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. message. If the mailbox is empty, raise a :exc:`KeyError` exception. The message is represented as an instance of the appropriate format-specific :class:`Message` subclass unless a custom message factory was specified - when the :class:`Mailbox` instance was initialized. + when the :class:`!Mailbox` instance was initialized. .. method:: update(arg) @@ -259,7 +275,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. *message* as if by using :meth:`__setitem__`. As with :meth:`__setitem__`, each *key* must already correspond to a message in the mailbox or else a :exc:`KeyError` exception will be raised, so in general it is incorrect - for *arg* to be a :class:`Mailbox` instance. + for *arg* to be a :class:`!Mailbox` instance. .. note:: @@ -269,7 +285,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: flush() Write any pending changes to the filesystem. For some :class:`Mailbox` - subclasses, changes are always written immediately and :meth:`flush` does + subclasses, changes are always written immediately and :meth:`!flush` does nothing, but you should still make a habit of calling this method. @@ -290,13 +306,13 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: close() Flush the mailbox, unlock it if necessary, and close any open files. For - some :class:`Mailbox` subclasses, this method does nothing. + some :class:`!Mailbox` subclasses, this method does nothing. .. _mailbox-maildir: -:class:`Maildir` -^^^^^^^^^^^^^^^^ +:class:`!Maildir` objects +^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: Maildir(dirname, factory=None, create=True) @@ -330,11 +346,11 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. Folders of the style introduced by the Courier mail transfer agent are also supported. Any subdirectory of the main mailbox is considered a folder if ``'.'`` is the first character in its name. Folder names are represented by - :class:`Maildir` without the leading ``'.'``. Each folder is itself a Maildir + :class:`!Maildir` without the leading ``'.'``. Each folder is itself a Maildir mailbox but should not contain other folders. Instead, a logical nesting is indicated using ``'.'`` to delimit levels, e.g., "Archived.2005.07". - .. note:: + .. attribute:: Maildir.colon The Maildir specification requires the use of a colon (``':'``) in certain message file names. However, some operating systems do not permit this @@ -346,9 +362,9 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. import mailbox mailbox.Maildir.colon = '!' - The :attr:`colon` attribute may also be set on a per-instance basis. + The :attr:`!colon` attribute may also be set on a per-instance basis. - :class:`Maildir` instances have all of the methods of :class:`Mailbox` in + :class:`!Maildir` instances have all of the methods of :class:`Mailbox` in addition to the following: @@ -359,14 +375,14 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: get_folder(folder) - Return a :class:`Maildir` instance representing the folder whose name is + Return a :class:`!Maildir` instance representing the folder whose name is *folder*. A :exc:`NoSuchMailboxError` exception is raised if the folder does not exist. .. method:: add_folder(folder) - Create a folder whose name is *folder* and return a :class:`Maildir` + Create a folder whose name is *folder* and return a :class:`!Maildir` instance representing it. @@ -485,7 +501,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. versionadded:: 3.13 - Some :class:`Mailbox` methods implemented by :class:`Maildir` deserve special + Some :class:`Mailbox` methods implemented by :class:`!Maildir` deserve special remarks: @@ -516,7 +532,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: close() - :class:`Maildir` instances do not keep any open files and the underlying + :class:`!Maildir` instances do not keep any open files and the underlying mailboxes do not support locking, so this method does nothing. @@ -539,8 +555,8 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-mbox: -:class:`mbox` -^^^^^^^^^^^^^ +:class:`!mbox` objects +^^^^^^^^^^^^^^^^^^^^^^ .. class:: mbox(path, factory=None, create=True) @@ -557,22 +573,22 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. each message indicated by a line whose first five characters are "From ". Several variations of the mbox format exist to address perceived shortcomings in - the original. In the interest of compatibility, :class:`mbox` implements the + the original. In the interest of compatibility, :class:`!mbox` implements the original format, which is sometimes referred to as :dfn:`mboxo`. This means that the :mailheader:`Content-Length` header, if present, is ignored and that any occurrences of "From " at the beginning of a line in a message body are transformed to ">From " when storing the message, although occurrences of ">From " are not transformed to "From " when reading the message. - Some :class:`Mailbox` methods implemented by :class:`mbox` deserve special + Some :class:`Mailbox` methods implemented by :class:`!mbox` deserve special remarks: .. method:: get_file(key) - Using the file after calling :meth:`flush` or :meth:`close` on the - :class:`mbox` instance may yield unpredictable results or raise an - exception. + Using the file after calling :meth:`~Mailbox.flush` or + :meth:`~Mailbox.close` on the :class:`!mbox` instance may yield + unpredictable results or raise an exception. .. method:: lock() @@ -596,8 +612,8 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-mh: -:class:`MH` -^^^^^^^^^^^ +:class:`!MH` objects +^^^^^^^^^^^^^^^^^^^^ .. class:: MH(path, factory=None, create=True) @@ -617,12 +633,12 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. messages without moving them to sub-folders. Sequences are defined in a file called :file:`.mh_sequences` in each folder. - The :class:`MH` class manipulates MH mailboxes, but it does not attempt to + The :class:`!MH` class manipulates MH mailboxes, but it does not attempt to emulate all of :program:`mh`'s behaviors. In particular, it does not modify and is not affected by the :file:`context` or :file:`.mh_profile` files that are used by :program:`mh` to store its state and configuration. - :class:`MH` instances have all of the methods of :class:`Mailbox` in addition + :class:`!MH` instances have all of the methods of :class:`Mailbox` in addition to the following: @@ -633,14 +649,14 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: get_folder(folder) - Return an :class:`MH` instance representing the folder whose name is + Return an :class:`!MH` instance representing the folder whose name is *folder*. A :exc:`NoSuchMailboxError` exception is raised if the folder does not exist. .. method:: add_folder(folder) - Create a folder whose name is *folder* and return an :class:`MH` instance + Create a folder whose name is *folder* and return an :class:`!MH` instance representing it. @@ -674,7 +690,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. Already-issued keys are invalidated by this operation and should not be subsequently used. - Some :class:`Mailbox` methods implemented by :class:`MH` deserve special + Some :class:`Mailbox` methods implemented by :class:`!MH` deserve special remarks: @@ -710,7 +726,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. method:: close() - :class:`MH` instances do not keep any open files, so this method is + :class:`!MH` instances do not keep any open files, so this method is equivalent to :meth:`unlock`. @@ -726,8 +742,8 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-babyl: -:class:`Babyl` -^^^^^^^^^^^^^^ +:class:`!Babyl` objects +^^^^^^^^^^^^^^^^^^^^^^^ .. class:: Babyl(path, factory=None, create=True) @@ -754,7 +770,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. message, and a list of all user-defined labels found in the mailbox is kept in the Babyl options section. - :class:`Babyl` instances have all of the methods of :class:`Mailbox` in + :class:`!Babyl` instances have all of the methods of :class:`Mailbox` in addition to the following: @@ -769,7 +785,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. options section, but the Babyl section is updated whenever the mailbox is modified. - Some :class:`Mailbox` methods implemented by :class:`Babyl` deserve special + Some :class:`Mailbox` methods implemented by :class:`!Babyl` deserve special remarks: @@ -802,8 +818,8 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-mmdf: -:class:`MMDF` -^^^^^^^^^^^^^ +:class:`!MMDF` objects +^^^^^^^^^^^^^^^^^^^^^^ .. class:: MMDF(path, factory=None, create=True) @@ -824,15 +840,15 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. ">From " when storing messages because the extra message separator lines prevent mistaking such occurrences for the starts of subsequent messages. - Some :class:`Mailbox` methods implemented by :class:`MMDF` deserve special + Some :class:`Mailbox` methods implemented by :class:`!MMDF` deserve special remarks: .. method:: get_file(key) - Using the file after calling :meth:`flush` or :meth:`close` on the - :class:`MMDF` instance may yield unpredictable results or raise an - exception. + Using the file after calling :meth:`~Mailbox.flush` or + :meth:`~Mailbox.close` on the :class:`!MMDF` instance may yield + unpredictable results or raise an exception. .. method:: lock() @@ -854,20 +870,20 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. .. _mailbox-message-objects: -:class:`Message` objects ------------------------- +:class:`!Message` objects +------------------------- .. class:: Message(message=None) A subclass of the :mod:`email.message` module's - :class:`~email.message.Message`. Subclasses of :class:`mailbox.Message` add + :class:`~email.message.Message`. Subclasses of :class:`!mailbox.Message` add mailbox-format-specific state and behavior. If *message* is omitted, the new instance is created in a default, empty state. If *message* is an :class:`email.message.Message` instance, its contents are copied; furthermore, any format-specific information is converted insofar as - possible if *message* is a :class:`Message` instance. If *message* is a string, + possible if *message* is a :class:`!Message` instance. If *message* is a string, a byte string, or a file, it should contain an :rfc:`2822`\ -compliant message, which is read and parsed. Files should be open in binary mode, but text mode files @@ -882,18 +898,18 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. such as whether a message has been read by the user or marked as important is retained, because it applies to the message itself. - There is no requirement that :class:`Message` instances be used to represent + There is no requirement that :class:`!Message` instances be used to represent messages retrieved using :class:`Mailbox` instances. In some situations, the - time and memory required to generate :class:`Message` representations might - not be acceptable. For such situations, :class:`Mailbox` instances also + time and memory required to generate :class:`!Message` representations might + not be acceptable. For such situations, :class:`!Mailbox` instances also offer string and file-like representations, and a custom message factory may - be specified when a :class:`Mailbox` instance is initialized. + be specified when a :class:`!Mailbox` instance is initialized. .. _mailbox-maildirmessage: -:class:`MaildirMessage` -^^^^^^^^^^^^^^^^^^^^^^^ +:class:`!MaildirMessage` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: MaildirMessage(message=None) @@ -928,7 +944,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. | T | Trashed | Marked for subsequent deletion | +------+---------+--------------------------------+ - :class:`MaildirMessage` instances offer the following methods: + :class:`!MaildirMessage` instances offer the following methods: .. method:: get_subdir() @@ -1005,7 +1021,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. Set "info" to *info*, which should be a string. -When a :class:`MaildirMessage` instance is created based upon an +When a :class:`!MaildirMessage` instance is created based upon an :class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status` and :mailheader:`X-Status` headers are omitted and the following conversions take place: @@ -1025,7 +1041,7 @@ take place: | T flag | D flag | +--------------------+----------------------------------------------+ -When a :class:`MaildirMessage` instance is created based upon an +When a :class:`!MaildirMessage` instance is created based upon an :class:`MHMessage` instance, the following conversions take place: +-------------------------------+--------------------------+ @@ -1040,7 +1056,7 @@ When a :class:`MaildirMessage` instance is created based upon an | R flag | "replied" sequence | +-------------------------------+--------------------------+ -When a :class:`MaildirMessage` instance is created based upon a +When a :class:`!MaildirMessage` instance is created based upon a :class:`BabylMessage` instance, the following conversions take place: +-------------------------------+-------------------------------+ @@ -1060,8 +1076,8 @@ When a :class:`MaildirMessage` instance is created based upon a .. _mailbox-mboxmessage: -:class:`mboxMessage` -^^^^^^^^^^^^^^^^^^^^ +:class:`!mboxMessage` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: mboxMessage(message=None) @@ -1097,7 +1113,7 @@ When a :class:`MaildirMessage` instance is created based upon a "D", "F", and "A" flags are stored in the :mailheader:`X-Status` header. The flags and headers typically appear in the order mentioned. - :class:`mboxMessage` instances offer the following methods: + :class:`!mboxMessage` instances offer the following methods: .. method:: get_from() @@ -1145,7 +1161,7 @@ When a :class:`MaildirMessage` instance is created based upon a remove more than one flag at a time, *flag* maybe a string of more than one character. -When an :class:`mboxMessage` instance is created based upon a +When an :class:`!mboxMessage` instance is created based upon a :class:`MaildirMessage` instance, a "From " line is generated based upon the :class:`MaildirMessage` instance's delivery date, and the following conversions take place: @@ -1164,7 +1180,7 @@ take place: | A flag | R flag | +-----------------+-------------------------------+ -When an :class:`mboxMessage` instance is created based upon an +When an :class:`!mboxMessage` instance is created based upon an :class:`MHMessage` instance, the following conversions take place: +-------------------+--------------------------+ @@ -1179,7 +1195,7 @@ When an :class:`mboxMessage` instance is created based upon an | A flag | "replied" sequence | +-------------------+--------------------------+ -When an :class:`mboxMessage` instance is created based upon a +When an :class:`!mboxMessage` instance is created based upon a :class:`BabylMessage` instance, the following conversions take place: +-------------------+-----------------------------+ @@ -1194,7 +1210,8 @@ When an :class:`mboxMessage` instance is created based upon a | A flag | "answered" label | +-------------------+-----------------------------+ -When a :class:`Message` instance is created based upon an :class:`MMDFMessage` +When a :class:`!mboxMessage` instance is created based upon an +:class:`MMDFMessage` instance, the "From " line is copied and all flags directly correspond: +-----------------+----------------------------+ @@ -1214,8 +1231,8 @@ instance, the "From " line is copied and all flags directly correspond: .. _mailbox-mhmessage: -:class:`MHMessage` -^^^^^^^^^^^^^^^^^^ +:class:`!MHMessage` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: MHMessage(message=None) @@ -1239,7 +1256,7 @@ instance, the "From " line is copied and all flags directly correspond: | flagged | Marked as important | +----------+------------------------------------------+ - :class:`MHMessage` instances offer the following methods: + :class:`!MHMessage` instances offer the following methods: .. method:: get_sequences() @@ -1261,7 +1278,7 @@ instance, the "From " line is copied and all flags directly correspond: Remove *sequence* from the list of sequences that include this message. -When an :class:`MHMessage` instance is created based upon a +When an :class:`!MHMessage` instance is created based upon a :class:`MaildirMessage` instance, the following conversions take place: +--------------------+-------------------------------+ @@ -1274,7 +1291,7 @@ When an :class:`MHMessage` instance is created based upon a | "flagged" sequence | F flag | +--------------------+-------------------------------+ -When an :class:`MHMessage` instance is created based upon an +When an :class:`!MHMessage` instance is created based upon an :class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status` and :mailheader:`X-Status` headers are omitted and the following conversions take place: @@ -1290,7 +1307,7 @@ take place: | "flagged" sequence | F flag | +--------------------+----------------------------------------------+ -When an :class:`MHMessage` instance is created based upon a +When an :class:`!MHMessage` instance is created based upon a :class:`BabylMessage` instance, the following conversions take place: +--------------------+-----------------------------+ @@ -1304,8 +1321,8 @@ When an :class:`MHMessage` instance is created based upon a .. _mailbox-babylmessage: -:class:`BabylMessage` -^^^^^^^^^^^^^^^^^^^^^ +:class:`!BabylMessage` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: BabylMessage(message=None) @@ -1334,11 +1351,11 @@ When an :class:`MHMessage` instance is created based upon a | resent | Resent | +-----------+------------------------------------------+ - By default, Rmail displays only visible headers. The :class:`BabylMessage` + By default, Rmail displays only visible headers. The :class:`!BabylMessage` class, though, uses the original headers because they are more complete. Visible headers may be accessed explicitly if desired. - :class:`BabylMessage` instances offer the following methods: + :class:`!BabylMessage` instances offer the following methods: .. method:: get_labels() @@ -1377,7 +1394,7 @@ When an :class:`MHMessage` instance is created based upon a .. method:: update_visible() - When a :class:`BabylMessage` instance's original headers are modified, the + When a :class:`!BabylMessage` instance's original headers are modified, the visible headers are not automatically modified to correspond. This method updates the visible headers as follows: each visible header with a corresponding original header is set to the value of the original header, @@ -1387,7 +1404,7 @@ When an :class:`MHMessage` instance is created based upon a present in the original headers but not the visible headers are added to the visible headers. -When a :class:`BabylMessage` instance is created based upon a +When a :class:`!BabylMessage` instance is created based upon a :class:`MaildirMessage` instance, the following conversions take place: +-------------------+-------------------------------+ @@ -1402,7 +1419,7 @@ When a :class:`BabylMessage` instance is created based upon a | "forwarded" label | P flag | +-------------------+-------------------------------+ -When a :class:`BabylMessage` instance is created based upon an +When a :class:`!BabylMessage` instance is created based upon an :class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status` and :mailheader:`X-Status` headers are omitted and the following conversions take place: @@ -1418,7 +1435,7 @@ take place: | "answered" label | A flag | +------------------+----------------------------------------------+ -When a :class:`BabylMessage` instance is created based upon an +When a :class:`!BabylMessage` instance is created based upon an :class:`MHMessage` instance, the following conversions take place: +------------------+--------------------------+ @@ -1432,8 +1449,8 @@ When a :class:`BabylMessage` instance is created based upon an .. _mailbox-mmdfmessage: -:class:`MMDFMessage` -^^^^^^^^^^^^^^^^^^^^ +:class:`!MMDFMessage` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. class:: MMDFMessage(message=None) @@ -1467,7 +1484,7 @@ When a :class:`BabylMessage` instance is created based upon an "D", "F", and "A" flags are stored in the :mailheader:`X-Status` header. The flags and headers typically appear in the order mentioned. - :class:`MMDFMessage` instances offer the following methods, which are + :class:`!MMDFMessage` instances offer the following methods, which are identical to those offered by :class:`mboxMessage`: @@ -1516,7 +1533,7 @@ When a :class:`BabylMessage` instance is created based upon an remove more than one flag at a time, *flag* maybe a string of more than one character. -When an :class:`MMDFMessage` instance is created based upon a +When an :class:`!MMDFMessage` instance is created based upon a :class:`MaildirMessage` instance, a "From " line is generated based upon the :class:`MaildirMessage` instance's delivery date, and the following conversions take place: @@ -1535,7 +1552,7 @@ take place: | A flag | R flag | +-----------------+-------------------------------+ -When an :class:`MMDFMessage` instance is created based upon an +When an :class:`!MMDFMessage` instance is created based upon an :class:`MHMessage` instance, the following conversions take place: +-------------------+--------------------------+ @@ -1550,7 +1567,7 @@ When an :class:`MMDFMessage` instance is created based upon an | A flag | "replied" sequence | +-------------------+--------------------------+ -When an :class:`MMDFMessage` instance is created based upon a +When an :class:`!MMDFMessage` instance is created based upon a :class:`BabylMessage` instance, the following conversions take place: +-------------------+-----------------------------+ @@ -1565,7 +1582,7 @@ When an :class:`MMDFMessage` instance is created based upon a | A flag | "answered" label | +-------------------+-----------------------------+ -When an :class:`MMDFMessage` instance is created based upon an +When an :class:`!MMDFMessage` instance is created based upon an :class:`mboxMessage` instance, the "From " line is copied and all flags directly correspond: @@ -1587,7 +1604,7 @@ correspond: Exceptions ---------- -The following exception classes are defined in the :mod:`mailbox` module: +The following exception classes are defined in the :mod:`!mailbox` module: .. exception:: Error() diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 20580f78e07b5dd..d9147aaeee12bd6 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -62,7 +62,6 @@ Doc/library/locale.rst Doc/library/logging.config.rst Doc/library/logging.handlers.rst Doc/library/lzma.rst -Doc/library/mailbox.rst Doc/library/mmap.rst Doc/library/multiprocessing.rst Doc/library/multiprocessing.shared_memory.rst From 5f7d7353b47ccf634b9b65f933d3fdeeb395301f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 14 Dec 2023 17:27:39 -0600 Subject: [PATCH 04/12] Optimize unique_justseen() recipe for a common case. (gh-113147) --- Doc/library/itertools.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 36cea9a835f3024..03127afe1b4460b 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -1017,6 +1017,8 @@ which incur interpreter overhead. "List unique elements, preserving order. Remember only the element just seen." # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B # unique_justseen('ABBcCAD', str.lower) --> A B c A D + if key is None: + return map(operator.itemgetter(0), groupby(iterable)) return map(next, map(operator.itemgetter(1), groupby(iterable, key))) From f34e22c6470d1c5d0e2a3ffb2272d375c22654b9 Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Fri, 15 Dec 2023 02:42:33 +0300 Subject: [PATCH 05/12] gh-112535: Update _Py_ThreadId() to support RISC-V (gh-113084) Update _Py_ThreadId() to support RISC-V --- Include/object.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Include/object.h b/Include/object.h index bd576b0bd43211c..d22e5c2b8be2a9d 100644 --- a/Include/object.h +++ b/Include/object.h @@ -283,6 +283,13 @@ _Py_ThreadId(void) // Both GCC and Clang have supported __builtin_thread_pointer // for s390 from long time ago. tid = (uintptr_t)__builtin_thread_pointer(); +#elif defined(__riscv) + #if defined(__clang__) && _Py__has_builtin(__builtin_thread_pointer) + tid = (uintptr_t)__builtin_thread_pointer(); + #else + // tp is Thread Pointer provided by the RISC-V ABI. + __asm__ ("mv %0, tp" : "=r" (tid)); + #endif #else # error "define _Py_ThreadId for this platform" #endif From 7bb00f053e0a86bb8827cc464961a4fad9278e6d Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 15 Dec 2023 08:57:23 +0000 Subject: [PATCH 06/12] gh-101100: Fix Sphinx nitpicks in `library/rlcompleter.rst` (#113125) --- Doc/library/readline.rst | 2 ++ Doc/library/rlcompleter.rst | 40 ++++++++++++++++++++----------------- Doc/tools/.nitignore | 1 - 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index 2e0f45ced30b9cd..1adafcaa02eab97 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -218,6 +218,8 @@ Startup hooks if Python was compiled for a version of the library that supports it. +.. _readline-completion: + Completion ---------- diff --git a/Doc/library/rlcompleter.rst b/Doc/library/rlcompleter.rst index 40b09ce897880ed..8287699c5f013ec 100644 --- a/Doc/library/rlcompleter.rst +++ b/Doc/library/rlcompleter.rst @@ -10,12 +10,14 @@ -------------- -The :mod:`rlcompleter` module defines a completion function suitable for the -:mod:`readline` module by completing valid Python identifiers and keywords. +The :mod:`!rlcompleter` module defines a completion function suitable to be +passed to :func:`~readline.set_completer` in the :mod:`readline` module. When this module is imported on a Unix platform with the :mod:`readline` module available, an instance of the :class:`Completer` class is automatically created -and its :meth:`complete` method is set as the :mod:`readline` completer. +and its :meth:`~Completer.complete` method is set as the +:ref:`readline completer `. The method provides +completion of valid Python :ref:`identifiers and keywords `. Example:: @@ -28,7 +30,7 @@ Example:: readline.__name__ readline.parse_and_bind( >>> readline. -The :mod:`rlcompleter` module is designed for use with Python's +The :mod:`!rlcompleter` module is designed for use with Python's :ref:`interactive mode `. Unless Python is run with the :option:`-S` option, the module is automatically imported and configured (see :ref:`rlcompleter-config`). @@ -39,23 +41,25 @@ this module can still be used for custom purposes. .. _completer-objects: -Completer Objects ------------------ +.. class:: Completer -Completer objects have the following method: + Completer objects have the following method: + .. method:: Completer.complete(text, state) -.. method:: Completer.complete(text, state) + Return the next possible completion for *text*. - Return the *state*\ th completion for *text*. + When called by the :mod:`readline` module, this method is called + successively with ``state == 0, 1, 2, ...`` until the method returns + ``None``. - If called for *text* that doesn't include a period character (``'.'``), it will - complete from names currently defined in :mod:`__main__`, :mod:`builtins` and - keywords (as defined by the :mod:`keyword` module). - - If called for a dotted name, it will try to evaluate anything without obvious - side-effects (functions will not be evaluated, but it can generate calls to - :meth:`__getattr__`) up to the last part, and find matches for the rest via the - :func:`dir` function. Any exception raised during the evaluation of the - expression is caught, silenced and :const:`None` is returned. + If called for *text* that doesn't include a period character (``'.'``), it will + complete from names currently defined in :mod:`__main__`, :mod:`builtins` and + keywords (as defined by the :mod:`keyword` module). + If called for a dotted name, it will try to evaluate anything without obvious + side-effects (functions will not be evaluated, but it can generate calls to + :meth:`~object.__getattr__`) up to the last part, and find matches for the + rest via the :func:`dir` function. Any exception raised during the + evaluation of the expression is caught, silenced and :const:`None` is + returned. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index d9147aaeee12bd6..c91e698ff0753a1 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -79,7 +79,6 @@ Doc/library/pyexpat.rst Doc/library/random.rst Doc/library/readline.rst Doc/library/resource.rst -Doc/library/rlcompleter.rst Doc/library/select.rst Doc/library/signal.rst Doc/library/smtplib.rst From 55ef998a8dead3874e8390284081290c1ccb46e2 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 15 Dec 2023 12:28:22 +0000 Subject: [PATCH 07/12] gh-112720: Move dis's cache output code to the Formatter, labels lookup to the arg_resolver. Reduce the number of parameters passed around. (#113108) --- Lib/dis.py | 168 +++++++++++++++++++++++-------------------- Lib/test/test_dis.py | 17 +++-- 2 files changed, 104 insertions(+), 81 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index 183091cb0d60988..1a2f1032d500afe 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -113,7 +113,14 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, elif hasattr(x, 'co_code'): # Code object _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) elif isinstance(x, (bytes, bytearray)): # Raw bytecode - _disassemble_bytes(x, file=file, show_caches=show_caches, show_offsets=show_offsets) + labels_map = _make_labels_map(x) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(labels_map=labels_map) + _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) elif isinstance(x, str): # Source code _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) else: @@ -394,23 +401,41 @@ def __str__(self): class Formatter: def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, - line_offset=0): + line_offset=0, show_caches=False): """Create a Formatter *file* where to write the output *lineno_width* sets the width of the line number field (0 omits it) *offset_width* sets the width of the instruction offset field *label_width* sets the width of the label field + *show_caches* is a boolean indicating whether to display cache lines - *line_offset* the line number (within the code unit) """ self.file = file self.lineno_width = lineno_width self.offset_width = offset_width self.label_width = label_width - + self.show_caches = show_caches def print_instruction(self, instr, mark_as_current=False): + self.print_instruction_line(instr, mark_as_current) + if self.show_caches and instr.cache_info: + offset = instr.offset + for name, size, data in instr.cache_info: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + self.print_instruction_line( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + False) + + def print_instruction_line(self, instr, mark_as_current): """Format instruction details for inclusion in disassembly output.""" lineno_width = self.lineno_width offset_width = self.offset_width @@ -474,11 +499,14 @@ def print_exception_table(self, exception_entries): class ArgResolver: - def __init__(self, co_consts, names, varname_from_oparg, labels_map): + def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None): self.co_consts = co_consts self.names = names self.varname_from_oparg = varname_from_oparg - self.labels_map = labels_map + self.labels_map = labels_map or {} + + def get_label_for_offset(self, offset): + return self.labels_map.get(offset, None) def get_argval_argrepr(self, op, arg, offset): get_name = None if self.names is None else self.names.__getitem__ @@ -547,8 +575,7 @@ def get_argval_argrepr(self, op, arg, offset): argrepr = _intrinsic_2_descs[arg] return argval, argrepr - -def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): +def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False): """Iterator for the opcodes in methods, functions or code Generates a series of Instruction named tuples giving the details of @@ -567,9 +594,10 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): line_offset = 0 original_code = co.co_code - labels_map = _make_labels_map(original_code) - arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg, - labels_map) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=_make_labels_map(original_code)) return _get_instructions_bytes(_get_code_array(co, adaptive), linestarts=linestarts, line_offset=line_offset, @@ -648,7 +676,7 @@ def _is_backward_jump(op): 'ENTER_EXECUTOR') def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, - original_code=None, labels_map=None, arg_resolver=None): + original_code=None, arg_resolver=None): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each @@ -661,8 +689,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N original_code = original_code or code co_positions = co_positions or iter(()) - labels_map = labels_map or _make_labels_map(original_code) - starts_line = False local_line_number = None line_number = None @@ -684,10 +710,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: argval, argrepr = arg, repr(arg) - instr = Instruction(_all_opname[op], op, arg, argval, argrepr, - offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions) - caches = _get_cache_size(_all_opname[deop]) # Advance the co_positions iterator: for _ in range(caches): @@ -701,10 +723,10 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: cache_info = None + label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None yield Instruction(_all_opname[op], op, arg, argval, argrepr, offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions, cache_info) - + label, positions, cache_info) def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, @@ -712,12 +734,20 @@ def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, """Disassemble a code object.""" linestarts = dict(findlinestarts(co)) exception_entries = _parse_exception_table(co) - _disassemble_bytes(_get_code_array(co, adaptive), - lasti, co._varname_from_oparg, - co.co_names, co.co_consts, linestarts, file=file, - exception_entries=exception_entries, - co_positions=co.co_positions(), show_caches=show_caches, - original_code=co.co_code, show_offsets=show_offsets) + labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + lineno_width=_get_lineno_width(linestarts), + offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts, + exception_entries=exception_entries, co_positions=co.co_positions(), + original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter) def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) @@ -764,60 +794,29 @@ def _get_lineno_width(linestarts): return lineno_width -def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, - names=None, co_consts=None, linestarts=None, - *, file=None, line_offset=0, exception_entries=(), - co_positions=None, show_caches=False, original_code=None, - show_offsets=False): - - offset_width = len(str(max(len(code) - 2, 9999))) if show_offsets else 0 - - labels_map = _make_labels_map(original_code or code, exception_entries) - label_width = 4 + len(str(len(labels_map))) +def _disassemble_bytes(code, lasti=-1, linestarts=None, + *, line_offset=0, exception_entries=(), + co_positions=None, original_code=None, + arg_resolver=None, formatter=None): - formatter = Formatter(file=file, - lineno_width=_get_lineno_width(linestarts), - offset_width=offset_width, - label_width=label_width, - line_offset=line_offset) + assert formatter is not None + assert arg_resolver is not None - arg_resolver = ArgResolver(co_consts, names, varname_from_oparg, labels_map) instrs = _get_instructions_bytes(code, linestarts=linestarts, line_offset=line_offset, co_positions=co_positions, original_code=original_code, - labels_map=labels_map, arg_resolver=arg_resolver) - print_instructions(instrs, exception_entries, formatter, - show_caches=show_caches, lasti=lasti) + print_instructions(instrs, exception_entries, formatter, lasti=lasti) -def print_instructions(instrs, exception_entries, formatter, show_caches=False, lasti=-1): +def print_instructions(instrs, exception_entries, formatter, lasti=-1): for instr in instrs: - if show_caches: - is_current_instr = instr.offset == lasti - else: - # Each CACHE takes 2 bytes - is_current_instr = instr.offset <= lasti \ - <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) + # Each CACHE takes 2 bytes + is_current_instr = instr.offset <= lasti \ + <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) formatter.print_instruction(instr, is_current_instr) - deop = _deoptop(instr.opcode) - if show_caches and instr.cache_info: - offset = instr.offset - for name, size, data in instr.cache_info: - for i in range(size): - offset += 2 - # Only show the fancy argrepr for a CACHE instruction when it's - # the first entry for a particular cache value: - if i == 0: - argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" - else: - argrepr = "" - formatter.print_instruction( - Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, - False, None, None, instr.positions), - is_current_instr) formatter.print_exception_table(exception_entries) @@ -960,14 +959,15 @@ def __iter__(self): co = self.codeobj original_code = co.co_code labels_map = _make_labels_map(original_code, self.exception_entries) - arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg, - labels_map) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) return _get_instructions_bytes(_get_code_array(co, self.adaptive), linestarts=self._linestarts, line_offset=self._line_offset, co_positions=co.co_positions(), original_code=original_code, - labels_map=labels_map, arg_resolver=arg_resolver) def __repr__(self): @@ -995,18 +995,32 @@ def dis(self): else: offset = -1 with io.StringIO() as output: - _disassemble_bytes(_get_code_array(co, self.adaptive), - varname_from_oparg=co._varname_from_oparg, - names=co.co_names, co_consts=co.co_consts, + code = _get_code_array(co, self.adaptive) + offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 + + + labels_map = _make_labels_map(co.co_code, self.exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=output, + lineno_width=_get_lineno_width(self._linestarts), + offset_width=offset_width, + label_width=label_width, + line_offset=self._line_offset, + show_caches=self.show_caches) + + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(code, linestarts=self._linestarts, line_offset=self._line_offset, - file=output, lasti=offset, exception_entries=self.exception_entries, co_positions=co.co_positions(), - show_caches=self.show_caches, original_code=co.co_code, - show_offsets=self.show_offsets) + arg_resolver=arg_resolver, + formatter=formatter) return output.getvalue() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 12e2c57e50b0ba5..0c7fd60f640854d 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -2,6 +2,7 @@ import contextlib import dis +import functools import io import re import sys @@ -1982,19 +1983,27 @@ def f(opcode, oparg, offset, *init_args): self.assertEqual(f(opcode.opmap["BINARY_OP"], 3, *args), (3, '<<')) self.assertEqual(f(opcode.opmap["CALL_INTRINSIC_1"], 2, *args), (2, 'INTRINSIC_IMPORT_STAR')) + def get_instructions(self, code): + return dis._get_instructions_bytes(code) + def test_start_offset(self): # When no extended args are present, # start_offset should be equal to offset + instructions = list(dis.Bytecode(_f)) for instruction in instructions: self.assertEqual(instruction.offset, instruction.start_offset) + def last_item(iterable): + return functools.reduce(lambda a, b : b, iterable) + code = bytes([ opcode.opmap["LOAD_FAST"], 0x00, opcode.opmap["EXTENDED_ARG"], 0x01, opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, ]) - jump = list(dis._get_instructions_bytes(code))[-1] + labels_map = dis._make_labels_map(code) + jump = last_item(self.get_instructions(code)) self.assertEqual(4, jump.offset) self.assertEqual(2, jump.start_offset) @@ -2006,7 +2015,7 @@ def test_start_offset(self): opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, opcode.opmap["CACHE"], 0x00, ]) - jump = list(dis._get_instructions_bytes(code))[-1] + jump = last_item(self.get_instructions(code)) self.assertEqual(8, jump.offset) self.assertEqual(2, jump.start_offset) @@ -2021,7 +2030,7 @@ def test_start_offset(self): opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, opcode.opmap["CACHE"], 0x00, ]) - instructions = list(dis._get_instructions_bytes(code)) + instructions = list(self.get_instructions(code)) # 1st jump self.assertEqual(4, instructions[2].offset) self.assertEqual(2, instructions[2].start_offset) @@ -2042,7 +2051,7 @@ def test_cache_offset_and_end_offset(self): opcode.opmap["CACHE"], 0x00, opcode.opmap["CACHE"], 0x00 ]) - instructions = list(dis._get_instructions_bytes(code)) + instructions = list(self.get_instructions(code)) self.assertEqual(2, instructions[0].cache_offset) self.assertEqual(10, instructions[0].end_offset) self.assertEqual(12, instructions[1].cache_offset) From 737d23ffcd16909274f21c932215ec104140fb1c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 15 Dec 2023 05:03:17 -0800 Subject: [PATCH 08/12] GH-111485: Mark some instructions as `TIER_ONE_ONLY` (GH-113155) --- Python/bytecodes.c | 7 ++ Python/executor_cases.c.h | 139 ------------------------------------- Python/generated_cases.c.h | 7 ++ 3 files changed, 14 insertions(+), 139 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 68bb15c2b536eb9..19e2268046fcdc7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -506,6 +506,7 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (unused/1, left, right --)) { + TIER_ONE_ONLY assert(next_instr->op.code == STORE_FAST); PyObject **target_local = &GETLOCAL(next_instr->op.arg); DEOPT_IF(*target_local != left); @@ -786,6 +787,7 @@ dummy_func( } inst(INTERPRETER_EXIT, (retval --)) { + TIER_ONE_ONLY assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); /* Restore previous frame and return. */ @@ -1072,6 +1074,7 @@ dummy_func( } inst(YIELD_VALUE, (retval -- unused)) { + TIER_ONE_ONLY // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() // or throw() call. @@ -2297,6 +2300,7 @@ dummy_func( } inst(JUMP_FORWARD, (--)) { + TIER_ONE_ONLY JUMPBY(oparg); } @@ -2402,6 +2406,7 @@ dummy_func( macro(POP_JUMP_IF_NOT_NONE) = _IS_NONE + _POP_JUMP_IF_FALSE; inst(JUMP_BACKWARD_NO_INTERRUPT, (--)) { + TIER_ONE_ONLY /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost * generator or coroutine, so we deliberately do not check it here. @@ -3454,6 +3459,7 @@ dummy_func( // This is secretly a super-instruction inst(CALL_LIST_APPEND, (unused/1, unused/2, callable, self, args[oparg] -- unused)) { + TIER_ONE_ONLY assert(oparg == 1); PyInterpreterState *interp = tstate->interp; DEOPT_IF(callable != interp->callable_cache.list_append); @@ -3792,6 +3798,7 @@ dummy_func( } inst(RETURN_GENERATOR, (--)) { + TIER_ONE_ONLY assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2519a4ee546a5ed..7cb60cbc1dd3ff8 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -375,38 +375,6 @@ break; } - case _BINARY_OP_INPLACE_ADD_UNICODE: { - PyObject *right; - PyObject *left; - right = stack_pointer[-1]; - left = stack_pointer[-2]; - assert(next_instr->op.code == STORE_FAST); - PyObject **target_local = &GETLOCAL(next_instr->op.arg); - if (*target_local != left) goto deoptimize; - STAT_INC(BINARY_OP, hit); - /* Handle `left = left + right` or `left += right` for str. - * - * When possible, extend `left` in place rather than - * allocating a new PyUnicodeObject. This attempts to avoid - * quadratic behavior when one neglects to use str.join(). - * - * If `left` has only two references remaining (one from - * the stack, one in the locals), DECREFing `left` leaves - * only the locals reference, so PyUnicode_Append knows - * that the string is safe to mutate. - */ - assert(Py_REFCNT(left) >= 2); - _Py_DECREF_NO_DEALLOC(left); - PyUnicode_Append(target_local, right); - _Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); - if (*target_local == NULL) goto pop_2_error_tier_two; - // The STORE_FAST is already done. - assert(next_instr->op.code == STORE_FAST); - SKIP_OVER(1); - stack_pointer += -2; - break; - } - case _BINARY_SUBSCR: { PyObject *sub; PyObject *container; @@ -690,18 +658,6 @@ break; } - case _INTERPRETER_EXIT: { - PyObject *retval; - retval = stack_pointer[-1]; - assert(frame == &entry_frame); - assert(_PyFrame_IsIncomplete(frame)); - /* Restore previous frame and return. */ - tstate->current_frame = frame->previous; - assert(!_PyErr_Occurred(tstate)); - tstate->c_recursion_remaining += PY_EVAL_C_STACK_UNITS; - return retval; - } - case _POP_FRAME: { PyObject *retval; retval = stack_pointer[-1]; @@ -846,33 +802,6 @@ /* _INSTRUMENTED_YIELD_VALUE is not a viable micro-op for tier 2 */ - case _YIELD_VALUE: { - PyObject *retval; - oparg = CURRENT_OPARG(); - retval = stack_pointer[-1]; - // NOTE: It's important that YIELD_VALUE never raises an exception! - // The compiler treats any exception raised here as a failed close() - // or throw() call. - assert(frame != &entry_frame); - frame->instr_ptr = next_instr; - PyGenObject *gen = _PyFrame_GetGenerator(frame); - assert(FRAME_SUSPENDED_YIELD_FROM == FRAME_SUSPENDED + 1); - assert(oparg == 0 || oparg == 1); - gen->gi_frame_state = FRAME_SUSPENDED + oparg; - _PyFrame_SetStackPointer(frame, stack_pointer - 1); - tstate->exc_info = gen->gi_exc_state.previous_item; - gen->gi_exc_state.previous_item = NULL; - _Py_LeaveRecursiveCallPy(tstate); - _PyInterpreterFrame *gen_frame = frame; - frame = tstate->current_frame = frame->previous; - gen_frame->previous = NULL; - _PyFrame_StackPush(frame, retval); - /* We don't know which of these is relevant here, so keep them equal */ - assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER); - LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); - goto resume_frame; - } - case _POP_EXCEPT: { PyObject *exc_value; exc_value = stack_pointer[-1]; @@ -2084,12 +2013,6 @@ break; } - case _JUMP_FORWARD: { - oparg = CURRENT_OPARG(); - JUMPBY(oparg); - break; - } - /* _JUMP_BACKWARD is not a viable micro-op for tier 2 */ /* _POP_JUMP_IF_FALSE is not a viable micro-op for tier 2 */ @@ -2111,17 +2034,6 @@ break; } - case _JUMP_BACKWARD_NO_INTERRUPT: { - oparg = CURRENT_OPARG(); - /* This bytecode is used in the `yield from` or `await` loop. - * If there is an interrupt, we want it handled in the innermost - * generator or coroutine, so we deliberately do not check it here. - * (see bpo-30039). - */ - JUMPBY(-oparg); - break; - } - case _GET_LEN: { PyObject *obj; PyObject *len_o; @@ -3060,32 +2972,6 @@ break; } - case _CALL_LIST_APPEND: { - PyObject **args; - PyObject *self; - PyObject *callable; - oparg = CURRENT_OPARG(); - args = &stack_pointer[-oparg]; - self = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - assert(oparg == 1); - PyInterpreterState *interp = tstate->interp; - if (callable != interp->callable_cache.list_append) goto deoptimize; - assert(self != NULL); - if (!PyList_Check(self)) goto deoptimize; - STAT_INC(CALL, hit); - if (_PyList_AppendTakeRef((PyListObject *)self, args[0]) < 0) { - goto pop_1_error; // Since arg is DECREF'ed already - } - Py_DECREF(self); - Py_DECREF(callable); - STACK_SHRINK(3); - // Skip POP_TOP - assert(next_instr->op.code == POP_TOP); - SKIP_OVER(1); - DISPATCH(); - } - case _CALL_METHOD_DESCRIPTOR_O: { PyObject **args; PyObject *self_or_null; @@ -3307,31 +3193,6 @@ break; } - case _RETURN_GENERATOR: { - assert(PyFunction_Check(frame->f_funcobj)); - PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; - PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); - if (gen == NULL) { - GOTO_ERROR(error); - } - assert(EMPTY()); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->instr_ptr = next_instr; - _PyFrame_Copy(frame, gen_frame); - assert(frame->frame_obj == NULL); - gen->gi_frame_state = FRAME_CREATED; - gen_frame->owner = FRAME_OWNED_BY_GENERATOR; - _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); - _PyInterpreterFrame *prev = frame->previous; - _PyThreadState_PopFrame(tstate, frame); - frame = tstate->current_frame = prev; - _PyFrame_StackPush(frame, (PyObject *)gen); - LOAD_IP(frame->return_offset); - goto resume_frame; - } - case _BUILD_SLICE: { PyObject *step = NULL; PyObject *stop; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 65e6f11f68b38c8..24f26722d7a7453 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -238,6 +238,7 @@ } // _BINARY_OP_INPLACE_ADD_UNICODE { + TIER_ONE_ONLY assert(next_instr->op.code == STORE_FAST); PyObject **target_local = &GETLOCAL(next_instr->op.arg); DEOPT_IF(*target_local != left, BINARY_OP); @@ -1446,6 +1447,7 @@ args = &stack_pointer[-oparg]; self = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; + TIER_ONE_ONLY assert(oparg == 1); PyInterpreterState *interp = tstate->interp; DEOPT_IF(callable != interp->callable_cache.list_append, CALL); @@ -3182,6 +3184,7 @@ INSTRUCTION_STATS(INTERPRETER_EXIT); PyObject *retval; retval = stack_pointer[-1]; + TIER_ONE_ONLY assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); /* Restore previous frame and return. */ @@ -3253,6 +3256,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(JUMP_BACKWARD_NO_INTERRUPT); + TIER_ONE_ONLY /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost * generator or coroutine, so we deliberately do not check it here. @@ -3266,6 +3270,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(JUMP_FORWARD); + TIER_ONE_ONLY JUMPBY(oparg); DISPATCH(); } @@ -4793,6 +4798,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(RETURN_GENERATOR); + TIER_ONE_ONLY assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -5764,6 +5770,7 @@ INSTRUCTION_STATS(YIELD_VALUE); PyObject *retval; retval = stack_pointer[-1]; + TIER_ONE_ONLY // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() // or throw() call. From 8f8f0f97e126db9ca470fd7e7b2944c150db6305 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 15 Dec 2023 15:24:30 +0200 Subject: [PATCH 09/12] gh-61648: Detect line numbers of properties in doctests (GH-113161) --- Lib/doctest.py | 2 ++ Lib/test/doctest_lineno.py | 16 ++++++++++++++++ Lib/test/test_doctest.py | 2 ++ ...2023-12-15-12-35-28.gh-issue-61648.G-4pz0.rst | 1 + 4 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-12-15-12-35-28.gh-issue-61648.G-4pz0.rst diff --git a/Lib/doctest.py b/Lib/doctest.py index d109b6c9e373438..114aac62a34e95b 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -1136,6 +1136,8 @@ def _find_lineno(self, obj, source_lines): # Find the line number for functions & methods. if inspect.ismethod(obj): obj = obj.__func__ + if isinstance(obj, property): + obj = obj.fget if inspect.isfunction(obj) and getattr(obj, '__doc__', None): # We don't use `docstring` var here, because `obj` can be changed. obj = obj.__code__ diff --git a/Lib/test/doctest_lineno.py b/Lib/test/doctest_lineno.py index 729a68aceaa9901..677c569cf710ebd 100644 --- a/Lib/test/doctest_lineno.py +++ b/Lib/test/doctest_lineno.py @@ -49,5 +49,21 @@ def method_with_doctest(self): 'method_with_doctest' """ + @classmethod + def classmethod_with_doctest(cls): + """ + This has a doctest! + >>> MethodWrapper.classmethod_with_doctest.__name__ + 'classmethod_with_doctest' + """ + + @property + def property_with_doctest(self): + """ + This has a doctest! + >>> MethodWrapper.property_with_doctest.__name__ + 'property_with_doctest' + """ + # https://github.com/python/cpython/issues/99433 str_wrapper = object().__str__ diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 36328f8086c7ad2..46a51007f9644d5 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -670,9 +670,11 @@ def basics(): r""" 30 test.doctest_lineno.ClassWithDoctest None test.doctest_lineno.ClassWithoutDocstring None test.doctest_lineno.MethodWrapper + 53 test.doctest_lineno.MethodWrapper.classmethod_with_doctest 39 test.doctest_lineno.MethodWrapper.method_with_docstring 45 test.doctest_lineno.MethodWrapper.method_with_doctest None test.doctest_lineno.MethodWrapper.method_without_docstring + 61 test.doctest_lineno.MethodWrapper.property_with_doctest 4 test.doctest_lineno.func_with_docstring 12 test.doctest_lineno.func_with_doctest None test.doctest_lineno.func_without_docstring diff --git a/Misc/NEWS.d/next/Library/2023-12-15-12-35-28.gh-issue-61648.G-4pz0.rst b/Misc/NEWS.d/next/Library/2023-12-15-12-35-28.gh-issue-61648.G-4pz0.rst new file mode 100644 index 000000000000000..c841e5c7f7683af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-15-12-35-28.gh-issue-61648.G-4pz0.rst @@ -0,0 +1 @@ +Detect line numbers of properties in doctests. From d1a2adfb0820ee730fa3e4bbc4bd88a67aa50666 Mon Sep 17 00:00:00 2001 From: AN Long Date: Fri, 15 Dec 2023 21:42:37 +0800 Subject: [PATCH 10/12] gh-112278: Add retry in WMI tests in case of slow initialization (GH-113154) --- Lib/test/test_wmi.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_wmi.py b/Lib/test/test_wmi.py index 3445702846d8a04..bf8c52e646dc18d 100644 --- a/Lib/test/test_wmi.py +++ b/Lib/test/test_wmi.py @@ -1,17 +1,29 @@ # Test the internal _wmi module on Windows # This is used by the platform module, and potentially others +import time import unittest -from test.support import import_helper, requires_resource +from test.support import import_helper, requires_resource, LOOPBACK_TIMEOUT # Do this first so test will be skipped if module doesn't exist _wmi = import_helper.import_module('_wmi', required_on=['win']) +def wmi_exec_query(query): + # gh-112278: WMI maybe slow response when first call. + try: + return _wmi.exec_query(query) + except WindowsError as e: + if e.winerror != 258: + raise + time.sleep(LOOPBACK_TIMEOUT) + return _wmi.exec_query(query) + + class WmiTests(unittest.TestCase): def test_wmi_query_os_version(self): - r = _wmi.exec_query("SELECT Version FROM Win32_OperatingSystem").split("\0") + r = wmi_exec_query("SELECT Version FROM Win32_OperatingSystem").split("\0") self.assertEqual(1, len(r)) k, eq, v = r[0].partition("=") self.assertEqual("=", eq, r[0]) @@ -28,7 +40,7 @@ def test_wmi_query_repeated(self): def test_wmi_query_error(self): # Invalid queries fail with OSError try: - _wmi.exec_query("SELECT InvalidColumnName FROM InvalidTableName") + wmi_exec_query("SELECT InvalidColumnName FROM InvalidTableName") except OSError as ex: if ex.winerror & 0xFFFFFFFF == 0x80041010: # This is the expected error code. All others should fail the test @@ -42,7 +54,7 @@ def test_wmi_query_repeated_error(self): def test_wmi_query_not_select(self): # Queries other than SELECT are blocked to avoid potential exploits with self.assertRaises(ValueError): - _wmi.exec_query("not select, just in case someone tries something") + wmi_exec_query("not select, just in case someone tries something") @requires_resource('cpu') def test_wmi_query_overflow(self): @@ -50,11 +62,11 @@ def test_wmi_query_overflow(self): # Test multiple times to ensure consistency for _ in range(2): with self.assertRaises(OSError): - _wmi.exec_query("SELECT * FROM CIM_DataFile") + wmi_exec_query("SELECT * FROM CIM_DataFile") def test_wmi_query_multiple_rows(self): # Multiple instances should have an extra null separator - r = _wmi.exec_query("SELECT ProcessId FROM Win32_Process WHERE ProcessId < 1000") + r = wmi_exec_query("SELECT ProcessId FROM Win32_Process WHERE ProcessId < 1000") self.assertFalse(r.startswith("\0"), r) self.assertFalse(r.endswith("\0"), r) it = iter(r.split("\0")) @@ -69,6 +81,6 @@ def test_wmi_query_threads(self): from concurrent.futures import ThreadPoolExecutor query = "SELECT ProcessId FROM Win32_Process WHERE ProcessId < 1000" with ThreadPoolExecutor(4) as pool: - task = [pool.submit(_wmi.exec_query, query) for _ in range(32)] + task = [pool.submit(wmi_exec_query, query) for _ in range(32)] for t in task: self.assertRegex(t.result(), "ProcessId=") From 4026ad5b2c595b855a3605420cfa0e3d49e63db7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 15 Dec 2023 15:57:49 +0100 Subject: [PATCH 11/12] gh-113009: Fix multiprocessing Process.terminate() on Windows (#113128) On Windows, Process.terminate() no longer sets the returncode attribute to always call WaitForSingleObject() in Process.wait(). Previously, sometimes the process was still running after TerminateProcess() even if GetExitCodeProcess() is not STILL_ACTIVE. --- Lib/multiprocessing/popen_spawn_win32.py | 54 ++++++++++--------- ...-12-14-19-00-29.gh-issue-113009.6LNdjz.rst | 5 ++ 2 files changed, 35 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2023-12-14-19-00-29.gh-issue-113009.6LNdjz.rst diff --git a/Lib/multiprocessing/popen_spawn_win32.py b/Lib/multiprocessing/popen_spawn_win32.py index af044305709e56c..49d4c7eea224111 100644 --- a/Lib/multiprocessing/popen_spawn_win32.py +++ b/Lib/multiprocessing/popen_spawn_win32.py @@ -101,18 +101,20 @@ def duplicate_for_child(self, handle): return reduction.duplicate(handle, self.sentinel) def wait(self, timeout=None): - if self.returncode is None: - if timeout is None: - msecs = _winapi.INFINITE - else: - msecs = max(0, int(timeout * 1000 + 0.5)) - - res = _winapi.WaitForSingleObject(int(self._handle), msecs) - if res == _winapi.WAIT_OBJECT_0: - code = _winapi.GetExitCodeProcess(self._handle) - if code == TERMINATE: - code = -signal.SIGTERM - self.returncode = code + if self.returncode is not None: + return self.returncode + + if timeout is None: + msecs = _winapi.INFINITE + else: + msecs = max(0, int(timeout * 1000 + 0.5)) + + res = _winapi.WaitForSingleObject(int(self._handle), msecs) + if res == _winapi.WAIT_OBJECT_0: + code = _winapi.GetExitCodeProcess(self._handle) + if code == TERMINATE: + code = -signal.SIGTERM + self.returncode = code return self.returncode @@ -120,18 +122,22 @@ def poll(self): return self.wait(timeout=0) def terminate(self): - if self.returncode is None: - try: - _winapi.TerminateProcess(int(self._handle), TERMINATE) - except PermissionError: - # ERROR_ACCESS_DENIED (winerror 5) is received when the - # process already died. - code = _winapi.GetExitCodeProcess(int(self._handle)) - if code == _winapi.STILL_ACTIVE: - raise - self.returncode = code - else: - self.returncode = -signal.SIGTERM + if self.returncode is not None: + return + + try: + _winapi.TerminateProcess(int(self._handle), TERMINATE) + except PermissionError: + # ERROR_ACCESS_DENIED (winerror 5) is received when the + # process already died. + code = _winapi.GetExitCodeProcess(int(self._handle)) + if code == _winapi.STILL_ACTIVE: + raise + + # gh-113009: Don't set self.returncode. Even if GetExitCodeProcess() + # returns an exit code different than STILL_ACTIVE, the process can + # still be running. Only set self.returncode once WaitForSingleObject() + # returns WAIT_OBJECT_0 in wait(). kill = terminate diff --git a/Misc/NEWS.d/next/Windows/2023-12-14-19-00-29.gh-issue-113009.6LNdjz.rst b/Misc/NEWS.d/next/Windows/2023-12-14-19-00-29.gh-issue-113009.6LNdjz.rst new file mode 100644 index 000000000000000..6fd7f7f9afdfa20 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-12-14-19-00-29.gh-issue-113009.6LNdjz.rst @@ -0,0 +1,5 @@ +:mod:`multiprocessing`: On Windows, fix a race condition in +``Process.terminate()``: no longer set the ``returncode`` attribute to +always call ``WaitForSingleObject()`` in ``Process.wait()``. Previously, +sometimes the process was still running after ``TerminateProcess()`` even if +``GetExitCodeProcess()`` is not ``STILL_ACTIVE``. Patch by Victor Stinner. From 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 15 Dec 2023 16:10:40 +0100 Subject: [PATCH 12/12] [CVE-2023-27043] gh-102988: Reject malformed addresses in email.parseaddr() (#111116) Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). Add an optional 'strict' parameter to getaddresses() and parseaddr() functions. Patch by Thomas Dwyer. Co-Authored-By: Thomas Dwyer --- Doc/library/email.utils.rst | 19 +- Doc/whatsnew/3.13.rst | 13 ++ Lib/email/utils.py | 151 +++++++++++++- Lib/test/test_email/test_email.py | 187 +++++++++++++++++- ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + 5 files changed, 357 insertions(+), 21 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst index 345b64001c1ace1..d693a9bc3933b5c 100644 --- a/Doc/library/email.utils.rst +++ b/Doc/library/email.utils.rst @@ -58,13 +58,18 @@ of the new API. begins with angle brackets, they are stripped off. -.. function:: parseaddr(address) +.. function:: parseaddr(address, *, strict=True) Parse address -- which should be the value of some address-containing field such as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and *email address* parts. Returns a tuple of that information, unless the parse fails, in which case a 2-tuple of ``('', '')`` is returned. + If *strict* is true, use a strict parser which rejects malformed inputs. + + .. versionchanged:: 3.13 + Add *strict* optional parameter and reject malformed inputs by default. + .. function:: formataddr(pair, charset='utf-8') @@ -82,12 +87,15 @@ of the new API. Added the *charset* option. -.. function:: getaddresses(fieldvalues) +.. function:: getaddresses(fieldvalues, *, strict=True) This method returns a list of 2-tuples of the form returned by ``parseaddr()``. *fieldvalues* is a sequence of header field values as might be returned by - :meth:`Message.get_all `. Here's a simple - example that gets all the recipients of a message:: + :meth:`Message.get_all `. + + If *strict* is true, use a strict parser which rejects malformed inputs. + + Here's a simple example that gets all the recipients of a message:: from email.utils import getaddresses @@ -97,6 +105,9 @@ of the new API. resent_ccs = msg.get_all('resent-cc', []) all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + .. versionchanged:: 3.13 + Add *strict* optional parameter and reject malformed inputs by default. + .. function:: parsedate(date) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e22257853d83337..4f9643967d20cfa 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -199,6 +199,19 @@ doctest :attr:`doctest.TestResults.skipped` attributes. (Contributed by Victor Stinner in :gh:`108794`.) +email +----- + +* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return + ``('', '')`` 2-tuples in more situations where invalid email addresses are + encountered instead of potentially inaccurate values. Add optional *strict* + parameter to these two functions: use ``strict=False`` to get the old + behavior, accept malformed inputs. + ``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to + check if the *strict* paramater is available. + (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve + the CVE-2023-27043 fix.) + glob ---- diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 9175f2fdb6e69e7..103cef61a835388 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -43,6 +43,7 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') + def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 @@ -103,12 +104,127 @@ def formataddr(pair, charset='utf-8'): return address +def _iter_escaped_chars(addr): + pos = 0 + escape = False + for pos, ch in enumerate(addr): + if escape: + yield (pos, '\\' + ch) + escape = False + elif ch == '\\': + escape = True + else: + yield (pos, ch) + if escape: + yield (pos, '\\') + + +def _strip_quoted_realnames(addr): + """Strip real names between quotes.""" + if '"' not in addr: + # Fast path + return addr + + start = 0 + open_pos = None + result = [] + for pos, ch in _iter_escaped_chars(addr): + if ch == '"': + if open_pos is None: + open_pos = pos + else: + if start != open_pos: + result.append(addr[start:open_pos]) + start = pos + 1 + open_pos = None + + if start < len(addr): + result.append(addr[start:]) + + return ''.join(result) -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) - return a.addresslist + +supports_strict_parsing = True + +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If strict is true, use a strict parser which rejects malformed inputs. + """ + + # If strict is true, if the resulting list of parsed addresses is greater + # than the number of fieldvalues in the input list, a parsing error has + # occurred and consequently a list containing a single empty 2-tuple [('', + # '')] is returned in its place. This is done to avoid invalid output. + # + # Malformed input: getaddresses(['alice@example.com ']) + # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + # Safe output: [('', '')] + + if not strict: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist + + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + addr = COMMASPACE.join(fieldvalues) + a = _AddressList(addr) + result = _post_parse_validation(a.addresslist) + + # Treat output as invalid if the number of addresses is not equal to the + # expected number of addresses. + n = 0 + for v in fieldvalues: + # When a comma is used in the Real Name part it is not a deliminator. + # So strip those out before counting the commas. + v = _strip_quoted_realnames(v) + # Expected number of addresses: 1 + number of commas + n += 1 + v.count(',') + if len(result) != n: + return [('', '')] + + return result + + +def _check_parenthesis(addr): + # Ignore parenthesis in quoted real names. + addr = _strip_quoted_realnames(addr) + + opens = 0 + for pos, ch in _iter_escaped_chars(addr): + if ch == '(': + opens += 1 + elif ch == ')': + opens -= 1 + if opens < 0: + return False + return (opens == 0) + + +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values def _format_timetuple_and_zone(timetuple, zone): @@ -207,16 +323,33 @@ def parsedate_to_datetime(data): tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): +def parseaddr(addr, *, strict=True): """ Parse addr into its constituent realname and email address parts. Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). + + If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if not strict: + addrs = _AddressList(addr).addresslist + if not addrs: + return ('', '') + return addrs[0] + + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 512464f87162cde..39d4ace8d4a1d8c 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -16,6 +16,7 @@ import email import email.policy +import email.utils from email.charset import Charset from email.generator import Generator, DecodedGenerator, BytesGenerator @@ -3337,15 +3338,137 @@ def test_getaddresses_comma_in_name(self): ], ) + def test_parsing_errors(self): + """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" + alice = 'alice@example.org' + bob = 'bob@example.com' + empty = ('', '') + + # Test utils.getaddresses() and utils.parseaddr() on malformed email + # addresses: default behavior (strict=True) rejects malformed address, + # and strict=False which tolerates malformed address. + for invalid_separator, expected_non_strict in ( + ('(', [(f'<{bob}>', alice)]), + (')', [('', alice), empty, ('', bob)]), + ('<', [('', alice), empty, ('', bob), empty]), + ('>', [('', alice), empty, ('', bob)]), + ('[', [('', f'{alice}[<{bob}>]')]), + (']', [('', alice), empty, ('', bob)]), + ('@', [empty, empty, ('', bob)]), + (';', [('', alice), empty, ('', bob)]), + (':', [('', alice), ('', bob)]), + ('.', [('', alice + '.'), ('', bob)]), + ('"', [('', alice), ('', f'<{bob}>')]), + ): + address = f'{alice}{invalid_separator}<{bob}>' + with self.subTest(address=address): + self.assertEqual(utils.getaddresses([address]), + [empty]) + self.assertEqual(utils.getaddresses([address], strict=False), + expected_non_strict) + + self.assertEqual(utils.parseaddr([address]), + empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Comma (',') is treated differently depending on strict parameter. + # Comma without quotes. + address = f'{alice},<{bob}>' + self.assertEqual(utils.getaddresses([address]), + [('', alice), ('', bob)]) + self.assertEqual(utils.getaddresses([address], strict=False), + [('', alice), ('', bob)]) + self.assertEqual(utils.parseaddr([address]), + empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Real name between quotes containing comma. + address = '"Alice, alice@example.org" ' + expected_strict = ('Alice, alice@example.org', 'bob@example.com') + self.assertEqual(utils.getaddresses([address]), [expected_strict]) + self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) + self.assertEqual(utils.parseaddr([address]), expected_strict) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Valid parenthesis in comments. + address = 'alice@example.org (Alice)' + expected_strict = ('Alice', 'alice@example.org') + self.assertEqual(utils.getaddresses([address]), [expected_strict]) + self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) + self.assertEqual(utils.parseaddr([address]), expected_strict) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Invalid parenthesis in comments. + address = 'alice@example.org )Alice(' + self.assertEqual(utils.getaddresses([address]), [empty]) + self.assertEqual(utils.getaddresses([address], strict=False), + [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) + self.assertEqual(utils.parseaddr([address]), empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Two addresses with quotes separated by comma. + address = '"Jane Doe" , "John Doe" ' + self.assertEqual(utils.getaddresses([address]), + [('Jane Doe', 'jane@example.net'), + ('John Doe', 'john@example.net')]) + self.assertEqual(utils.getaddresses([address], strict=False), + [('Jane Doe', 'jane@example.net'), + ('John Doe', 'john@example.net')]) + self.assertEqual(utils.parseaddr([address]), empty) + self.assertEqual(utils.parseaddr([address], strict=False), + ('', address)) + + # Test email.utils.supports_strict_parsing attribute + self.assertEqual(email.utils.supports_strict_parsing, True) + def test_getaddresses_nasty(self): - eq = self.assertEqual - eq(utils.getaddresses(['foo: ;']), [('', '')]) - eq(utils.getaddresses( - ['[]*-- =~$']), - [('', ''), ('', ''), ('', '*--')]) - eq(utils.getaddresses( - ['foo: ;', '"Jason R. Mastaler" ']), - [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) + for addresses, expected in ( + (['"Sürname, Firstname" '], + [('Sürname, Firstname', 'to@example.com')]), + + (['foo: ;'], + [('', '')]), + + (['foo: ;', '"Jason R. Mastaler" '], + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), + + ([r'Pete(A nice \) chap) '], + [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), + + (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], + [('', '')]), + + (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], + [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), + + (['John Doe '], + [('John Doe (comment)', 'jdoe@machine.example')]), + + (['"Mary Smith: Personal Account" '], + [('Mary Smith: Personal Account', 'smith@home.example')]), + + (['Undisclosed recipients:;'], + [('', '')]), + + ([r', "Giant; \"Big\" Box" '], + [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), + ): + with self.subTest(addresses=addresses): + self.assertEqual(utils.getaddresses(addresses), + expected) + self.assertEqual(utils.getaddresses(addresses, strict=False), + expected) + + addresses = ['[]*-- =~$'] + self.assertEqual(utils.getaddresses(addresses), + [('', '')]) + self.assertEqual(utils.getaddresses(addresses, strict=False), + [('', ''), ('', ''), ('', '*--')]) def test_getaddresses_embedded_comment(self): """Test proper handling of a nested comment""" @@ -3536,6 +3659,54 @@ def test_mime_classes_policy_argument(self): m = cls(*constructor, policy=email.policy.default) self.assertIs(m.policy, email.policy.default) + def test_iter_escaped_chars(self): + self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), + [(0, 'a'), + (2, '\\\\'), + (3, 'b'), + (5, '\\"'), + (6, 'c'), + (8, '\\\\'), + (9, '"'), + (10, 'd')]) + self.assertEqual(list(utils._iter_escaped_chars('a\\')), + [(0, 'a'), (1, '\\')]) + + def test_strip_quoted_realnames(self): + def check(addr, expected): + self.assertEqual(utils._strip_quoted_realnames(addr), expected) + + check('"Jane Doe" , "John Doe" ', + ' , ') + check(r'"Jane \"Doe\"." ', + ' ') + + # special cases + check(r'before"name"after', 'beforeafter') + check(r'before"name"', 'before') + check(r'b"name"', 'b') # single char + check(r'"name"after', 'after') + check(r'"name"a', 'a') # single char + check(r'"name"', '') + + # no change + for addr in ( + 'Jane Doe , John Doe ', + 'lone " quote', + ): + self.assertEqual(utils._strip_quoted_realnames(addr), addr) + + + def test_check_parenthesis(self): + addr = 'alice@example.net' + self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) + self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) + self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) + self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) + + # Ignore real name between quotes + self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) + # Test the iterator/generators class TestIterators(TestEmailBase): diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst new file mode 100644 index 000000000000000..3d0e9e4078c9340 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst @@ -0,0 +1,8 @@ +:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now +return ``('', '')`` 2-tuples in more situations where invalid email +addresses are encountered instead of potentially inaccurate values. Add +optional *strict* parameter to these two functions: use ``strict=False`` to +get the old behavior, accept malformed inputs. +``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check +if the *strict* paramater is available. Patch by Thomas Dwyer and Victor +Stinner to improve the CVE-2023-27043 fix.