diff --git a/lib/dependencies/inspect-implementation.ts b/lib/dependencies/inspect-implementation.ts index 8d2b07c8..520d940a 100644 --- a/lib/dependencies/inspect-implementation.ts +++ b/lib/dependencies/inspect-implementation.ts @@ -70,6 +70,20 @@ function createAssets() { path.join(__dirname, '../../pysrc/pytoml/core.py'), path.join(__dirname, '../../pysrc/pytoml/parser.py'), path.join(__dirname, '../../pysrc/pytoml/writer.py'), + + path.join(__dirname, '../../pysrc/packaging/tags.py'), + path.join(__dirname, '../../pysrc/packaging/_typing.py'), + path.join(__dirname, '../../pysrc/packaging/version.py'), + path.join(__dirname, '../../pysrc/packaging/__init__.py'), + path.join(__dirname, '../../pysrc/packaging/utils.py'), + path.join(__dirname, '../../pysrc/packaging/requirements.py'), + path.join(__dirname, '../../pysrc/packaging/_structures.py'), + path.join(__dirname, '../../pysrc/packaging/markers.py'), + path.join(__dirname, '../../pysrc/packaging/__about__.py'), + path.join(__dirname, '../../pysrc/packaging/_compat.py'), + path.join(__dirname, '../../pysrc/packaging/specifiers.py'), + + path.join(__dirname, '../../pysrc/pyparsing.py'), ]; } diff --git a/pysrc/packaging/LICENSE b/pysrc/packaging/LICENSE new file mode 100644 index 00000000..6f62d44e --- /dev/null +++ b/pysrc/packaging/LICENSE @@ -0,0 +1,3 @@ +This software is made available under the terms of *either* of the licenses +found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made +under the terms of *both* these licenses. diff --git a/pysrc/packaging/LICENSE.APACHE b/pysrc/packaging/LICENSE.APACHE new file mode 100644 index 00000000..f433b1a5 --- /dev/null +++ b/pysrc/packaging/LICENSE.APACHE @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/pysrc/packaging/LICENSE.BSD b/pysrc/packaging/LICENSE.BSD new file mode 100644 index 00000000..42ce7b75 --- /dev/null +++ b/pysrc/packaging/LICENSE.BSD @@ -0,0 +1,23 @@ +Copyright (c) Donald Stufft and individual contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pysrc/packaging/__about__.py b/pysrc/packaging/__about__.py new file mode 100644 index 00000000..4c43a968 --- /dev/null +++ b/pysrc/packaging/__about__.py @@ -0,0 +1,27 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +__all__ = [ + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", +] + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "20.9" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014-2019 %s" % __author__ diff --git a/pysrc/packaging/__init__.py b/pysrc/packaging/__init__.py new file mode 100644 index 00000000..a0cf67df --- /dev/null +++ b/pysrc/packaging/__init__.py @@ -0,0 +1,26 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +from .__about__ import ( + __author__, + __copyright__, + __email__, + __license__, + __summary__, + __title__, + __uri__, + __version__, +) + +__all__ = [ + "__title__", + "__summary__", + "__uri__", + "__version__", + "__author__", + "__email__", + "__license__", + "__copyright__", +] diff --git a/pysrc/packaging/_compat.py b/pysrc/packaging/_compat.py new file mode 100644 index 00000000..e54bd4ed --- /dev/null +++ b/pysrc/packaging/_compat.py @@ -0,0 +1,38 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import sys + +from ._typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from typing import Any, Dict, Tuple, Type + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + +# flake8: noqa + +if PY3: + string_types = (str,) +else: + string_types = (basestring,) + + +def with_metaclass(meta, *bases): + # type: (Type[Any], Tuple[Type[Any], ...]) -> Any + """ + Create a base class with a metaclass. + """ + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): # type: ignore + def __new__(cls, name, this_bases, d): + # type: (Type[Any], str, Tuple[Any], Dict[Any, Any]) -> Any + return meta(name, bases, d) + + return type.__new__(metaclass, "temporary_class", (), {}) diff --git a/pysrc/packaging/_structures.py b/pysrc/packaging/_structures.py new file mode 100644 index 00000000..800d5c55 --- /dev/null +++ b/pysrc/packaging/_structures.py @@ -0,0 +1,86 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + + +class InfinityType(object): + def __repr__(self): + # type: () -> str + return "Infinity" + + def __hash__(self): + # type: () -> int + return hash(repr(self)) + + def __lt__(self, other): + # type: (object) -> bool + return False + + def __le__(self, other): + # type: (object) -> bool + return False + + def __eq__(self, other): + # type: (object) -> bool + return isinstance(other, self.__class__) + + def __ne__(self, other): + # type: (object) -> bool + return not isinstance(other, self.__class__) + + def __gt__(self, other): + # type: (object) -> bool + return True + + def __ge__(self, other): + # type: (object) -> bool + return True + + def __neg__(self): + # type: (object) -> NegativeInfinityType + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType(object): + def __repr__(self): + # type: () -> str + return "-Infinity" + + def __hash__(self): + # type: () -> int + return hash(repr(self)) + + def __lt__(self, other): + # type: (object) -> bool + return True + + def __le__(self, other): + # type: (object) -> bool + return True + + def __eq__(self, other): + # type: (object) -> bool + return isinstance(other, self.__class__) + + def __ne__(self, other): + # type: (object) -> bool + return not isinstance(other, self.__class__) + + def __gt__(self, other): + # type: (object) -> bool + return False + + def __ge__(self, other): + # type: (object) -> bool + return False + + def __neg__(self): + # type: (object) -> InfinityType + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/pysrc/packaging/_typing.py b/pysrc/packaging/_typing.py new file mode 100644 index 00000000..77a8b918 --- /dev/null +++ b/pysrc/packaging/_typing.py @@ -0,0 +1,48 @@ +"""For neatly implementing static typing in packaging. + +`mypy` - the static type analysis tool we use - uses the `typing` module, which +provides core functionality fundamental to mypy's functioning. + +Generally, `typing` would be imported at runtime and used in that fashion - +it acts as a no-op at runtime and does not have any run-time overhead by +design. + +As it turns out, `typing` is not vendorable - it uses separate sources for +Python 2/Python 3. Thus, this codebase can not expect it to be present. +To work around this, mypy allows the typing import to be behind a False-y +optional to prevent it from running at runtime and type-comments can be used +to remove the need for the types to be accessible directly during runtime. + +This module provides the False-y guard in a nicely named fashion so that a +curious maintainer can reach here to read this. + +In packaging, all static-typing related imports should be guarded as follows: + + from packaging._typing import TYPE_CHECKING + + if TYPE_CHECKING: + from typing import ... + +Ref: https://github.com/python/mypy/issues/3216 +""" + +__all__ = ["TYPE_CHECKING", "cast"] + +# The TYPE_CHECKING constant defined by the typing module is False at runtime +# but True while type checking. +if False: # pragma: no cover + from typing import TYPE_CHECKING +else: + TYPE_CHECKING = False + +# typing's cast syntax requires calling typing.cast at runtime, but we don't +# want to import typing at runtime. Here, we inform the type checkers that +# we're importing `typing.cast` as `cast` and re-implement typing.cast's +# runtime behavior in a block that is ignored by type checkers. +if TYPE_CHECKING: # pragma: no cover + # not executed at runtime + from typing import cast +else: + # executed at runtime + def cast(type_, value): # noqa + return value diff --git a/pysrc/packaging/markers.py b/pysrc/packaging/markers.py new file mode 100644 index 00000000..e0330ab6 --- /dev/null +++ b/pysrc/packaging/markers.py @@ -0,0 +1,336 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import operator +import os +import platform +import sys + +from pyparsing import ( # noqa: N817 + Forward, + Group, + Literal as L, + ParseException, + ParseResults, + QuotedString, + ZeroOrMore, + stringEnd, + stringStart, +) + +from ._compat import string_types +from ._typing import TYPE_CHECKING +from .specifiers import InvalidSpecifier, Specifier + +if TYPE_CHECKING: # pragma: no cover + from typing import Any, Callable, Dict, List, Optional, Tuple, Union + + Operator = Callable[[str, str], bool] + + +__all__ = [ + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", +] + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +class Node(object): + def __init__(self, value): + # type: (Any) -> None + self.value = value + + def __str__(self): + # type: () -> str + return str(self.value) + + def __repr__(self): + # type: () -> str + return "<{0}({1!r})>".format(self.__class__.__name__, str(self)) + + def serialize(self): + # type: () -> str + raise NotImplementedError + + +class Variable(Node): + def serialize(self): + # type: () -> str + return str(self) + + +class Value(Node): + def serialize(self): + # type: () -> str + return '"{0}"'.format(self) + + +class Op(Node): + def serialize(self): + # type: () -> str + return str(self) + + +VARIABLE = ( + L("implementation_version") + | L("platform_python_implementation") + | L("implementation_name") + | L("python_full_version") + | L("platform_release") + | L("platform_version") + | L("platform_machine") + | L("platform_system") + | L("python_version") + | L("sys_platform") + | L("os_name") + | L("os.name") # PEP-345 + | L("sys.platform") # PEP-345 + | L("platform.version") # PEP-345 + | L("platform.machine") # PEP-345 + | L("platform.python_implementation") # PEP-345 + | L("python_implementation") # undocumented setuptools legacy + | L("extra") # PEP-508 +) +ALIASES = { + "os.name": "os_name", + "sys.platform": "sys_platform", + "platform.version": "platform_version", + "platform.machine": "platform_machine", + "platform.python_implementation": "platform_python_implementation", + "python_implementation": "platform_python_implementation", +} +VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) + +VERSION_CMP = ( + L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") +) + +MARKER_OP = VERSION_CMP | L("not in") | L("in") +MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) + +MARKER_VALUE = QuotedString("'") | QuotedString('"') +MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) + +BOOLOP = L("and") | L("or") + +MARKER_VAR = VARIABLE | MARKER_VALUE + +MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) +MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) + +LPAREN = L("(").suppress() +RPAREN = L(")").suppress() + +MARKER_EXPR = Forward() +MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) +MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) + +MARKER = stringStart + MARKER_EXPR + stringEnd + + +def _coerce_parse_result(results): + # type: (Union[ParseResults, List[Any]]) -> List[Any] + if isinstance(results, ParseResults): + return [_coerce_parse_result(i) for i in results] + else: + return results + + +def _format_marker(marker, first=True): + # type: (Union[List[str], Tuple[Node, ...], str], Optional[bool]) -> str + + assert isinstance(marker, (list, tuple, string_types)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} # type: Dict[str, Operator] + + +def _eval_op(lhs, op, rhs): + # type: (str, Op, str) -> bool + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs) + + oper = _operators.get(op.serialize()) # type: Optional[Operator] + if oper is None: + raise UndefinedComparison( + "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs) + ) + + return oper(lhs, rhs) + + +class Undefined(object): + pass + + +_undefined = Undefined() + + +def _get_env(environment, name): + # type: (Dict[str, str], str) -> str + value = environment.get(name, _undefined) # type: Union[str, Undefined] + + if isinstance(value, Undefined): + raise UndefinedEnvironmentName( + "{0!r} does not exist in evaluation environment.".format(name) + ) + + return value + + +def _evaluate_markers(markers, environment): + # type: (List[Any], Dict[str, str]) -> bool + groups = [[]] # type: List[List[bool]] + + for marker in markers: + assert isinstance(marker, (list, tuple, string_types)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + lhs_value = _get_env(environment, lhs.value) + rhs_value = rhs.value + else: + lhs_value = lhs.value + rhs_value = _get_env(environment, rhs.value) + + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info): + # type: (sys._version_info) -> str + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment(): + # type: () -> Dict[str, str] + if hasattr(sys, "implementation"): + # Ignoring the `sys.implementation` reference for type checking due to + # mypy not liking that the attribute doesn't exist in Python 2.7 when + # run with the `--py27` flag. + iver = format_full_version(sys.implementation.version) # type: ignore + implementation_name = sys.implementation.name # type: ignore + else: + iver = "0" + implementation_name = "" + + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker(object): + def __init__(self, marker): + # type: (str) -> None + try: + self._markers = _coerce_parse_result(MARKER.parseString(marker)) + except ParseException as e: + err_str = "Invalid marker: {0!r}, parse error at {1!r}".format( + marker, marker[e.loc : e.loc + 8] + ) + raise InvalidMarker(err_str) + + def __str__(self): + # type: () -> str + return _format_marker(self._markers) + + def __repr__(self): + # type: () -> str + return "".format(str(self)) + + def evaluate(self, environment=None): + # type: (Optional[Dict[str, str]]) -> bool + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. + + The environment is determined from the current Python process. + """ + current_environment = default_environment() + if environment is not None: + current_environment.update(environment) + + return _evaluate_markers(self._markers, current_environment) diff --git a/pysrc/packaging/py.typed b/pysrc/packaging/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/pysrc/packaging/requirements.py b/pysrc/packaging/requirements.py new file mode 100644 index 00000000..aa69d50d --- /dev/null +++ b/pysrc/packaging/requirements.py @@ -0,0 +1,160 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import re +import string +import sys + +from pyparsing import ( # noqa: N817 + Combine, + Literal as L, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + originalTextFor, + stringEnd, + stringStart, +) + +from ._typing import TYPE_CHECKING +from .markers import MARKER_EXPR, Marker +from .specifiers import LegacySpecifier, Specifier, SpecifierSet + +if sys.version_info[0] >= 3: + from urllib import parse as urlparse # pragma: no cover +else: # pragma: no cover + import urlparse + + +if TYPE_CHECKING: # pragma: no cover + from typing import List, Optional as TOptional, Set + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +ALPHANUM = Word(string.ascii_letters + string.digits) + +LBRACKET = L("[").suppress() +RBRACKET = L("]").suppress() +LPAREN = L("(").suppress() +RPAREN = L(")").suppress() +COMMA = L(",").suppress() +SEMICOLON = L(";").suppress() +AT = L("@").suppress() + +PUNCTUATION = Word("-_.") +IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) +IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) + +NAME = IDENTIFIER("name") +EXTRA = IDENTIFIER + +URI = Regex(r"[^ ]+")("url") +URL = AT + URI + +EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) +EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") + +VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) +VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) + +VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY +VERSION_MANY = Combine( + VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False +)("_raw_spec") +_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY)) +_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") + +VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") +VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) + +MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") +MARKER_EXPR.setParseAction( + lambda s, l, t: Marker(s[t._original_start : t._original_end]) +) +MARKER_SEPARATOR = SEMICOLON +MARKER = MARKER_SEPARATOR + MARKER_EXPR + +VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) +URL_AND_MARKER = URL + Optional(MARKER) + +NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) + +REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd +# pyparsing isn't thread safe during initialization, so we do it eagerly, see +# issue #104 +REQUIREMENT.parseString("x[]") + + +class Requirement(object): + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string): + # type: (str) -> None + try: + req = REQUIREMENT.parseString(requirement_string) + except ParseException as e: + raise InvalidRequirement( + 'Parse error at "{0!r}": {1}'.format( + requirement_string[e.loc : e.loc + 8], e.msg + ) + ) + + self.name = req.name # type: str + if req.url: + parsed_url = urlparse.urlparse(req.url) + if parsed_url.scheme == "file": + if urlparse.urlunparse(parsed_url) != req.url: + raise InvalidRequirement("Invalid URL given") + elif not (parsed_url.scheme and parsed_url.netloc) or ( + not parsed_url.scheme and not parsed_url.netloc + ): + raise InvalidRequirement("Invalid URL: {0}".format(req.url)) + self.url = req.url # type: TOptional[str] + else: + self.url = None + self.extras = set(req.extras.asList() if req.extras else []) # type: Set[str] + self.specifier = SpecifierSet(req.specifier) # type: SpecifierSet + self.marker = req.marker if req.marker else None # type: TOptional[Marker] + + def __str__(self): + # type: () -> str + parts = [self.name] # type: List[str] + + if self.extras: + parts.append("[{0}]".format(",".join(sorted(self.extras)))) + + if self.specifier: + parts.append(str(self.specifier)) + + if self.url: + parts.append("@ {0}".format(self.url)) + if self.marker: + parts.append(" ") + + if self.marker: + parts.append("; {0}".format(self.marker)) + + return "".join(parts) + + def __repr__(self): + # type: () -> str + return "".format(str(self)) diff --git a/pysrc/packaging/specifiers.py b/pysrc/packaging/specifiers.py new file mode 100644 index 00000000..a6a83c1f --- /dev/null +++ b/pysrc/packaging/specifiers.py @@ -0,0 +1,864 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import abc +import functools +import itertools +import re +import warnings + +from ._compat import string_types, with_metaclass +from ._typing import TYPE_CHECKING +from .utils import canonicalize_version +from .version import LegacyVersion, Version, parse + +if TYPE_CHECKING: # pragma: no cover + from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union + + ParsedVersion = Union[Version, LegacyVersion] + UnparsedVersion = Union[Version, LegacyVersion, str] + CallableOperator = Callable[[ParsedVersion, str], bool] + + +class InvalidSpecifier(ValueError): + """ + An invalid specifier was found, users should refer to PEP 440. + """ + + +class BaseSpecifier(with_metaclass(abc.ABCMeta, object)): # type: ignore + @abc.abstractmethod + def __str__(self): + # type: () -> str + """ + Returns the str representation of this Specifier like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self): + # type: () -> int + """ + Returns a hash value for this Specifier like object. + """ + + @abc.abstractmethod + def __eq__(self, other): + # type: (object) -> bool + """ + Returns a boolean representing whether or not the two Specifier like + objects are equal. + """ + + @abc.abstractmethod + def __ne__(self, other): + # type: (object) -> bool + """ + Returns a boolean representing whether or not the two Specifier like + objects are not equal. + """ + + @abc.abstractproperty + def prereleases(self): + # type: () -> Optional[bool] + """ + Returns whether or not pre-releases as a whole are allowed by this + specifier. + """ + + @prereleases.setter + def prereleases(self, value): + # type: (bool) -> None + """ + Sets whether or not pre-releases as a whole are allowed by this + specifier. + """ + + @abc.abstractmethod + def contains(self, item, prereleases=None): + # type: (str, Optional[bool]) -> bool + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter(self, iterable, prereleases=None): + # type: (Iterable[UnparsedVersion], Optional[bool]) -> Iterable[UnparsedVersion] + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class _IndividualSpecifier(BaseSpecifier): + + _operators = {} # type: Dict[str, str] + + def __init__(self, spec="", prereleases=None): + # type: (str, Optional[bool]) -> None + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec)) + + self._spec = ( + match.group("operator").strip(), + match.group("version").strip(), + ) # type: Tuple[str, str] + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + def __repr__(self): + # type: () -> str + pre = ( + ", prereleases={0!r}".format(self.prereleases) + if self._prereleases is not None + else "" + ) + + return "<{0}({1!r}{2})>".format(self.__class__.__name__, str(self), pre) + + def __str__(self): + # type: () -> str + return "{0}{1}".format(*self._spec) + + @property + def _canonical_spec(self): + # type: () -> Tuple[str, Union[Version, str]] + return self._spec[0], canonicalize_version(self._spec[1]) + + def __hash__(self): + # type: () -> int + return hash(self._canonical_spec) + + def __eq__(self, other): + # type: (object) -> bool + if isinstance(other, string_types): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def __ne__(self, other): + # type: (object) -> bool + if isinstance(other, string_types): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._spec != other._spec + + def _get_operator(self, op): + # type: (str) -> CallableOperator + operator_callable = getattr( + self, "_compare_{0}".format(self._operators[op]) + ) # type: CallableOperator + return operator_callable + + def _coerce_version(self, version): + # type: (UnparsedVersion) -> ParsedVersion + if not isinstance(version, (LegacyVersion, Version)): + version = parse(version) + return version + + @property + def operator(self): + # type: () -> str + return self._spec[0] + + @property + def version(self): + # type: () -> str + return self._spec[1] + + @property + def prereleases(self): + # type: () -> Optional[bool] + return self._prereleases + + @prereleases.setter + def prereleases(self, value): + # type: (bool) -> None + self._prereleases = value + + def __contains__(self, item): + # type: (str) -> bool + return self.contains(item) + + def contains(self, item, prereleases=None): + # type: (UnparsedVersion, Optional[bool]) -> bool + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version or LegacyVersion, this allows us to have + # a shortcut for ``"2.0" in Specifier(">=2") + normalized_item = self._coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable = self._get_operator(self.operator) # type: CallableOperator + return operator_callable(normalized_item, self.version) + + def filter(self, iterable, prereleases=None): + # type: (Iterable[UnparsedVersion], Optional[bool]) -> Iterable[UnparsedVersion] + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = self._coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later incase nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +class LegacySpecifier(_IndividualSpecifier): + + _regex_str = r""" + (?P(==|!=|<=|>=|<|>)) + \s* + (?P + [^,;\s)]* # Since this is a "legacy" specifier, and the version + # string can be just about anything, we match everything + # except for whitespace, a semi-colon for marker support, + # a closing paren since versions can be enclosed in + # them, and a comma since it's a version separator. + ) + """ + + _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + + _operators = { + "==": "equal", + "!=": "not_equal", + "<=": "less_than_equal", + ">=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + } + + def __init__(self, spec="", prereleases=None): + # type: (str, Optional[bool]) -> None + super(LegacySpecifier, self).__init__(spec, prereleases) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def _coerce_version(self, version): + # type: (Union[ParsedVersion, str]) -> LegacyVersion + if not isinstance(version, LegacyVersion): + version = LegacyVersion(str(version)) + return version + + def _compare_equal(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective == self._coerce_version(spec) + + def _compare_not_equal(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective != self._coerce_version(spec) + + def _compare_less_than_equal(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective <= self._coerce_version(spec) + + def _compare_greater_than_equal(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective >= self._coerce_version(spec) + + def _compare_less_than(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective < self._coerce_version(spec) + + def _compare_greater_than(self, prospective, spec): + # type: (LegacyVersion, str) -> bool + return prospective > self._coerce_version(spec) + + +def _require_version_compare( + fn, # type: (Callable[[Specifier, ParsedVersion, str], bool]) +): + # type: (...) -> Callable[[Specifier, ParsedVersion, str], bool] + @functools.wraps(fn) + def wrapped(self, prospective, spec): + # type: (Specifier, ParsedVersion, str) -> bool + if not isinstance(prospective, Version): + return False + return fn(self, prospective, spec) + + return wrapped + + +class Specifier(_IndividualSpecifier): + + _regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s]* # We just match everything, except for whitespace + # since we are only testing for strict identity. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + (?: # pre release + [-_\.]? + (a|b|c|rc|alpha|beta|pre|preview) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + + # You cannot use a wild card and a dev or local version + # together so group them with a | and make them optional. + (?: + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + | + \.\* # Wild card syntax of .* + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (a|b|c|rc|alpha|beta|pre|preview) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + @_require_version_compare + def _compare_compatible(self, prospective, spec): + # type: (ParsedVersion, str) -> bool + + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore post and dev releases and we want to treat the pre-release as + # it's own separate segment. + prefix = ".".join( + list( + itertools.takewhile( + lambda x: (not x.startswith("post") and not x.startswith("dev")), + _version_split(spec), + ) + )[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + @_require_version_compare + def _compare_equal(self, prospective, spec): + # type: (ParsedVersion, str) -> bool + + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + prospective = Version(prospective.public) + # Split the spec out by dots, and pretend that there is an implicit + # dot in between a release segment and a pre-release segment. + split_spec = _version_split(spec[:-2]) # Remove the trailing .* + + # Split the prospective version out by dots, and pretend that there + # is an implicit dot in between a release segment and a pre-release + # segment. + split_prospective = _version_split(str(prospective)) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = split_prospective[: len(split_spec)] + + # Pad out our two sides with zeros so that they both equal the same + # length. + padded_spec, padded_prospective = _pad_version( + split_spec, shortened_prospective + ) + + return padded_prospective == padded_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + @_require_version_compare + def _compare_not_equal(self, prospective, spec): + # type: (ParsedVersion, str) -> bool + return not self._compare_equal(prospective, spec) + + @_require_version_compare + def _compare_less_than_equal(self, prospective, spec): + # type: (ParsedVersion, str) -> bool + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + @_require_version_compare + def _compare_greater_than_equal(self, prospective, spec): + # type: (ParsedVersion, str) -> bool + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + @_require_version_compare + def _compare_less_than(self, prospective, spec_str): + # type: (ParsedVersion, str) -> bool + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + @_require_version_compare + def _compare_greater_than(self, prospective, spec_str): + # type: (ParsedVersion, str) -> bool + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective, spec): + # type: (Version, str) -> bool + return str(prospective).lower() == str(spec).lower() + + @property + def prereleases(self): + # type: () -> bool + + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if parse(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value): + # type: (bool) -> None + self._prereleases = value + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version): + # type: (str) -> List[str] + result = [] # type: List[str] + for item in version.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _pad_version(left, right): + # type: (List[str], List[str]) -> Tuple[List[str], List[str]] + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split))) + + +class SpecifierSet(BaseSpecifier): + def __init__(self, specifiers="", prereleases=None): + # type: (str, Optional[bool]) -> None + + # Split on , to break each individual specifier into it's own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Parsed each individual specifier, attempting first to make it a + # Specifier and falling back to a LegacySpecifier. + parsed = set() + for specifier in split_specifiers: + try: + parsed.add(Specifier(specifier)) + except InvalidSpecifier: + parsed.add(LegacySpecifier(specifier)) + + # Turn our parsed specifiers into a frozen set and save them for later. + self._specs = frozenset(parsed) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + def __repr__(self): + # type: () -> str + pre = ( + ", prereleases={0!r}".format(self.prereleases) + if self._prereleases is not None + else "" + ) + + return "".format(str(self), pre) + + def __str__(self): + # type: () -> str + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self): + # type: () -> int + return hash(self._specs) + + def __and__(self, other): + # type: (Union[SpecifierSet, str]) -> SpecifierSet + if isinstance(other, string_types): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease " + "overrides." + ) + + return specifier + + def __eq__(self, other): + # type: (object) -> bool + if isinstance(other, (string_types, _IndividualSpecifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __ne__(self, other): + # type: (object) -> bool + if isinstance(other, (string_types, _IndividualSpecifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs != other._specs + + def __len__(self): + # type: () -> int + return len(self._specs) + + def __iter__(self): + # type: () -> Iterator[_IndividualSpecifier] + return iter(self._specs) + + @property + def prereleases(self): + # type: () -> Optional[bool] + + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value): + # type: (bool) -> None + self._prereleases = value + + def __contains__(self, item): + # type: (Union[ParsedVersion, str]) -> bool + return self.contains(item) + + def contains(self, item, prereleases=None): + # type: (Union[ParsedVersion, str], Optional[bool]) -> bool + + # Ensure that our item is a Version or LegacyVersion instance. + if not isinstance(item, (LegacyVersion, Version)): + item = parse(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, + iterable, # type: Iterable[Union[ParsedVersion, str]] + prereleases=None, # type: Optional[bool] + ): + # type: (...) -> Iterable[Union[ParsedVersion, str]] + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iterable + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases, and which will filter out LegacyVersion in general. + else: + filtered = [] # type: List[Union[ParsedVersion, str]] + found_prereleases = [] # type: List[Union[ParsedVersion, str]] + + for item in iterable: + # Ensure that we some kind of Version class for this item. + if not isinstance(item, (LegacyVersion, Version)): + parsed_version = parse(item) + else: + parsed_version = item + + # Filter out any item which is parsed as a LegacyVersion + if isinstance(parsed_version, LegacyVersion): + continue + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return found_prereleases + + return filtered diff --git a/pysrc/packaging/tags.py b/pysrc/packaging/tags.py new file mode 100644 index 00000000..d637f1b6 --- /dev/null +++ b/pysrc/packaging/tags.py @@ -0,0 +1,866 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from __future__ import absolute_import + +import distutils.util + +try: + from importlib.machinery import EXTENSION_SUFFIXES +except ImportError: # pragma: no cover + import imp + + EXTENSION_SUFFIXES = [x[0] for x in imp.get_suffixes()] + del imp +import collections +import logging +import os +import platform +import re +import struct +import sys +import sysconfig +import warnings + +from ._typing import TYPE_CHECKING, cast + +if TYPE_CHECKING: # pragma: no cover + from typing import ( + IO, + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + ) + + PythonVersion = Sequence[int] + MacVersion = Tuple[int, int] + GlibcVersion = Tuple[int, int] + + +logger = logging.getLogger(__name__) + +INTERPRETER_SHORT_NAMES = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} # type: Dict[str, str] + + +_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR = collections.defaultdict(lambda: 50) # type: Dict[int, int] +glibcVersion = collections.namedtuple("Version", ["major", "minor"]) + + +class Tag(object): + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter, abi, platform): + # type: (str, str, str) -> None + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self): + # type: () -> str + return self._interpreter + + @property + def abi(self): + # type: () -> str + return self._abi + + @property + def platform(self): + # type: () -> str + return self._platform + + def __eq__(self, other): + # type: (object) -> bool + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self.platform == other.platform) + and (self.abi == other.abi) + and (self.interpreter == other.interpreter) + ) + + def __hash__(self): + # type: () -> int + return self._hash + + def __str__(self): + # type: () -> str + return "{}-{}-{}".format(self._interpreter, self._abi, self._platform) + + def __repr__(self): + # type: () -> str + return "<{self} @ {self_id}>".format(self=self, self_id=id(self)) + + +def parse_tag(tag): + # type: (str) -> FrozenSet[Tag] + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _warn_keyword_parameter(func_name, kwargs): + # type: (str, Dict[str, bool]) -> bool + """ + Backwards-compatibility with Python 2.7 to allow treating 'warn' as keyword-only. + """ + if not kwargs: + return False + elif len(kwargs) > 1 or "warn" not in kwargs: + kwargs.pop("warn", None) + arg = next(iter(kwargs.keys())) + raise TypeError( + "{}() got an unexpected keyword argument {!r}".format(func_name, arg) + ) + return kwargs["warn"] + + +def _get_config_var(name, warn=False): + # type: (str, bool) -> Union[int, str, None] + value = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string): + # type: (str) -> str + return string.replace(".", "_").replace("-", "_") + + +def _abi3_applies(python_version): + # type: (PythonVersion) -> bool + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) + + +def _cpython_abis(py_version, warn=False): + # type: (PythonVersion, bool) -> List[str] + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append("cp{version}".format(version=version)) + abis.insert( + 0, + "cp{version}{debug}{pymalloc}{ucs4}".format( + version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4 + ), + ) + return abis + + +def cpython_tags( + python_version=None, # type: Optional[PythonVersion] + abis=None, # type: Optional[Iterable[str]] + platforms=None, # type: Optional[Iterable[str]] + **kwargs # type: bool +): + # type: (...) -> Iterator[Tag] + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + warn = _warn_keyword_parameter("cpython_tags", kwargs) + if not python_version: + python_version = sys.version_info[:2] + + interpreter = "cp{}".format(_version_nodot(python_version[:2])) + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or _platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + if _abi3_applies(python_version): + for tag in (Tag(interpreter, "abi3", platform_) for platform_ in platforms): + yield tag + for tag in (Tag(interpreter, "none", platform_) for platform_ in platforms): + yield tag + + if _abi3_applies(python_version): + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi(): + # type: () -> Iterator[str] + abi = sysconfig.get_config_var("SOABI") + if abi: + yield _normalize_string(abi) + + +def generic_tags( + interpreter=None, # type: Optional[str] + abis=None, # type: Optional[Iterable[str]] + platforms=None, # type: Optional[Iterable[str]] + **kwargs # type: bool +): + # type: (...) -> Iterator[Tag] + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + warn = _warn_keyword_parameter("generic_tags", kwargs) + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + platforms = list(platforms or _platform_tags()) + abis = list(abis) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version): + # type: (PythonVersion) -> Iterator[str] + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield "py{version}".format(version=_version_nodot(py_version[:2])) + yield "py{major}".format(major=py_version[0]) + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield "py{version}".format(version=_version_nodot((py_version[0], minor))) + + +def compatible_tags( + python_version=None, # type: Optional[PythonVersion] + interpreter=None, # type: Optional[str] + platforms=None, # type: Optional[Iterable[str]] +): + # type: (...) -> Iterator[Tag] + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or _platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch, is_32bit=_32_BIT_INTERPRETER): + # type: (str, bool) -> str + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version, cpu_arch): + # type: (MacVersion, str) -> List[str] + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms(version=None, arch=None): + # type: (Optional[MacVersion], Optional[str]) -> Iterator[str] + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() # type: ignore + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +# From PEP 513, PEP 600 +def _is_manylinux_compatible(name, arch, glibc_version): + # type: (str, str, GlibcVersion) -> bool + sys_glibc = _get_glibc_version() + if sys_glibc < glibc_version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + pass + else: + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible( + glibc_version[0], glibc_version[1], arch + ) + if result is not None: + return bool(result) + else: + if glibc_version == (2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if glibc_version == (2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if glibc_version == (2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +def _glibc_version_string(): + # type: () -> Optional[str] + # Returns glibc version string, or None if not using glibc. + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _glibc_version_string_confstr(): + # type: () -> Optional[str] + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c9d0921ff3d70e1127ca1b71/Lib/platform.py#L175-L183 + try: + # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". + version_string = os.confstr( # type: ignore[attr-defined] # noqa: F821 + "CS_GNU_LIBC_VERSION" + ) + assert version_string is not None + _, version = version_string.split() # type: Tuple[str, str] + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes(): + # type: () -> Optional[str] + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + # Note: typeshed is wrong here so we are ignoring this line. + process_namespace = ctypes.CDLL(None) # type: ignore + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str = gnu_get_libc_version() # type: str + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _parse_glibc_version(version_str): + # type: (str) -> Tuple[int, int] + # Parse glibc version. + # + # We use a regexp instead of str.split because we want to discard any + # random junk that might come after the minor version -- this might happen + # in patched/forked versions of glibc (e.g. Linaro's version of glibc + # uses version strings like "2.20-2014.11"). See gh-3588. + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + "Expected glibc version with 2 components major.minor," + " got: %s" % version_str, + RuntimeWarning, + ) + return -1, -1 + return (int(m.group("major")), int(m.group("minor"))) + + +_glibc_version = [] # type: List[Tuple[int, int]] + + +def _get_glibc_version(): + # type: () -> Tuple[int, int] + if _glibc_version: + return _glibc_version[0] + version_str = _glibc_version_string() + if version_str is None: + _glibc_version.append((-1, -1)) + else: + _glibc_version.append(_parse_glibc_version(version_str)) + return _glibc_version[0] + + +# Python does not provide platform information at sufficient granularity to +# identify the architecture of the running executable in some cases, so we +# determine it dynamically by reading the information from the running +# process. This only applies on Linux, which uses the ELF format. +class _ELFFileHeader(object): + # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header + class _InvalidELFFileHeader(ValueError): + """ + An invalid ELF file header was found. + """ + + ELF_MAGIC_NUMBER = 0x7F454C46 + ELFCLASS32 = 1 + ELFCLASS64 = 2 + ELFDATA2LSB = 1 + ELFDATA2MSB = 2 + EM_386 = 3 + EM_S390 = 22 + EM_ARM = 40 + EM_X86_64 = 62 + EF_ARM_ABIMASK = 0xFF000000 + EF_ARM_ABI_VER5 = 0x05000000 + EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + def __init__(self, file): + # type: (IO[bytes]) -> None + def unpack(fmt): + # type: (str) -> int + try: + (result,) = struct.unpack( + fmt, file.read(struct.calcsize(fmt)) + ) # type: (int, ) + except struct.error: + raise _ELFFileHeader._InvalidELFFileHeader() + return result + + self.e_ident_magic = unpack(">I") + if self.e_ident_magic != self.ELF_MAGIC_NUMBER: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_class = unpack("B") + if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_data = unpack("B") + if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: + raise _ELFFileHeader._InvalidELFFileHeader() + self.e_ident_version = unpack("B") + self.e_ident_osabi = unpack("B") + self.e_ident_abiversion = unpack("B") + self.e_ident_pad = file.read(7) + format_h = "H" + format_i = "I" + format_q = "Q" + format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q + self.e_type = unpack(format_h) + self.e_machine = unpack(format_h) + self.e_version = unpack(format_i) + self.e_entry = unpack(format_p) + self.e_phoff = unpack(format_p) + self.e_shoff = unpack(format_p) + self.e_flags = unpack(format_i) + self.e_ehsize = unpack(format_h) + self.e_phentsize = unpack(format_h) + self.e_phnum = unpack(format_h) + self.e_shentsize = unpack(format_h) + self.e_shnum = unpack(format_h) + self.e_shstrndx = unpack(format_h) + + +def _get_elf_header(): + # type: () -> Optional[_ELFFileHeader] + try: + with open(sys.executable, "rb") as f: + elf_header = _ELFFileHeader(f) + except (IOError, OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): + return None + return elf_header + + +def _is_linux_armhf(): + # type: () -> bool + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_ARM + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABIMASK + ) == elf_header.EF_ARM_ABI_VER5 + result &= ( + elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD + ) == elf_header.EF_ARM_ABI_FLOAT_HARD + return result + + +def _is_linux_i686(): + # type: () -> bool + elf_header = _get_elf_header() + if elf_header is None: + return False + result = elf_header.e_ident_class == elf_header.ELFCLASS32 + result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB + result &= elf_header.e_machine == elf_header.EM_386 + return result + + +def _have_compatible_manylinux_abi(arch): + # type: (str) -> bool + if arch == "armv7l": + return _is_linux_armhf() + if arch == "i686": + return _is_linux_i686() + return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} + + +def _manylinux_tags(linux, arch): + # type: (str, str) -> Iterator[str] + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = glibcVersion(2, 16) + if arch in {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = glibcVersion(2, 4) + current_glibc = glibcVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_max_list.append(glibcVersion(glibc_major, _LAST_GLIBC_MINOR[glibc_major])) + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = (glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_manylinux_compatible(tag, arch, glibc_version): + yield linux.replace("linux", tag) + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_manylinux_compatible(legacy_tag, arch, glibc_version): + yield linux.replace("linux", legacy_tag) + + +def _linux_platforms(is_32bit=_32_BIT_INTERPRETER): + # type: (bool) -> Iterator[str] + linux = _normalize_string(distutils.util.get_platform()) + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv7l" + _, arch = linux.split("_", 1) + if _have_compatible_manylinux_abi(arch): + for tag in _manylinux_tags(linux, arch): + yield tag + yield linux + + +def _generic_platforms(): + # type: () -> Iterator[str] + yield _normalize_string(distutils.util.get_platform()) + + +def _platform_tags(): + # type: () -> Iterator[str] + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name(): + # type: () -> str + """ + Returns the name of the running interpreter. + """ + try: + name = sys.implementation.name # type: ignore + except AttributeError: # pragma: no cover + # Python 2.7 compatibility. + name = platform.python_implementation().lower() + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(**kwargs): + # type: (bool) -> str + """ + Returns the version of the running interpreter. + """ + warn = _warn_keyword_parameter("interpreter_version", kwargs) + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version): + # type: (PythonVersion) -> str + return "".join(map(str, version)) + + +def sys_tags(**kwargs): + # type: (bool) -> Iterator[Tag] + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + warn = _warn_keyword_parameter("sys_tags", kwargs) + + interp_name = interpreter_name() + if interp_name == "cp": + for tag in cpython_tags(warn=warn): + yield tag + else: + for tag in generic_tags(): + yield tag + + for tag in compatible_tags(): + yield tag diff --git a/pysrc/packaging/utils.py b/pysrc/packaging/utils.py new file mode 100644 index 00000000..6e8c2a3e --- /dev/null +++ b/pysrc/packaging/utils.py @@ -0,0 +1,138 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import re + +from ._typing import TYPE_CHECKING, cast +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +if TYPE_CHECKING: # pragma: no cover + from typing import FrozenSet, NewType, Tuple, Union + + BuildTag = Union[Tuple[()], Tuple[int, str]] + NormalizedName = NewType("NormalizedName", str) +else: + BuildTag = tuple + NormalizedName = str + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +_canonicalize_regex = re.compile(r"[-_.]+") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name): + # type: (str) -> NormalizedName + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def canonicalize_version(version): + # type: (Union[Version, str]) -> Union[Version, str] + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if not isinstance(version, Version): + try: + version = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + + parts = [] + + # Epoch + if version.epoch != 0: + parts.append("{0}!".format(version.epoch)) + + # Release segment + # NB: This strips trailing '.0's to normalize + parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in version.release))) + + # Pre-release + if version.pre is not None: + parts.append("".join(str(x) for x in version.pre)) + + # Post-release + if version.post is not None: + parts.append(".post{0}".format(version.post)) + + # Development release + if version.dev is not None: + parts.append(".dev{0}".format(version.dev)) + + # Local version segment + if version.local is not None: + parts.append("+{0}".format(version.local)) + + return "".join(parts) + + +def parse_wheel_filename(filename): + # type: (str) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]] + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + "Invalid wheel filename (extension must be '.whl'): {0}".format(filename) + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + "Invalid wheel filename (wrong number of parts): {0}".format(filename) + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename("Invalid project name: {0}".format(filename)) + name = canonicalize_name(name_part) + version = Version(parts[1]) + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + "Invalid build number: {0} in '{1}'".format(build_part, filename) + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename): + # type: (str) -> Tuple[NormalizedName, Version] + if not filename.endswith(".tar.gz"): + raise InvalidSdistFilename( + "Invalid sdist filename (extension must be '.tar.gz'): {0}".format(filename) + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = filename[:-7].rpartition("-") + if not sep: + raise InvalidSdistFilename("Invalid sdist filename: {0}".format(filename)) + + name = canonicalize_name(name_part) + version = Version(version_part) + return (name, version) diff --git a/pysrc/packaging/version.py b/pysrc/packaging/version.py new file mode 100644 index 00000000..517d91f2 --- /dev/null +++ b/pysrc/packaging/version.py @@ -0,0 +1,556 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +from __future__ import absolute_import, division, print_function + +import collections +import itertools +import re +import warnings + +from ._structures import Infinity, NegativeInfinity +from ._typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union + + from ._structures import InfinityType, NegativeInfinityType + + InfiniteTypes = Union[InfinityType, NegativeInfinityType] + PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] + SubLocalType = Union[InfiniteTypes, int, str] + LocalType = Union[ + NegativeInfinityType, + Tuple[ + Union[ + SubLocalType, + Tuple[SubLocalType, str], + Tuple[NegativeInfinityType, SubLocalType], + ], + ..., + ], + ] + CmpKey = Tuple[ + int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType + ] + LegacyCmpKey = Tuple[int, Tuple[str, ...]] + VersionComparisonMethod = Callable[ + [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool + ] + +__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] + + +_Version = collections.namedtuple( + "_Version", ["epoch", "release", "dev", "pre", "post", "local"] +) + + +def parse(version): + # type: (str) -> Union[LegacyVersion, Version] + """ + Parse the given version string and return either a :class:`Version` object + or a :class:`LegacyVersion` object depending on if the given version is + a valid PEP 440 version or a legacy version. + """ + try: + return Version(version) + except InvalidVersion: + return LegacyVersion(version) + + +class InvalidVersion(ValueError): + """ + An invalid version was found, users should refer to PEP 440. + """ + + +class _BaseVersion(object): + _key = None # type: Union[CmpKey, LegacyCmpKey] + + def __hash__(self): + # type: () -> int + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other): + # type: (_BaseVersion) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other): + # type: (_BaseVersion) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other): + # type: (object) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other): + # type: (_BaseVersion) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other): + # type: (_BaseVersion) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other): + # type: (object) -> bool + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +class LegacyVersion(_BaseVersion): + def __init__(self, version): + # type: (str) -> None + self._version = str(version) + self._key = _legacy_cmpkey(self._version) + + warnings.warn( + "Creating a LegacyVersion has been deprecated and will be " + "removed in the next major release", + DeprecationWarning, + ) + + def __str__(self): + # type: () -> str + return self._version + + def __repr__(self): + # type: () -> str + return "".format(repr(str(self))) + + @property + def public(self): + # type: () -> str + return self._version + + @property + def base_version(self): + # type: () -> str + return self._version + + @property + def epoch(self): + # type: () -> int + return -1 + + @property + def release(self): + # type: () -> None + return None + + @property + def pre(self): + # type: () -> None + return None + + @property + def post(self): + # type: () -> None + return None + + @property + def dev(self): + # type: () -> None + return None + + @property + def local(self): + # type: () -> None + return None + + @property + def is_prerelease(self): + # type: () -> bool + return False + + @property + def is_postrelease(self): + # type: () -> bool + return False + + @property + def is_devrelease(self): + # type: () -> bool + return False + + +_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) + +_legacy_version_replacement_map = { + "pre": "c", + "preview": "c", + "-": "final-", + "rc": "c", + "dev": "@", +} + + +def _parse_version_parts(s): + # type: (str) -> Iterator[str] + for part in _legacy_version_component_re.split(s): + part = _legacy_version_replacement_map.get(part, part) + + if not part or part == ".": + continue + + if part[:1] in "0123456789": + # pad for numeric comparison + yield part.zfill(8) + else: + yield "*" + part + + # ensure that alpha/beta/candidate are before final + yield "*final" + + +def _legacy_cmpkey(version): + # type: (str) -> LegacyCmpKey + + # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch + # greater than or equal to 0. This will effectively put the LegacyVersion, + # which uses the defacto standard originally implemented by setuptools, + # as before all PEP 440 versions. + epoch = -1 + + # This scheme is taken from pkg_resources.parse_version setuptools prior to + # it's adoption of the packaging library. + parts = [] # type: List[str] + for part in _parse_version_parts(version.lower()): + if part.startswith("*"): + # remove "-" before a prerelease tag + if part < "*final": + while parts and parts[-1] == "*final-": + parts.pop() + + # remove trailing zeros from each series of numeric parts + while parts and parts[-1] == "00000000": + parts.pop() + + parts.append(part) + + return epoch, tuple(parts) + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    def __init__(self, version):
+        # type: (str) -> None
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion("Invalid version: '{0}'".format(version))
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self):
+        # type: () -> str
+        return "".format(repr(str(self)))
+
+    def __str__(self):
+        # type: () -> str
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append("{0}!".format(self.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(".post{0}".format(self.post))
+
+        # Development release
+        if self.dev is not None:
+            parts.append(".dev{0}".format(self.dev))
+
+        # Local version segment
+        if self.local is not None:
+            parts.append("+{0}".format(self.local))
+
+        return "".join(parts)
+
+    @property
+    def epoch(self):
+        # type: () -> int
+        _epoch = self._version.epoch  # type: int
+        return _epoch
+
+    @property
+    def release(self):
+        # type: () -> Tuple[int, ...]
+        _release = self._version.release  # type: Tuple[int, ...]
+        return _release
+
+    @property
+    def pre(self):
+        # type: () -> Optional[Tuple[str, int]]
+        _pre = self._version.pre  # type: Optional[Tuple[str, int]]
+        return _pre
+
+    @property
+    def post(self):
+        # type: () -> Optional[Tuple[str, int]]
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self):
+        # type: () -> Optional[Tuple[str, int]]
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self):
+        # type: () -> Optional[str]
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self):
+        # type: () -> str
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self):
+        # type: () -> str
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append("{0}!".format(self.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self):
+        # type: () -> bool
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self):
+        # type: () -> bool
+        return self.post is not None
+
+    @property
+    def is_devrelease(self):
+        # type: () -> bool
+        return self.dev is not None
+
+    @property
+    def major(self):
+        # type: () -> int
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self):
+        # type: () -> int
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self):
+        # type: () -> int
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter,  # type: str
+    number,  # type: Union[str, bytes, SupportsInt]
+):
+    # type: (...) -> Optional[Tuple[str, int]]
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local):
+    # type: (str) -> Optional[LocalType]
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch,  # type: int
+    release,  # type: Tuple[int, ...]
+    pre,  # type: Optional[Tuple[str, int]]
+    post,  # type: Optional[Tuple[str, int]]
+    dev,  # type: Optional[Tuple[str, int]]
+    local,  # type: Optional[Tuple[SubLocalType]]
+):
+    # type: (...) -> CmpKey
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre = NegativeInfinity  # type: PrePostDevType
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post = NegativeInfinity  # type: PrePostDevType
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev = Infinity  # type: PrePostDevType
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local = NegativeInfinity  # type: LocalType
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/pysrc/pyparsing.py b/pysrc/pyparsing.py
new file mode 100644
index 00000000..581d5bbb
--- /dev/null
+++ b/pysrc/pyparsing.py
@@ -0,0 +1,7107 @@
+# -*- coding: utf-8 -*-
+# module pyparsing.py
+#
+# Copyright (c) 2003-2019  Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+=============================================================================
+
+The pyparsing module is an alternative approach to creating and
+executing simple grammars, vs. the traditional lex/yacc approach, or the
+use of regular expressions.  With pyparsing, you don't need to learn
+a new syntax for defining grammars or matching expressions - the parsing
+module provides a library of classes that you use to construct the
+grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form
+``", !"``), built up using :class:`Word`,
+:class:`Literal`, and :class:`And` elements
+(the :class:`'+'` operators create :class:`And` expressions,
+and the strings are auto-converted to :class:`Literal` expressions)::
+
+    from pyparsing import Word, alphas
+
+    # define grammar of a greeting
+    greet = Word(alphas) + "," + Word(alphas) + "!"
+
+    hello = "Hello, World!"
+    print (hello, "->", greet.parseString(hello))
+
+The program outputs the following::
+
+    Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the
+self-explanatory class names, and the use of '+', '|' and '^' operators.
+
+The :class:`ParseResults` object returned from
+:class:`ParserElement.parseString` can be
+accessed as a nested list, a dictionary, or an object with named
+attributes.
+
+The pyparsing module handles some of the problems that are typically
+vexing when writing text parsers:
+
+  - extra or missing whitespace (the above program will also handle
+    "Hello,World!", "Hello  ,  World  !", etc.)
+  - quoted strings
+  - embedded comments
+
+
+Getting Started -
+-----------------
+Visit the classes :class:`ParserElement` and :class:`ParseResults` to
+see the base classes that most other pyparsing
+classes inherit from. Use the docstrings for examples of how to:
+
+ - construct literal match expressions from :class:`Literal` and
+   :class:`CaselessLiteral` classes
+ - construct character word-group expressions using the :class:`Word`
+   class
+ - see how to create repetitive expressions using :class:`ZeroOrMore`
+   and :class:`OneOrMore` classes
+ - use :class:`'+'`, :class:`'|'`, :class:`'^'`,
+   and :class:`'&'` operators to combine simple expressions into
+   more complex ones
+ - associate names with your parsed results using
+   :class:`ParserElement.setResultsName`
+ - access the parsed data, which is returned as a :class:`ParseResults`
+   object
+ - find some helpful expression short-cuts like :class:`delimitedList`
+   and :class:`oneOf`
+ - find more useful common expressions in the :class:`pyparsing_common`
+   namespace class
+"""
+
+__version__ = "2.4.7"
+__versionTime__ = "30 Mar 2020 00:43 UTC"
+__author__ = "Paul McGuire "
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+import collections
+import pprint
+import traceback
+import types
+from datetime import datetime
+from operator import itemgetter
+import itertools
+from functools import wraps
+from contextlib import contextmanager
+
+try:
+    # Python 3
+    from itertools import filterfalse
+except ImportError:
+    from itertools import ifilterfalse as filterfalse
+
+try:
+    from _thread import RLock
+except ImportError:
+    from threading import RLock
+
+try:
+    # Python 3
+    from collections.abc import Iterable
+    from collections.abc import MutableMapping, Mapping
+except ImportError:
+    # Python 2.7
+    from collections import Iterable
+    from collections import MutableMapping, Mapping
+
+try:
+    from collections import OrderedDict as _OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict as _OrderedDict
+    except ImportError:
+        _OrderedDict = None
+
+try:
+    from types import SimpleNamespace
+except ImportError:
+    class SimpleNamespace: pass
+
+# version compatibility configuration
+__compat__ = SimpleNamespace()
+__compat__.__doc__ = """
+    A cross-version compatibility configuration for pyparsing features that will be
+    released in a future version. By setting values in this configuration to True,
+    those features can be enabled in prior versions for compatibility development
+    and testing.
+
+     - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
+       of results names when an And expression is nested within an Or or MatchFirst; set to
+       True to enable bugfix released in pyparsing 2.3.0, or False to preserve
+       pre-2.3.0 handling of named results
+"""
+__compat__.collect_all_And_tokens = True
+
+__diag__ = SimpleNamespace()
+__diag__.__doc__ = """
+Diagnostic configuration (all default to False)
+     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
+       name is defined on a MatchFirst or Or expression with one or more And subexpressions
+       (only warns if __compat__.collect_all_And_tokens is False)
+     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
+       name is defined on a containing expression with ungrouped subexpressions that also
+       have results names
+     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
+       with a results name, but has no contents defined
+     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
+       incorrectly called with multiple str arguments
+     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
+       calls to ParserElement.setName()
+"""
+__diag__.warn_multiple_tokens_in_named_alternation = False
+__diag__.warn_ungrouped_named_tokens_in_collection = False
+__diag__.warn_name_set_on_empty_Forward = False
+__diag__.warn_on_multiple_string_args_to_oneof = False
+__diag__.enable_debug_on_named_expressions = False
+__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
+
+def _enable_all_warnings():
+    __diag__.warn_multiple_tokens_in_named_alternation = True
+    __diag__.warn_ungrouped_named_tokens_in_collection = True
+    __diag__.warn_name_set_on_empty_Forward = True
+    __diag__.warn_on_multiple_string_args_to_oneof = True
+__diag__.enable_all_warnings = _enable_all_warnings
+
+
+__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
+           'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+           'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+           'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+           'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+           'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+           'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
+           'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
+           'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+           'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+           'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+           'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
+           'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+           'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+           'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
+           'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+           'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+           'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
+           'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
+           'conditionAsParseAction', 're',
+           ]
+
+system_version = tuple(sys.version_info)[:3]
+PY_3 = system_version[0] == 3
+if PY_3:
+    _MAX_INT = sys.maxsize
+    basestring = str
+    unichr = chr
+    unicode = str
+    _ustr = str
+
+    # build list of single arg builtins, that can be used as parse actions
+    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
+
+else:
+    _MAX_INT = sys.maxint
+    range = xrange
+
+    def _ustr(obj):
+        """Drop-in replacement for str(obj) that tries to be Unicode
+        friendly. It first tries str(obj). If that fails with
+        a UnicodeEncodeError, then it tries unicode(obj). It then
+        < returns the unicode object | encodes it with the default
+        encoding | ... >.
+        """
+        if isinstance(obj, unicode):
+            return obj
+
+        try:
+            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+            # it won't break any existing code.
+            return str(obj)
+
+        except UnicodeEncodeError:
+            # Else encode it
+            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+            xmlcharref = Regex(r'&#\d+;')
+            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+            return xmlcharref.transformString(ret)
+
+    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+    singleArgBuiltins = []
+    import __builtin__
+
+    for fname in "sum len sorted reversed list tuple set any all min max".split():
+        try:
+            singleArgBuiltins.append(getattr(__builtin__, fname))
+        except AttributeError:
+            continue
+
+_generatorType = type((y for y in range(1)))
+
+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
+    for from_, to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
+alphas = string.ascii_uppercase + string.ascii_lowercase
+nums = "0123456789"
+hexnums = nums + "ABCDEFabcdef"
+alphanums = alphas + nums
+_bslash = chr(92)
+printables = "".join(c for c in string.printable if c not in string.whitespace)
+
+
+def conditionAsParseAction(fn, message=None, fatal=False):
+    msg = message if message is not None else "failed user-defined condition"
+    exc_type = ParseFatalException if fatal else ParseException
+    fn = _trim_arity(fn)
+
+    @wraps(fn)
+    def pa(s, l, t):
+        if not bool(fn(s, l, t)):
+            raise exc_type(s, l, msg)
+
+    return pa
+
+class ParseBaseException(Exception):
+    """base exception class for all parsing runtime exceptions"""
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__(self, pstr, loc=0, msg=None, elem=None):
+        self.loc = loc
+        if msg is None:
+            self.msg = pstr
+            self.pstr = ""
+        else:
+            self.msg = msg
+            self.pstr = pstr
+        self.parserElement = elem
+        self.args = (pstr, loc, msg)
+
+    @classmethod
+    def _from_exception(cls, pe):
+        """
+        internal factory method to simplify creating one type of ParseException
+        from another - avoids having __init__ signature conflicts among subclasses
+        """
+        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+    def __getattr__(self, aname):
+        """supported attributes by name are:
+           - lineno - returns the line number of the exception text
+           - col - returns the column number of the exception text
+           - line - returns the line containing the exception text
+        """
+        if aname == "lineno":
+            return lineno(self.loc, self.pstr)
+        elif aname in ("col", "column"):
+            return col(self.loc, self.pstr)
+        elif aname == "line":
+            return line(self.loc, self.pstr)
+        else:
+            raise AttributeError(aname)
+
+    def __str__(self):
+        if self.pstr:
+            if self.loc >= len(self.pstr):
+                foundstr = ', found end of text'
+            else:
+                foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
+        else:
+            foundstr = ''
+        return ("%s%s  (at char %d), (line:%d, col:%d)" %
+                   (self.msg, foundstr, self.loc, self.lineno, self.column))
+    def __repr__(self):
+        return _ustr(self)
+    def markInputline(self, markerString=">!<"):
+        """Extracts the exception line from the input string, and marks
+           the location of the exception with a special symbol.
+        """
+        line_str = self.line
+        line_column = self.column - 1
+        if markerString:
+            line_str = "".join((line_str[:line_column],
+                                markerString, line_str[line_column:]))
+        return line_str.strip()
+    def __dir__(self):
+        return "lineno col line".split() + dir(type(self))
+
+class ParseException(ParseBaseException):
+    """
+    Exception thrown when parse expressions don't match class;
+    supported attributes by name are:
+    - lineno - returns the line number of the exception text
+    - col - returns the column number of the exception text
+    - line - returns the line containing the exception text
+
+    Example::
+
+        try:
+            Word(nums).setName("integer").parseString("ABC")
+        except ParseException as pe:
+            print(pe)
+            print("column: {}".format(pe.col))
+
+    prints::
+
+       Expected integer (at char 0), (line:1, col:1)
+        column: 1
+
+    """
+
+    @staticmethod
+    def explain(exc, depth=16):
+        """
+        Method to take an exception and translate the Python internal traceback into a list
+        of the pyparsing expressions that caused the exception to be raised.
+
+        Parameters:
+
+         - exc - exception raised during parsing (need not be a ParseException, in support
+           of Python exceptions that might be raised in a parse action)
+         - depth (default=16) - number of levels back in the stack trace to list expression
+           and function names; if None, the full stack trace names will be listed; if 0, only
+           the failing input line, marker, and exception string will be shown
+
+        Returns a multi-line string listing the ParserElements and/or function names in the
+        exception's stack trace.
+
+        Note: the diagnostic output will include string representations of the expressions
+        that failed to parse. These representations will be more helpful if you use `setName` to
+        give identifiable names to your expressions. Otherwise they will use the default string
+        forms, which may be cryptic to read.
+
+        explain() is only supported under Python 3.
+        """
+        import inspect
+
+        if depth is None:
+            depth = sys.getrecursionlimit()
+        ret = []
+        if isinstance(exc, ParseBaseException):
+            ret.append(exc.line)
+            ret.append(' ' * (exc.col - 1) + '^')
+        ret.append("{0}: {1}".format(type(exc).__name__, exc))
+
+        if depth > 0:
+            callers = inspect.getinnerframes(exc.__traceback__, context=depth)
+            seen = set()
+            for i, ff in enumerate(callers[-depth:]):
+                frm = ff[0]
+
+                f_self = frm.f_locals.get('self', None)
+                if isinstance(f_self, ParserElement):
+                    if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
+                        continue
+                    if f_self in seen:
+                        continue
+                    seen.add(f_self)
+
+                    self_type = type(f_self)
+                    ret.append("{0}.{1} - {2}".format(self_type.__module__,
+                                                      self_type.__name__,
+                                                      f_self))
+                elif f_self is not None:
+                    self_type = type(f_self)
+                    ret.append("{0}.{1}".format(self_type.__module__,
+                                                self_type.__name__))
+                else:
+                    code = frm.f_code
+                    if code.co_name in ('wrapper', ''):
+                        continue
+
+                    ret.append("{0}".format(code.co_name))
+
+                depth -= 1
+                if not depth:
+                    break
+
+        return '\n'.join(ret)
+
+
+class ParseFatalException(ParseBaseException):
+    """user-throwable exception thrown when inconsistent parse content
+       is found; stops all parsing immediately"""
+    pass
+
+class ParseSyntaxException(ParseFatalException):
+    """just like :class:`ParseFatalException`, but thrown internally
+    when an :class:`ErrorStop` ('-' operator) indicates
+    that parsing is to stop immediately because an unbacktrackable
+    syntax error has been found.
+    """
+    pass
+
+#~ class ReparseException(ParseBaseException):
+    #~ """Experimental class - parse actions can raise this exception to cause
+       #~ pyparsing to reparse the input string:
+        #~ - with a modified input string, and/or
+        #~ - with a modified start location
+       #~ Set the values of the ReparseException in the constructor, and raise the
+       #~ exception in a parse action to cause pyparsing to use the new string/location.
+       #~ Setting the values as None causes no change to be made.
+       #~ """
+    #~ def __init_( self, newstring, restartLoc ):
+        #~ self.newParseText = newstring
+        #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+    """exception thrown by :class:`ParserElement.validate` if the
+    grammar could be improperly recursive
+    """
+    def __init__(self, parseElementList):
+        self.parseElementTrace = parseElementList
+
+    def __str__(self):
+        return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+    def __init__(self, p1, p2):
+        self.tup = (p1, p2)
+    def __getitem__(self, i):
+        return self.tup[i]
+    def __repr__(self):
+        return repr(self.tup[0])
+    def setOffset(self, i):
+        self.tup = (self.tup[0], i)
+
+class ParseResults(object):
+    """Structured parse results, to provide multiple means of access to
+    the parsed data:
+
+       - as a list (``len(results)``)
+       - by list index (``results[0], results[1]``, etc.)
+       - by attribute (``results.`` - see :class:`ParserElement.setResultsName`)
+
+    Example::
+
+        integer = Word(nums)
+        date_str = (integer.setResultsName("year") + '/'
+                        + integer.setResultsName("month") + '/'
+                        + integer.setResultsName("day"))
+        # equivalent form:
+        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+        # parseString returns a ParseResults object
+        result = date_str.parseString("1999/12/31")
+
+        def test(s, fn=repr):
+            print("%s -> %s" % (s, fn(eval(s))))
+        test("list(result)")
+        test("result[0]")
+        test("result['month']")
+        test("result.day")
+        test("'month' in result")
+        test("'minutes' in result")
+        test("result.dump()", str)
+
+    prints::
+
+        list(result) -> ['1999', '/', '12', '/', '31']
+        result[0] -> '1999'
+        result['month'] -> '12'
+        result.day -> '31'
+        'month' in result -> True
+        'minutes' in result -> False
+        result.dump() -> ['1999', '/', '12', '/', '31']
+        - day: 31
+        - month: 12
+        - year: 1999
+    """
+    def __new__(cls, toklist=None, name=None, asList=True, modal=True):
+        if isinstance(toklist, cls):
+            return toklist
+        retobj = object.__new__(cls)
+        retobj.__doinit = True
+        return retobj
+
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
+        if self.__doinit:
+            self.__doinit = False
+            self.__name = None
+            self.__parent = None
+            self.__accumNames = {}
+            self.__asList = asList
+            self.__modal = modal
+            if toklist is None:
+                toklist = []
+            if isinstance(toklist, list):
+                self.__toklist = toklist[:]
+            elif isinstance(toklist, _generatorType):
+                self.__toklist = list(toklist)
+            else:
+                self.__toklist = [toklist]
+            self.__tokdict = dict()
+
+        if name is not None and name:
+            if not modal:
+                self.__accumNames[name] = 0
+            if isinstance(name, int):
+                name = _ustr(name)  # will always return a str, but use _ustr for consistency
+            self.__name = name
+            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
+                if isinstance(toklist, basestring):
+                    toklist = [toklist]
+                if asList:
+                    if isinstance(toklist, ParseResults):
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
+                    else:
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
+                    self[name].__name = name
+                else:
+                    try:
+                        self[name] = toklist[0]
+                    except (KeyError, TypeError, IndexError):
+                        self[name] = toklist
+
+    def __getitem__(self, i):
+        if isinstance(i, (int, slice)):
+            return self.__toklist[i]
+        else:
+            if i not in self.__accumNames:
+                return self.__tokdict[i][-1][0]
+            else:
+                return ParseResults([v[0] for v in self.__tokdict[i]])
+
+    def __setitem__(self, k, v, isinstance=isinstance):
+        if isinstance(v, _ParseResultsWithOffset):
+            self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
+            sub = v[0]
+        elif isinstance(k, (int, slice)):
+            self.__toklist[k] = v
+            sub = v
+        else:
+            self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
+            sub = v
+        if isinstance(sub, ParseResults):
+            sub.__parent = wkref(self)
+
+    def __delitem__(self, i):
+        if isinstance(i, (int, slice)):
+            mylen = len(self.__toklist)
+            del self.__toklist[i]
+
+            # convert int to slice
+            if isinstance(i, int):
+                if i < 0:
+                    i += mylen
+                i = slice(i, i + 1)
+            # get removed indices
+            removed = list(range(*i.indices(mylen)))
+            removed.reverse()
+            # fixup indices in token dictionary
+            for name, occurrences in self.__tokdict.items():
+                for j in removed:
+                    for k, (value, position) in enumerate(occurrences):
+                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+        else:
+            del self.__tokdict[i]
+
+    def __contains__(self, k):
+        return k in self.__tokdict
+
+    def __len__(self):
+        return len(self.__toklist)
+
+    def __bool__(self):
+        return (not not self.__toklist)
+    __nonzero__ = __bool__
+
+    def __iter__(self):
+        return iter(self.__toklist)
+
+    def __reversed__(self):
+        return iter(self.__toklist[::-1])
+
+    def _iterkeys(self):
+        if hasattr(self.__tokdict, "iterkeys"):
+            return self.__tokdict.iterkeys()
+        else:
+            return iter(self.__tokdict)
+
+    def _itervalues(self):
+        return (self[k] for k in self._iterkeys())
+
+    def _iteritems(self):
+        return ((k, self[k]) for k in self._iterkeys())
+
+    if PY_3:
+        keys = _iterkeys
+        """Returns an iterator of all named result keys."""
+
+        values = _itervalues
+        """Returns an iterator of all named result values."""
+
+        items = _iteritems
+        """Returns an iterator of all named result key-value tuples."""
+
+    else:
+        iterkeys = _iterkeys
+        """Returns an iterator of all named result keys (Python 2.x only)."""
+
+        itervalues = _itervalues
+        """Returns an iterator of all named result values (Python 2.x only)."""
+
+        iteritems = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
+        def keys(self):
+            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iterkeys())
+
+        def values(self):
+            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.itervalues())
+
+        def items(self):
+            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iteritems())
+
+    def haskeys(self):
+        """Since keys() returns an iterator, this method is helpful in bypassing
+           code that looks for the existence of any defined results names."""
+        return bool(self.__tokdict)
+
+    def pop(self, *args, **kwargs):
+        """
+        Removes and returns item at specified index (default= ``last``).
+        Supports both ``list`` and ``dict`` semantics for ``pop()``. If
+        passed no argument or an integer argument, it will use ``list``
+        semantics and pop tokens from the list of parsed tokens. If passed
+        a non-integer argument (most likely a string), it will use ``dict``
+        semantics and pop the corresponding value from any defined results
+        names. A second default return value argument is supported, just as in
+        ``dict.pop()``.
+
+        Example::
+
+            def remove_first(tokens):
+                tokens.pop(0)
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+            label = Word(alphas)
+            patt = label("LABEL") + OneOrMore(Word(nums))
+            print(patt.parseString("AAB 123 321").dump())
+
+            # Use pop() in a parse action to remove named result (note that corresponding value is not
+            # removed from list form of results)
+            def remove_LABEL(tokens):
+                tokens.pop("LABEL")
+                return tokens
+            patt.addParseAction(remove_LABEL)
+            print(patt.parseString("AAB 123 321").dump())
+
+        prints::
+
+            ['AAB', '123', '321']
+            - LABEL: AAB
+
+            ['AAB', '123', '321']
+        """
+        if not args:
+            args = [-1]
+        for k, v in kwargs.items():
+            if k == 'default':
+                args = (args[0], v)
+            else:
+                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
+        if (isinstance(args[0], int)
+                or len(args) == 1
+                or args[0] in self):
+            index = args[0]
+            ret = self[index]
+            del self[index]
+            return ret
+        else:
+            defaultvalue = args[1]
+            return defaultvalue
+
+    def get(self, key, defaultValue=None):
+        """
+        Returns named result matching the given key, or if there is no
+        such name, then returns the given ``defaultValue`` or ``None`` if no
+        ``defaultValue`` is specified.
+
+        Similar to ``dict.get()``.
+
+        Example::
+
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+            result = date_str.parseString("1999/12/31")
+            print(result.get("year")) # -> '1999'
+            print(result.get("hour", "not specified")) # -> 'not specified'
+            print(result.get("hour")) # -> None
+        """
+        if key in self:
+            return self[key]
+        else:
+            return defaultValue
+
+    def insert(self, index, insStr):
+        """
+        Inserts new element at location index in the list of parsed tokens.
+
+        Similar to ``list.insert()``.
+
+        Example::
+
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+            # use a parse action to insert the parse location in the front of the parsed results
+            def insert_locn(locn, tokens):
+                tokens.insert(0, locn)
+            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+        """
+        self.__toklist.insert(index, insStr)
+        # fixup indices in token dictionary
+        for name, occurrences in self.__tokdict.items():
+            for k, (value, position) in enumerate(occurrences):
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+    def append(self, item):
+        """
+        Add single element to end of ParseResults list of elements.
+
+        Example::
+
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+            # use a parse action to compute the sum of the parsed integers, and add it to the end
+            def append_sum(tokens):
+                tokens.append(sum(map(int, tokens)))
+            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+        """
+        self.__toklist.append(item)
+
+    def extend(self, itemseq):
+        """
+        Add sequence of elements to end of ParseResults list of elements.
+
+        Example::
+
+            patt = OneOrMore(Word(alphas))
+
+            # use a parse action to append the reverse of the matched strings, to make a palindrome
+            def make_palindrome(tokens):
+                tokens.extend(reversed([t[::-1] for t in tokens]))
+                return ''.join(tokens)
+            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+        """
+        if isinstance(itemseq, ParseResults):
+            self.__iadd__(itemseq)
+        else:
+            self.__toklist.extend(itemseq)
+
+    def clear(self):
+        """
+        Clear all elements and results names.
+        """
+        del self.__toklist[:]
+        self.__tokdict.clear()
+
+    def __getattr__(self, name):
+        try:
+            return self[name]
+        except KeyError:
+            return ""
+
+    def __add__(self, other):
+        ret = self.copy()
+        ret += other
+        return ret
+
+    def __iadd__(self, other):
+        if other.__tokdict:
+            offset = len(self.__toklist)
+            addoffset = lambda a: offset if a < 0 else a + offset
+            otheritems = other.__tokdict.items()
+            otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
+                              for k, vlist in otheritems for v in vlist]
+            for k, v in otherdictitems:
+                self[k] = v
+                if isinstance(v[0], ParseResults):
+                    v[0].__parent = wkref(self)
+
+        self.__toklist += other.__toklist
+        self.__accumNames.update(other.__accumNames)
+        return self
+
+    def __radd__(self, other):
+        if isinstance(other, int) and other == 0:
+            # useful for merging many ParseResults using sum() builtin
+            return self.copy()
+        else:
+            # this may raise a TypeError - so be it
+            return other + self
+
+    def __repr__(self):
+        return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
+
+    def __str__(self):
+        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
+
+    def _asStringList(self, sep=''):
+        out = []
+        for item in self.__toklist:
+            if out and sep:
+                out.append(sep)
+            if isinstance(item, ParseResults):
+                out += item._asStringList()
+            else:
+                out.append(_ustr(item))
+        return out
+
+    def asList(self):
+        """
+        Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+        Example::
+
+            patt = OneOrMore(Word(alphas))
+            result = patt.parseString("sldkj lsdkj sldkj")
+            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+            print(type(result), result) # ->  ['sldkj', 'lsdkj', 'sldkj']
+
+            # Use asList() to create an actual list
+            result_list = result.asList()
+            print(type(result_list), result_list) # ->  ['sldkj', 'lsdkj', 'sldkj']
+        """
+        return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
+
+    def asDict(self):
+        """
+        Returns the named parse results as a nested dictionary.
+
+        Example::
+
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+            result = date_str.parseString('12/31/1999')
+            print(type(result), repr(result)) # ->  (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+
+            result_dict = result.asDict()
+            print(type(result_dict), repr(result_dict)) # ->  {'day': '1999', 'year': '12', 'month': '31'}
+
+            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+            import json
+            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
+        """
+        if PY_3:
+            item_fn = self.items
+        else:
+            item_fn = self.iteritems
+
+        def toItem(obj):
+            if isinstance(obj, ParseResults):
+                if obj.haskeys():
+                    return obj.asDict()
+                else:
+                    return [toItem(v) for v in obj]
+            else:
+                return obj
+
+        return dict((k, toItem(v)) for k, v in item_fn())
+
+    def copy(self):
+        """
+        Returns a new copy of a :class:`ParseResults` object.
+        """
+        ret = ParseResults(self.__toklist)
+        ret.__tokdict = dict(self.__tokdict.items())
+        ret.__parent = self.__parent
+        ret.__accumNames.update(self.__accumNames)
+        ret.__name = self.__name
+        return ret
+
+    def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
+        """
+        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+        """
+        nl = "\n"
+        out = []
+        namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
+                          for v in vlist)
+        nextLevelIndent = indent + "  "
+
+        # collapse out indents if formatting is not desired
+        if not formatted:
+            indent = ""
+            nextLevelIndent = ""
+            nl = ""
+
+        selfTag = None
+        if doctag is not None:
+            selfTag = doctag
+        else:
+            if self.__name:
+                selfTag = self.__name
+
+        if not selfTag:
+            if namedItemsOnly:
+                return ""
+            else:
+                selfTag = "ITEM"
+
+        out += [nl, indent, "<", selfTag, ">"]
+
+        for i, res in enumerate(self.__toklist):
+            if isinstance(res, ParseResults):
+                if i in namedItems:
+                    out += [res.asXML(namedItems[i],
+                                      namedItemsOnly and doctag is None,
+                                      nextLevelIndent,
+                                      formatted)]
+                else:
+                    out += [res.asXML(None,
+                                      namedItemsOnly and doctag is None,
+                                      nextLevelIndent,
+                                      formatted)]
+            else:
+                # individual token, see if there is a name for it
+                resTag = None
+                if i in namedItems:
+                    resTag = namedItems[i]
+                if not resTag:
+                    if namedItemsOnly:
+                        continue
+                    else:
+                        resTag = "ITEM"
+                xmlBodyText = _xml_escape(_ustr(res))
+                out += [nl, nextLevelIndent, "<", resTag, ">",
+                        xmlBodyText,
+                                                ""]
+
+        out += [nl, indent, ""]
+        return "".join(out)
+
+    def __lookup(self, sub):
+        for k, vlist in self.__tokdict.items():
+            for v, loc in vlist:
+                if sub is v:
+                    return k
+        return None
+
+    def getName(self):
+        r"""
+        Returns the results name for this token expression. Useful when several
+        different expressions might match at a particular location.
+
+        Example::
+
+            integer = Word(nums)
+            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
+            house_number_expr = Suppress('#') + Word(nums, alphanums)
+            user_data = (Group(house_number_expr)("house_number")
+                        | Group(ssn_expr)("ssn")
+                        | Group(integer)("age"))
+            user_info = OneOrMore(user_data)
+
+            result = user_info.parseString("22 111-22-3333 #221B")
+            for item in result:
+                print(item.getName(), ':', item[0])
+
+        prints::
+
+            age : 22
+            ssn : 111-22-3333
+            house_number : 221B
+        """
+        if self.__name:
+            return self.__name
+        elif self.__parent:
+            par = self.__parent()
+            if par:
+                return par.__lookup(self)
+            else:
+                return None
+        elif (len(self) == 1
+              and len(self.__tokdict) == 1
+              and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
+            return next(iter(self.__tokdict.keys()))
+        else:
+            return None
+
+    def dump(self, indent='', full=True, include_list=True, _depth=0):
+        """
+        Diagnostic method for listing out the contents of
+        a :class:`ParseResults`. Accepts an optional ``indent`` argument so
+        that this string can be embedded in a nested display of other data.
+
+        Example::
+
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+            result = date_str.parseString('12/31/1999')
+            print(result.dump())
+
+        prints::
+
+            ['12', '/', '31', '/', '1999']
+            - day: 1999
+            - month: 31
+            - year: 12
+        """
+        out = []
+        NL = '\n'
+        if include_list:
+            out.append(indent + _ustr(self.asList()))
+        else:
+            out.append('')
+
+        if full:
+            if self.haskeys():
+                items = sorted((str(k), v) for k, v in self.items())
+                for k, v in items:
+                    if out:
+                        out.append(NL)
+                    out.append("%s%s- %s: " % (indent, ('  ' * _depth), k))
+                    if isinstance(v, ParseResults):
+                        if v:
+                            out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
+                        else:
+                            out.append(_ustr(v))
+                    else:
+                        out.append(repr(v))
+            elif any(isinstance(vv, ParseResults) for vv in self):
+                v = self
+                for i, vv in enumerate(v):
+                    if isinstance(vv, ParseResults):
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
+                                                            ('  ' * (_depth)),
+                                                            i,
+                                                            indent,
+                                                            ('  ' * (_depth + 1)),
+                                                            vv.dump(indent=indent,
+                                                                    full=full,
+                                                                    include_list=include_list,
+                                                                    _depth=_depth + 1)))
+                    else:
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
+                                                            ('  ' * (_depth)),
+                                                            i,
+                                                            indent,
+                                                            ('  ' * (_depth + 1)),
+                                                            _ustr(vv)))
+
+        return "".join(out)
+
+    def pprint(self, *args, **kwargs):
+        """
+        Pretty-printer for parsed results as a list, using the
+        `pprint `_ module.
+        Accepts additional positional or keyword args as defined for
+        `pprint.pprint `_ .
+
+        Example::
+
+            ident = Word(alphas, alphanums)
+            num = Word(nums)
+            func = Forward()
+            term = ident | num | Group('(' + func + ')')
+            func <<= ident + Group(Optional(delimitedList(term)))
+            result = func.parseString("fna a,b,(fnb c,d,200),100")
+            result.pprint(width=40)
+
+        prints::
+
+            ['fna',
+             ['a',
+              'b',
+              ['(', 'fnb', ['c', 'd', '200'], ')'],
+              '100']]
+        """
+        pprint.pprint(self.asList(), *args, **kwargs)
+
+    # add support for pickle protocol
+    def __getstate__(self):
+        return (self.__toklist,
+                (self.__tokdict.copy(),
+                 self.__parent is not None and self.__parent() or None,
+                 self.__accumNames,
+                 self.__name))
+
+    def __setstate__(self, state):
+        self.__toklist = state[0]
+        self.__tokdict, par, inAccumNames, self.__name = state[1]
+        self.__accumNames = {}
+        self.__accumNames.update(inAccumNames)
+        if par is not None:
+            self.__parent = wkref(par)
+        else:
+            self.__parent = None
+
+    def __getnewargs__(self):
+        return self.__toklist, self.__name, self.__asList, self.__modal
+
+    def __dir__(self):
+        return dir(type(self)) + list(self.keys())
+
+    @classmethod
+    def from_dict(cls, other, name=None):
+        """
+        Helper classmethod to construct a ParseResults from a dict, preserving the
+        name-value relations as results names. If an optional 'name' argument is
+        given, a nested ParseResults will be returned
+        """
+        def is_iterable(obj):
+            try:
+                iter(obj)
+            except Exception:
+                return False
+            else:
+                if PY_3:
+                    return not isinstance(obj, (str, bytes))
+                else:
+                    return not isinstance(obj, basestring)
+
+        ret = cls([])
+        for k, v in other.items():
+            if isinstance(v, Mapping):
+                ret += cls.from_dict(v, name=k)
+            else:
+                ret += cls([v], name=k, asList=is_iterable(v))
+        if name is not None:
+            ret = cls([ret], name=name)
+        return ret
+
+MutableMapping.register(ParseResults)
+
+def col (loc, strg):
+    """Returns current column within a string, counting newlines as line separators.
+   The first column is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See
+   :class:`ParserElement.parseString` for more
+   information on parsing strings containing ```` s, and suggested
+   methods to maintain a consistent view of the parsed string, the parse
+   location, and line and column positions within the parsed string.
+   """
+    s = strg
+    return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
+
+def lineno(loc, strg):
+    """Returns current line number within a string, counting newlines as line separators.
+    The first line is number 1.
+
+    Note - the default parsing behavior is to expand tabs in the input string
+    before starting the parsing process.  See :class:`ParserElement.parseString`
+    for more information on parsing strings containing ```` s, and
+    suggested methods to maintain a consistent view of the parsed string, the
+    parse location, and line and column positions within the parsed string.
+    """
+    return strg.count("\n", 0, loc) + 1
+
+def line(loc, strg):
+    """Returns the line of text containing loc within a string, counting newlines as line separators.
+       """
+    lastCR = strg.rfind("\n", 0, loc)
+    nextCR = strg.find("\n", loc)
+    if nextCR >= 0:
+        return strg[lastCR + 1:nextCR]
+    else:
+        return strg[lastCR + 1:]
+
+def _defaultStartDebugAction(instring, loc, expr):
+    print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
+
+def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
+    print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction(instring, loc, expr, exc):
+    print("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+    """'Do-nothing' debug action, to suppress debugging output during parsing."""
+    pass
+
+# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
+#~ 'decorator to trim function calls to match the arity of the target'
+#~ def _trim_arity(func, maxargs=3):
+    #~ if func in singleArgBuiltins:
+        #~ return lambda s,l,t: func(t)
+    #~ limit = 0
+    #~ foundArity = False
+    #~ def wrapper(*args):
+        #~ nonlocal limit,foundArity
+        #~ while 1:
+            #~ try:
+                #~ ret = func(*args[limit:])
+                #~ foundArity = True
+                #~ return ret
+            #~ except TypeError:
+                #~ if limit == maxargs or foundArity:
+                    #~ raise
+                #~ limit += 1
+                #~ continue
+    #~ return wrapper
+
+# this version is Python 2.x-3.x cross-compatible
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+    if func in singleArgBuiltins:
+        return lambda s, l, t: func(t)
+    limit = [0]
+    foundArity = [False]
+
+    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
+    if system_version[:2] >= (3, 5):
+        def extract_stack(limit=0):
+            # special handling for Python 3.5.0 - extra deep call stack by 1
+            offset = -3 if system_version == (3, 5, 0) else -2
+            frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
+            return [frame_summary[:2]]
+        def extract_tb(tb, limit=0):
+            frames = traceback.extract_tb(tb, limit=limit)
+            frame_summary = frames[-1]
+            return [frame_summary[:2]]
+    else:
+        extract_stack = traceback.extract_stack
+        extract_tb = traceback.extract_tb
+
+    # synthesize what would be returned by traceback.extract_stack at the call to
+    # user's parse action 'func', so that we don't incur call penalty at parse time
+
+    LINE_DIFF = 6
+    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
+    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
+    this_line = extract_stack(limit=2)[-1]
+    pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
+
+    def wrapper(*args):
+        while 1:
+            try:
+                ret = func(*args[limit[0]:])
+                foundArity[0] = True
+                return ret
+            except TypeError:
+                # re-raise TypeErrors if they did not come from our arity testing
+                if foundArity[0]:
+                    raise
+                else:
+                    try:
+                        tb = sys.exc_info()[-1]
+                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
+                            raise
+                    finally:
+                        try:
+                            del tb
+                        except NameError:
+                            pass
+
+                if limit[0] <= maxargs:
+                    limit[0] += 1
+                    continue
+                raise
+
+    # copy func name to wrapper for sensible debug output
+    func_name = ""
+    try:
+        func_name = getattr(func, '__name__',
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    wrapper.__name__ = func_name
+
+    return wrapper
+
+
+class ParserElement(object):
+    """Abstract base level parser element class."""
+    DEFAULT_WHITE_CHARS = " \n\t\r"
+    verbose_stacktrace = False
+
+    @staticmethod
+    def setDefaultWhitespaceChars(chars):
+        r"""
+        Overrides the default whitespace chars
+
+        Example::
+
+            # default whitespace chars are space,  and newline
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
+
+            # change to just treat newline as significant
+            ParserElement.setDefaultWhitespaceChars(" \t")
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
+        """
+        ParserElement.DEFAULT_WHITE_CHARS = chars
+
+    @staticmethod
+    def inlineLiteralsUsing(cls):
+        """
+        Set class to be used for inclusion of string literals into a parser.
+
+        Example::
+
+            # default literal class used is Literal
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+
+            # change to Suppress
+            ParserElement.inlineLiteralsUsing(Suppress)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
+        """
+        ParserElement._literalStringClass = cls
+
+    @classmethod
+    def _trim_traceback(cls, tb):
+        while tb.tb_next:
+            tb = tb.tb_next
+        return tb
+
+    def __init__(self, savelist=False):
+        self.parseAction = list()
+        self.failAction = None
+        # ~ self.name = ""  # don't define self.name, let subclasses try/except upcall
+        self.strRepr = None
+        self.resultsName = None
+        self.saveAsList = savelist
+        self.skipWhitespace = True
+        self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
+        self.copyDefaultWhiteChars = True
+        self.mayReturnEmpty = False # used when checking for left-recursion
+        self.keepTabs = False
+        self.ignoreExprs = list()
+        self.debug = False
+        self.streamlined = False
+        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+        self.errmsg = ""
+        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+        self.debugActions = (None, None, None)  # custom debug actions
+        self.re = None
+        self.callPreparse = True # used to avoid redundant calls to preParse
+        self.callDuringTry = False
+
+    def copy(self):
+        """
+        Make a copy of this :class:`ParserElement`.  Useful for defining
+        different parse actions for the same parsing pattern, using copies of
+        the original parse element.
+
+        Example::
+
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
+            integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
+
+            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+
+        prints::
+
+            [5120, 100, 655360, 268435456]
+
+        Equivalent form of ``expr.copy()`` is just ``expr()``::
+
+            integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
+        """
+        cpy = copy.copy(self)
+        cpy.parseAction = self.parseAction[:]
+        cpy.ignoreExprs = self.ignoreExprs[:]
+        if self.copyDefaultWhiteChars:
+            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        return cpy
+
+    def setName(self, name):
+        """
+        Define name for this expression, makes debugging and exception messages clearer.
+
+        Example::
+
+            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
+        """
+        self.name = name
+        self.errmsg = "Expected " + self.name
+        if __diag__.enable_debug_on_named_expressions:
+            self.setDebug()
+        return self
+
+    def setResultsName(self, name, listAllMatches=False):
+        """
+        Define name for referencing matching tokens as a nested attribute
+        of the returned parse results.
+        NOTE: this returns a *copy* of the original :class:`ParserElement` object;
+        this is so that the client can define a basic element, such as an
+        integer, and reference it in multiple places with different names.
+
+        You can also set results names using the abbreviated syntax,
+        ``expr("name")`` in place of ``expr.setResultsName("name")``
+        - see :class:`__call__`.
+
+        Example::
+
+            date_str = (integer.setResultsName("year") + '/'
+                        + integer.setResultsName("month") + '/'
+                        + integer.setResultsName("day"))
+
+            # equivalent form:
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+        """
+        return self._setResultsName(name, listAllMatches)
+
+    def _setResultsName(self, name, listAllMatches=False):
+        newself = self.copy()
+        if name.endswith("*"):
+            name = name[:-1]
+            listAllMatches = True
+        newself.resultsName = name
+        newself.modalResults = not listAllMatches
+        return newself
+
+    def setBreak(self, breakFlag=True):
+        """Method to invoke the Python pdb debugger when this element is
+           about to be parsed. Set ``breakFlag`` to True to enable, False to
+           disable.
+        """
+        if breakFlag:
+            _parseMethod = self._parse
+            def breaker(instring, loc, doActions=True, callPreParse=True):
+                import pdb
+                # this call to pdb.set_trace() is intentional, not a checkin error
+                pdb.set_trace()
+                return _parseMethod(instring, loc, doActions, callPreParse)
+            breaker._originalParseMethod = _parseMethod
+            self._parse = breaker
+        else:
+            if hasattr(self._parse, "_originalParseMethod"):
+                self._parse = self._parse._originalParseMethod
+        return self
+
+    def setParseAction(self, *fns, **kwargs):
+        """
+        Define one or more actions to perform when successfully matching parse element definition.
+        Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
+        ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
+
+        - s   = the original string being parsed (see note below)
+        - loc = the location of the matching substring
+        - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
+
+        If the functions in fns modify the tokens, they can return them as the return
+        value from fn, and the modified list of tokens will replace the original.
+        Otherwise, fn does not need to return any value.
+
+        If None is passed as the parse action, all previously added parse actions for this
+        expression are cleared.
+
+        Optional keyword arguments:
+        - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
+
+        Note: the default parsing behavior is to expand tabs in the input string
+        before starting the parsing process.  See :class:`parseString for more
+        information on parsing strings containing ```` s, and suggested
+        methods to maintain a consistent view of the parsed string, the parse
+        location, and line and column positions within the parsed string.
+
+        Example::
+
+            integer = Word(nums)
+            date_str = integer + '/' + integer + '/' + integer
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+            # use parse action to convert to ints at parse time
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            date_str = integer + '/' + integer + '/' + integer
+
+            # note that integer fields are now ints, not strings
+            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
+        """
+        if list(fns) == [None,]:
+            self.parseAction = []
+        else:
+            if not all(callable(fn) for fn in fns):
+                raise TypeError("parse actions must be callable")
+            self.parseAction = list(map(_trim_arity, list(fns)))
+            self.callDuringTry = kwargs.get("callDuringTry", False)
+        return self
+
+    def addParseAction(self, *fns, **kwargs):
+        """
+        Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
+
+        See examples in :class:`copy`.
+        """
+        self.parseAction += list(map(_trim_arity, list(fns)))
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def addCondition(self, *fns, **kwargs):
+        """Add a boolean predicate function to expression's list of parse actions. See
+        :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
+        functions passed to ``addCondition`` need to return boolean success/fail of the condition.
+
+        Optional keyword arguments:
+        - message = define a custom message to be used in the raised exception
+        - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+
+        Example::
+
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            year_int = integer.copy()
+            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+            date_str = year_int + '/' + integer + '/' + integer
+
+            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
+        """
+        for fn in fns:
+            self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
+                                                           fatal=kwargs.get('fatal', False)))
+
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def setFailAction(self, fn):
+        """Define action to perform if parsing fails at this expression.
+           Fail acton fn is a callable function that takes the arguments
+           ``fn(s, loc, expr, err)`` where:
+           - s = string being parsed
+           - loc = location where expression match was attempted and failed
+           - expr = the parse expression that failed
+           - err = the exception thrown
+           The function returns no value.  It may throw :class:`ParseFatalException`
+           if it is desired to stop parsing immediately."""
+        self.failAction = fn
+        return self
+
+    def _skipIgnorables(self, instring, loc):
+        exprsFound = True
+        while exprsFound:
+            exprsFound = False
+            for e in self.ignoreExprs:
+                try:
+                    while 1:
+                        loc, dummy = e._parse(instring, loc)
+                        exprsFound = True
+                except ParseException:
+                    pass
+        return loc
+
+    def preParse(self, instring, loc):
+        if self.ignoreExprs:
+            loc = self._skipIgnorables(instring, loc)
+
+        if self.skipWhitespace:
+            wt = self.whiteChars
+            instrlen = len(instring)
+            while loc < instrlen and instring[loc] in wt:
+                loc += 1
+
+        return loc
+
+    def parseImpl(self, instring, loc, doActions=True):
+        return loc, []
+
+    def postParse(self, instring, loc, tokenlist):
+        return tokenlist
+
+    # ~ @profile
+    def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
+        TRY, MATCH, FAIL = 0, 1, 2
+        debugging = (self.debug)  # and doActions)
+
+        if debugging or self.failAction:
+            # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
+            if self.debugActions[TRY]:
+                self.debugActions[TRY](instring, loc, self)
+            try:
+                if callPreParse and self.callPreparse:
+                    preloc = self.preParse(instring, loc)
+                else:
+                    preloc = loc
+                tokensStart = preloc
+                if self.mayIndexError or preloc >= len(instring):
+                    try:
+                        loc, tokens = self.parseImpl(instring, preloc, doActions)
+                    except IndexError:
+                        raise ParseException(instring, len(instring), self.errmsg, self)
+                else:
+                    loc, tokens = self.parseImpl(instring, preloc, doActions)
+            except Exception as err:
+                # ~ print ("Exception raised:", err)
+                if self.debugActions[FAIL]:
+                    self.debugActions[FAIL](instring, tokensStart, self, err)
+                if self.failAction:
+                    self.failAction(instring, tokensStart, self, err)
+                raise
+        else:
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse(instring, loc)
+            else:
+                preloc = loc
+            tokensStart = preloc
+            if self.mayIndexError or preloc >= len(instring):
+                try:
+                    loc, tokens = self.parseImpl(instring, preloc, doActions)
+                except IndexError:
+                    raise ParseException(instring, len(instring), self.errmsg, self)
+            else:
+                loc, tokens = self.parseImpl(instring, preloc, doActions)
+
+        tokens = self.postParse(instring, loc, tokens)
+
+        retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
+        if self.parseAction and (doActions or self.callDuringTry):
+            if debugging:
+                try:
+                    for fn in self.parseAction:
+                        try:
+                            tokens = fn(instring, tokensStart, retTokens)
+                        except IndexError as parse_action_exc:
+                            exc = ParseException("exception raised in parse action")
+                            exc.__cause__ = parse_action_exc
+                            raise exc
+
+                        if tokens is not None and tokens is not retTokens:
+                            retTokens = ParseResults(tokens,
+                                                      self.resultsName,
+                                                      asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
+                                                      modal=self.modalResults)
+                except Exception as err:
+                    # ~ print "Exception raised in user parse action:", err
+                    if self.debugActions[FAIL]:
+                        self.debugActions[FAIL](instring, tokensStart, self, err)
+                    raise
+            else:
+                for fn in self.parseAction:
+                    try:
+                        tokens = fn(instring, tokensStart, retTokens)
+                    except IndexError as parse_action_exc:
+                        exc = ParseException("exception raised in parse action")
+                        exc.__cause__ = parse_action_exc
+                        raise exc
+
+                    if tokens is not None and tokens is not retTokens:
+                        retTokens = ParseResults(tokens,
+                                                  self.resultsName,
+                                                  asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
+                                                  modal=self.modalResults)
+        if debugging:
+            # ~ print ("Matched", self, "->", retTokens.asList())
+            if self.debugActions[MATCH]:
+                self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
+
+        return loc, retTokens
+
+    def tryParse(self, instring, loc):
+        try:
+            return self._parse(instring, loc, doActions=False)[0]
+        except ParseFatalException:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+    def canParseNext(self, instring, loc):
+        try:
+            self.tryParse(instring, loc)
+        except (ParseException, IndexError):
+            return False
+        else:
+            return True
+
+    class _UnboundedCache(object):
+        def __init__(self):
+            cache = {}
+            self.not_in_cache = not_in_cache = object()
+
+            def get(self, key):
+                return cache.get(key, not_in_cache)
+
+            def set(self, key, value):
+                cache[key] = value
+
+            def clear(self):
+                cache.clear()
+
+            def cache_len(self):
+                return len(cache)
+
+            self.get = types.MethodType(get, self)
+            self.set = types.MethodType(set, self)
+            self.clear = types.MethodType(clear, self)
+            self.__len__ = types.MethodType(cache_len, self)
+
+    if _OrderedDict is not None:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = _OrderedDict()
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    while len(cache) > size:
+                        try:
+                            cache.popitem(False)
+                        except KeyError:
+                            pass
+
+                def clear(self):
+                    cache.clear()
+
+                def cache_len(self):
+                    return len(cache)
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+                self.__len__ = types.MethodType(cache_len, self)
+
+    else:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = {}
+                key_fifo = collections.deque([], size)
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    while len(key_fifo) > size:
+                        cache.pop(key_fifo.popleft(), None)
+                    key_fifo.append(key)
+
+                def clear(self):
+                    cache.clear()
+                    key_fifo.clear()
+
+                def cache_len(self):
+                    return len(cache)
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+                self.__len__ = types.MethodType(cache_len, self)
+
+    # argument cache for optimizing repeated calls when backtracking through recursive expressions
+    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+    packrat_cache_lock = RLock()
+    packrat_cache_stats = [0, 0]
+
+    # this method gets repeatedly called during backtracking with the same arguments -
+    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+    def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
+        HIT, MISS = 0, 1
+        lookup = (self, instring, loc, callPreParse, doActions)
+        with ParserElement.packrat_cache_lock:
+            cache = ParserElement.packrat_cache
+            value = cache.get(lookup)
+            if value is cache.not_in_cache:
+                ParserElement.packrat_cache_stats[MISS] += 1
+                try:
+                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
+                except ParseBaseException as pe:
+                    # cache a copy of the exception, without the traceback
+                    cache.set(lookup, pe.__class__(*pe.args))
+                    raise
+                else:
+                    cache.set(lookup, (value[0], value[1].copy()))
+                    return value
+            else:
+                ParserElement.packrat_cache_stats[HIT] += 1
+                if isinstance(value, Exception):
+                    raise value
+                return value[0], value[1].copy()
+
+    _parse = _parseNoCache
+
+    @staticmethod
+    def resetCache():
+        ParserElement.packrat_cache.clear()
+        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
+
+    _packratEnabled = False
+    @staticmethod
+    def enablePackrat(cache_size_limit=128):
+        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+           Repeated parse attempts at the same string location (which happens
+           often in many complex grammars) can immediately return a cached value,
+           instead of re-executing parsing/validating code.  Memoizing is done of
+           both valid results and parsing exceptions.
+
+           Parameters:
+
+           - cache_size_limit - (default= ``128``) - if an integer value is provided
+             will limit the size of the packrat cache; if None is passed, then
+             the cache size will be unbounded; if 0 is passed, the cache will
+             be effectively disabled.
+
+           This speedup may break existing programs that use parse actions that
+           have side-effects.  For this reason, packrat parsing is disabled when
+           you first import pyparsing.  To activate the packrat feature, your
+           program must call the class method :class:`ParserElement.enablePackrat`.
+           For best results, call ``enablePackrat()`` immediately after
+           importing pyparsing.
+
+           Example::
+
+               import pyparsing
+               pyparsing.ParserElement.enablePackrat()
+        """
+        if not ParserElement._packratEnabled:
+            ParserElement._packratEnabled = True
+            if cache_size_limit is None:
+                ParserElement.packrat_cache = ParserElement._UnboundedCache()
+            else:
+                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
+            ParserElement._parse = ParserElement._parseCache
+
+    def parseString(self, instring, parseAll=False):
+        """
+        Execute the parse expression with the given string.
+        This is the main interface to the client code, once the complete
+        expression has been built.
+
+        Returns the parsed data as a :class:`ParseResults` object, which may be
+        accessed as a list, or as a dict or object with attributes if the given parser
+        includes results names.
+
+        If you want the grammar to require that the entire input string be
+        successfully parsed, then set ``parseAll`` to True (equivalent to ending
+        the grammar with ``StringEnd()``).
+
+        Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
+        in order to report proper column numbers in parse actions.
+        If the input string contains tabs and
+        the grammar uses parse actions that use the ``loc`` argument to index into the
+        string being parsed, you can ensure you have a consistent view of the input
+        string by:
+
+        - calling ``parseWithTabs`` on your grammar before calling ``parseString``
+          (see :class:`parseWithTabs`)
+        - define your parse action using the full ``(s, loc, toks)`` signature, and
+          reference the input string using the parse action's ``s`` argument
+        - explictly expand the tabs in your input string before calling
+          ``parseString``
+
+        Example::
+
+            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
+            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
+        """
+        ParserElement.resetCache()
+        if not self.streamlined:
+            self.streamline()
+            # ~ self.saveAsList = True
+        for e in self.ignoreExprs:
+            e.streamline()
+        if not self.keepTabs:
+            instring = instring.expandtabs()
+        try:
+            loc, tokens = self._parse(instring, 0)
+            if parseAll:
+                loc = self.preParse(instring, loc)
+                se = Empty() + StringEnd()
+                se._parse(instring, loc)
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
+                if getattr(exc, '__traceback__', None) is not None:
+                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
+                raise exc
+        else:
+            return tokens
+
+    def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
+        """
+        Scan the input string for expression matches.  Each match will return the
+        matching tokens, start location, and end location.  May be called with optional
+        ``maxMatches`` argument, to clip scanning after 'n' matches are found.  If
+        ``overlap`` is specified, then overlapping matches will be reported.
+
+        Note that the start and end locations are reported relative to the string
+        being parsed.  See :class:`parseString` for more information on parsing
+        strings with embedded tabs.
+
+        Example::
+
+            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+            print(source)
+            for tokens, start, end in Word(alphas).scanString(source):
+                print(' '*start + '^'*(end-start))
+                print(' '*start + tokens[0])
+
+        prints::
+
+            sldjf123lsdjjkf345sldkjf879lkjsfd987
+            ^^^^^
+            sldjf
+                    ^^^^^^^
+                    lsdjjkf
+                              ^^^^^^
+                              sldkjf
+                                       ^^^^^^
+                                       lkjsfd
+        """
+        if not self.streamlined:
+            self.streamline()
+        for e in self.ignoreExprs:
+            e.streamline()
+
+        if not self.keepTabs:
+            instring = _ustr(instring).expandtabs()
+        instrlen = len(instring)
+        loc = 0
+        preparseFn = self.preParse
+        parseFn = self._parse
+        ParserElement.resetCache()
+        matches = 0
+        try:
+            while loc <= instrlen and matches < maxMatches:
+                try:
+                    preloc = preparseFn(instring, loc)
+                    nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
+                except ParseException:
+                    loc = preloc + 1
+                else:
+                    if nextLoc > loc:
+                        matches += 1
+                        yield tokens, preloc, nextLoc
+                        if overlap:
+                            nextloc = preparseFn(instring, loc)
+                            if nextloc > loc:
+                                loc = nextLoc
+                            else:
+                                loc += 1
+                        else:
+                            loc = nextLoc
+                    else:
+                        loc = preloc + 1
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
+                if getattr(exc, '__traceback__', None) is not None:
+                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
+                raise exc
+
+    def transformString(self, instring):
+        """
+        Extension to :class:`scanString`, to modify matching text with modified tokens that may
+        be returned from a parse action.  To use ``transformString``, define a grammar and
+        attach a parse action to it that modifies the returned token list.
+        Invoking ``transformString()`` on a target string will then scan for matches,
+        and replace the matched text patterns according to the logic in the parse
+        action.  ``transformString()`` returns the resulting transformed string.
+
+        Example::
+
+            wd = Word(alphas)
+            wd.setParseAction(lambda toks: toks[0].title())
+
+            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
+
+        prints::
+
+            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
+        """
+        out = []
+        lastE = 0
+        # force preservation of s, to minimize unwanted transformation of string, and to
+        # keep string locs straight between transformString and scanString
+        self.keepTabs = True
+        try:
+            for t, s, e in self.scanString(instring):
+                out.append(instring[lastE:s])
+                if t:
+                    if isinstance(t, ParseResults):
+                        out += t.asList()
+                    elif isinstance(t, list):
+                        out += t
+                    else:
+                        out.append(t)
+                lastE = e
+            out.append(instring[lastE:])
+            out = [o for o in out if o]
+            return "".join(map(_ustr, _flatten(out)))
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
+                if getattr(exc, '__traceback__', None) is not None:
+                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
+                raise exc
+
+    def searchString(self, instring, maxMatches=_MAX_INT):
+        """
+        Another extension to :class:`scanString`, simplifying the access to the tokens found
+        to match the given parse expression.  May be called with optional
+        ``maxMatches`` argument, to clip searching after 'n' matches are found.
+
+        Example::
+
+            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
+            cap_word = Word(alphas.upper(), alphas.lower())
+
+            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
+
+            # the sum() builtin can be used to merge results into a single ParseResults object
+            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
+
+        prints::
+
+            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
+            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
+        """
+        try:
+            return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
+                if getattr(exc, '__traceback__', None) is not None:
+                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
+                raise exc
+
+    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+        """
+        Generator method to split a string using the given expression as a separator.
+        May be called with optional ``maxsplit`` argument, to limit the number of splits;
+        and the optional ``includeSeparators`` argument (default= ``False``), if the separating
+        matching text should be included in the split results.
+
+        Example::
+
+            punc = oneOf(list(".,;:/-!?"))
+            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
+
+        prints::
+
+            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
+        """
+        splits = 0
+        last = 0
+        for t, s, e in self.scanString(instring, maxMatches=maxsplit):
+            yield instring[last:s]
+            if includeSeparators:
+                yield t[0]
+            last = e
+        yield instring[last:]
+
+    def __add__(self, other):
+        """
+        Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
+        converts them to :class:`Literal`s by default.
+
+        Example::
+
+            greet = Word(alphas) + "," + Word(alphas) + "!"
+            hello = "Hello, World!"
+            print (hello, "->", greet.parseString(hello))
+
+        prints::
+
+            Hello, World! -> ['Hello', ',', 'World', '!']
+
+        ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
+
+            Literal('start') + ... + Literal('end')
+
+        is equivalent to:
+
+            Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
+
+        Note that the skipped text is returned with '_skipped' as a results name,
+        and to support having multiple skips in the same parser, the value returned is
+        a list of all skipped text.
+        """
+        if other is Ellipsis:
+            return _PendingSkip(self)
+
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return And([self, other])
+
+    def __radd__(self, other):
+        """
+        Implementation of + operator when left operand is not a :class:`ParserElement`
+        """
+        if other is Ellipsis:
+            return SkipTo(self)("_skipped*") + self
+
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return other + self
+
+    def __sub__(self, other):
+        """
+        Implementation of - operator, returns :class:`And` with error stop
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return self + And._ErrorStop() + other
+
+    def __rsub__(self, other):
+        """
+        Implementation of - operator when left operand is not a :class:`ParserElement`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return other - self
+
+    def __mul__(self, other):
+        """
+        Implementation of * operator, allows use of ``expr * 3`` in place of
+        ``expr + expr + expr``.  Expressions may also me multiplied by a 2-integer
+        tuple, similar to ``{min, max}`` multipliers in regular expressions.  Tuples
+        may also include ``None`` as in:
+         - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
+              to ``expr*n + ZeroOrMore(expr)``
+              (read as "at least n instances of ``expr``")
+         - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
+              (read as "0 to n instances of ``expr``")
+         - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
+         - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
+
+        Note that ``expr*(None, n)`` does not raise an exception if
+        more than n exprs exist in the input stream; that is,
+        ``expr*(None, n)`` does not enforce a maximum number of expr
+        occurrences.  If this behavior is desired, then write
+        ``expr*(None, n) + ~expr``
+        """
+        if other is Ellipsis:
+            other = (0, None)
+        elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
+            other = ((0, ) + other[1:] + (None,))[:2]
+
+        if isinstance(other, int):
+            minElements, optElements = other, 0
+        elif isinstance(other, tuple):
+            other = tuple(o if o is not Ellipsis else None for o in other)
+            other = (other + (None, None))[:2]
+            if other[0] is None:
+                other = (0, other[1])
+            if isinstance(other[0], int) and other[1] is None:
+                if other[0] == 0:
+                    return ZeroOrMore(self)
+                if other[0] == 1:
+                    return OneOrMore(self)
+                else:
+                    return self * other[0] + ZeroOrMore(self)
+            elif isinstance(other[0], int) and isinstance(other[1], int):
+                minElements, optElements = other
+                optElements -= minElements
+            else:
+                raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
+        else:
+            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+        if minElements < 0:
+            raise ValueError("cannot multiply ParserElement by negative value")
+        if optElements < 0:
+            raise ValueError("second tuple value must be greater or equal to first tuple value")
+        if minElements == optElements == 0:
+            raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
+
+        if optElements:
+            def makeOptionalList(n):
+                if n > 1:
+                    return Optional(self + makeOptionalList(n - 1))
+                else:
+                    return Optional(self)
+            if minElements:
+                if minElements == 1:
+                    ret = self + makeOptionalList(optElements)
+                else:
+                    ret = And([self] * minElements) + makeOptionalList(optElements)
+            else:
+                ret = makeOptionalList(optElements)
+        else:
+            if minElements == 1:
+                ret = self
+            else:
+                ret = And([self] * minElements)
+        return ret
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __or__(self, other):
+        """
+        Implementation of | operator - returns :class:`MatchFirst`
+        """
+        if other is Ellipsis:
+            return _PendingSkip(self, must_skip=True)
+
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return MatchFirst([self, other])
+
+    def __ror__(self, other):
+        """
+        Implementation of | operator when left operand is not a :class:`ParserElement`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return other | self
+
+    def __xor__(self, other):
+        """
+        Implementation of ^ operator - returns :class:`Or`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return Or([self, other])
+
+    def __rxor__(self, other):
+        """
+        Implementation of ^ operator when left operand is not a :class:`ParserElement`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return other ^ self
+
+    def __and__(self, other):
+        """
+        Implementation of & operator - returns :class:`Each`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return Each([self, other])
+
+    def __rand__(self, other):
+        """
+        Implementation of & operator when left operand is not a :class:`ParserElement`
+        """
+        if isinstance(other, basestring):
+            other = self._literalStringClass(other)
+        if not isinstance(other, ParserElement):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                          SyntaxWarning, stacklevel=2)
+            return None
+        return other & self
+
+    def __invert__(self):
+        """
+        Implementation of ~ operator - returns :class:`NotAny`
+        """
+        return NotAny(self)
+
+    def __iter__(self):
+        # must implement __iter__ to override legacy use of sequential access to __getitem__ to
+        # iterate over a sequence
+        raise TypeError('%r object is not iterable' % self.__class__.__name__)
+
+    def __getitem__(self, key):
+        """
+        use ``[]`` indexing notation as a short form for expression repetition:
+         - ``expr[n]`` is equivalent to ``expr*n``
+         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
+         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
+              to ``expr*n + ZeroOrMore(expr)``
+              (read as "at least n instances of ``expr``")
+         - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
+              (read as "0 to n instances of ``expr``")
+         - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
+         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
+         ``None`` may be used in place of ``...``.
+
+        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
+        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
+        desired, then write ``expr[..., n] + ~expr``.
+       """
+
+        # convert single arg keys to tuples
+        try:
+            if isinstance(key, str):
+                key = (key,)
+            iter(key)
+        except TypeError:
+            key = (key, key)
+
+        if len(key) > 2:
+            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
+                                                                                '... [{0}]'.format(len(key))
+                                                                                if len(key) > 5 else ''))
+
+        # clip to 2 elements
+        ret = self * tuple(key[:2])
+        return ret
+
+    def __call__(self, name=None):
+        """
+        Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
+
+        If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
+        passed as ``True``.
+
+        If ``name` is omitted, same as calling :class:`copy`.
+
+        Example::
+
+            # these are equivalent
+            userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
+            userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
+        """
+        if name is not None:
+            return self._setResultsName(name)
+        else:
+            return self.copy()
+
+    def suppress(self):
+        """
+        Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
+        cluttering up returned output.
+        """
+        return Suppress(self)
+
+    def leaveWhitespace(self):
+        """
+        Disables the skipping of whitespace before matching the characters in the
+        :class:`ParserElement`'s defined pattern.  This is normally only used internally by
+        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+        """
+        self.skipWhitespace = False
+        return self
+
+    def setWhitespaceChars(self, chars):
+        """
+        Overrides the default whitespace chars
+        """
+        self.skipWhitespace = True
+        self.whiteChars = chars
+        self.copyDefaultWhiteChars = False
+        return self
+
+    def parseWithTabs(self):
+        """
+        Overrides default behavior to expand ````s to spaces before parsing the input string.
+        Must be called before ``parseString`` when the input grammar contains elements that
+        match ```` characters.
+        """
+        self.keepTabs = True
+        return self
+
+    def ignore(self, other):
+        """
+        Define expression to be ignored (e.g., comments) while doing pattern
+        matching; may be called repeatedly, to define multiple comment or other
+        ignorable patterns.
+
+        Example::
+
+            patt = OneOrMore(Word(alphas))
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+
+            patt.ignore(cStyleComment)
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
+        """
+        if isinstance(other, basestring):
+            other = Suppress(other)
+
+        if isinstance(other, Suppress):
+            if other not in self.ignoreExprs:
+                self.ignoreExprs.append(other)
+        else:
+            self.ignoreExprs.append(Suppress(other.copy()))
+        return self
+
+    def setDebugActions(self, startAction, successAction, exceptionAction):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        """
+        self.debugActions = (startAction or _defaultStartDebugAction,
+                             successAction or _defaultSuccessDebugAction,
+                             exceptionAction or _defaultExceptionDebugAction)
+        self.debug = True
+        return self
+
+    def setDebug(self, flag=True):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        Set ``flag`` to True to enable, False to disable.
+
+        Example::
+
+            wd = Word(alphas).setName("alphaword")
+            integer = Word(nums).setName("numword")
+            term = wd | integer
+
+            # turn on debugging for wd
+            wd.setDebug()
+
+            OneOrMore(term).parseString("abc 123 xyz 890")
+
+        prints::
+
+            Match alphaword at loc 0(1,1)
+            Matched alphaword -> ['abc']
+            Match alphaword at loc 3(1,4)
+            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
+            Match alphaword at loc 7(1,8)
+            Matched alphaword -> ['xyz']
+            Match alphaword at loc 11(1,12)
+            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
+            Match alphaword at loc 15(1,16)
+            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
+
+        The output shown is that produced by the default debug actions - custom debug actions can be
+        specified using :class:`setDebugActions`. Prior to attempting
+        to match the ``wd`` expression, the debugging message ``"Match  at loc (,)"``
+        is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
+        message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
+        which makes debugging and exception messages easier to understand - for instance, the default
+        name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
+        """
+        if flag:
+            self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
+        else:
+            self.debug = False
+        return self
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return _ustr(self)
+
+    def streamline(self):
+        self.streamlined = True
+        self.strRepr = None
+        return self
+
+    def checkRecursion(self, parseElementList):
+        pass
+
+    def validate(self, validateTrace=None):
+        """
+        Check defined expressions for valid structure, check for infinite recursive definitions.
+        """
+        self.checkRecursion([])
+
+    def parseFile(self, file_or_filename, parseAll=False):
+        """
+        Execute the parse expression on the given file or filename.
+        If a filename is specified (instead of a file object),
+        the entire file is opened, read, and closed before parsing.
+        """
+        try:
+            file_contents = file_or_filename.read()
+        except AttributeError:
+            with open(file_or_filename, "r") as f:
+                file_contents = f.read()
+        try:
+            return self.parseString(file_contents, parseAll)
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
+                if getattr(exc, '__traceback__', None) is not None:
+                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
+                raise exc
+
+    def __eq__(self, other):
+        if self is other:
+            return True
+        elif isinstance(other, basestring):
+            return self.matches(other)
+        elif isinstance(other, ParserElement):
+            return vars(self) == vars(other)
+        return False
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return id(self)
+
+    def __req__(self, other):
+        return self == other
+
+    def __rne__(self, other):
+        return not (self == other)
+
+    def matches(self, testString, parseAll=True):
+        """
+        Method for quick testing of a parser against a test string. Good for simple
+        inline microtests of sub expressions while building up larger parser.
+
+        Parameters:
+         - testString - to test against this expression for a match
+         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
+
+        Example::
+
+            expr = Word(nums)
+            assert expr.matches("100")
+        """
+        try:
+            self.parseString(_ustr(testString), parseAll=parseAll)
+            return True
+        except ParseBaseException:
+            return False
+
+    def runTests(self, tests, parseAll=True, comment='#',
+                 fullDump=True, printResults=True, failureTests=False, postParse=None,
+                 file=None):
+        """
+        Execute the parse expression on a series of test strings, showing each
+        test, the parsed results or where the parse failed. Quick and easy way to
+        run a parse expression against a list of sample strings.
+
+        Parameters:
+         - tests - a list of separate test strings, or a multiline string of test strings
+         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
+         - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
+              string; pass None to disable comment filtering
+         - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
+              if False, only dump nested list
+         - printResults - (default= ``True``) prints test output to stdout
+         - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
+         - postParse - (default= ``None``) optional callback for successful parse results; called as
+              `fn(test_string, parse_results)` and returns a string to be added to the test output
+         - file - (default=``None``) optional file-like object to which test output will be written;
+              if None, will default to ``sys.stdout``
+
+        Returns: a (success, results) tuple, where success indicates that all tests succeeded
+        (or failed if ``failureTests`` is True), and the results contain a list of lines of each
+        test's output
+
+        Example::
+
+            number_expr = pyparsing_common.number.copy()
+
+            result = number_expr.runTests('''
+                # unsigned integer
+                100
+                # negative integer
+                -100
+                # float with scientific notation
+                6.02e23
+                # integer with scientific notation
+                1e-12
+                ''')
+            print("Success" if result[0] else "Failed!")
+
+            result = number_expr.runTests('''
+                # stray character
+                100Z
+                # missing leading digit before '.'
+                -.100
+                # too many '.'
+                3.14.159
+                ''', failureTests=True)
+            print("Success" if result[0] else "Failed!")
+
+        prints::
+
+            # unsigned integer
+            100
+            [100]
+
+            # negative integer
+            -100
+            [-100]
+
+            # float with scientific notation
+            6.02e23
+            [6.02e+23]
+
+            # integer with scientific notation
+            1e-12
+            [1e-12]
+
+            Success
+
+            # stray character
+            100Z
+               ^
+            FAIL: Expected end of text (at char 3), (line:1, col:4)
+
+            # missing leading digit before '.'
+            -.100
+            ^
+            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
+
+            # too many '.'
+            3.14.159
+                ^
+            FAIL: Expected end of text (at char 4), (line:1, col:5)
+
+            Success
+
+        Each test string must be on a single line. If you want to test a string that spans multiple
+        lines, create a test like this::
+
+            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
+
+        (Note that this is a raw string literal, you must include the leading 'r'.)
+        """
+        if isinstance(tests, basestring):
+            tests = list(map(str.strip, tests.rstrip().splitlines()))
+        if isinstance(comment, basestring):
+            comment = Literal(comment)
+        if file is None:
+            file = sys.stdout
+        print_ = file.write
+
+        allResults = []
+        comments = []
+        success = True
+        NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
+        BOM = u'\ufeff'
+        for t in tests:
+            if comment is not None and comment.matches(t, False) or comments and not t:
+                comments.append(t)
+                continue
+            if not t:
+                continue
+            out = ['\n' + '\n'.join(comments) if comments else '', t]
+            comments = []
+            try:
+                # convert newline marks to actual newlines, and strip leading BOM if present
+                t = NL.transformString(t.lstrip(BOM))
+                result = self.parseString(t, parseAll=parseAll)
+            except ParseBaseException as pe:
+                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
+                if '\n' in t:
+                    out.append(line(pe.loc, t))
+                    out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
+                else:
+                    out.append(' ' * pe.loc + '^' + fatal)
+                out.append("FAIL: " + str(pe))
+                success = success and failureTests
+                result = pe
+            except Exception as exc:
+                out.append("FAIL-EXCEPTION: " + str(exc))
+                success = success and failureTests
+                result = exc
+            else:
+                success = success and not failureTests
+                if postParse is not None:
+                    try:
+                        pp_value = postParse(t, result)
+                        if pp_value is not None:
+                            if isinstance(pp_value, ParseResults):
+                                out.append(pp_value.dump())
+                            else:
+                                out.append(str(pp_value))
+                        else:
+                            out.append(result.dump())
+                    except Exception as e:
+                        out.append(result.dump(full=fullDump))
+                        out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
+                else:
+                    out.append(result.dump(full=fullDump))
+
+            if printResults:
+                if fullDump:
+                    out.append('')
+                print_('\n'.join(out))
+
+            allResults.append((t, result))
+
+        return success, allResults
+
+
+class _PendingSkip(ParserElement):
+    # internal placeholder class to hold a place were '...' is added to a parser element,
+    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
+    def __init__(self, expr, must_skip=False):
+        super(_PendingSkip, self).__init__()
+        self.strRepr = str(expr + Empty()).replace('Empty', '...')
+        self.name = self.strRepr
+        self.anchor = expr
+        self.must_skip = must_skip
+
+    def __add__(self, other):
+        skipper = SkipTo(other).setName("...")("_skipped*")
+        if self.must_skip:
+            def must_skip(t):
+                if not t._skipped or t._skipped.asList() == ['']:
+                    del t[0]
+                    t.pop("_skipped", None)
+            def show_skip(t):
+                if t._skipped.asList()[-1:] == ['']:
+                    skipped = t.pop('_skipped')
+                    t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
+            return (self.anchor + skipper().addParseAction(must_skip)
+                    | skipper().addParseAction(show_skip)) + other
+
+        return self.anchor + skipper + other
+
+    def __repr__(self):
+        return self.strRepr
+
+    def parseImpl(self, *args):
+        raise Exception("use of `...` expression without following SkipTo target expression")
+
+
+class Token(ParserElement):
+    """Abstract :class:`ParserElement` subclass, for defining atomic
+    matching patterns.
+    """
+    def __init__(self):
+        super(Token, self).__init__(savelist=False)
+
+
+class Empty(Token):
+    """An empty token, will always match.
+    """
+    def __init__(self):
+        super(Empty, self).__init__()
+        self.name = "Empty"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+
+class NoMatch(Token):
+    """A token that will never match.
+    """
+    def __init__(self):
+        super(NoMatch, self).__init__()
+        self.name = "NoMatch"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.errmsg = "Unmatchable token"
+
+    def parseImpl(self, instring, loc, doActions=True):
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Literal(Token):
+    """Token to exactly match a specified string.
+
+    Example::
+
+        Literal('blah').parseString('blah')  # -> ['blah']
+        Literal('blah').parseString('blahfooblah')  # -> ['blah']
+        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
+
+    For case-insensitive matching, use :class:`CaselessLiteral`.
+
+    For keyword matching (force word break before and after the matched string),
+    use :class:`Keyword` or :class:`CaselessKeyword`.
+    """
+    def __init__(self, matchString):
+        super(Literal, self).__init__()
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Literal; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+            self.__class__ = Empty
+        self.name = '"%s"' % _ustr(self.match)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+
+        # Performance tuning: modify __class__ to select
+        # a parseImpl optimized for single-character check
+        if self.matchLen == 1 and type(self) is Literal:
+            self.__class__ = _SingleCharLiteral
+
+    def parseImpl(self, instring, loc, doActions=True):
+        if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
+            return loc + self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class _SingleCharLiteral(Literal):
+    def parseImpl(self, instring, loc, doActions=True):
+        if instring[loc] == self.firstMatchChar:
+            return loc + 1, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+_L = Literal
+ParserElement._literalStringClass = Literal
+
+class Keyword(Token):
+    """Token to exactly match a specified string as a keyword, that is,
+    it must be immediately followed by a non-keyword character.  Compare
+    with :class:`Literal`:
+
+     - ``Literal("if")`` will match the leading ``'if'`` in
+       ``'ifAndOnlyIf'``.
+     - ``Keyword("if")`` will not; it will only match the leading
+       ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
+
+    Accepts two optional constructor arguments in addition to the
+    keyword string:
+
+     - ``identChars`` is a string of characters that would be valid
+       identifier characters, defaulting to all alphanumerics + "_" and
+       "$"
+     - ``caseless`` allows case-insensitive matching, default is ``False``.
+
+    Example::
+
+        Keyword("start").parseString("start")  # -> ['start']
+        Keyword("start").parseString("starting")  # -> Exception
+
+    For case-insensitive matching, use :class:`CaselessKeyword`.
+    """
+    DEFAULT_KEYWORD_CHARS = alphanums + "_$"
+
+    def __init__(self, matchString, identChars=None, caseless=False):
+        super(Keyword, self).__init__()
+        if identChars is None:
+            identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Keyword; use Empty() instead",
+                          SyntaxWarning, stacklevel=2)
+        self.name = '"%s"' % self.match
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+        self.caseless = caseless
+        if caseless:
+            self.caselessmatch = matchString.upper()
+            identChars = identChars.upper()
+        self.identChars = set(identChars)
+
+    def parseImpl(self, instring, loc, doActions=True):
+        if self.caseless:
+            if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
+                    and (loc >= len(instring) - self.matchLen
+                         or instring[loc + self.matchLen].upper() not in self.identChars)
+                    and (loc == 0
+                         or instring[loc - 1].upper() not in self.identChars)):
+                return loc + self.matchLen, self.match
+
+        else:
+            if instring[loc] == self.firstMatchChar:
+                if ((self.matchLen == 1 or instring.startswith(self.match, loc))
+                        and (loc >= len(instring) - self.matchLen
+                             or instring[loc + self.matchLen] not in self.identChars)
+                        and (loc == 0 or instring[loc - 1] not in self.identChars)):
+                    return loc + self.matchLen, self.match
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+    def copy(self):
+        c = super(Keyword, self).copy()
+        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        return c
+
+    @staticmethod
+    def setDefaultKeywordChars(chars):
+        """Overrides the default Keyword chars
+        """
+        Keyword.DEFAULT_KEYWORD_CHARS = chars
+
+class CaselessLiteral(Literal):
+    """Token to match a specified string, ignoring case of letters.
+    Note: the matched results will always be in the case of the given
+    match string, NOT the case of the input text.
+
+    Example::
+
+        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+
+    (Contrast with example for :class:`CaselessKeyword`.)
+    """
+    def __init__(self, matchString):
+        super(CaselessLiteral, self).__init__(matchString.upper())
+        # Preserve the defining literal.
+        self.returnString = matchString
+        self.name = "'%s'" % self.returnString
+        self.errmsg = "Expected " + self.name
+
+    def parseImpl(self, instring, loc, doActions=True):
+        if instring[loc:loc + self.matchLen].upper() == self.match:
+            return loc + self.matchLen, self.returnString
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CaselessKeyword(Keyword):
+    """
+    Caseless version of :class:`Keyword`.
+
+    Example::
+
+        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+
+    (Contrast with example for :class:`CaselessLiteral`.)
+    """
+    def __init__(self, matchString, identChars=None):
+        super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
+
+class CloseMatch(Token):
+    """A variation on :class:`Literal` which matches "close" matches,
+    that is, strings with at most 'n' mismatching characters.
+    :class:`CloseMatch` takes parameters:
+
+     - ``match_string`` - string to be matched
+     - ``maxMismatches`` - (``default=1``) maximum number of
+       mismatches allowed to count as a match
+
+    The results from a successful parse will contain the matched text
+    from the input string and the following named results:
+
+     - ``mismatches`` - a list of the positions within the
+       match_string where mismatches were found
+     - ``original`` - the original match_string used to compare
+       against the input string
+
+    If ``mismatches`` is an empty list, then the match was an exact
+    match.
+
+    Example::
+
+        patt = CloseMatch("ATCATCGAATGGA")
+        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+        # exact match
+        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+        # close match allowing up to 2 mismatches
+        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+    """
+    def __init__(self, match_string, maxMismatches=1):
+        super(CloseMatch, self).__init__()
+        self.name = match_string
+        self.match_string = match_string
+        self.maxMismatches = maxMismatches
+        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+        self.mayIndexError = False
+        self.mayReturnEmpty = False
+
+    def parseImpl(self, instring, loc, doActions=True):
+        start = loc
+        instrlen = len(instring)
+        maxloc = start + len(self.match_string)
+
+        if maxloc <= instrlen:
+            match_string = self.match_string
+            match_stringloc = 0
+            mismatches = []
+            maxMismatches = self.maxMismatches
+
+            for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
+                src, mat = s_m
+                if src != mat:
+                    mismatches.append(match_stringloc)
+                    if len(mismatches) > maxMismatches:
+                        break
+            else:
+                loc = match_stringloc + 1
+                results = ParseResults([instring[start:loc]])
+                results['original'] = match_string
+                results['mismatches'] = mismatches
+                return loc, results
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Word(Token):
+    """Token for matching words composed of allowed character sets.
+    Defined with string containing all allowed initial characters, an
+    optional string containing allowed body characters (if omitted,
+    defaults to the initial character set), and an optional minimum,
+    maximum, and/or exact length.  The default value for ``min`` is
+    1 (a minimum value < 1 is not valid); the default values for
+    ``max`` and ``exact`` are 0, meaning no maximum or exact
+    length restriction. An optional ``excludeChars`` parameter can
+    list characters that might be found in the input ``bodyChars``
+    string; useful to define a word of all printables except for one or
+    two characters, for instance.
+
+    :class:`srange` is useful for defining custom character set strings
+    for defining ``Word`` expressions, using range notation from
+    regular expression character sets.
+
+    A common mistake is to use :class:`Word` to match a specific literal
+    string, as in ``Word("Address")``. Remember that :class:`Word`
+    uses the string argument to define *sets* of matchable characters.
+    This expression would match "Add", "AAA", "dAred", or any other word
+    made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
+    exact literal string, use :class:`Literal` or :class:`Keyword`.
+
+    pyparsing includes helper strings for building Words:
+
+     - :class:`alphas`
+     - :class:`nums`
+     - :class:`alphanums`
+     - :class:`hexnums`
+     - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
+       - accented, tilded, umlauted, etc.)
+     - :class:`punc8bit` (non-alphabetic characters in ASCII range
+       128-255 - currency, symbols, superscripts, diacriticals, etc.)
+     - :class:`printables` (any non-whitespace character)
+
+    Example::
+
+        # a word composed of digits
+        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
+
+        # a word with a leading capital, and zero or more lowercase
+        capital_word = Word(alphas.upper(), alphas.lower())
+
+        # hostnames are alphanumeric, with leading alpha, and '-'
+        hostname = Word(alphas, alphanums + '-')
+
+        # roman numeral (not a strict parser, accepts invalid mix of characters)
+        roman = Word("IVXLCDM")
+
+        # any string of non-whitespace characters, except for ','
+        csv_value = Word(printables, excludeChars=",")
+    """
+    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
+        super(Word, self).__init__()
+        if excludeChars:
+            excludeChars = set(excludeChars)
+            initChars = ''.join(c for c in initChars if c not in excludeChars)
+            if bodyChars:
+                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
+        self.initCharsOrig = initChars
+        self.initChars = set(initChars)
+        if bodyChars:
+            self.bodyCharsOrig = bodyChars
+            self.bodyChars = set(bodyChars)
+        else:
+            self.bodyCharsOrig = initChars
+            self.bodyChars = set(initChars)
+
+        self.maxSpecified = max > 0
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.asKeyword = asKeyword
+
+        if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
+            if self.bodyCharsOrig == self.initCharsOrig:
+                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+            elif len(self.initCharsOrig) == 1:
+                self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
+                                             _escapeRegexRangeChars(self.bodyCharsOrig),)
+            else:
+                self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
+                                               _escapeRegexRangeChars(self.bodyCharsOrig),)
+            if self.asKeyword:
+                self.reString = r"\b" + self.reString + r"\b"
+
+            try:
+                self.re = re.compile(self.reString)
+            except Exception:
+                self.re = None
+            else:
+                self.re_match = self.re.match
+                self.__class__ = _WordRegex
+
+    def parseImpl(self, instring, loc, doActions=True):
+        if instring[loc] not in self.initChars:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        instrlen = len(instring)
+        bodychars = self.bodyChars
+        maxloc = start + self.maxLen
+        maxloc = min(maxloc, instrlen)
+        while loc < maxloc and instring[loc] in bodychars:
+            loc += 1
+
+        throwException = False
+        if loc - start < self.minLen:
+            throwException = True
+        elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+            throwException = True
+        elif self.asKeyword:
+            if (start > 0 and instring[start - 1] in bodychars
+                    or loc < instrlen and instring[loc] in bodychars):
+                throwException = True
+
+        if throwException:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__(self):
+        try:
+            return super(Word, self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+
+            def charsAsStr(s):
+                if len(s) > 4:
+                    return s[:4] + "..."
+                else:
+                    return s
+
+            if self.initCharsOrig != self.bodyCharsOrig:
+                self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
+            else:
+                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+        return self.strRepr
+
+class _WordRegex(Word):
+    def parseImpl(self, instring, loc, doActions=True):
+        result = self.re_match(instring, loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        return loc, result.group()
+
+
+class Char(_WordRegex):
+    """A short-cut class for defining ``Word(characters, exact=1)``,
+    when defining a match of any single character in a string of
+    characters.
+    """
+    def __init__(self, charset, asKeyword=False, excludeChars=None):
+        super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
+        self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
+        if asKeyword:
+            self.reString = r"\b%s\b" % self.reString
+        self.re = re.compile(self.reString)
+        self.re_match = self.re.match
+
+
+class Regex(Token):
+    r"""Token for matching strings that match a given regular
+    expression. Defined with string specifying the regular expression in
+    a form recognized by the stdlib Python  `re module `_.
+    If the given regex contains named groups (defined using ``(?P...)``),
+    these will be preserved as named parse results.
+
+    If instead of the Python stdlib re module you wish to use a different RE module
+    (such as the `regex` module), you can replace it by either building your
+    Regex object with a compiled RE that was compiled using regex:
+
+    Example::
+
+        realnum = Regex(r"[+-]?\d+\.\d*")
+        date = Regex(r'(?P\d{4})-(?P\d\d?)-(?P\d\d?)')
+        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+        roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
+
+        # use regex module instead of stdlib re module to construct a Regex using
+        # a compiled regular expression
+        import regex
+        parser = pp.Regex(regex.compile(r'[0-9]'))
+
+    """
+    def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
+        """The parameters ``pattern`` and ``flags`` are passed
+        to the ``re.compile()`` function as-is. See the Python
+        `re module `_ module for an
+        explanation of the acceptable patterns and flags.
+        """
+        super(Regex, self).__init__()
+
+        if isinstance(pattern, basestring):
+            if not pattern:
+                warnings.warn("null string passed to Regex; use Empty() instead",
+                              SyntaxWarning, stacklevel=2)
+
+            self.pattern = pattern
+            self.flags = flags
+
+            try:
+                self.re = re.compile(self.pattern, self.flags)
+                self.reString = self.pattern
+            except sre_constants.error:
+                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+                              SyntaxWarning, stacklevel=2)
+                raise
+
+        elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
+            self.re = pattern
+            self.pattern = self.reString = pattern.pattern
+            self.flags = flags
+
+        else:
+            raise TypeError("Regex may only be constructed with a string or a compiled RE object")
+
+        self.re_match = self.re.match
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = self.re_match("") is not None
+        self.asGroupList = asGroupList
+        self.asMatch = asMatch
+        if self.asGroupList:
+            self.parseImpl = self.parseImplAsGroupList
+        if self.asMatch:
+            self.parseImpl = self.parseImplAsMatch
+
+    def parseImpl(self, instring, loc, doActions=True):
+        result = self.re_match(instring, loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = ParseResults(result.group())
+        d = result.groupdict()
+        if d:
+            for k, v in d.items():
+                ret[k] = v
+        return loc, ret
+
+    def parseImplAsGroupList(self, instring, loc, doActions=True):
+        result = self.re_match(instring, loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = result.groups()
+        return loc, ret
+
+    def parseImplAsMatch(self, instring, loc, doActions=True):
+        result = self.re_match(instring, loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = result
+        return loc, ret
+
+    def __str__(self):
+        try:
+            return super(Regex, self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+        return self.strRepr
+
+    def sub(self, repl):
+        r"""
+        Return Regex with an attached parse action to transform the parsed
+        result as if called using `re.sub(expr, repl, string) `_.
+
+        Example::
+
+            make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2")
+            print(make_html.transformString("h1:main title:"))
+            # prints "

main title

" + """ + if self.asGroupList: + warnings.warn("cannot use sub() with Regex(asGroupList=True)", + SyntaxWarning, stacklevel=2) + raise SyntaxError() + + if self.asMatch and callable(repl): + warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", + SyntaxWarning, stacklevel=2) + raise SyntaxError() + + if self.asMatch: + def pa(tokens): + return tokens[0].expand(repl) + else: + def pa(tokens): + return self.re.sub(repl, tokens[0]) + return self.addParseAction(pa) + +class QuotedString(Token): + r""" + Token for matching strings that are delimited by quoting characters. + + Defined with the following parameters: + + - quoteChar - string of one or more characters defining the + quote delimiting string + - escChar - character to escape quotes, typically backslash + (default= ``None``) + - escQuote - special quote sequence to escape an embedded quote + string (such as SQL's ``""`` to escape an embedded ``"``) + (default= ``None``) + - multiline - boolean indicating whether quotes can span + multiple lines (default= ``False``) + - unquoteResults - boolean indicating whether the matched text + should be unquoted (default= ``True``) + - endQuoteChar - string of one or more characters defining the + end of the quote delimited string (default= ``None`` => same as + quoteChar) + - convertWhitespaceEscapes - convert escaped whitespace + (``'\t'``, ``'\n'``, etc.) to actual whitespace + (default= ``True``) + + Example:: + + qs = QuotedString('"') + print(qs.searchString('lsjdf "This is the quote" sldjf')) + complex_qs = QuotedString('{{', endQuoteChar='}}') + print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) + sql_qs = QuotedString('"', escQuote='""') + print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) + + prints:: + + [['This is the quote']] + [['This is the "quote"']] + [['This is the quote with "embedded" quotes']] + """ + def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, + unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): + super(QuotedString, self).__init__() + + # remove white space from quote chars - wont work anyway + quoteChar = quoteChar.strip() + if not quoteChar: + warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) + raise SyntaxError() + + if endQuoteChar is None: + endQuoteChar = quoteChar + else: + endQuoteChar = endQuoteChar.strip() + if not endQuoteChar: + warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) + raise SyntaxError() + + self.quoteChar = quoteChar + self.quoteCharLen = len(quoteChar) + self.firstQuoteChar = quoteChar[0] + self.endQuoteChar = endQuoteChar + self.endQuoteCharLen = len(endQuoteChar) + self.escChar = escChar + self.escQuote = escQuote + self.unquoteResults = unquoteResults + self.convertWhitespaceEscapes = convertWhitespaceEscapes + + if multiline: + self.flags = re.MULTILINE | re.DOTALL + self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '')) + else: + self.flags = 0 + self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar), + _escapeRegexRangeChars(self.endQuoteChar[0]), + (escChar is not None and _escapeRegexRangeChars(escChar) or '')) + if len(self.endQuoteChar) > 1: + self.pattern += ( + '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), + _escapeRegexRangeChars(self.endQuoteChar[i])) + for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')') + + if escQuote: + self.pattern += (r'|(?:%s)' % re.escape(escQuote)) + if escChar: + self.pattern += (r'|(?:%s.)' % re.escape(escChar)) + self.escCharReplacePattern = re.escape(self.escChar) + "(.)" + self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) + + try: + self.re = re.compile(self.pattern, self.flags) + self.reString = self.pattern + self.re_match = self.re.match + except sre_constants.error: + warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, + SyntaxWarning, stacklevel=2) + raise + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayIndexError = False + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None + if not result: + raise ParseException(instring, loc, self.errmsg, self) + + loc = result.end() + ret = result.group() + + if self.unquoteResults: + + # strip off quotes + ret = ret[self.quoteCharLen: -self.endQuoteCharLen] + + if isinstance(ret, basestring): + # replace escaped whitespace + if '\\' in ret and self.convertWhitespaceEscapes: + ws_map = { + r'\t': '\t', + r'\n': '\n', + r'\f': '\f', + r'\r': '\r', + } + for wslit, wschar in ws_map.items(): + ret = ret.replace(wslit, wschar) + + # replace escaped characters + if self.escChar: + ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) + + # replace escaped quotes + if self.escQuote: + ret = ret.replace(self.escQuote, self.endQuoteChar) + + return loc, ret + + def __str__(self): + try: + return super(QuotedString, self).__str__() + except Exception: + pass + + if self.strRepr is None: + self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) + + return self.strRepr + + +class CharsNotIn(Token): + """Token for matching words composed of characters *not* in a given + set (will include whitespace in matched characters if not listed in + the provided exclusion set - see example). Defined with string + containing all disallowed characters, and an optional minimum, + maximum, and/or exact length. The default value for ``min`` is + 1 (a minimum value < 1 is not valid); the default values for + ``max`` and ``exact`` are 0, meaning no maximum or exact + length restriction. + + Example:: + + # define a comma-separated-value as anything that is not a ',' + csv_value = CharsNotIn(',') + print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) + + prints:: + + ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] + """ + def __init__(self, notChars, min=1, max=0, exact=0): + super(CharsNotIn, self).__init__() + self.skipWhitespace = False + self.notChars = notChars + + if min < 1: + raise ValueError("cannot specify a minimum length < 1; use " + "Optional(CharsNotIn()) if zero-length char group is permitted") + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + self.name = _ustr(self) + self.errmsg = "Expected " + self.name + self.mayReturnEmpty = (self.minLen == 0) + self.mayIndexError = False + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] in self.notChars: + raise ParseException(instring, loc, self.errmsg, self) + + start = loc + loc += 1 + notchars = self.notChars + maxlen = min(start + self.maxLen, len(instring)) + while loc < maxlen and instring[loc] not in notchars: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + def __str__(self): + try: + return super(CharsNotIn, self).__str__() + except Exception: + pass + + if self.strRepr is None: + if len(self.notChars) > 4: + self.strRepr = "!W:(%s...)" % self.notChars[:4] + else: + self.strRepr = "!W:(%s)" % self.notChars + + return self.strRepr + +class White(Token): + """Special matching class for matching whitespace. Normally, + whitespace is ignored by pyparsing grammars. This class is included + when some whitespace structures are significant. Define with + a string containing the whitespace characters to be matched; default + is ``" \\t\\r\\n"``. Also takes optional ``min``, + ``max``, and ``exact`` arguments, as defined for the + :class:`Word` class. + """ + whiteStrs = { + ' ' : '', + '\t': '', + '\n': '', + '\r': '', + '\f': '', + u'\u00A0': '', + u'\u1680': '', + u'\u180E': '', + u'\u2000': '', + u'\u2001': '', + u'\u2002': '', + u'\u2003': '', + u'\u2004': '', + u'\u2005': '', + u'\u2006': '', + u'\u2007': '', + u'\u2008': '', + u'\u2009': '', + u'\u200A': '', + u'\u200B': '', + u'\u202F': '', + u'\u205F': '', + u'\u3000': '', + } + def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): + super(White, self).__init__() + self.matchWhite = ws + self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) + # ~ self.leaveWhitespace() + self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) + self.mayReturnEmpty = True + self.errmsg = "Expected " + self.name + + self.minLen = min + + if max > 0: + self.maxLen = max + else: + self.maxLen = _MAX_INT + + if exact > 0: + self.maxLen = exact + self.minLen = exact + + def parseImpl(self, instring, loc, doActions=True): + if instring[loc] not in self.matchWhite: + raise ParseException(instring, loc, self.errmsg, self) + start = loc + loc += 1 + maxloc = start + self.maxLen + maxloc = min(maxloc, len(instring)) + while loc < maxloc and instring[loc] in self.matchWhite: + loc += 1 + + if loc - start < self.minLen: + raise ParseException(instring, loc, self.errmsg, self) + + return loc, instring[start:loc] + + +class _PositionToken(Token): + def __init__(self): + super(_PositionToken, self).__init__() + self.name = self.__class__.__name__ + self.mayReturnEmpty = True + self.mayIndexError = False + +class GoToColumn(_PositionToken): + """Token to advance to a specific column of input text; useful for + tabular report scraping. + """ + def __init__(self, colno): + super(GoToColumn, self).__init__() + self.col = colno + + def preParse(self, instring, loc): + if col(loc, instring) != self.col: + instrlen = len(instring) + if self.ignoreExprs: + loc = self._skipIgnorables(instring, loc) + while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col: + loc += 1 + return loc + + def parseImpl(self, instring, loc, doActions=True): + thiscol = col(loc, instring) + if thiscol > self.col: + raise ParseException(instring, loc, "Text not in expected column", self) + newloc = loc + self.col - thiscol + ret = instring[loc: newloc] + return newloc, ret + + +class LineStart(_PositionToken): + r"""Matches if current position is at the beginning of a line within + the parse string + + Example:: + + test = '''\ + AAA this line + AAA and this line + AAA but not this one + B AAA and definitely not this one + ''' + + for t in (LineStart() + 'AAA' + restOfLine).searchString(test): + print(t) + + prints:: + + ['AAA', ' this line'] + ['AAA', ' and this line'] + + """ + def __init__(self): + super(LineStart, self).__init__() + self.errmsg = "Expected start of line" + + def parseImpl(self, instring, loc, doActions=True): + if col(loc, instring) == 1: + return loc, [] + raise ParseException(instring, loc, self.errmsg, self) + +class LineEnd(_PositionToken): + """Matches if current position is at the end of a line within the + parse string + """ + def __init__(self): + super(LineEnd, self).__init__() + self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) + self.errmsg = "Expected end of line" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + if instring[loc] == "\n": + return loc + 1, "\n" + else: + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + +class StringStart(_PositionToken): + """Matches if current position is at the beginning of the parse + string + """ + def __init__(self): + super(StringStart, self).__init__() + self.errmsg = "Expected start of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + # see if entire string up to here is just whitespace and ignoreables + if loc != self.preParse(instring, 0): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + +class StringEnd(_PositionToken): + """Matches if current position is at the end of the parse string + """ + def __init__(self): + super(StringEnd, self).__init__() + self.errmsg = "Expected end of text" + + def parseImpl(self, instring, loc, doActions=True): + if loc < len(instring): + raise ParseException(instring, loc, self.errmsg, self) + elif loc == len(instring): + return loc + 1, [] + elif loc > len(instring): + return loc, [] + else: + raise ParseException(instring, loc, self.errmsg, self) + +class WordStart(_PositionToken): + """Matches if the current position is at the beginning of a Word, + and is not preceded by any character in a given set of + ``wordChars`` (default= ``printables``). To emulate the + ``\b`` behavior of regular expressions, use + ``WordStart(alphanums)``. ``WordStart`` will also match at + the beginning of the string being parsed, or at the beginning of + a line. + """ + def __init__(self, wordChars=printables): + super(WordStart, self).__init__() + self.wordChars = set(wordChars) + self.errmsg = "Not at the start of a word" + + def parseImpl(self, instring, loc, doActions=True): + if loc != 0: + if (instring[loc - 1] in self.wordChars + or instring[loc] not in self.wordChars): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + +class WordEnd(_PositionToken): + """Matches if the current position is at the end of a Word, and is + not followed by any character in a given set of ``wordChars`` + (default= ``printables``). To emulate the ``\b`` behavior of + regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` + will also match at the end of the string being parsed, or at the end + of a line. + """ + def __init__(self, wordChars=printables): + super(WordEnd, self).__init__() + self.wordChars = set(wordChars) + self.skipWhitespace = False + self.errmsg = "Not at the end of a word" + + def parseImpl(self, instring, loc, doActions=True): + instrlen = len(instring) + if instrlen > 0 and loc < instrlen: + if (instring[loc] in self.wordChars or + instring[loc - 1] not in self.wordChars): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + +class ParseExpression(ParserElement): + """Abstract subclass of ParserElement, for combining and + post-processing parsed tokens. + """ + def __init__(self, exprs, savelist=False): + super(ParseExpression, self).__init__(savelist) + if isinstance(exprs, _generatorType): + exprs = list(exprs) + + if isinstance(exprs, basestring): + self.exprs = [self._literalStringClass(exprs)] + elif isinstance(exprs, ParserElement): + self.exprs = [exprs] + elif isinstance(exprs, Iterable): + exprs = list(exprs) + # if sequence of strings provided, wrap with Literal + if any(isinstance(expr, basestring) for expr in exprs): + exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs) + self.exprs = list(exprs) + else: + try: + self.exprs = list(exprs) + except TypeError: + self.exprs = [exprs] + self.callPreparse = False + + def append(self, other): + self.exprs.append(other) + self.strRepr = None + return self + + def leaveWhitespace(self): + """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on + all contained expressions.""" + self.skipWhitespace = False + self.exprs = [e.copy() for e in self.exprs] + for e in self.exprs: + e.leaveWhitespace() + return self + + def ignore(self, other): + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super(ParseExpression, self).ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + else: + super(ParseExpression, self).ignore(other) + for e in self.exprs: + e.ignore(self.ignoreExprs[-1]) + return self + + def __str__(self): + try: + return super(ParseExpression, self).__str__() + except Exception: + pass + + if self.strRepr is None: + self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs)) + return self.strRepr + + def streamline(self): + super(ParseExpression, self).streamline() + + for e in self.exprs: + e.streamline() + + # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) + # but only if there are no parse actions or resultsNames on the nested And's + # (likewise for Or's and MatchFirst's) + if len(self.exprs) == 2: + other = self.exprs[0] + if (isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug): + self.exprs = other.exprs[:] + [self.exprs[1]] + self.strRepr = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + other = self.exprs[-1] + if (isinstance(other, self.__class__) + and not other.parseAction + and other.resultsName is None + and not other.debug): + self.exprs = self.exprs[:-1] + other.exprs[:] + self.strRepr = None + self.mayReturnEmpty |= other.mayReturnEmpty + self.mayIndexError |= other.mayIndexError + + self.errmsg = "Expected " + _ustr(self) + + return self + + def validate(self, validateTrace=None): + tmp = (validateTrace if validateTrace is not None else [])[:] + [self] + for e in self.exprs: + e.validate(tmp) + self.checkRecursion([]) + + def copy(self): + ret = super(ParseExpression, self).copy() + ret.exprs = [e.copy() for e in self.exprs] + return ret + + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_ungrouped_named_tokens_in_collection: + for e in self.exprs: + if isinstance(e, ParserElement) and e.resultsName: + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName), + stacklevel=3) + + return super(ParseExpression, self)._setResultsName(name, listAllMatches) + + +class And(ParseExpression): + """ + Requires all given :class:`ParseExpression` s to be found in the given order. + Expressions may be separated by whitespace. + May be constructed using the ``'+'`` operator. + May also be constructed using the ``'-'`` operator, which will + suppress backtracking. + + Example:: + + integer = Word(nums) + name_expr = OneOrMore(Word(alphas)) + + expr = And([integer("id"), name_expr("name"), integer("age")]) + # more easily written as: + expr = integer("id") + name_expr("name") + integer("age") + """ + + class _ErrorStop(Empty): + def __init__(self, *args, **kwargs): + super(And._ErrorStop, self).__init__(*args, **kwargs) + self.name = '-' + self.leaveWhitespace() + + def __init__(self, exprs, savelist=True): + exprs = list(exprs) + if exprs and Ellipsis in exprs: + tmp = [] + for i, expr in enumerate(exprs): + if expr is Ellipsis: + if i < len(exprs) - 1: + skipto_arg = (Empty() + exprs[i + 1]).exprs[-1] + tmp.append(SkipTo(skipto_arg)("_skipped*")) + else: + raise Exception("cannot construct And with sequence ending in ...") + else: + tmp.append(expr) + exprs[:] = tmp + super(And, self).__init__(exprs, savelist) + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self.setWhitespaceChars(self.exprs[0].whiteChars) + self.skipWhitespace = self.exprs[0].skipWhitespace + self.callPreparse = True + + def streamline(self): + # collapse any _PendingSkip's + if self.exprs: + if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) + for e in self.exprs[:-1]): + for i, e in enumerate(self.exprs[:-1]): + if e is None: + continue + if (isinstance(e, ParseExpression) + and e.exprs and isinstance(e.exprs[-1], _PendingSkip)): + e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] + self.exprs[i + 1] = None + self.exprs = [e for e in self.exprs if e is not None] + + super(And, self).streamline() + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + # pass False as last arg to _parse for first element, since we already + # pre-parsed the string as part of our And pre-parsing + loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False) + errorStop = False + for e in self.exprs[1:]: + if isinstance(e, And._ErrorStop): + errorStop = True + continue + if errorStop: + try: + loc, exprtokens = e._parse(instring, loc, doActions) + except ParseSyntaxException: + raise + except ParseBaseException as pe: + pe.__traceback__ = None + raise ParseSyntaxException._from_exception(pe) + except IndexError: + raise ParseSyntaxException(instring, len(instring), self.errmsg, self) + else: + loc, exprtokens = e._parse(instring, loc, doActions) + if exprtokens or exprtokens.haskeys(): + resultlist += exprtokens + return loc, resultlist + + def __iadd__(self, other): + if isinstance(other, basestring): + other = self._literalStringClass(other) + return self.append(other) # And([self, other]) + + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e.checkRecursion(subRecCheckList) + if not e.mayReturnEmpty: + break + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" + + return self.strRepr + + +class Or(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + two expressions match, the expression that matches the longest + string will be used. May be constructed using the ``'^'`` + operator. + + Example:: + + # construct Or using '^' operator + + number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) + print(number.searchString("123 3.1416 789")) + + prints:: + + [['123'], ['3.1416'], ['789']] + """ + def __init__(self, exprs, savelist=False): + super(Or, self).__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self): + super(Or, self).streamline() + if __compat__.collect_all_And_tokens: + self.saveAsList = any(e.saveAsList for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + matches = [] + for e in self.exprs: + try: + loc2 = e.tryParse(instring, loc) + except ParseException as err: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring, len(instring), e.errmsg, self) + maxExcLoc = len(instring) + else: + # save match among all matches, to retry longest to shortest + matches.append((loc2, e)) + + if matches: + # re-evaluate all matches in descending order of length of match, in case attached actions + # might change whether or how much they match of the input. + matches.sort(key=itemgetter(0), reverse=True) + + if not doActions: + # no further conditions or parse actions to change the selection of + # alternative, so the first match will be the best match + best_expr = matches[0][1] + return best_expr._parse(instring, loc, doActions) + + longest = -1, None + for loc1, expr1 in matches: + if loc1 <= longest[0]: + # already have a longer match than this one will deliver, we are done + return longest + + try: + loc2, toks = expr1._parse(instring, loc, doActions) + except ParseException as err: + err.__traceback__ = None + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + else: + if loc2 >= loc1: + return loc2, toks + # didn't match as much as before + elif loc2 > longest[0]: + longest = loc2, toks + + if longest != (-1, None): + return longest + + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + + def __ixor__(self, other): + if isinstance(other, basestring): + other = self._literalStringClass(other) + return self.append(other) # Or([self, other]) + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" + + return self.strRepr + + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e.checkRecursion(subRecCheckList) + + def _setResultsName(self, name, listAllMatches=False): + if (not __compat__.collect_all_And_tokens + and __diag__.warn_multiple_tokens_in_named_alternation): + if any(isinstance(e, And) for e in self.exprs): + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "may only return a single token for an And alternative, " + "in future will return the full list of tokens".format( + "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), + stacklevel=3) + + return super(Or, self)._setResultsName(name, listAllMatches) + + +class MatchFirst(ParseExpression): + """Requires that at least one :class:`ParseExpression` is found. If + two expressions match, the first one listed is the one that will + match. May be constructed using the ``'|'`` operator. + + Example:: + + # construct MatchFirst using '|' operator + + # watch the order of expressions to match + number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) + print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] + + # put more selective expression first + number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) + print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] + """ + def __init__(self, exprs, savelist=False): + super(MatchFirst, self).__init__(exprs, savelist) + if self.exprs: + self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) + else: + self.mayReturnEmpty = True + + def streamline(self): + super(MatchFirst, self).streamline() + if __compat__.collect_all_And_tokens: + self.saveAsList = any(e.saveAsList for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + maxExcLoc = -1 + maxException = None + for e in self.exprs: + try: + ret = e._parse(instring, loc, doActions) + return ret + except ParseException as err: + if err.loc > maxExcLoc: + maxException = err + maxExcLoc = err.loc + except IndexError: + if len(instring) > maxExcLoc: + maxException = ParseException(instring, len(instring), e.errmsg, self) + maxExcLoc = len(instring) + + # only got here if no expression matched, raise exception for match that made it the furthest + else: + if maxException is not None: + maxException.msg = self.errmsg + raise maxException + else: + raise ParseException(instring, loc, "no defined alternatives to match", self) + + def __ior__(self, other): + if isinstance(other, basestring): + other = self._literalStringClass(other) + return self.append(other) # MatchFirst([self, other]) + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" + + return self.strRepr + + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e.checkRecursion(subRecCheckList) + + def _setResultsName(self, name, listAllMatches=False): + if (not __compat__.collect_all_And_tokens + and __diag__.warn_multiple_tokens_in_named_alternation): + if any(isinstance(e, And) for e in self.exprs): + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "may only return a single token for an And alternative, " + "in future will return the full list of tokens".format( + "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), + stacklevel=3) + + return super(MatchFirst, self)._setResultsName(name, listAllMatches) + + +class Each(ParseExpression): + """Requires all given :class:`ParseExpression` s to be found, but in + any order. Expressions may be separated by whitespace. + + May be constructed using the ``'&'`` operator. + + Example:: + + color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") + shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") + integer = Word(nums) + shape_attr = "shape:" + shape_type("shape") + posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") + color_attr = "color:" + color("color") + size_attr = "size:" + integer("size") + + # use Each (using operator '&') to accept attributes in any order + # (shape and posn are required, color and size are optional) + shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) + + shape_spec.runTests(''' + shape: SQUARE color: BLACK posn: 100, 120 + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + color:GREEN size:20 shape:TRIANGLE posn:20,40 + ''' + ) + + prints:: + + shape: SQUARE color: BLACK posn: 100, 120 + ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] + - color: BLACK + - posn: ['100', ',', '120'] + - x: 100 + - y: 120 + - shape: SQUARE + + + shape: CIRCLE size: 50 color: BLUE posn: 50,80 + ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] + - color: BLUE + - posn: ['50', ',', '80'] + - x: 50 + - y: 80 + - shape: CIRCLE + - size: 50 + + + color: GREEN size: 20 shape: TRIANGLE posn: 20,40 + ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] + - color: GREEN + - posn: ['20', ',', '40'] + - x: 20 + - y: 40 + - shape: TRIANGLE + - size: 20 + """ + def __init__(self, exprs, savelist=True): + super(Each, self).__init__(exprs, savelist) + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + self.skipWhitespace = True + self.initExprGroups = True + self.saveAsList = True + + def streamline(self): + super(Each, self).streamline() + self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) + return self + + def parseImpl(self, instring, loc, doActions=True): + if self.initExprGroups: + self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional)) + opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] + opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))] + self.optionals = opt1 + opt2 + self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)] + self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)] + self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))] + self.required += self.multirequired + self.initExprGroups = False + tmpLoc = loc + tmpReqd = self.required[:] + tmpOpt = self.optionals[:] + matchOrder = [] + + keepMatching = True + while keepMatching: + tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired + failed = [] + for e in tmpExprs: + try: + tmpLoc = e.tryParse(instring, tmpLoc) + except ParseException: + failed.append(e) + else: + matchOrder.append(self.opt1map.get(id(e), e)) + if e in tmpReqd: + tmpReqd.remove(e) + elif e in tmpOpt: + tmpOpt.remove(e) + if len(failed) == len(tmpExprs): + keepMatching = False + + if tmpReqd: + missing = ", ".join(_ustr(e) for e in tmpReqd) + raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing) + + # add any unmatched Optionals, in case they have default values defined + matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt] + + resultlist = [] + for e in matchOrder: + loc, results = e._parse(instring, loc, doActions) + resultlist.append(results) + + finalResults = sum(resultlist, ParseResults([])) + return loc, finalResults + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" + + return self.strRepr + + def checkRecursion(self, parseElementList): + subRecCheckList = parseElementList[:] + [self] + for e in self.exprs: + e.checkRecursion(subRecCheckList) + + +class ParseElementEnhance(ParserElement): + """Abstract subclass of :class:`ParserElement`, for combining and + post-processing parsed tokens. + """ + def __init__(self, expr, savelist=False): + super(ParseElementEnhance, self).__init__(savelist) + if isinstance(expr, basestring): + if issubclass(self._literalStringClass, Token): + expr = self._literalStringClass(expr) + else: + expr = self._literalStringClass(Literal(expr)) + self.expr = expr + self.strRepr = None + if expr is not None: + self.mayIndexError = expr.mayIndexError + self.mayReturnEmpty = expr.mayReturnEmpty + self.setWhitespaceChars(expr.whiteChars) + self.skipWhitespace = expr.skipWhitespace + self.saveAsList = expr.saveAsList + self.callPreparse = expr.callPreparse + self.ignoreExprs.extend(expr.ignoreExprs) + + def parseImpl(self, instring, loc, doActions=True): + if self.expr is not None: + return self.expr._parse(instring, loc, doActions, callPreParse=False) + else: + raise ParseException("", loc, self.errmsg, self) + + def leaveWhitespace(self): + self.skipWhitespace = False + self.expr = self.expr.copy() + if self.expr is not None: + self.expr.leaveWhitespace() + return self + + def ignore(self, other): + if isinstance(other, Suppress): + if other not in self.ignoreExprs: + super(ParseElementEnhance, self).ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + else: + super(ParseElementEnhance, self).ignore(other) + if self.expr is not None: + self.expr.ignore(self.ignoreExprs[-1]) + return self + + def streamline(self): + super(ParseElementEnhance, self).streamline() + if self.expr is not None: + self.expr.streamline() + return self + + def checkRecursion(self, parseElementList): + if self in parseElementList: + raise RecursiveGrammarException(parseElementList + [self]) + subRecCheckList = parseElementList[:] + [self] + if self.expr is not None: + self.expr.checkRecursion(subRecCheckList) + + def validate(self, validateTrace=None): + if validateTrace is None: + validateTrace = [] + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__(self): + try: + return super(ParseElementEnhance, self).__str__() + except Exception: + pass + + if self.strRepr is None and self.expr is not None: + self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) + return self.strRepr + + +class FollowedBy(ParseElementEnhance): + """Lookahead matching of the given parse expression. + ``FollowedBy`` does *not* advance the parsing position within + the input string, it only verifies that the specified parse + expression matches at the current position. ``FollowedBy`` + always returns a null token list. If any results names are defined + in the lookahead expression, those *will* be returned for access by + name. + + Example:: + + # use FollowedBy to match a label only if it is followed by a ':' + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) + + OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() + + prints:: + + [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] + """ + def __init__(self, expr): + super(FollowedBy, self).__init__(expr) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + # by using self._expr.parse and deleting the contents of the returned ParseResults list + # we keep any named results that were defined in the FollowedBy expression + _, ret = self.expr._parse(instring, loc, doActions=doActions) + del ret[:] + + return loc, ret + + +class PrecededBy(ParseElementEnhance): + """Lookbehind matching of the given parse expression. + ``PrecededBy`` does not advance the parsing position within the + input string, it only verifies that the specified parse expression + matches prior to the current position. ``PrecededBy`` always + returns a null token list, but if a results name is defined on the + given expression, it is returned. + + Parameters: + + - expr - expression that must match prior to the current parse + location + - retreat - (default= ``None``) - (int) maximum number of characters + to lookbehind prior to the current parse location + + If the lookbehind expression is a string, Literal, Keyword, or + a Word or CharsNotIn with a specified exact or maximum length, then + the retreat parameter is not required. Otherwise, retreat must be + specified to give a maximum number of characters to look back from + the current parse position for a lookbehind match. + + Example:: + + # VB-style variable names with type prefixes + int_var = PrecededBy("#") + pyparsing_common.identifier + str_var = PrecededBy("$") + pyparsing_common.identifier + + """ + def __init__(self, expr, retreat=None): + super(PrecededBy, self).__init__(expr) + self.expr = self.expr().leaveWhitespace() + self.mayReturnEmpty = True + self.mayIndexError = False + self.exact = False + if isinstance(expr, str): + retreat = len(expr) + self.exact = True + elif isinstance(expr, (Literal, Keyword)): + retreat = expr.matchLen + self.exact = True + elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: + retreat = expr.maxLen + self.exact = True + elif isinstance(expr, _PositionToken): + retreat = 0 + self.exact = True + self.retreat = retreat + self.errmsg = "not preceded by " + str(expr) + self.skipWhitespace = False + self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) + + def parseImpl(self, instring, loc=0, doActions=True): + if self.exact: + if loc < self.retreat: + raise ParseException(instring, loc, self.errmsg) + start = loc - self.retreat + _, ret = self.expr._parse(instring, start) + else: + # retreat specified a maximum lookbehind window, iterate + test_expr = self.expr + StringEnd() + instring_slice = instring[max(0, loc - self.retreat):loc] + last_expr = ParseException(instring, loc, self.errmsg) + for offset in range(1, min(loc, self.retreat + 1)+1): + try: + # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) + _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) + except ParseBaseException as pbe: + last_expr = pbe + else: + break + else: + raise last_expr + return loc, ret + + +class NotAny(ParseElementEnhance): + """Lookahead to disallow matching with the given parse expression. + ``NotAny`` does *not* advance the parsing position within the + input string, it only verifies that the specified parse expression + does *not* match at the current position. Also, ``NotAny`` does + *not* skip over leading whitespace. ``NotAny`` always returns + a null token list. May be constructed using the '~' operator. + + Example:: + + AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) + + # take care not to mistake keywords for identifiers + ident = ~(AND | OR | NOT) + Word(alphas) + boolean_term = Optional(NOT) + ident + + # very crude boolean expression - to support parenthesis groups and + # operation hierarchy, use infixNotation + boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) + + # integers that are followed by "." are actually floats + integer = Word(nums) + ~Char(".") + """ + def __init__(self, expr): + super(NotAny, self).__init__(expr) + # ~ self.leaveWhitespace() + self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs + self.mayReturnEmpty = True + self.errmsg = "Found unwanted token, " + _ustr(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + if self.expr.canParseNext(instring, loc): + raise ParseException(instring, loc, self.errmsg, self) + return loc, [] + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "~{" + _ustr(self.expr) + "}" + + return self.strRepr + +class _MultipleMatch(ParseElementEnhance): + def __init__(self, expr, stopOn=None): + super(_MultipleMatch, self).__init__(expr) + self.saveAsList = True + ender = stopOn + if isinstance(ender, basestring): + ender = self._literalStringClass(ender) + self.stopOn(ender) + + def stopOn(self, ender): + if isinstance(ender, basestring): + ender = self._literalStringClass(ender) + self.not_ender = ~ender if ender is not None else None + return self + + def parseImpl(self, instring, loc, doActions=True): + self_expr_parse = self.expr._parse + self_skip_ignorables = self._skipIgnorables + check_ender = self.not_ender is not None + if check_ender: + try_not_ender = self.not_ender.tryParse + + # must be at least one (but first see if we are the stopOn sentinel; + # if so, fail) + if check_ender: + try_not_ender(instring, loc) + loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) + try: + hasIgnoreExprs = (not not self.ignoreExprs) + while 1: + if check_ender: + try_not_ender(instring, loc) + if hasIgnoreExprs: + preloc = self_skip_ignorables(instring, loc) + else: + preloc = loc + loc, tmptokens = self_expr_parse(instring, preloc, doActions) + if tmptokens or tmptokens.haskeys(): + tokens += tmptokens + except (ParseException, IndexError): + pass + + return loc, tokens + + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_ungrouped_named_tokens_in_collection: + for e in [self.expr] + getattr(self.expr, 'exprs', []): + if isinstance(e, ParserElement) and e.resultsName: + warnings.warn("{0}: setting results name {1!r} on {2} expression " + "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", + name, + type(self).__name__, + e.resultsName), + stacklevel=3) + + return super(_MultipleMatch, self)._setResultsName(name, listAllMatches) + + +class OneOrMore(_MultipleMatch): + """Repetition of one or more of the given expression. + + Parameters: + - expr - expression that must match one or more times + - stopOn - (default= ``None``) - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) + + text = "shape: SQUARE posn: upper left color: BLACK" + OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] + + # use stopOn attribute for OneOrMore to avoid reading label string as part of the data + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) + OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] + + # could also be written as + (attr_expr * (1,)).parseString(text).pprint() + """ + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "{" + _ustr(self.expr) + "}..." + + return self.strRepr + +class ZeroOrMore(_MultipleMatch): + """Optional repetition of zero or more of the given expression. + + Parameters: + - expr - expression that must match zero or more times + - stopOn - (default= ``None``) - expression for a terminating sentinel + (only required if the sentinel would ordinarily match the repetition + expression) + + Example: similar to :class:`OneOrMore` + """ + def __init__(self, expr, stopOn=None): + super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + try: + return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) + except (ParseException, IndexError): + return loc, [] + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]..." + + return self.strRepr + + +class _NullToken(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + def __str__(self): + return "" + +class Optional(ParseElementEnhance): + """Optional matching of the given expression. + + Parameters: + - expr - expression that must match zero or more times + - default (optional) - value to be returned if the optional expression is not found. + + Example:: + + # US postal code can be a 5-digit zip, plus optional 4-digit qualifier + zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) + zip.runTests(''' + # traditional ZIP code + 12345 + + # ZIP+4 form + 12101-0001 + + # invalid ZIP + 98765- + ''') + + prints:: + + # traditional ZIP code + 12345 + ['12345'] + + # ZIP+4 form + 12101-0001 + ['12101-0001'] + + # invalid ZIP + 98765- + ^ + FAIL: Expected end of text (at char 5), (line:1, col:6) + """ + __optionalNotMatched = _NullToken() + + def __init__(self, expr, default=__optionalNotMatched): + super(Optional, self).__init__(expr, savelist=False) + self.saveAsList = self.expr.saveAsList + self.defaultValue = default + self.mayReturnEmpty = True + + def parseImpl(self, instring, loc, doActions=True): + try: + loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) + except (ParseException, IndexError): + if self.defaultValue is not self.__optionalNotMatched: + if self.expr.resultsName: + tokens = ParseResults([self.defaultValue]) + tokens[self.expr.resultsName] = self.defaultValue + else: + tokens = [self.defaultValue] + else: + tokens = [] + return loc, tokens + + def __str__(self): + if hasattr(self, "name"): + return self.name + + if self.strRepr is None: + self.strRepr = "[" + _ustr(self.expr) + "]" + + return self.strRepr + +class SkipTo(ParseElementEnhance): + """Token for skipping over all undefined text until the matched + expression is found. + + Parameters: + - expr - target expression marking the end of the data to be skipped + - include - (default= ``False``) if True, the target expression is also parsed + (the skipped text and target expression are returned as a 2-element list). + - ignore - (default= ``None``) used to define grammars (typically quoted strings and + comments) that might contain false matches to the target expression + - failOn - (default= ``None``) define expressions that are not allowed to be + included in the skipped test; if found before the target expression is found, + the SkipTo is not a match + + Example:: + + report = ''' + Outstanding Issues Report - 1 Jan 2000 + + # | Severity | Description | Days Open + -----+----------+-------------------------------------------+----------- + 101 | Critical | Intermittent system crash | 6 + 94 | Cosmetic | Spelling error on Login ('log|n') | 14 + 79 | Minor | System slow when running too many reports | 47 + ''' + integer = Word(nums) + SEP = Suppress('|') + # use SkipTo to simply match everything up until the next SEP + # - ignore quoted strings, so that a '|' character inside a quoted string does not match + # - parse action will call token.strip() for each matched token, i.e., the description body + string_data = SkipTo(SEP, ignore=quotedString) + string_data.setParseAction(tokenMap(str.strip)) + ticket_expr = (integer("issue_num") + SEP + + string_data("sev") + SEP + + string_data("desc") + SEP + + integer("days_open")) + + for tkt in ticket_expr.searchString(report): + print tkt.dump() + + prints:: + + ['101', 'Critical', 'Intermittent system crash', '6'] + - days_open: 6 + - desc: Intermittent system crash + - issue_num: 101 + - sev: Critical + ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] + - days_open: 14 + - desc: Spelling error on Login ('log|n') + - issue_num: 94 + - sev: Cosmetic + ['79', 'Minor', 'System slow when running too many reports', '47'] + - days_open: 47 + - desc: System slow when running too many reports + - issue_num: 79 + - sev: Minor + """ + def __init__(self, other, include=False, ignore=None, failOn=None): + super(SkipTo, self).__init__(other) + self.ignoreExpr = ignore + self.mayReturnEmpty = True + self.mayIndexError = False + self.includeMatch = include + self.saveAsList = False + if isinstance(failOn, basestring): + self.failOn = self._literalStringClass(failOn) + else: + self.failOn = failOn + self.errmsg = "No match found for " + _ustr(self.expr) + + def parseImpl(self, instring, loc, doActions=True): + startloc = loc + instrlen = len(instring) + expr = self.expr + expr_parse = self.expr._parse + self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None + self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None + + tmploc = loc + while tmploc <= instrlen: + if self_failOn_canParseNext is not None: + # break if failOn expression matches + if self_failOn_canParseNext(instring, tmploc): + break + + if self_ignoreExpr_tryParse is not None: + # advance past ignore expressions + while 1: + try: + tmploc = self_ignoreExpr_tryParse(instring, tmploc) + except ParseBaseException: + break + + try: + expr_parse(instring, tmploc, doActions=False, callPreParse=False) + except (ParseException, IndexError): + # no match, advance loc in string + tmploc += 1 + else: + # matched skipto expr, done + break + + else: + # ran off the end of the input string without matching skipto expr, fail + raise ParseException(instring, loc, self.errmsg, self) + + # build up return values + loc = tmploc + skiptext = instring[startloc:loc] + skipresult = ParseResults(skiptext) + + if self.includeMatch: + loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) + skipresult += mat + + return loc, skipresult + +class Forward(ParseElementEnhance): + """Forward declaration of an expression to be defined later - + used for recursive grammars, such as algebraic infix notation. + When the expression is known, it is assigned to the ``Forward`` + variable using the '<<' operator. + + Note: take care when assigning to ``Forward`` not to overlook + precedence of operators. + + Specifically, '|' has a lower precedence than '<<', so that:: + + fwdExpr << a | b | c + + will actually be evaluated as:: + + (fwdExpr << a) | b | c + + thereby leaving b and c out as parseable alternatives. It is recommended that you + explicitly group the values inserted into the ``Forward``:: + + fwdExpr << (a | b | c) + + Converting to use the '<<=' operator instead will avoid this problem. + + See :class:`ParseResults.pprint` for an example of a recursive + parser created using ``Forward``. + """ + def __init__(self, other=None): + super(Forward, self).__init__(other, savelist=False) + + def __lshift__(self, other): + if isinstance(other, basestring): + other = self._literalStringClass(other) + self.expr = other + self.strRepr = None + self.mayIndexError = self.expr.mayIndexError + self.mayReturnEmpty = self.expr.mayReturnEmpty + self.setWhitespaceChars(self.expr.whiteChars) + self.skipWhitespace = self.expr.skipWhitespace + self.saveAsList = self.expr.saveAsList + self.ignoreExprs.extend(self.expr.ignoreExprs) + return self + + def __ilshift__(self, other): + return self << other + + def leaveWhitespace(self): + self.skipWhitespace = False + return self + + def streamline(self): + if not self.streamlined: + self.streamlined = True + if self.expr is not None: + self.expr.streamline() + return self + + def validate(self, validateTrace=None): + if validateTrace is None: + validateTrace = [] + + if self not in validateTrace: + tmp = validateTrace[:] + [self] + if self.expr is not None: + self.expr.validate(tmp) + self.checkRecursion([]) + + def __str__(self): + if hasattr(self, "name"): + return self.name + if self.strRepr is not None: + return self.strRepr + + # Avoid infinite recursion by setting a temporary strRepr + self.strRepr = ": ..." + + # Use the string representation of main expression. + retString = '...' + try: + if self.expr is not None: + retString = _ustr(self.expr)[:1000] + else: + retString = "None" + finally: + self.strRepr = self.__class__.__name__ + ": " + retString + return self.strRepr + + def copy(self): + if self.expr is not None: + return super(Forward, self).copy() + else: + ret = Forward() + ret <<= self + return ret + + def _setResultsName(self, name, listAllMatches=False): + if __diag__.warn_name_set_on_empty_Forward: + if self.expr is None: + warnings.warn("{0}: setting results name {0!r} on {1} expression " + "that has no contained expression".format("warn_name_set_on_empty_Forward", + name, + type(self).__name__), + stacklevel=3) + + return super(Forward, self)._setResultsName(name, listAllMatches) + +class TokenConverter(ParseElementEnhance): + """ + Abstract subclass of :class:`ParseExpression`, for converting parsed results. + """ + def __init__(self, expr, savelist=False): + super(TokenConverter, self).__init__(expr) # , savelist) + self.saveAsList = False + +class Combine(TokenConverter): + """Converter to concatenate all matching tokens to a single string. + By default, the matching patterns must also be contiguous in the + input string; this can be disabled by specifying + ``'adjacent=False'`` in the constructor. + + Example:: + + real = Word(nums) + '.' + Word(nums) + print(real.parseString('3.1416')) # -> ['3', '.', '1416'] + # will also erroneously match the following + print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] + + real = Combine(Word(nums) + '.' + Word(nums)) + print(real.parseString('3.1416')) # -> ['3.1416'] + # no match when there are internal spaces + print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) + """ + def __init__(self, expr, joinString="", adjacent=True): + super(Combine, self).__init__(expr) + # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself + if adjacent: + self.leaveWhitespace() + self.adjacent = adjacent + self.skipWhitespace = True + self.joinString = joinString + self.callPreparse = True + + def ignore(self, other): + if self.adjacent: + ParserElement.ignore(self, other) + else: + super(Combine, self).ignore(other) + return self + + def postParse(self, instring, loc, tokenlist): + retToks = tokenlist.copy() + del retToks[:] + retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults) + + if self.resultsName and retToks.haskeys(): + return [retToks] + else: + return retToks + +class Group(TokenConverter): + """Converter to return the matched tokens as a list - useful for + returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. + + Example:: + + ident = Word(alphas) + num = Word(nums) + term = ident | num + func = ident + Optional(delimitedList(term)) + print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100'] + + func = ident + Group(Optional(delimitedList(term))) + print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']] + """ + def __init__(self, expr): + super(Group, self).__init__(expr) + self.saveAsList = True + + def postParse(self, instring, loc, tokenlist): + return [tokenlist] + +class Dict(TokenConverter): + """Converter to return a repetitive expression as a list, but also + as a dictionary. Each element can also be referenced using the first + token in the expression as its key. Useful for tabular report + scraping when the first column can be used as a item key. + + Example:: + + data_word = Word(alphas) + label = data_word + FollowedBy(':') + attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) + + # print attributes as plain groups + print(OneOrMore(attr_expr).parseString(text).dump()) + + # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names + result = Dict(OneOrMore(Group(attr_expr))).parseString(text) + print(result.dump()) + + # access named fields as dict entries, or output as dict + print(result['shape']) + print(result.asDict()) + + prints:: + + ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: light blue + - posn: upper left + - shape: SQUARE + - texture: burlap + SQUARE + {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} + + See more examples at :class:`ParseResults` of accessing fields by results name. + """ + def __init__(self, expr): + super(Dict, self).__init__(expr) + self.saveAsList = True + + def postParse(self, instring, loc, tokenlist): + for i, tok in enumerate(tokenlist): + if len(tok) == 0: + continue + ikey = tok[0] + if isinstance(ikey, int): + ikey = _ustr(tok[0]).strip() + if len(tok) == 1: + tokenlist[ikey] = _ParseResultsWithOffset("", i) + elif len(tok) == 2 and not isinstance(tok[1], ParseResults): + tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) + else: + dictvalue = tok.copy() # ParseResults(i) + del dictvalue[0] + if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()): + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) + else: + tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) + + if self.resultsName: + return [tokenlist] + else: + return tokenlist + + +class Suppress(TokenConverter): + """Converter for ignoring the results of a parsed expression. + + Example:: + + source = "a, b, c,d" + wd = Word(alphas) + wd_list1 = wd + ZeroOrMore(',' + wd) + print(wd_list1.parseString(source)) + + # often, delimiters that are useful during parsing are just in the + # way afterward - use Suppress to keep them out of the parsed output + wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) + print(wd_list2.parseString(source)) + + prints:: + + ['a', ',', 'b', ',', 'c', ',', 'd'] + ['a', 'b', 'c', 'd'] + + (See also :class:`delimitedList`.) + """ + def postParse(self, instring, loc, tokenlist): + return [] + + def suppress(self): + return self + + +class OnlyOnce(object): + """Wrapper for parse actions, to ensure they are only called once. + """ + def __init__(self, methodCall): + self.callable = _trim_arity(methodCall) + self.called = False + def __call__(self, s, l, t): + if not self.called: + results = self.callable(s, l, t) + self.called = True + return results + raise ParseException(s, l, "") + def reset(self): + self.called = False + +def traceParseAction(f): + """Decorator for debugging parse actions. + + When the parse action is called, this decorator will print + ``">> entering method-name(line:, , )"``. + When the parse action completes, the decorator will print + ``"<<"`` followed by the returned value, or any exception that the parse action raised. + + Example:: + + wd = Word(alphas) + + @traceParseAction + def remove_duplicate_chars(tokens): + return ''.join(sorted(set(''.join(tokens)))) + + wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) + print(wds.parseString("slkdjs sld sldd sdlf sdljf")) + + prints:: + + >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) + < 3: + thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc + sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t)) + try: + ret = f(*paArgs) + except Exception as exc: + sys.stderr.write("< ['aa', 'bb', 'cc'] + delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] + """ + dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." + if combine: + return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) + else: + return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) + +def countedArray(expr, intExpr=None): + """Helper to define a counted list of expressions. + + This helper defines a pattern of the form:: + + integer expr expr expr... + + where the leading integer tells how many expr expressions follow. + The matched tokens returns the array of expr tokens as a list - the + leading count token is suppressed. + + If ``intExpr`` is specified, it should be a pyparsing expression + that produces an integer value. + + Example:: + + countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] + + # in this parser, the leading integer value is given in binary, + # '10' indicating that 2 values are in the array + binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) + countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] + """ + arrayExpr = Forward() + def countFieldParseAction(s, l, t): + n = t[0] + arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) + return [] + if intExpr is None: + intExpr = Word(nums).setParseAction(lambda t: int(t[0])) + else: + intExpr = intExpr.copy() + intExpr.setName("arrayLen") + intExpr.addParseAction(countFieldParseAction, callDuringTry=True) + return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...') + +def _flatten(L): + ret = [] + for i in L: + if isinstance(i, list): + ret.extend(_flatten(i)) + else: + ret.append(i) + return ret + +def matchPreviousLiteral(expr): + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = matchPreviousLiteral(first) + matchExpr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches a previous literal, will also match the leading + ``"1:1"`` in ``"1:10"``. If this is not desired, use + :class:`matchPreviousExpr`. Do *not* use with packrat parsing + enabled. + """ + rep = Forward() + def copyTokenToRepeater(s, l, t): + if t: + if len(t) == 1: + rep << t[0] + else: + # flatten t tokens + tflat = _flatten(t.asList()) + rep << And(Literal(tt) for tt in tflat) + else: + rep << Empty() + expr.addParseAction(copyTokenToRepeater, callDuringTry=True) + rep.setName('(prev) ' + _ustr(expr)) + return rep + +def matchPreviousExpr(expr): + """Helper to define an expression that is indirectly defined from + the tokens matched in a previous expression, that is, it looks for + a 'repeat' of a previous expression. For example:: + + first = Word(nums) + second = matchPreviousExpr(first) + matchExpr = first + ":" + second + + will match ``"1:1"``, but not ``"1:2"``. Because this + matches by expressions, will *not* match the leading ``"1:1"`` + in ``"1:10"``; the expressions are evaluated first, and then + compared, so ``"1"`` is compared with ``"10"``. Do *not* use + with packrat parsing enabled. + """ + rep = Forward() + e2 = expr.copy() + rep <<= e2 + def copyTokenToRepeater(s, l, t): + matchTokens = _flatten(t.asList()) + def mustMatchTheseTokens(s, l, t): + theseTokens = _flatten(t.asList()) + if theseTokens != matchTokens: + raise ParseException('', 0, '') + rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) + expr.addParseAction(copyTokenToRepeater, callDuringTry=True) + rep.setName('(prev) ' + _ustr(expr)) + return rep + +def _escapeRegexRangeChars(s): + # ~ escape these chars: ^-[] + for c in r"\^-[]": + s = s.replace(c, _bslash + c) + s = s.replace("\n", r"\n") + s = s.replace("\t", r"\t") + return _ustr(s) + +def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): + """Helper to quickly define a set of alternative Literals, and makes + sure to do longest-first testing when there is a conflict, + regardless of the input order, but returns + a :class:`MatchFirst` for best performance. + + Parameters: + + - strs - a string of space-delimited literals, or a collection of + string literals + - caseless - (default= ``False``) - treat all literals as + caseless + - useRegex - (default= ``True``) - as an optimization, will + generate a Regex object; otherwise, will generate + a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if + creating a :class:`Regex` raises an exception) + - asKeyword - (default=``False``) - enforce Keyword-style matching on the + generated expressions + + Example:: + + comp_oper = oneOf("< = > <= >= !=") + var = Word(alphas) + number = Word(nums) + term = var | number + comparison_expr = term + comp_oper + term + print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) + + prints:: + + [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] + """ + if isinstance(caseless, basestring): + warnings.warn("More than one string argument passed to oneOf, pass " + "choices as a list or space-delimited string", stacklevel=2) + + if caseless: + isequal = (lambda a, b: a.upper() == b.upper()) + masks = (lambda a, b: b.upper().startswith(a.upper())) + parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral + else: + isequal = (lambda a, b: a == b) + masks = (lambda a, b: b.startswith(a)) + parseElementClass = Keyword if asKeyword else Literal + + symbols = [] + if isinstance(strs, basestring): + symbols = strs.split() + elif isinstance(strs, Iterable): + symbols = list(strs) + else: + warnings.warn("Invalid argument to oneOf, expected string or iterable", + SyntaxWarning, stacklevel=2) + if not symbols: + return NoMatch() + + if not asKeyword: + # if not producing keywords, need to reorder to take care to avoid masking + # longer choices with shorter ones + i = 0 + while i < len(symbols) - 1: + cur = symbols[i] + for j, other in enumerate(symbols[i + 1:]): + if isequal(other, cur): + del symbols[i + j + 1] + break + elif masks(cur, other): + del symbols[i + j + 1] + symbols.insert(i, other) + break + else: + i += 1 + + if not (caseless or asKeyword) and useRegex: + # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) + try: + if len(symbols) == len("".join(symbols)): + return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) + else: + return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols)) + except Exception: + warnings.warn("Exception creating Regex for oneOf, building MatchFirst", + SyntaxWarning, stacklevel=2) + + # last resort, just use MatchFirst + return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) + +def dictOf(key, value): + """Helper to easily and clearly define a dictionary by specifying + the respective patterns for the key and value. Takes care of + defining the :class:`Dict`, :class:`ZeroOrMore`, and + :class:`Group` tokens in the proper order. The key pattern + can include delimiting markers or punctuation, as long as they are + suppressed, thereby leaving the significant key text. The value + pattern can include named results, so that the :class:`Dict` results + can include named token fields. + + Example:: + + text = "shape: SQUARE posn: upper left color: light blue texture: burlap" + attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) + print(OneOrMore(attr_expr).parseString(text).dump()) + + attr_label = label + attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) + + # similar to Dict, but simpler call format + result = dictOf(attr_label, attr_value).parseString(text) + print(result.dump()) + print(result['shape']) + print(result.shape) # object attribute access works too + print(result.asDict()) + + prints:: + + [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] + - color: light blue + - posn: upper left + - shape: SQUARE + - texture: burlap + SQUARE + SQUARE + {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} + """ + return Dict(OneOrMore(Group(key + value))) + +def originalTextFor(expr, asString=True): + """Helper to return the original, untokenized text for a given + expression. Useful to restore the parsed fields of an HTML start + tag into the raw tag text itself, or to revert separate tokens with + intervening whitespace back to the original matching input text. By + default, returns astring containing the original parsed text. + + If the optional ``asString`` argument is passed as + ``False``, then the return value is + a :class:`ParseResults` containing any results names that + were originally matched, and a single token containing the original + matched text from the input string. So if the expression passed to + :class:`originalTextFor` contains expressions with defined + results names, you must set ``asString`` to ``False`` if you + want to preserve those results name values. + + Example:: + + src = "this is test bold text normal text " + for tag in ("b", "i"): + opener, closer = makeHTMLTags(tag) + patt = originalTextFor(opener + SkipTo(closer) + closer) + print(patt.searchString(src)[0]) + + prints:: + + [' bold text '] + ['text'] + """ + locMarker = Empty().setParseAction(lambda s, loc, t: loc) + endlocMarker = locMarker.copy() + endlocMarker.callPreparse = False + matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") + if asString: + extractText = lambda s, l, t: s[t._original_start: t._original_end] + else: + def extractText(s, l, t): + t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] + matchExpr.setParseAction(extractText) + matchExpr.ignoreExprs = expr.ignoreExprs + return matchExpr + +def ungroup(expr): + """Helper to undo pyparsing's default grouping of And expressions, + even if all but one are non-empty. + """ + return TokenConverter(expr).addParseAction(lambda t: t[0]) + +def locatedExpr(expr): + """Helper to decorate a returned token with its starting and ending + locations in the input string. + + This helper adds the following results names: + + - locn_start = location where matched expression begins + - locn_end = location where matched expression ends + - value = the actual parsed results + + Be careful if the input text contains ```` characters, you + may want to call :class:`ParserElement.parseWithTabs` + + Example:: + + wd = Word(alphas) + for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): + print(match) + + prints:: + + [[0, 'ljsdf', 5]] + [[8, 'lksdjjf', 15]] + [[18, 'lkkjj', 23]] + """ + locator = Empty().setParseAction(lambda s, l, t: l) + return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) + + +# convenience constants for positional expressions +empty = Empty().setName("empty") +lineStart = LineStart().setName("lineStart") +lineEnd = LineEnd().setName("lineEnd") +stringStart = StringStart().setName("stringStart") +stringEnd = StringEnd().setName("stringEnd") + +_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1]) +_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16))) +_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8))) +_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) +_charRange = Group(_singleChar + Suppress("-") + _singleChar) +_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]" + +def srange(s): + r"""Helper to easily define string ranges for use in Word + construction. Borrows syntax from regexp '[]' string range + definitions:: + + srange("[0-9]") -> "0123456789" + srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" + srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" + + The input string must be enclosed in []'s, and the returned string + is the expanded character set joined into a single string. The + values enclosed in the []'s may be: + + - a single character + - an escaped character with a leading backslash (such as ``\-`` + or ``\]``) + - an escaped hex character with a leading ``'\x'`` + (``\x21``, which is a ``'!'`` character) (``\0x##`` + is also supported for backwards compatibility) + - an escaped octal character with a leading ``'\0'`` + (``\041``, which is a ``'!'`` character) + - a range of any of the above, separated by a dash (``'a-z'``, + etc.) + - any combination of the above (``'aeiouy'``, + ``'a-zA-Z0-9_$'``, etc.) + """ + _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) + try: + return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) + except Exception: + return "" + +def matchOnlyAtCol(n): + """Helper method for defining parse actions that require matching at + a specific column in the input text. + """ + def verifyCol(strg, locn, toks): + if col(locn, strg) != n: + raise ParseException(strg, locn, "matched token not at column %d" % n) + return verifyCol + +def replaceWith(replStr): + """Helper method for common parse actions that simply return + a literal value. Especially useful when used with + :class:`transformString` (). + + Example:: + + num = Word(nums).setParseAction(lambda toks: int(toks[0])) + na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) + term = na | num + + OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] + """ + return lambda s, l, t: [replStr] + +def removeQuotes(s, l, t): + """Helper parse action for removing quotation marks from parsed + quoted strings. + + Example:: + + # by default, quotation marks are included in parsed results + quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] + + # use removeQuotes to strip quotation marks from parsed results + quotedString.setParseAction(removeQuotes) + quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] + """ + return t[0][1:-1] + +def tokenMap(func, *args): + """Helper to define a parse action by mapping a function to all + elements of a ParseResults list. If any additional args are passed, + they are forwarded to the given function as additional arguments + after the token, as in + ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``, + which will convert the parsed data to an integer using base 16. + + Example (compare the last to example in :class:`ParserElement.transformString`:: + + hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) + hex_ints.runTests(''' + 00 11 22 aa FF 0a 0d 1a + ''') + + upperword = Word(alphas).setParseAction(tokenMap(str.upper)) + OneOrMore(upperword).runTests(''' + my kingdom for a horse + ''') + + wd = Word(alphas).setParseAction(tokenMap(str.title)) + OneOrMore(wd).setParseAction(' '.join).runTests(''' + now is the winter of our discontent made glorious summer by this sun of york + ''') + + prints:: + + 00 11 22 aa FF 0a 0d 1a + [0, 17, 34, 170, 255, 10, 13, 26] + + my kingdom for a horse + ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] + + now is the winter of our discontent made glorious summer by this sun of york + ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] + """ + def pa(s, l, t): + return [func(tokn, *args) for tokn in t] + + try: + func_name = getattr(func, '__name__', + getattr(func, '__class__').__name__) + except Exception: + func_name = str(func) + pa.__name__ = func_name + + return pa + +upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) +"""(Deprecated) Helper parse action to convert tokens to upper case. +Deprecated in favor of :class:`pyparsing_common.upcaseTokens`""" + +downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) +"""(Deprecated) Helper parse action to convert tokens to lower case. +Deprecated in favor of :class:`pyparsing_common.downcaseTokens`""" + +def _makeTags(tagStr, xml, + suppress_LT=Suppress("<"), + suppress_GT=Suppress(">")): + """Internal helper to construct opening and closing tag expressions, given a tag name""" + if isinstance(tagStr, basestring): + resname = tagStr + tagStr = Keyword(tagStr, caseless=not xml) + else: + resname = tagStr.name + + tagAttrName = Word(alphas, alphanums + "_-:") + if xml: + tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) + openTag = (suppress_LT + + tagStr("tag") + + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') + + suppress_GT) + else: + tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">") + openTag = (suppress_LT + + tagStr("tag") + + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) + + Optional(Suppress("=") + tagAttrValue)))) + + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') + + suppress_GT) + closeTag = Combine(_L("", adjacent=False) + + openTag.setName("<%s>" % resname) + # add start results name in parse action now that ungrouped names are not reported at two levels + openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy())) + closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("" % resname) + openTag.tag = resname + closeTag.tag = resname + openTag.tag_body = SkipTo(closeTag()) + return openTag, closeTag + +def makeHTMLTags(tagStr): + """Helper to construct opening and closing tag expressions for HTML, + given a tag name. Matches tags in either upper or lower case, + attributes with namespaces and with quoted or unquoted values. + + Example:: + + text = 'More info at the pyparsing wiki page' + # makeHTMLTags returns pyparsing expressions for the opening and + # closing tags as a 2-tuple + a, a_end = makeHTMLTags("A") + link_expr = a + SkipTo(a_end)("link_text") + a_end + + for link in link_expr.searchString(text): + # attributes in the tag (like "href" shown here) are + # also accessible as named results + print(link.link_text, '->', link.href) + + prints:: + + pyparsing -> https://github.com/pyparsing/pyparsing/wiki + """ + return _makeTags(tagStr, False) + +def makeXMLTags(tagStr): + """Helper to construct opening and closing tag expressions for XML, + given a tag name. Matches tags only in the given upper/lower case. + + Example: similar to :class:`makeHTMLTags` + """ + return _makeTags(tagStr, True) + +def withAttribute(*args, **attrDict): + """Helper to create a validating parse action to be used with start + tags created with :class:`makeXMLTags` or + :class:`makeHTMLTags`. Use ``withAttribute`` to qualify + a starting tag with a required attribute value, to avoid false + matches on common tags such as ```` or ``
``. + + Call ``withAttribute`` with a series of attribute names and + values. Specify the list of filter attributes names and values as: + + - keyword arguments, as in ``(align="right")``, or + - as an explicit dict with ``**`` operator, when an attribute + name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` + - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` + + For attribute names with a namespace prefix, you must use the second + form. Attribute names are matched insensitive to upper/lower case. + + If just testing for ``class`` (with or without a namespace), use + :class:`withClass`. + + To verify that the attribute exists, but without specifying a value, + pass ``withAttribute.ANY_VALUE`` as the value. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this has no type
+
+ + ''' + div,div_end = makeHTMLTags("div") + + # only match div tag having a type attribute with value "grid" + div_grid = div().setParseAction(withAttribute(type="grid")) + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.searchString(html): + print(grid_header.body) + + # construct a match with any div tag having a type attribute, regardless of the value + div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.searchString(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + if args: + attrs = args[:] + else: + attrs = attrDict.items() + attrs = [(k, v) for k, v in attrs] + def pa(s, l, tokens): + for attrName, attrValue in attrs: + if attrName not in tokens: + raise ParseException(s, l, "no matching attribute " + attrName) + if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: + raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" % + (attrName, tokens[attrName], attrValue)) + return pa +withAttribute.ANY_VALUE = object() + +def withClass(classname, namespace=''): + """Simplified version of :class:`withAttribute` when + matching on a div class - made difficult because ``class`` is + a reserved word in Python. + + Example:: + + html = ''' +
+ Some text +
1 4 0 1 0
+
1,3 2,3 1,1
+
this <div> has no class
+
+ + ''' + div,div_end = makeHTMLTags("div") + div_grid = div().setParseAction(withClass("grid")) + + grid_expr = div_grid + SkipTo(div | div_end)("body") + for grid_header in grid_expr.searchString(html): + print(grid_header.body) + + div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) + div_expr = div_any_type + SkipTo(div | div_end)("body") + for div_header in div_expr.searchString(html): + print(div_header.body) + + prints:: + + 1 4 0 1 0 + + 1 4 0 1 0 + 1,3 2,3 1,1 + """ + classattr = "%s:class" % namespace if namespace else "class" + return withAttribute(**{classattr: classname}) + +opAssoc = SimpleNamespace() +opAssoc.LEFT = object() +opAssoc.RIGHT = object() + +def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')): + """Helper method for constructing grammars of expressions made up of + operators working in a precedence hierarchy. Operators may be unary + or binary, left- or right-associative. Parse actions can also be + attached to operator expressions. The generated parser will also + recognize the use of parentheses to override operator precedences + (see example below). + + Note: if you define a deep operator list, you may see performance + issues when using infixNotation. See + :class:`ParserElement.enablePackrat` for a mechanism to potentially + improve your parser performance. + + Parameters: + - baseExpr - expression representing the most basic element for the + nested + - opList - list of tuples, one for each operator precedence level + in the expression grammar; each tuple is of the form ``(opExpr, + numTerms, rightLeftAssoc, parseAction)``, where: + + - opExpr is the pyparsing expression for the operator; may also + be a string, which will be converted to a Literal; if numTerms + is 3, opExpr is a tuple of two expressions, for the two + operators separating the 3 terms + - numTerms is the number of terms for this operator (must be 1, + 2, or 3) + - rightLeftAssoc is the indicator whether the operator is right + or left associative, using the pyparsing-defined constants + ``opAssoc.RIGHT`` and ``opAssoc.LEFT``. + - parseAction is the parse action to be associated with + expressions matching this operator expression (the parse action + tuple member may be omitted); if the parse action is passed + a tuple or list of functions, this is equivalent to calling + ``setParseAction(*fn)`` + (:class:`ParserElement.setParseAction`) + - lpar - expression for matching left-parentheses + (default= ``Suppress('(')``) + - rpar - expression for matching right-parentheses + (default= ``Suppress(')')``) + + Example:: + + # simple example of four-function arithmetic with ints and + # variable names + integer = pyparsing_common.signed_integer + varname = pyparsing_common.identifier + + arith_expr = infixNotation(integer | varname, + [ + ('-', 1, opAssoc.RIGHT), + (oneOf('* /'), 2, opAssoc.LEFT), + (oneOf('+ -'), 2, opAssoc.LEFT), + ]) + + arith_expr.runTests(''' + 5+3*6 + (5+3)*6 + -2--11 + ''', fullDump=False) + + prints:: + + 5+3*6 + [[5, '+', [3, '*', 6]]] + + (5+3)*6 + [[[5, '+', 3], '*', 6]] + + -2--11 + [[['-', 2], '-', ['-', 11]]] + """ + # captive version of FollowedBy that does not do parse actions or capture results names + class _FB(FollowedBy): + def parseImpl(self, instring, loc, doActions=True): + self.expr.tryParse(instring, loc) + return loc, [] + + ret = Forward() + lastExpr = baseExpr | (lpar + ret + rpar) + for i, operDef in enumerate(opList): + opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4] + termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr + if arity == 3: + if opExpr is None or len(opExpr) != 2: + raise ValueError( + "if numterms=3, opExpr must be a tuple or list of two expressions") + opExpr1, opExpr2 = opExpr + thisExpr = Forward().setName(termName) + if rightLeftAssoc == opAssoc.LEFT: + if arity == 1: + matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) + else: + matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) + elif arity == 3: + matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + elif rightLeftAssoc == opAssoc.RIGHT: + if arity == 1: + # try to avoid LR with this extra test + if not isinstance(opExpr, Optional): + opExpr = Optional(opExpr) + matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) + elif arity == 2: + if opExpr is not None: + matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) + else: + matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) + elif arity == 3: + matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)) + else: + raise ValueError("operator must be unary (1), binary (2), or ternary (3)") + else: + raise ValueError("operator must indicate right or left associativity") + if pa: + if isinstance(pa, (tuple, list)): + matchExpr.setParseAction(*pa) + else: + matchExpr.setParseAction(pa) + thisExpr <<= (matchExpr.setName(termName) | lastExpr) + lastExpr = thisExpr + ret <<= lastExpr + return ret + +operatorPrecedence = infixNotation +"""(Deprecated) Former name of :class:`infixNotation`, will be +dropped in a future release.""" + +dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes") +sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes") +quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' + | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes") +unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") + +def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): + """Helper method for defining nested lists enclosed in opening and + closing delimiters ("(" and ")" are the default). + + Parameters: + - opener - opening character for a nested list + (default= ``"("``); can also be a pyparsing expression + - closer - closing character for a nested list + (default= ``")"``); can also be a pyparsing expression + - content - expression for items within the nested lists + (default= ``None``) + - ignoreExpr - expression for ignoring opening and closing + delimiters (default= :class:`quotedString`) + + If an expression is not provided for the content argument, the + nested expression will capture all whitespace-delimited content + between delimiters as a list of separate values. + + Use the ``ignoreExpr`` argument to define expressions that may + contain opening or closing characters that should not be treated as + opening or closing characters for nesting, such as quotedString or + a comment expression. Specify multiple expressions using an + :class:`Or` or :class:`MatchFirst`. The default is + :class:`quotedString`, but if no expressions are to be ignored, then + pass ``None`` for this argument. + + Example:: + + data_type = oneOf("void int short long char float double") + decl_data_type = Combine(data_type + Optional(Word('*'))) + ident = Word(alphas+'_', alphanums+'_') + number = pyparsing_common.number + arg = Group(decl_data_type + ident) + LPAR, RPAR = map(Suppress, "()") + + code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) + + c_function = (decl_data_type("type") + + ident("name") + + LPAR + Optional(delimitedList(arg), [])("args") + RPAR + + code_body("body")) + c_function.ignore(cStyleComment) + + source_code = ''' + int is_odd(int x) { + return (x%2); + } + + int dec_to_hex(char hchar) { + if (hchar >= '0' && hchar <= '9') { + return (ord(hchar)-ord('0')); + } else { + return (10+ord(hchar)-ord('A')); + } + } + ''' + for func in c_function.searchString(source_code): + print("%(name)s (%(type)s) args: %(args)s" % func) + + + prints:: + + is_odd (int) args: [['int', 'x']] + dec_to_hex (int) args: [['char', 'hchar']] + """ + if opener == closer: + raise ValueError("opening and closing strings cannot be the same") + if content is None: + if isinstance(opener, basestring) and isinstance(closer, basestring): + if len(opener) == 1 and len(closer) == 1: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + CharsNotIn(opener + + closer + + ParserElement.DEFAULT_WHITE_CHARS, exact=1) + ) + ).setParseAction(lambda t: t[0].strip())) + else: + content = (empty.copy() + CharsNotIn(opener + + closer + + ParserElement.DEFAULT_WHITE_CHARS + ).setParseAction(lambda t: t[0].strip())) + else: + if ignoreExpr is not None: + content = (Combine(OneOrMore(~ignoreExpr + + ~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) + ).setParseAction(lambda t: t[0].strip())) + else: + content = (Combine(OneOrMore(~Literal(opener) + + ~Literal(closer) + + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) + ).setParseAction(lambda t: t[0].strip())) + else: + raise ValueError("opening and closing arguments must be strings if no content expression is given") + ret = Forward() + if ignoreExpr is not None: + ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) + else: + ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) + ret.setName('nested %s%s expression' % (opener, closer)) + return ret + +def indentedBlock(blockStatementExpr, indentStack, indent=True): + """Helper method for defining space-delimited indentation blocks, + such as those used to define block statements in Python source code. + + Parameters: + + - blockStatementExpr - expression defining syntax of statement that + is repeated within the indented block + - indentStack - list created by caller to manage indentation stack + (multiple statementWithIndentedBlock expressions within a single + grammar should share a common indentStack) + - indent - boolean indicating whether block must be indented beyond + the current level; set to False for block of left-most + statements (default= ``True``) + + A valid block must contain at least one ``blockStatement``. + + Example:: + + data = ''' + def A(z): + A1 + B = 100 + G = A2 + A2 + A3 + B + def BB(a,b,c): + BB1 + def BBA(): + bba1 + bba2 + bba3 + C + D + def spam(x,y): + def eggs(z): + pass + ''' + + + indentStack = [1] + stmt = Forward() + + identifier = Word(alphas, alphanums) + funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":") + func_body = indentedBlock(stmt, indentStack) + funcDef = Group(funcDecl + func_body) + + rvalue = Forward() + funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") + rvalue << (funcCall | identifier | Word(nums)) + assignment = Group(identifier + "=" + rvalue) + stmt << (funcDef | assignment | identifier) + + module_body = OneOrMore(stmt) + + parseTree = module_body.parseString(data) + parseTree.pprint() + + prints:: + + [['def', + 'A', + ['(', 'z', ')'], + ':', + [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], + 'B', + ['def', + 'BB', + ['(', 'a', 'b', 'c', ')'], + ':', + [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], + 'C', + 'D', + ['def', + 'spam', + ['(', 'x', 'y', ')'], + ':', + [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] + """ + backup_stack = indentStack[:] + + def reset_stack(): + indentStack[:] = backup_stack + + def checkPeerIndent(s, l, t): + if l >= len(s): return + curCol = col(l, s) + if curCol != indentStack[-1]: + if curCol > indentStack[-1]: + raise ParseException(s, l, "illegal nesting") + raise ParseException(s, l, "not a peer entry") + + def checkSubIndent(s, l, t): + curCol = col(l, s) + if curCol > indentStack[-1]: + indentStack.append(curCol) + else: + raise ParseException(s, l, "not a subentry") + + def checkUnindent(s, l, t): + if l >= len(s): return + curCol = col(l, s) + if not(indentStack and curCol in indentStack): + raise ParseException(s, l, "not an unindent") + if curCol < indentStack[-1]: + indentStack.pop() + + NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd()) + INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') + PEER = Empty().setParseAction(checkPeerIndent).setName('') + UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') + if indent: + smExpr = Group(Optional(NL) + + INDENT + + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) + + UNDENT) + else: + smExpr = Group(Optional(NL) + + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) + + UNDENT) + smExpr.setFailAction(lambda a, b, c, d: reset_stack()) + blockStatementExpr.ignore(_bslash + LineEnd()) + return smExpr.setName('indented block') + +alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") +punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") + +anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag')) +_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\'')) +commonHTMLEntity = Regex('&(?P' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") +def replaceHTMLEntity(t): + """Helper parser action to replace common HTML entities with their special characters""" + return _htmlEntityMap.get(t.entity) + +# it's easy to get these comment structures wrong - they're very common, so may as well make them available +cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") +"Comment of the form ``/* ... */``" + +htmlComment = Regex(r"").setName("HTML comment") +"Comment of the form ````" + +restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") +dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") +"Comment of the form ``// ... (to end of line)``" + +cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment") +"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" + +javaStyleComment = cppStyleComment +"Same as :class:`cppStyleComment`" + +pythonStyleComment = Regex(r"#.*").setName("Python style comment") +"Comment of the form ``# ... (to end of line)``" + +_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + + Optional(Word(" \t") + + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") +commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList") +"""(Deprecated) Predefined expression of 1 or more printable words or +quoted strings, separated by commas. + +This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`. +""" + +# some other useful expressions - using lower-case class name since we are really using this as a namespace +class pyparsing_common: + """Here are some common low-level expressions that may be useful in + jump-starting parser development: + + - numeric forms (:class:`integers`, :class:`reals`, + :class:`scientific notation`) + - common :class:`programming identifiers` + - network addresses (:class:`MAC`, + :class:`IPv4`, :class:`IPv6`) + - ISO8601 :class:`dates` and + :class:`datetime` + - :class:`UUID` + - :class:`comma-separated list` + + Parse actions: + + - :class:`convertToInteger` + - :class:`convertToFloat` + - :class:`convertToDate` + - :class:`convertToDatetime` + - :class:`stripHTMLTags` + - :class:`upcaseTokens` + - :class:`downcaseTokens` + + Example:: + + pyparsing_common.number.runTests(''' + # any int or real number, returned as the appropriate type + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.fnumber.runTests(''' + # any int or real number, returned as float + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + ''') + + pyparsing_common.hex_integer.runTests(''' + # hex numbers + 100 + FF + ''') + + pyparsing_common.fraction.runTests(''' + # fractions + 1/2 + -3/4 + ''') + + pyparsing_common.mixed_integer.runTests(''' + # mixed fractions + 1 + 1/2 + -3/4 + 1-3/4 + ''') + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(''' + # uuid + 12345678-1234-5678-1234-567812345678 + ''') + + prints:: + + # any int or real number, returned as the appropriate type + 100 + [100] + + -100 + [-100] + + +100 + [100] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # any int or real number, returned as float + 100 + [100.0] + + -100 + [-100.0] + + +100 + [100.0] + + 3.14159 + [3.14159] + + 6.02e23 + [6.02e+23] + + 1e-12 + [1e-12] + + # hex numbers + 100 + [256] + + FF + [255] + + # fractions + 1/2 + [0.5] + + -3/4 + [-0.75] + + # mixed fractions + 1 + [1] + + 1/2 + [0.5] + + -3/4 + [-0.75] + + 1-3/4 + [1.75] + + # uuid + 12345678-1234-5678-1234-567812345678 + [UUID('12345678-1234-5678-1234-567812345678')] + """ + + convertToInteger = tokenMap(int) + """ + Parse action for converting parsed integers to Python int + """ + + convertToFloat = tokenMap(float) + """ + Parse action for converting parsed numbers to Python float + """ + + integer = Word(nums).setName("integer").setParseAction(convertToInteger) + """expression that parses an unsigned integer, returns an int""" + + hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) + """expression that parses a hexadecimal integer, returns an int""" + + signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) + """expression that parses an integer with optional leading sign, returns an int""" + + fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") + """fractional expression of an integer divided by an integer, returns a float""" + fraction.addParseAction(lambda t: t[0]/t[-1]) + + mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") + """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" + mixed_integer.addParseAction(sum) + + real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat) + """expression that parses a floating point number and returns a float""" + + sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) + """expression that parses a floating point number with optional + scientific notation and returns a float""" + + # streamlining this expression makes the docs nicer-looking + number = (sci_real | real | signed_integer).streamline() + """any numeric expression, returns the corresponding Python type""" + + fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) + """any int or real number, returned as float""" + + identifier = Word(alphas + '_', alphanums + '_').setName("identifier") + """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" + + ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") + "IPv4 address (``0.0.0.0 - 255.255.255.255``)" + + _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") + _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address") + _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) + + "::" + + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) + ).setName("short IPv6 address") + _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) + _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") + ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") + "IPv6 address (long, short, or mixed form)" + + mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") + "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" + + @staticmethod + def convertToDate(fmt="%Y-%m-%d"): + """ + Helper to create a parse action for converting parsed date string to Python datetime.date + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) + + Example:: + + date_expr = pyparsing_common.iso8601_date.copy() + date_expr.setParseAction(pyparsing_common.convertToDate()) + print(date_expr.parseString("1999-12-31")) + + prints:: + + [datetime.date(1999, 12, 31)] + """ + def cvt_fn(s, l, t): + try: + return datetime.strptime(t[0], fmt).date() + except ValueError as ve: + raise ParseException(s, l, str(ve)) + return cvt_fn + + @staticmethod + def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): + """Helper to create a parse action for converting parsed + datetime string to Python datetime.datetime + + Params - + - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) + + Example:: + + dt_expr = pyparsing_common.iso8601_datetime.copy() + dt_expr.setParseAction(pyparsing_common.convertToDatetime()) + print(dt_expr.parseString("1999-12-31T23:59:59.999")) + + prints:: + + [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] + """ + def cvt_fn(s, l, t): + try: + return datetime.strptime(t[0], fmt) + except ValueError as ve: + raise ParseException(s, l, str(ve)) + return cvt_fn + + iso8601_date = Regex(r'(?P\d{4})(?:-(?P\d\d)(?:-(?P\d\d))?)?').setName("ISO8601 date") + "ISO8601 date (``yyyy-mm-dd``)" + + iso8601_datetime = Regex(r'(?P\d{4})-(?P\d\d)-(?P\d\d)[T ](?P\d\d):(?P\d\d)(:(?P\d\d(\.\d*)?)?)?(?PZ|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") + "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" + + uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") + "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" + + _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() + @staticmethod + def stripHTMLTags(s, l, tokens): + """Parse action to remove HTML tags from web page HTML source + + Example:: + + # strip HTML links from normal text + text = 'More info at the
pyparsing wiki page' + td, td_end = makeHTMLTags("TD") + table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end + print(table_text.parseString(text).body) + + Prints:: + + More info at the pyparsing wiki page + """ + return pyparsing_common._html_stripper.transformString(tokens[0]) + + _commasepitem = Combine(OneOrMore(~Literal(",") + + ~LineEnd() + + Word(printables, excludeChars=',') + + Optional(White(" \t")))).streamline().setName("commaItem") + comma_separated_list = delimitedList(Optional(quotedString.copy() + | _commasepitem, default='') + ).setName("comma separated list") + """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" + + upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) + """Parse action to convert tokens to upper case.""" + + downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) + """Parse action to convert tokens to lower case.""" + + +class _lazyclassproperty(object): + def __init__(self, fn): + self.fn = fn + self.__doc__ = fn.__doc__ + self.__name__ = fn.__name__ + + def __get__(self, obj, cls): + if cls is None: + cls = type(obj) + if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) + for superclass in cls.__mro__[1:]): + cls._intern = {} + attrname = self.fn.__name__ + if attrname not in cls._intern: + cls._intern[attrname] = self.fn(cls) + return cls._intern[attrname] + + +class unicode_set(object): + """ + A set of Unicode characters, for language-specific strings for + ``alphas``, ``nums``, ``alphanums``, and ``printables``. + A unicode_set is defined by a list of ranges in the Unicode character + set, in a class attribute ``_ranges``, such as:: + + _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] + + A unicode set can also be defined using multiple inheritance of other unicode sets:: + + class CJK(Chinese, Japanese, Korean): + pass + """ + _ranges = [] + + @classmethod + def _get_chars_for_ranges(cls): + ret = [] + for cc in cls.__mro__: + if cc is unicode_set: + break + for rr in cc._ranges: + ret.extend(range(rr[0], rr[-1] + 1)) + return [unichr(c) for c in sorted(set(ret))] + + @_lazyclassproperty + def printables(cls): + "all non-whitespace characters in this range" + return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges())) + + @_lazyclassproperty + def alphas(cls): + "all alphabetic characters in this range" + return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges())) + + @_lazyclassproperty + def nums(cls): + "all numeric digit characters in this range" + return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges())) + + @_lazyclassproperty + def alphanums(cls): + "all alphanumeric characters in this range" + return cls.alphas + cls.nums + + +class pyparsing_unicode(unicode_set): + """ + A namespace class for defining common language unicode_sets. + """ + _ranges = [(32, sys.maxunicode)] + + class Latin1(unicode_set): + "Unicode set for Latin-1 Unicode Character Range" + _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] + + class LatinA(unicode_set): + "Unicode set for Latin-A Unicode Character Range" + _ranges = [(0x0100, 0x017f),] + + class LatinB(unicode_set): + "Unicode set for Latin-B Unicode Character Range" + _ranges = [(0x0180, 0x024f),] + + class Greek(unicode_set): + "Unicode set for Greek Unicode Character Ranges" + _ranges = [ + (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), + (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), + (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), + ] + + class Cyrillic(unicode_set): + "Unicode set for Cyrillic Unicode Character Range" + _ranges = [(0x0400, 0x04ff)] + + class Chinese(unicode_set): + "Unicode set for Chinese Unicode Character Range" + _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),] + + class Japanese(unicode_set): + "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" + _ranges = [] + + class Kanji(unicode_set): + "Unicode set for Kanji Unicode Character Range" + _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),] + + class Hiragana(unicode_set): + "Unicode set for Hiragana Unicode Character Range" + _ranges = [(0x3040, 0x309f),] + + class Katakana(unicode_set): + "Unicode set for Katakana Unicode Character Range" + _ranges = [(0x30a0, 0x30ff),] + + class Korean(unicode_set): + "Unicode set for Korean Unicode Character Range" + _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),] + + class CJK(Chinese, Japanese, Korean): + "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" + pass + + class Thai(unicode_set): + "Unicode set for Thai Unicode Character Range" + _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),] + + class Arabic(unicode_set): + "Unicode set for Arabic Unicode Character Range" + _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),] + + class Hebrew(unicode_set): + "Unicode set for Hebrew Unicode Character Range" + _ranges = [(0x0590, 0x05ff),] + + class Devanagari(unicode_set): + "Unicode set for Devanagari Unicode Character Range" + _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)] + +pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges + + pyparsing_unicode.Japanese.Hiragana._ranges + + pyparsing_unicode.Japanese.Katakana._ranges) + +# define ranges in language character sets +if PY_3: + setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic) + setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese) + setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic) + setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek) + setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew) + setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese) + setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji) + setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana) + setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana) + setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean) + setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai) + setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari) + + +class pyparsing_test: + """ + namespace class for classes useful in writing unit tests + """ + + class reset_pyparsing_context: + """ + Context manager to be used when writing unit tests that modify pyparsing config values: + - packrat parsing + - default whitespace characters. + - default keyword characters + - literal string auto-conversion class + - __diag__ settings + + Example: + with reset_pyparsing_context(): + # test that literals used to construct a grammar are automatically suppressed + ParserElement.inlineLiteralsUsing(Suppress) + + term = Word(alphas) | Word(nums) + group = Group('(' + term[...] + ')') + + # assert that the '()' characters are not included in the parsed tokens + self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def']) + + # after exiting context manager, literals are converted to Literal expressions again + """ + + def __init__(self): + self._save_context = {} + + def save(self): + self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS + self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS + self._save_context[ + "literal_string_class" + ] = ParserElement._literalStringClass + self._save_context["packrat_enabled"] = ParserElement._packratEnabled + self._save_context["packrat_parse"] = ParserElement._parse + self._save_context["__diag__"] = { + name: getattr(__diag__, name) for name in __diag__._all_names + } + self._save_context["__compat__"] = { + "collect_all_And_tokens": __compat__.collect_all_And_tokens + } + return self + + def restore(self): + # reset pyparsing global state + if ( + ParserElement.DEFAULT_WHITE_CHARS + != self._save_context["default_whitespace"] + ): + ParserElement.setDefaultWhitespaceChars( + self._save_context["default_whitespace"] + ) + Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] + ParserElement.inlineLiteralsUsing( + self._save_context["literal_string_class"] + ) + for name, value in self._save_context["__diag__"].items(): + setattr(__diag__, name, value) + ParserElement._packratEnabled = self._save_context["packrat_enabled"] + ParserElement._parse = self._save_context["packrat_parse"] + __compat__.collect_all_And_tokens = self._save_context["__compat__"] + + def __enter__(self): + return self.save() + + def __exit__(self, *args): + return self.restore() + + class TestParseResultsAsserts: + """ + A mixin class to add parse results assertion methods to normal unittest.TestCase classes. + """ + def assertParseResultsEquals( + self, result, expected_list=None, expected_dict=None, msg=None + ): + """ + Unit test assertion to compare a ParseResults object with an optional expected_list, + and compare any defined results names with an optional expected_dict. + """ + if expected_list is not None: + self.assertEqual(expected_list, result.asList(), msg=msg) + if expected_dict is not None: + self.assertEqual(expected_dict, result.asDict(), msg=msg) + + def assertParseAndCheckList( + self, expr, test_string, expected_list, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ParseResults.asList() is equal to the expected_list. + """ + result = expr.parseString(test_string, parseAll=True) + if verbose: + print(result.dump()) + self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) + + def assertParseAndCheckDict( + self, expr, test_string, expected_dict, msg=None, verbose=True + ): + """ + Convenience wrapper assert to test a parser element and input string, and assert that + the resulting ParseResults.asDict() is equal to the expected_dict. + """ + result = expr.parseString(test_string, parseAll=True) + if verbose: + print(result.dump()) + self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) + + def assertRunTestResults( + self, run_tests_report, expected_parse_results=None, msg=None + ): + """ + Unit test assertion to evaluate output of ParserElement.runTests(). If a list of + list-dict tuples is given as the expected_parse_results argument, then these are zipped + with the report tuples returned by runTests and evaluated using assertParseResultsEquals. + Finally, asserts that the overall runTests() success value is True. + + :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests + :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] + """ + run_test_success, run_test_results = run_tests_report + + if expected_parse_results is not None: + merged = [ + (rpt[0], rpt[1], expected) + for rpt, expected in zip(run_test_results, expected_parse_results) + ] + for test_string, result, expected in merged: + # expected should be a tuple containing a list and/or a dict or an exception, + # and optional failure message string + # an empty tuple will skip any result validation + fail_msg = next( + (exp for exp in expected if isinstance(exp, str)), None + ) + expected_exception = next( + ( + exp + for exp in expected + if isinstance(exp, type) and issubclass(exp, Exception) + ), + None, + ) + if expected_exception is not None: + with self.assertRaises( + expected_exception=expected_exception, msg=fail_msg or msg + ): + if isinstance(result, Exception): + raise result + else: + expected_list = next( + (exp for exp in expected if isinstance(exp, list)), None + ) + expected_dict = next( + (exp for exp in expected if isinstance(exp, dict)), None + ) + if (expected_list, expected_dict) != (None, None): + self.assertParseResultsEquals( + result, + expected_list=expected_list, + expected_dict=expected_dict, + msg=fail_msg or msg, + ) + else: + # warning here maybe? + print("no validation for {!r}".format(test_string)) + + # do this last, in case some specific test results can be reported instead + self.assertTrue( + run_test_success, msg=msg if msg is not None else "failed runTests" + ) + + @contextmanager + def assertRaisesParseException(self, exc_type=ParseException, msg=None): + with self.assertRaises(exc_type, msg=msg): + yield + + +if __name__ == "__main__": + + selectToken = CaselessLiteral("select") + fromToken = CaselessLiteral("from") + + ident = Word(alphas, alphanums + "_$") + + columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + columnNameList = Group(delimitedList(columnName)).setName("columns") + columnSpec = ('*' | columnNameList) + + tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) + tableNameList = Group(delimitedList(tableName)).setName("tables") + + simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") + + # demo runTests method, including embedded comments in test string + simpleSQL.runTests(""" + # '*' as column list and dotted table name + select * from SYS.XYZZY + + # caseless match on "SELECT", and casts back to "select" + SELECT * from XYZZY, ABC + + # list of column names, and mixed case SELECT keyword + Select AA,BB,CC from Sys.dual + + # multiple tables + Select A, B, C from Sys.dual, Table2 + + # invalid SELECT keyword - should fail + Xelect A, B, C from Sys.dual + + # incomplete command - should fail + Select + + # invalid column name - should fail + Select ^^^ frox Sys.dual + + """) + + pyparsing_common.number.runTests(""" + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + """) + + # any int or real number, returned as float + pyparsing_common.fnumber.runTests(""" + 100 + -100 + +100 + 3.14159 + 6.02e23 + 1e-12 + """) + + pyparsing_common.hex_integer.runTests(""" + 100 + FF + """) + + import uuid + pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) + pyparsing_common.uuid.runTests(""" + 12345678-1234-5678-1234-567812345678 + """) diff --git a/pysrc/requirements/requirement.py b/pysrc/requirements/requirement.py index 6ba1c314..77868b42 100644 --- a/pysrc/requirements/requirement.py +++ b/pysrc/requirements/requirement.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals import re -from pkg_resources import Requirement as Req +from packaging.requirements import Requirement as Req from .fragment import get_hash_info, parse_fragment, parse_extras_require from .vcs import VCS, VCS_SCHEMES @@ -33,6 +33,15 @@ EDITABLE_REGEX = re.compile(r'^(-e|--editable=?)\s*') +# The following was copied from pkg_resources in order to eliminate the usage of the package +# https://github.com/pypa/pkg_resources/blob/6f81a44010d1266494025647dd1e1f0befa5b26b/pkg_resources/__init__.py#L1435 +def safe_extra(extra): + """Convert an arbitrary string to a standard 'extra' name + Any runs of non-alphanumeric characters are replaced with a single '_', + and the result is always lowercased. + """ + return re.sub('[^A-Za-z0-9.-]+', '_', extra).lower() + class Requirement(object): """ Represents a single requirement @@ -215,10 +224,17 @@ def parse_line(cls, line): # and https://pip.pypa.io/en/stable/reference/pip_install/#hash-checking-mode line = PER_REQ_PARAM_REGEX.sub('', line) - pkg_req = Req.parse(line) - req.name = pkg_req.unsafe_name - req.extras = list(pkg_req.extras) - req.specs = pkg_req.specs + # The following was copied from pkg_resources in order to eliminate the usage of the package + # https://github.com/pypa/pkg_resources/blob/6f81a44010d1266494025647dd1e1f0befa5b26b/pkg_resources/__init__.py#L3032 + if ' #' in line: + line = line[:line.find(' #')] + + # The following was copied from pkg_resources in order to eliminate the usage of the package + # https://github.com/pypa/pkg_resources/blob/6f81a44010d1266494025647dd1e1f0befa5b26b/pkg_resources/__init__.py#L3051-L3055 + pkg_req = Req(line) + req.name = pkg_req.name + req.extras = list(tuple(map(safe_extra, req.extras))) + req.specs = [(spec.operator, spec.version) for spec in pkg_req.specifier] return req @classmethod