From 1dde09395102fcec65c6d7e925dbe103e9341e2c Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Sat, 2 Jul 2022 16:18:07 +1200 Subject: [PATCH] Refactor and document the optimize build process - Rebuilt the optimize_build helper to allow the assumption of a working version of ply to check against pre-generated modules, providing an option to skip the mandatory regeneration step in the other (i.e. the not `--build`) workflow. - Document the changes, make it clearer as to what are the requirements for using the setup.py. --- README.rst | 71 +++++++++++--- src/calmjs/parse/parsers/optimize.py | 91 +++++++++++++---- .../parse/tests/test_parsers_optimize.py | 97 +++++++++++++++++-- src/calmjs/parse/utils.py | 12 ++- 4 files changed, 233 insertions(+), 38 deletions(-) diff --git a/README.rst b/README.rst index 075884c..f4577f7 100644 --- a/README.rst +++ b/README.rst @@ -78,9 +78,8 @@ As this package uses |ply|, it requires the generation of optimization modules for its lexer. The wheel distribution of |calmjs.parse| does not require this extra step as it contains these pre-generated modules for |ply| up to version 3.11 (the latest version available at the time -of previous release), however the source tarball or if |ply| version -that is installed lies outside of the supported versions, the following -caveats will apply. +of previous release), however the version of |ply| that is installed is +beyond the supported version, the following caveats will apply. If a more recent release of |ply| becomes available and the environment upgrades to that version, those pre-generated modules may become @@ -89,11 +88,18 @@ A corrective action can be achieved through a `manual optimization`_ step if a newer version of |calmjs.parse| is not available, or |ply| may be downgraded back to version 3.11 if possible. +Alternatively, install a more recent version of |calmjs.parse| wheel +that has the most complete set of pre-generated modules built. + Once the package is installed, the installation may be `tested`_ or be `used directly`_. -Alternative installation methods (for developers, advanced users) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Manual installation and packaging requirements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*This section is for developers and advanced users; contains important +information for package maintainers for OS distributions (e.g. Linux) +that will prevent less than ideal experiences for downstream users.* Development is still ongoing with |calmjs.parse|, for the latest features and bug fixes, the development version may be installed through @@ -101,14 +107,43 @@ git like so: .. code:: console - $ pip install git+https://github.com/calmjs/calmjs.parse.git#egg=calmjs.parse + $ pip install ply # this MUST be done first; see below for reason + $ pip install -e git+https://github.com/calmjs/calmjs.parse.git#egg=calmjs.parse -Alternatively, the git repository can be cloned directly and execute -``python setup.py develop`` while inside the root of the source -directory. +Note that |ply| MUST be pre-installed for the ``setup.py build`` step to +run, otherwise the build step required to create the pre-generated +modules will result in the following failure condition, even when trying +to package this library: -A manual optimization step may need to be performed for platforms and -systems that do not have utf8 as their default encoding. +.. code:: console + + $ python setup.py sdist --format=zip + running sdist + ... + WARNING: cannot find distribution for 'ply'; using default value, + assuming 'ply==3.11' for pre-generated modules + ERROR: cannot find pre-generated modules for the assumed 'ply' + version from above and/or cannot `import ply` to build generated + modules, aborting build; please either ensure that the source + archive containing the pre-generate modules is being used, or that + the python package 'ply' is installed and available for import + before attempting to use the setup.py to build this package; please + refer to the top level README for further details + +Naturally, the git repository can be cloned directly and execute +``python setup.py develop`` while inside the root of the source +directory; again, |ply| MUST already be available. + +As the git repository does NOT contain any pre-generated modules or +code, the above message is likely to be seen by developers or distro +maintainers who are on their first try at interacting with this +software. However, the zip archives released on PyPI starting from +version 1.3.0 do contain these modules fully pre-generated, thus they +may be used as part of a standard installation step, i.e. without +requiring |ply| be available for import before usage of the ``setup.py`` +for any purpose. While the same warning message about |ply| being +missing may be shown, the pre-generated modules will allow the build +step to proceed as normal. Manual optimization ~~~~~~~~~~~~~~~~~~~ @@ -714,6 +749,7 @@ Object assignments from a given script file: Further details and example usage can be consulted from the various docstrings found within the module. + Limitations ----------- @@ -735,6 +771,7 @@ comments. Likewise, any comments before the ``:`` token in a ternary statement will also be discarded as that is the second token consumed by the production rule that produces a ``Conditional`` node. + Troubleshooting --------------- @@ -781,6 +818,18 @@ this will may require both the token and layout functions not having arguments with name collisions, and the new function will take in all of those arguments in one go. +ERROR message about `import ply` when trying to run setup.py +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As noted in the full message, the |ply|_) package must be installed +before attempting to build the package through ``setup.py`` in the +situation where the pre-generated modules are missing. This situation +may be caused by building directly using the source provided by the +source code repository, or where there is no matching pre-generated +module matching with the installed version of |ply|. Please ensure +that |ply| is installed and available first before installing from +source if this error message is sighted. + Contribute ---------- diff --git a/src/calmjs/parse/parsers/optimize.py b/src/calmjs/parse/parsers/optimize.py index d92f7d1..ec2f569 100644 --- a/src/calmjs/parse/parsers/optimize.py +++ b/src/calmjs/parse/parsers/optimize.py @@ -8,18 +8,36 @@ """ import codecs +import os import sys from functools import partial from os import unlink from os.path import exists from importlib import import_module +from calmjs.parse.utils import generate_tab_names +from calmjs.parse.utils import ply_dist + +_ASSUME_PLY_VERSION = '3.11' +_ASSUME_ENVVAR = 'CALMJS_PARSE_ASSUME_PLY_VERSION' + + +def validate_imports(*imports): + paths = [] + missing = [] + for name in imports: + try: + import_module(name) + except ImportError: + missing.append(name) + else: + paths.append(sys.modules.pop(name).__file__) + return paths, missing def find_tab_paths(module): # return a list of lextab/yacctab module paths and a list of missing # import names. - paths = [] - missing = [] + names = [] for entry in ('lextab', 'yacctab'): # we assume the specified entries are defined as such name = getattr(module, entry) @@ -30,13 +48,8 @@ def find_tab_paths(module): 'provided module `%s` does not export expected tab values ' % module.__name__ ) - try: - import_module(name) - except ImportError: - missing.append(name) - else: - paths.append(sys.modules.pop(name).__file__) - return paths, missing + names.append(name) + return validate_imports(*names) def purge_tabs(module): @@ -71,11 +84,42 @@ def reoptimize(module): module.Parser() -def optimize_build(module): - paths, missing = find_tab_paths(module) +def _assume_ply_version(): + version = os.environ.get(_ASSUME_ENVVAR, _ASSUME_PLY_VERSION) + if ply_dist is None: + if _ASSUME_ENVVAR in os.environ: + source = "using environment variable %r" % _ASSUME_ENVVAR + else: + source = "using default value" + sys.stderr.write( + u"WARNING: cannot find distribution for 'ply'; " + "%s, assuming 'ply==%s' for pre-generated modules\n" % ( + source, version)) + return version + + +def optimize_build(module_name, assume_ply_version=True): + """ + optimize build helper for first build + + assume_ply_version + This flag denotes whether or not to assume a ply version should + ply be NOT installed; this will either assume ply to be whatever + value assigned to _ASSUME_PLY_VERSION (i.e. 3.11), or read from + the environment variable `CALMJS_PARSE_ASSUME_PLY_VERSION`. + + Default: True + """ + + kws = {} + if assume_ply_version: + kws['_version'] = _assume_ply_version() + + paths, missing = validate_imports(*generate_tab_names(module_name, **kws)) if missing: - # only purge and regenerate if any are missing. + # only import, purge and regenerate if any are missing. unlink_modules(verify_paths(paths)) + module = import_module(module_name) module.Parser() @@ -109,19 +153,30 @@ def reoptimize_all(monkey_patch=False, first_build=False): modules = ('.es5',) try: for name in modules: - module = import_module(name, 'calmjs.parse.parsers') if first_build: - optimize_build(module) + # A consideration for modifying this flag to simply + # check for a marker file to denote none of this being + # needed (i.e. this tarball was fully prepared), but it + # will not solve the issue where the distro packager + # already got an even more recent version of ply + # installed (as unlikely as that is) and that build step + # then is completely skipped. + optimize_build('calmjs.parse.parsers' + name) else: + module = import_module(name, 'calmjs.parse.parsers') reoptimize(module) except ImportError as e: if not first_build or 'ply' not in str(e): raise sys.stderr.write( - u"ERROR: cannot import ply, aborting build; please ensure " - "that the python package 'ply' is installed before attempting to " - "build this package; please refer to the top level README for " - "further details\n" + u"ERROR: cannot find pre-generated modules for the assumed 'ply' " + "version from above and/or cannot `import ply` to build generated " + "modules, aborting build; please either ensure that the source " + "archive containing the pre-generate modules is being used, or " + "that the python package 'ply' is installed and available for " + "import before attempting to use the setup.py to build this " + "package; please refer to the top level README for further " + "details\n" ) sys.exit(1) diff --git a/src/calmjs/parse/tests/test_parsers_optimize.py b/src/calmjs/parse/tests/test_parsers_optimize.py index ca1b621..4cad624 100644 --- a/src/calmjs/parse/tests/test_parsers_optimize.py +++ b/src/calmjs/parse/tests/test_parsers_optimize.py @@ -12,6 +12,7 @@ from ply import lex from calmjs.parse.parsers import optimize from calmjs.parse.parsers import es5 +from calmjs.parse.utils import ply_dist class OptimizeTestCase(unittest.TestCase): @@ -23,6 +24,7 @@ def setUp(self): def tearDown(self): optimize.unlink = os.unlink optimize.import_module = importlib.import_module + optimize.ply_dist = ply_dist # undo whatever monkey patch that may have happened lex.open = open @@ -122,12 +124,16 @@ def test_optimize_build(self): def sentinel(): called.append(True) - fake_es5 = ModuleType('fake_es5') - fake_es5.lextab = 'some_lextab' - fake_es5.yacctab = 'some_yacctab' + fake_es5 = ModuleType('fake_namespace.fake_es5') fake_es5.Parser = sentinel - optimize.optimize_build(fake_es5) + # inject fake namespace and module + sys.modules['fake_namespace'] = ModuleType('fake_namespace') + self.addCleanup(sys.modules.pop, 'fake_namespace') + sys.modules['fake_namespace.fake_es5'] = fake_es5 + self.addCleanup(sys.modules.pop, 'fake_namespace.fake_es5') + + optimize.optimize_build('fake_namespace.fake_es5') self.assertEqual(len(self.purged), 0) self.assertTrue(called) @@ -142,8 +148,55 @@ def test_optimize_first_build_valid_with_broken_ply(self): # shouldn't have purged any modules self.assertEqual(len(self.purged), 0) + def test_assume_ply_version(self): + # only applicable if no ply_dist found + optimize.ply_dist = None + stderr = sys.stderr + self.addCleanup(setattr, sys, 'stderr', stderr) + + sys.stderr = StringIO() + optimize._assume_ply_version() + self.assertTrue(sys.stderr.getvalue().startswith( + "WARNING: cannot find distribution for 'ply'; using default " + "value, assuming 'ply==3.11' for pre-generated modules")) + + self.addCleanup(os.environ.pop, optimize._ASSUME_ENVVAR, None) + sys.stderr = StringIO() + os.environ[optimize._ASSUME_ENVVAR] = '0.9999' # should never exist + optimize._assume_ply_version() + self.assertTrue(sys.stderr.getvalue().startswith( + "WARNING: cannot find distribution for 'ply'; using environment " + "variable 'CALMJS_PARSE_ASSUME_PLY_VERSION', " + "assuming 'ply==0.9999' for pre-generated modules")) + def test_optimize_first_build_valid_with_broken_ply_error(self): - def fail_import(module, package): + def fail_import(*a, **kw): + raise ImportError('no module named ply') + + optimize.import_module = fail_import + + with self.assertRaises(ImportError): + optimize.reoptimize_all() + + stderr = sys.stderr + + def cleanup(): + sys.stderr = stderr + + self.addCleanup(cleanup) + + sys.stderr = StringIO() + with self.assertRaises(SystemExit): + optimize.reoptimize_all(first_build=True) + + self.assertTrue(sys.stderr.getvalue().startswith( + "ERROR: cannot find pre-generated modules for the assumed 'ply' " + "version")) + + def test_optimize_first_build_assume_broken_ply_error(self): + optimize.ply_dist = None + + def fail_import(*a, **kw): raise ImportError('no module named ply') optimize.import_module = fail_import @@ -162,5 +215,37 @@ def cleanup(): with self.assertRaises(SystemExit): optimize.reoptimize_all(first_build=True) + lines = sys.stderr.getvalue().splitlines() + self.assertTrue(lines[0].startswith( + "WARNING: cannot find distribution for 'ply'; using default value" + )) + self.assertTrue(lines[1].startswith( + "ERROR: cannot find pre-generated modules for the assumed 'ply' " + "version")) + + def test_optimize_build_assume_broken_ply(self): + optimize.ply_dist = None + called = [] + + def sentinel(): + called.append(True) + + fake_es5 = ModuleType('fake_namespace.fake_es5') + fake_es5.Parser = sentinel + + # inject fake namespace and module + sys.modules['fake_namespace'] = ModuleType('fake_namespace') + self.addCleanup(sys.modules.pop, 'fake_namespace') + sys.modules['fake_namespace.fake_es5'] = fake_es5 + self.addCleanup(sys.modules.pop, 'fake_namespace.fake_es5') + stderr = sys.stderr + self.addCleanup(setattr, sys, 'stderr', stderr) + sys.stderr = StringIO() + + optimize.optimize_build('fake_namespace.fake_es5') + self.assertEqual(len(self.purged), 0) + self.assertTrue(called) self.assertTrue(sys.stderr.getvalue().startswith( - 'ERROR: cannot import ply')) + "WARNING: cannot find distribution for 'ply'; using default value" + )) + self.assertNotIn('ERROR', sys.stderr.getvalue()) diff --git a/src/calmjs/parse/utils.py b/src/calmjs/parse/utils.py index 3dde4de..a76f7c8 100644 --- a/src/calmjs/parse/utils.py +++ b/src/calmjs/parse/utils.py @@ -13,6 +13,12 @@ from pkg_resources import working_set from pkg_resources import Requirement ply_dist = working_set.find(Requirement.parse('ply')) + # note that for **extremely** ancient versions of setuptools, e.g. + # setuptools<0.6c11, will require the following workaround... + # if ply_dist is None: + # from pkg_resources import Distribution + # import ply + # ply_dist = Distribution(project_name='ply', version=ply.__version__) except ImportError: # pragma: no cover ply_dist = None @@ -34,7 +40,7 @@ def repr_compat(s): return repr(s) -def generate_tab_names(name): +def generate_tab_names(name, _version='unknown'): """ Return the names to lextab and yacctab modules for the given module name. Typical usage should be like so:: @@ -44,8 +50,8 @@ def generate_tab_names(name): package_name, module_name = name.rsplit('.', 1) - version = ply_dist.version.replace( - '.', '_') if ply_dist is not None else 'unknown' + version = (ply_dist.version if ply_dist is not None else _version).replace( + '.', '_') data = (package_name, module_name, py_major, version) lextab = '%s.lextab_%s_py%d_ply%s' % data yacctab = '%s.yacctab_%s_py%d_ply%s' % data